/[couchdb]/scripts/reblog2couchdb.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /scripts/reblog2couchdb.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (hide annotations)
Thu Aug 28 21:53:49 2008 UTC (15 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 3339 byte(s)
rewrite tag import to go tag-by-tag
1 dpavlin 2 #!/usr/bin/perl -w
2    
3     use strict;
4     use DBI;
5     use CouchDB::Client;
6     use Data::Dump qw/dump/;
7    
8     $|++;
9    
10 dpavlin 15 my $debug = @ARGV ? 1 : 0;
11    
12 dpavlin 2 my $database = 'reblog';
13    
14 dpavlin 6 my $dbi = "DBI:mysql:database=$database";
15     $dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh
16 dpavlin 2
17     my $sql = qq{
18     select
19 dpavlin 4 items.id as _id,
20     items.*,
21     feeds.url as feed_url,
22     feeds.title as feed_title,
23     feeds.link as feed_link,
24     feeds.description as feed_description
25 dpavlin 2 from items
26 dpavlin 4 join items_userdata on items.id = item_id
27     join feeds on items.feed_id = feeds.id
28 dpavlin 10 where items.id > ? and items_userdata.label = 'published' and items_userdata.value_numeric = 1
29 dpavlin 4 order by items.id asc
30 dpavlin 10 limit 1000
31 dpavlin 2 };
32    
33 dpavlin 15 my $sql_tags = qq{
34     select
35     items_userdata.item_id,
36     t.value_long as tags,
37     items_userdata.timestamp
38     from items_userdata
39     join items_userdata as t
40     on items_userdata.item_id = t.item_id and t.label='tags'
41     where
42     items_userdata.item_id > ? and
43     items_userdata.label = 'published' and
44     items_userdata.value_numeric = 1
45     order by items_userdata.item_id asc
46     };
47    
48 dpavlin 2 my $c = CouchDB::Client->new(uri => 'http://localhost:5984/');
49    
50     $c->testConnection or die "The server cannot be reached";
51 dpavlin 6 print "CouchDB version " . $c->serverInfo->{version} . "\n";
52 dpavlin 2 my $db = $c->newDB( $database );
53     $db->create unless $c->dbExists( $database );
54    
55 dpavlin 6 my $status = $db->newDoc( '_sync' );
56     eval { $status->retrieve };
57     $status->create if $@;
58 dpavlin 2
59 dpavlin 6 print "status ",dump( $status->{data} ), "\n";
60    
61     my $last_row = $status->{data}->{last_row_id} || 0;
62 dpavlin 15 $last_row = 0 if $debug;
63 dpavlin 6
64 dpavlin 7 sub commit_last_row {
65     warn "commit_last_row $last_row\n";
66     $status->{data}->{last_row_id} = $last_row;
67     $status->update;
68     }
69    
70 dpavlin 15 my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 });
71 dpavlin 6
72     print "Fetching items from $dbi id > $last_row\n";
73    
74 dpavlin 15 my $sth = $dbh->prepare($sql);
75     $sth->execute( $last_row );
76 dpavlin 6
77     warn dump( $sth->{NAME} );
78    
79 dpavlin 10 print "found ",$sth->rows," items to process...\n";
80 dpavlin 6
81 dpavlin 15 my $sth_tags = $dbh->prepare($sql_tags);
82     $sth_tags->execute( $last_row );
83     print "found ",$sth_tags->rows, " tags found...\n";
84 dpavlin 2
85 dpavlin 6 my $count = 0;
86    
87 dpavlin 15 my $row_tags = $sth_tags->fetchrow_hashref();
88    
89 dpavlin 2 while (my $row = $sth->fetchrow_hashref() ) {
90 dpavlin 4 my $_id = $row->{_id} || die "row needs _id";
91 dpavlin 2 my $doc = $db->newDoc( $_id );
92 dpavlin 4
93 dpavlin 15 while ( $row_tags && $row_tags->{item_id} < $row->{_id} ) {
94     $row_tags = $sth_tags->fetchrow_hashref();
95     warn "## got tags: ",dump( $row_tags ) if $debug;
96     }
97    
98 dpavlin 4 sub row2doc {
99     my ( $row, $doc ) = @_;
100     my $a = delete( $row->{xml} );
101     $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;
102 dpavlin 15 $a = delete( $row->{content} );
103 dpavlin 4 $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;
104 dpavlin 15 if ( $row_tags && $row_tags->{item_id} == $row->{_id} ) {
105     $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ];
106     warn "++ ",$row->{item_id}, dump( $row->{tags} );
107     }
108 dpavlin 4 $doc->{data} = $row;
109 dpavlin 15 warn "## ",dump( $row ) if $debug;
110 dpavlin 4 return $doc;
111     }
112    
113     row2doc( $row, $doc );
114    
115 dpavlin 2 eval { $doc->create };
116     if ( $@ ) {
117 dpavlin 4 $doc->retrieve;
118     row2doc( $row, $doc )->update;
119     # eval { $doc->update };
120 dpavlin 10 warn $@ ? "$count ERROR $_id $@\n" : "$count updated $_id\n";
121 dpavlin 2 } else {
122 dpavlin 10 warn "$count created $_id\n";
123 dpavlin 2 }
124 dpavlin 6
125 dpavlin 7 $last_row = $row->{id};
126 dpavlin 6 $count++;
127 dpavlin 7
128     commit_last_row if $count % 100 == 0 # checkpoint every 100 records
129 dpavlin 2 }
130    
131 dpavlin 7 commit_last_row;
132 dpavlin 6
133 dpavlin 2 __END__
134    
135     $sql = qq{
136     update items_userdata
137     set value_numeric = 1
138     where label = 'read' and item_id in ($ids)
139     };
140    
141     $dbh->do( $sql );
142    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26