/[couchdb]/scripts/reblog2couchdb.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /scripts/reblog2couchdb.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4 by dpavlin, Tue Aug 5 14:48:57 2008 UTC revision 15 by dpavlin, Thu Aug 28 21:53:49 2008 UTC
# Line 7  use Data::Dump qw/dump/; Line 7  use Data::Dump qw/dump/;
7    
8  $|++;  $|++;
9    
10  my $database = 'reblog';  my $debug = @ARGV ? 1 : 0;
11    
12  my $connect = "DBI:mysql:database=$database";  my $database = 'reblog';
 #$connect = "DBI:mysql:database=$database;host=localhost;port=13306";  
13    
14  my $dbh = DBI->connect($connect,"","") || die $DBI::errstr;  my $dbi = "DBI:mysql:database=$database";
15    $dbi .= ";host=127.0.0.1;port=13306";   # XXX over ssh
16    
 # select all posts which have been read or unread  
17  my $sql = qq{  my $sql = qq{
18          select          select
19                  items.id as _id,                  items.id as _id,
# Line 25  my $sql = qq{ Line 24  my $sql = qq{
24                  feeds.description as feed_description                  feeds.description as feed_description
25          from items          from items
26          join items_userdata on items.id = item_id          join items_userdata on items.id = item_id
 --      where label = 'read' and value_numeric = ?  
27          join feeds on items.feed_id = feeds.id          join feeds on items.feed_id = feeds.id
28            where items.id > ? and items_userdata.label = 'published' and items_userdata.value_numeric = 1
29          order by items.id asc          order by items.id asc
30  --      limit 42          limit 1000
31  };  };
32    
33  my $sth = $dbh->prepare($sql) || die $dbh->errstr();  my $sql_tags = qq{
34  $sth->execute( 1 ) || die $sth->errstr();  select
35            items_userdata.item_id,
36  warn dump( $sth->{NAME} );          t.value_long as tags,
37            items_userdata.timestamp
38  print "found ",$sth->rows," items to process...";  from items_userdata
39    join items_userdata as t
40            on items_userdata.item_id = t.item_id and t.label='tags'
41    where
42            items_userdata.item_id > ? and
43            items_userdata.label = 'published' and
44            items_userdata.value_numeric = 1
45    order by items_userdata.item_id asc
46    };
47    
48  my $c = CouchDB::Client->new(uri => 'http://localhost:5984/');  my $c = CouchDB::Client->new(uri => 'http://localhost:5984/');
49    
50  $c->testConnection or die "The server cannot be reached";  $c->testConnection or die "The server cannot be reached";
51  print "Running version " . $c->serverInfo->{version} . "\n";  print "CouchDB version " . $c->serverInfo->{version} . "\n";
52  my $db = $c->newDB( $database );  my $db = $c->newDB( $database );
53  $db->create unless $c->dbExists( $database );  $db->create unless $c->dbExists( $database );
54    
55  my @docs = $db->listDocs;  my $status = $db->newDoc( '_sync' );
56  my $row_id = shift @docs || 0;  eval { $status->retrieve };
57    $status->create if $@;
58    
59    print "status ",dump( $status->{data} ), "\n";
60    
61    my $last_row = $status->{data}->{last_row_id} || 0;
62    $last_row = 0 if $debug;
63    
64    sub commit_last_row {
65            warn "commit_last_row $last_row\n";
66            $status->{data}->{last_row_id} = $last_row;
67            $status->update;
68    }
69    
70    my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 });
71    
72    print "Fetching items from $dbi id > $last_row\n";
73    
74    my $sth = $dbh->prepare($sql);
75    $sth->execute( $last_row );
76    
77  my $pk = 'id';  warn dump( $sth->{NAME} );
78    
79    print "found ",$sth->rows," items to process...\n";
80    
81    my $sth_tags = $dbh->prepare($sql_tags);
82    $sth_tags->execute( $last_row );
83    print "found ",$sth_tags->rows, " tags found...\n";
84    
85    my $count = 0;
86    
87    my $row_tags = $sth_tags->fetchrow_hashref();
88    
89  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
90          my $_id = $row->{_id} || die "row needs _id";          my $_id = $row->{_id} || die "row needs _id";
91          my $doc = $db->newDoc( $_id );          my $doc = $db->newDoc( $_id );
92    
93            while ( $row_tags && $row_tags->{item_id} < $row->{_id} ) {
94                    $row_tags = $sth_tags->fetchrow_hashref();
95                    warn "## got tags: ",dump( $row_tags ) if $debug;
96            }
97    
98          sub row2doc {          sub row2doc {
99                  my ( $row, $doc ) = @_;                  my ( $row, $doc ) = @_;
100                  my $a = delete( $row->{xml} );                  my $a = delete( $row->{xml} );
101                  $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;                  $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;
102                  my $a = delete( $row->{content} );                  $a = delete( $row->{content} );
103                  $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;                  $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;
104                    if ( $row_tags && $row_tags->{item_id} == $row->{_id} ) {
105                            $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ];
106                            warn "++ ",$row->{item_id}, dump( $row->{tags} );
107                    }
108                  $doc->{data} = $row;                  $doc->{data} = $row;
109                    warn "## ",dump( $row ) if $debug;
110                  return $doc;                  return $doc;
111          }          }
112    
# Line 71  while (my $row = $sth->fetchrow_hashref( Line 117  while (my $row = $sth->fetchrow_hashref(
117                  $doc->retrieve;                  $doc->retrieve;
118                  row2doc( $row, $doc )->update;                  row2doc( $row, $doc )->update;
119  #               eval { $doc->update };  #               eval { $doc->update };
120                  warn ( $@ ? "ERROR $_id $@" : "updated $_id" ), $/;                  warn $@ ? "$count ERROR $_id $@\n" : "$count updated $_id\n";
121          } else {          } else {
122                  warn "created ",dump( $row ),$/;                  warn "$count created $_id\n";
123          }          }
124    
125            $last_row = $row->{id};
126            $count++;
127    
128            commit_last_row if $count % 100 == 0    # checkpoint every 100 records
129  }  }
130    
131    commit_last_row;
132    
133  __END__  __END__
134    
135  $sql = qq{  $sql = qq{

Legend:
Removed from v.4  
changed lines
  Added in v.15

  ViewVC Help
Powered by ViewVC 1.1.26