--- scripts/reblog2couchdb.pl 2008/08/08 19:19:12 10 +++ scripts/reblog2couchdb.pl 2008/08/28 21:53:49 15 @@ -7,6 +7,8 @@ $|++; +my $debug = @ARGV ? 1 : 0; + my $database = 'reblog'; my $dbi = "DBI:mysql:database=$database"; @@ -20,16 +22,29 @@ feeds.title as feed_title, feeds.link as feed_link, feeds.description as feed_description --- t.value_long as tags from items join items_userdata on items.id = item_id join feeds on items.feed_id = feeds.id --- left outer join items_userdata as t on items.id = t.item_id and t.label='tags' where items.id > ? and items_userdata.label = 'published' and items_userdata.value_numeric = 1 order by items.id asc limit 1000 }; +my $sql_tags = qq{ +select + items_userdata.item_id, + t.value_long as tags, + items_userdata.timestamp +from items_userdata +join items_userdata as t + on items_userdata.item_id = t.item_id and t.label='tags' +where + items_userdata.item_id > ? and + items_userdata.label = 'published' and + items_userdata.value_numeric = 1 +order by items_userdata.item_id asc +}; + my $c = CouchDB::Client->new(uri => 'http://localhost:5984/'); $c->testConnection or die "The server cannot be reached"; @@ -44,6 +59,7 @@ print "status ",dump( $status->{data} ), "\n"; my $last_row = $status->{data}->{last_row_id} || 0; +$last_row = 0 if $debug; sub commit_last_row { warn "commit_last_row $last_row\n"; @@ -51,32 +67,46 @@ $status->update; } -my $dbh = DBI->connect($dbi,"","") || die $DBI::errstr; +my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 }); print "Fetching items from $dbi id > $last_row\n"; -my $sth = $dbh->prepare($sql) || die $dbh->errstr(); -$sth->execute( $last_row ) || die $sth->errstr(); +my $sth = $dbh->prepare($sql); +$sth->execute( $last_row ); warn dump( $sth->{NAME} ); print "found ",$sth->rows," items to process...\n"; -my $pk = 'id'; +my $sth_tags = $dbh->prepare($sql_tags); +$sth_tags->execute( $last_row ); +print "found ",$sth_tags->rows, " tags found...\n"; my $count = 0; +my $row_tags = $sth_tags->fetchrow_hashref(); + while (my $row = $sth->fetchrow_hashref() ) { my $_id = $row->{_id} || die "row needs _id"; my $doc = $db->newDoc( $_id ); + while ( $row_tags && $row_tags->{item_id} < $row->{_id} ) { + $row_tags = $sth_tags->fetchrow_hashref(); + warn "## got tags: ",dump( $row_tags ) if $debug; + } + sub row2doc { my ( $row, $doc ) = @_; my $a = delete( $row->{xml} ); $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a; - my $a = delete( $row->{content} ); + $a = delete( $row->{content} ); $doc->addAttachment( 'content.html', 'text/html', $a ) if $a; + if ( $row_tags && $row_tags->{item_id} == $row->{_id} ) { + $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ]; + warn "++ ",$row->{item_id}, dump( $row->{tags} ); + } $doc->{data} = $row; + warn "## ",dump( $row ) if $debug; return $doc; }