/[couchdb]/scripts/reblog2couchdb.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /scripts/reblog2couchdb.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2 by dpavlin, Tue Aug 5 13:28:54 2008 UTC revision 36 by dpavlin, Tue Apr 28 21:34:58 2009 UTC
# Line 7  use Data::Dump qw/dump/; Line 7  use Data::Dump qw/dump/;
7    
8  $|++;  $|++;
9    
10    my $debug = @ARGV ? 1 : 0;
11    
12  my $database = 'reblog';  my $database = 'reblog';
13    
14  my $connect = "DBI:mysql:database=$database";  my $dbi = "DBI:mysql:database=$database";
15  #$connect = "DBI:mysql:database=$database;host=localhost;port=13306";  $dbi .= ";host=127.0.0.1;port=13306";   # XXX over ssh
16    
17    my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 });
18    
19  my $dbh = DBI->connect($connect,"","") || die $DBI::errstr;  $dbh->do(qq{
20            create temporary table published_items as
21            select
22                    item_id
23            from
24                    items_userdata
25            where
26                    label = 'published' and
27                    value_numeric = 1
28    });
29    
 # select all posts which have been read or unread  
30  my $sql = qq{  my $sql = qq{
31          select          select
32                  *                  i.id as item_id,
33          from items  --              i.guid as _id,
34  --      join items_userdata on id=item_id                  i.link as _id,
35  --      where label = 'read' and value_numeric = ?                  i.*,
36  --      limit 3                  f.url as feed_url,
37          order by id asc                  f.title as feed_title,
38                    f.link as feed_link,
39                    f.description as feed_description
40            from items i
41            join published_items p on i.id = p.item_id
42            join feeds f on i.feed_id = f.id
43            where i.id > ?
44            order by i.id asc
45            limit 1000
46  };  };
47    
48  my $sth = $dbh->prepare($sql) || die $dbh->errstr();  my $sql_tags = qq{
49  $sth->execute( 1 ) || die $sth->errstr();  select
50            items_userdata.item_id,
51  print "found ",$sth->rows," items to process...";          value_long as tags,
52            timestamp
53    from items_userdata
54    join published_items p
55            on items_userdata.item_id = p.item_id and label='tags'
56    where
57            items_userdata.item_id > ?
58    order by items_userdata.item_id asc
59    };
60    
61  my $c = CouchDB::Client->new(uri => 'http://localhost:5984/');  my $c = CouchDB::Client->new(uri => 'http://192.168.1.13:5984/');
62    
63  $c->testConnection or die "The server cannot be reached";  $c->testConnection or die "The server cannot be reached";
64  print "Running version " . $c->serverInfo->{version} . "\n";  print "CouchDB version " . $c->serverInfo->{version} . "\n";
65  my $db = $c->newDB( $database );  my $db = $c->newDB( $database );
66  $db->create unless $c->dbExists( $database );  $db->create unless $c->dbExists( $database );
67    
68  my @docs = $db->listDocs;  my $status = $db->newDoc( 'last_sync' );
69  my $row_id = shift @docs || 0;  eval { $status->retrieve };
70    $status->create if $@;
71    
72    print "status ",dump( $status->{data} ), "\n";
73    
74    my $last_row = $status->{data}->{last_row_id} || 0;
75    $last_row = 0 if $debug;
76    
77    sub commit_last_row {
78            warn "commit_last_row $last_row\n";
79            $status->{data}->{last_row_id} = $last_row;
80            $status->update;
81    }
82    
83    print "Fetching items from $dbi id > $last_row\n";
84    
85    my $sth = $dbh->prepare($sql);
86    $sth->execute( $last_row );
87    
88    warn dump( $sth->{NAME} );
89    
90    print "found ",$sth->rows," items to process...\n";
91    
92    my $sth_tags = $dbh->prepare($sql_tags);
93    $sth_tags->execute( $last_row );
94    print "found ",$sth_tags->rows, " tags found...\n";
95    
96  my $pk = 'id';  my $count = 0;
97    
98    my $row_tags = $sth_tags->fetchrow_hashref();
99    
100  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
101          my $_id = $row->{$pk};          my $_id = $row->{_id} || "c$count";
102            $_id =~ s{\W+}{_}g;
103            $_id =~ s{_+$}{};
104          my $doc = $db->newDoc( $_id );          my $doc = $db->newDoc( $_id );
105          $doc->{data} = $row;  
106            while ( $row_tags && $row_tags->{item_id} < $row->{item_id} ) {
107                    $row_tags = $sth_tags->fetchrow_hashref();
108                    warn "## got tags: ",dump( $row_tags ) if $debug;
109            }
110    
111            sub row2doc {
112                    my ( $row, $doc ) = @_;
113                    my $a = delete( $row->{xml} );
114                    $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;
115                    $a = delete( $row->{content} );
116                    $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;
117                    if ( $row_tags && $row_tags->{item_id} == $row->{item_id} ) {
118                            $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ];
119                            warn "++ ",$row->{item_id}, dump( $row->{tags} );
120                    }
121                    $doc->{data} = $row;
122                    warn "## ",dump( $row ) if $debug;
123                    return $doc;
124            }
125    
126            row2doc( $row, $doc );
127    
128          eval { $doc->create };          eval { $doc->create };
129          if ( $@ ) {          if ( $@ ) {
130                  warn "can't create $_id $@";                  $doc->retrieve;
131                  eval { $doc->update };                  row2doc( $row, $doc )->update;
132                  warn $@ ? "can't update $_id $@" : "updated $_id";  #               eval { $doc->update };
133                    warn $@ ? "$count ERROR $_id $@\n" : "$count updated $_id\n";
134          } else {          } else {
135                  warn "created ",dump( $row );                  warn "$count created $_id\n";
136          }          }
137    
138            $last_row = $row->{id};
139            $count++;
140    
141            commit_last_row if $count % 100 == 0    # checkpoint every 100 records
142  }  }
143    
144    commit_last_row;
145    
146  __END__  __END__
147    
148  $sql = qq{  $sql = qq{

Legend:
Removed from v.2  
changed lines
  Added in v.36

  ViewVC Help
Powered by ViewVC 1.1.26