/[couchdb]/scripts/reblog2couchdb.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /scripts/reblog2couchdb.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 36 - (hide annotations)
Tue Apr 28 21:34:58 2009 UTC (15 years ago) by dpavlin
File MIME type: text/plain
File size: 3369 byte(s)
cleanup sql, use post url as key after mungling
1 dpavlin 2 #!/usr/bin/perl -w
2    
3     use strict;
4     use DBI;
5     use CouchDB::Client;
6     use Data::Dump qw/dump/;
7    
8     $|++;
9    
10 dpavlin 15 my $debug = @ARGV ? 1 : 0;
11    
12 dpavlin 2 my $database = 'reblog';
13    
14 dpavlin 6 my $dbi = "DBI:mysql:database=$database";
15     $dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh
16 dpavlin 2
17 dpavlin 28 my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 });
18    
19     $dbh->do(qq{
20     create temporary table published_items as
21     select
22     item_id
23     from
24     items_userdata
25     where
26     label = 'published' and
27     value_numeric = 1
28     });
29    
30 dpavlin 2 my $sql = qq{
31     select
32 dpavlin 36 i.id as item_id,
33     -- i.guid as _id,
34     i.link as _id,
35     i.*,
36     f.url as feed_url,
37     f.title as feed_title,
38     f.link as feed_link,
39     f.description as feed_description
40     from items i
41     join published_items p on i.id = p.item_id
42     join feeds f on i.feed_id = f.id
43     where i.id > ?
44     order by i.id asc
45 dpavlin 10 limit 1000
46 dpavlin 2 };
47    
48 dpavlin 15 my $sql_tags = qq{
49     select
50     items_userdata.item_id,
51 dpavlin 28 value_long as tags,
52     timestamp
53 dpavlin 15 from items_userdata
54 dpavlin 28 join published_items p
55     on items_userdata.item_id = p.item_id and label='tags'
56 dpavlin 15 where
57 dpavlin 28 items_userdata.item_id > ?
58 dpavlin 15 order by items_userdata.item_id asc
59     };
60    
61 dpavlin 26 my $c = CouchDB::Client->new(uri => 'http://192.168.1.13:5984/');
62 dpavlin 2
63     $c->testConnection or die "The server cannot be reached";
64 dpavlin 6 print "CouchDB version " . $c->serverInfo->{version} . "\n";
65 dpavlin 2 my $db = $c->newDB( $database );
66     $db->create unless $c->dbExists( $database );
67    
68 dpavlin 27 my $status = $db->newDoc( 'last_sync' );
69 dpavlin 6 eval { $status->retrieve };
70 dpavlin 27 $status->create if $@;
71 dpavlin 2
72 dpavlin 6 print "status ",dump( $status->{data} ), "\n";
73    
74     my $last_row = $status->{data}->{last_row_id} || 0;
75 dpavlin 15 $last_row = 0 if $debug;
76 dpavlin 6
77 dpavlin 7 sub commit_last_row {
78     warn "commit_last_row $last_row\n";
79     $status->{data}->{last_row_id} = $last_row;
80     $status->update;
81     }
82    
83 dpavlin 6 print "Fetching items from $dbi id > $last_row\n";
84    
85 dpavlin 15 my $sth = $dbh->prepare($sql);
86     $sth->execute( $last_row );
87 dpavlin 6
88     warn dump( $sth->{NAME} );
89    
90 dpavlin 10 print "found ",$sth->rows," items to process...\n";
91 dpavlin 6
92 dpavlin 15 my $sth_tags = $dbh->prepare($sql_tags);
93     $sth_tags->execute( $last_row );
94     print "found ",$sth_tags->rows, " tags found...\n";
95 dpavlin 2
96 dpavlin 6 my $count = 0;
97    
98 dpavlin 15 my $row_tags = $sth_tags->fetchrow_hashref();
99    
100 dpavlin 2 while (my $row = $sth->fetchrow_hashref() ) {
101 dpavlin 36 my $_id = $row->{_id} || "c$count";
102     $_id =~ s{\W+}{_}g;
103     $_id =~ s{_+$}{};
104 dpavlin 2 my $doc = $db->newDoc( $_id );
105 dpavlin 4
106 dpavlin 36 while ( $row_tags && $row_tags->{item_id} < $row->{item_id} ) {
107 dpavlin 15 $row_tags = $sth_tags->fetchrow_hashref();
108     warn "## got tags: ",dump( $row_tags ) if $debug;
109     }
110    
111 dpavlin 4 sub row2doc {
112     my ( $row, $doc ) = @_;
113     my $a = delete( $row->{xml} );
114     $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;
115 dpavlin 15 $a = delete( $row->{content} );
116 dpavlin 4 $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;
117 dpavlin 36 if ( $row_tags && $row_tags->{item_id} == $row->{item_id} ) {
118 dpavlin 15 $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ];
119     warn "++ ",$row->{item_id}, dump( $row->{tags} );
120     }
121 dpavlin 4 $doc->{data} = $row;
122 dpavlin 15 warn "## ",dump( $row ) if $debug;
123 dpavlin 4 return $doc;
124     }
125    
126     row2doc( $row, $doc );
127    
128 dpavlin 2 eval { $doc->create };
129     if ( $@ ) {
130 dpavlin 4 $doc->retrieve;
131     row2doc( $row, $doc )->update;
132     # eval { $doc->update };
133 dpavlin 10 warn $@ ? "$count ERROR $_id $@\n" : "$count updated $_id\n";
134 dpavlin 2 } else {
135 dpavlin 10 warn "$count created $_id\n";
136 dpavlin 2 }
137 dpavlin 6
138 dpavlin 7 $last_row = $row->{id};
139 dpavlin 6 $count++;
140 dpavlin 7
141     commit_last_row if $count % 100 == 0 # checkpoint every 100 records
142 dpavlin 2 }
143    
144 dpavlin 7 commit_last_row;
145 dpavlin 6
146 dpavlin 2 __END__
147    
148     $sql = qq{
149     update items_userdata
150     set value_numeric = 1
151     where label = 'read' and item_id in ($ids)
152     };
153    
154     $dbh->do( $sql );
155    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26