/[Search-Estraier]/trunk/scripts/estcp-mt.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/scripts/estcp-mt.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (show annotations)
Sat Jan 21 17:37:07 2006 UTC (18 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 3300 byte(s)
fixed node URL extraction, put -1 marker on queue at end so that threads will finish
after all documents are processed
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier;
5 use URI::Escape qw/uri_escape/;
6 use Time::HiRes;
7 use POSIX qw/strftime/;
8 use Config;
9 use threads;
10 use Thread::Queue;
11
12 =head1 NAME
13
14 estcp.pl - copy Hyper Estraier index from one node to another
15
16 =cut
17
18 die "Your perl isn't compiled with support for ithreads\n" unless ($Config{useithreads});
19
20
21 my ($from,$to) = @ARGV;
22
23 die "usage: $0 http://localhost:1978/node/from http://remote.example.com:1978/node/to\n" unless ($from && $to);
24
25 my $debug = 0;
26 my $max = 256;
27
28 # create and configure node
29 my $from_n = new Search::Estraier::Node(
30 url => $from,
31 croak_on_error => 1,
32 debug => $debug,
33 );
34 my $to_n = new Search::Estraier::Node(
35 url => $to,
36 croak_on_error => 1,
37 debug => $debug,
38 );
39
40 unless(eval{ $to_n->name }) {
41 if ($to =~ m#^(http://.+)/node/([^/]+)$#) {
42 my ($url,$name) = ($1,$2);
43 print "Creating '$name' on $url\n";
44 $to_n->shuttle_url( $url . '/master?action=nodeadd',
45 'application/x-www-form-urlencoded',
46 'name=' . uri_escape($name) . '&label=' . uri_escape( $name ),
47 undef,
48 );
49 } else {
50 die "can't extract node name from $to\n";
51 }
52 }
53
54 # total processed elements
55 my $i : shared = 1;
56
57 my $q_id = Thread::Queue->new;
58 my $q_drafts = Thread::Queue->new;
59
60 my $get_thr = threads->new( sub {
61 while (my $id = $q_id->dequeue) {
62 #warn "get ", $id || 'undef',"\n";
63 if ($id < 0) {
64 $q_drafts->enqueue( '' ); # abort put thread
65 last;
66 };
67 print STDERR "get_thr, id: $id\n" if ($debug);
68 my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
69 $q_drafts->enqueue( $doc_draft );
70 }
71 } );
72
73 my $t = time();
74 my $t_refresh = time();
75 my $doc_num = $from_n->doc_num || 1;
76
77 my $put_thr = threads->new( sub {
78 while (my $doc_draft = $q_drafts->dequeue) {
79 last unless ($doc_draft);
80 print STDERR "put_thr, $doc_draft\n" if ($debug);
81 $to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
82
83 $i++;
84 if (time() - $t_refresh > 3) {
85 my $rate = ( $i / ((time() - $t) || 1) );
86 printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n",
87 $i,
88 ($i * 100 / $doc_num),
89 $rate,
90 strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))),
91 );
92 $t_refresh = time();
93 }
94
95 }
96 } );
97
98 print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n";
99
100 my $prev;
101 my $more = 1;
102
103 while($more) {
104 my $res;
105 $from_n->shuttle_url( $from_n->{url} . '/list',
106 'application/x-www-form-urlencoded',
107 'max=' . $max . ( $prev ? '&prev=' . uri_escape( $prev ) : '' ),
108 \$res,
109 );
110 if (! $res || $res eq '') {
111 $more = 0;
112 last;
113 }
114 foreach my $l (split(/\n/,$res)) {
115 (my $id, $prev) = split(/\t/,$l, 2);
116
117 #$to_n->put_doc( $from_n->get_doc( $id ));
118
119 #my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
120 #$to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
121
122 $q_id->enqueue( $id );
123 }
124 warn "$prev\n" if ($debug);
125
126 }
127 $q_id->enqueue( -1 ); # last one
128
129 $get_thr->join;
130 $put_thr->join;
131
132 printf "Copy of %d records completed [%1.2f rec/s]\n", $i,
133 ( $i / ((time() - $t) || 1) );
134

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26