/[Search-Estraier]/trunk/scripts/estcp-mt.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/scripts/estcp-mt.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 141 - (hide annotations)
Wed May 10 14:52:28 2006 UTC (18 years ago) by dpavlin
File MIME type: text/plain
File size: 3421 byte(s)
estcp scripts cleanup for creating nodes (they now copy source label too)
1 dpavlin 80 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 141 use Search::Estraier 0.06;
5 dpavlin 80 use URI::Escape qw/uri_escape/;
6 dpavlin 82 use Time::HiRes;
7     use POSIX qw/strftime/;
8 dpavlin 86 use Config;
9     use threads;
10     use Thread::Queue;
11 dpavlin 80
12     =head1 NAME
13    
14     estcp.pl - copy Hyper Estraier index from one node to another
15    
16     =cut
17    
18 dpavlin 86 die "Your perl isn't compiled with support for ithreads\n" unless ($Config{useithreads});
19    
20    
21 dpavlin 80 my ($from,$to) = @ARGV;
22    
23     die "usage: $0 http://localhost:1978/node/from http://remote.example.com:1978/node/to\n" unless ($from && $to);
24    
25     my $debug = 0;
26 dpavlin 83 my $max = 256;
27 dpavlin 80
28     # create and configure node
29     my $from_n = new Search::Estraier::Node(
30     url => $from,
31     croak_on_error => 1,
32     debug => $debug,
33 dpavlin 141 user => 'admin',
34     passwd => 'admin',
35 dpavlin 80 );
36     my $to_n = new Search::Estraier::Node(
37     url => $to,
38     croak_on_error => 1,
39     debug => $debug,
40 dpavlin 141 user => 'admin',
41     passwd => 'admin',
42     create => 1,
43     label => $from_n->label,
44 dpavlin 80 );
45    
46 dpavlin 85 unless(eval{ $to_n->name }) {
47 dpavlin 87 if ($to =~ m#^(http://.+)/node/([^/]+)$#) {
48 dpavlin 85 my ($url,$name) = ($1,$2);
49     print "Creating '$name' on $url\n";
50     $to_n->shuttle_url( $url . '/master?action=nodeadd',
51     'application/x-www-form-urlencoded',
52     'name=' . uri_escape($name) . '&label=' . uri_escape( $name ),
53     undef,
54     );
55     } else {
56     die "can't extract node name from $to\n";
57     }
58     }
59    
60 dpavlin 86 # total processed elements
61     my $i : shared = 1;
62 dpavlin 80
63 dpavlin 86 my $q_id = Thread::Queue->new;
64     my $q_drafts = Thread::Queue->new;
65    
66     my $get_thr = threads->new( sub {
67     while (my $id = $q_id->dequeue) {
68 dpavlin 87 #warn "get ", $id || 'undef',"\n";
69     if ($id < 0) {
70     $q_drafts->enqueue( '' ); # abort put thread
71     last;
72     };
73 dpavlin 86 print STDERR "get_thr, id: $id\n" if ($debug);
74     my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
75     $q_drafts->enqueue( $doc_draft );
76     }
77     } );
78    
79     my $t = time();
80     my $t_refresh = time();
81 dpavlin 82 my $doc_num = $from_n->doc_num || 1;
82 dpavlin 80
83 dpavlin 86 my $put_thr = threads->new( sub {
84     while (my $doc_draft = $q_drafts->dequeue) {
85 dpavlin 87 last unless ($doc_draft);
86 dpavlin 86 print STDERR "put_thr, $doc_draft\n" if ($debug);
87     $to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
88    
89     $i++;
90     if (time() - $t_refresh > 3) {
91     my $rate = ( $i / ((time() - $t) || 1) );
92     printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n",
93     $i,
94     ($i * 100 / $doc_num),
95     $rate,
96     strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))),
97     );
98     $t_refresh = time();
99     }
100    
101     }
102     } );
103    
104     print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n";
105    
106 dpavlin 80 my $prev;
107 dpavlin 83 my $more = 1;
108 dpavlin 80
109 dpavlin 83 while($more) {
110     my $res;
111 dpavlin 80 $from_n->shuttle_url( $from_n->{url} . '/list',
112     'application/x-www-form-urlencoded',
113 dpavlin 83 'max=' . $max . ( $prev ? '&prev=' . uri_escape( $prev ) : '' ),
114 dpavlin 80 \$res,
115     );
116 dpavlin 83 if (! $res || $res eq '') {
117     $more = 0;
118     last;
119     }
120 dpavlin 80 foreach my $l (split(/\n/,$res)) {
121     (my $id, $prev) = split(/\t/,$l, 2);
122 dpavlin 84
123     #$to_n->put_doc( $from_n->get_doc( $id ));
124    
125 dpavlin 86 #my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
126     #$to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
127 dpavlin 84
128 dpavlin 86 $q_id->enqueue( $id );
129 dpavlin 80 }
130 dpavlin 87 warn "$prev\n" if ($debug);
131 dpavlin 82
132 dpavlin 83 }
133 dpavlin 87 $q_id->enqueue( -1 ); # last one
134 dpavlin 80
135 dpavlin 86 $get_thr->join;
136     $put_thr->join;
137    
138 dpavlin 87 printf "Copy of %d records completed [%1.2f rec/s]\n", $i,
139     ( $i / ((time() - $t) || 1) );
140 dpavlin 83

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26