/[Search-Estraier]/trunk/scripts/estcp-mt.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/scripts/estcp-mt.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/scripts/estcp.pl revision 82 by dpavlin, Tue Jan 17 00:17:50 2006 UTC trunk/scripts/estcp-mt.pl revision 141 by dpavlin, Wed May 10 14:52:28 2006 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    
3  use strict;  use strict;
4  use Search::Estraier;  use Search::Estraier 0.06;
5  use URI::Escape qw/uri_escape/;  use URI::Escape qw/uri_escape/;
6  use Time::HiRes;  use Time::HiRes;
7  use POSIX qw/strftime/;  use POSIX qw/strftime/;
8    use Config;
9    use threads;
10    use Thread::Queue;
11    
12  =head1 NAME  =head1 NAME
13    
# Line 12  estcp.pl - copy Hyper Estraier index fro Line 15  estcp.pl - copy Hyper Estraier index fro
15    
16  =cut  =cut
17    
18    die "Your perl isn't compiled with support for ithreads\n" unless ($Config{useithreads});
19    
20    
21  my ($from,$to) = @ARGV;  my ($from,$to) = @ARGV;
22    
23  die "usage: $0 http://localhost:1978/node/from http://remote.example.com:1978/node/to\n" unless ($from && $to);  die "usage: $0 http://localhost:1978/node/from http://remote.example.com:1978/node/to\n" unless ($from && $to);
24    
25  my $debug = 0;  my $debug = 0;
26    my $max = 256;
27    
28  # create and configure node  # create and configure node
29  my $from_n = new Search::Estraier::Node(  my $from_n = new Search::Estraier::Node(
30          url => $from,          url => $from,
31          croak_on_error => 1,          croak_on_error => 1,
32          debug => $debug,          debug => $debug,
33            user => 'admin',
34            passwd => 'admin',
35  );  );
36  my $to_n = new Search::Estraier::Node(  my $to_n = new Search::Estraier::Node(
37          url => $to,          url => $to,
38          croak_on_error => 1,          croak_on_error => 1,
39          debug => $debug,          debug => $debug,
40            user => 'admin',
41            passwd => 'admin',
42            create => 1,
43            label => $from_n->label,
44  );  );
45    
46  print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n";  unless(eval{ $to_n->name }) {
47            if ($to =~ m#^(http://.+)/node/([^/]+)$#) {
48                    my ($url,$name) = ($1,$2);
49                    print "Creating '$name' on $url\n";
50                    $to_n->shuttle_url( $url . '/master?action=nodeadd',
51                            'application/x-www-form-urlencoded',
52                            'name=' . uri_escape($name) . '&label=' . uri_escape( $name ),
53                            undef,
54                    );
55            } else {
56                    die "can't extract node name from $to\n";
57            }
58    }
59    
60  my $doc_num = $from_n->doc_num || 1;  # total processed elements
61    my $i : shared = 1;
62    
63  my $res;  my $q_id = Thread::Queue->new;
64  my $prev;  my $q_drafts = Thread::Queue->new;
65  my $i = 1;  
66    my $get_thr = threads->new( sub {
67            while (my $id = $q_id->dequeue) {
68                    #warn "get ", $id || 'undef',"\n";
69                    if ($id < 0) {
70                            $q_drafts->enqueue( '' );       # abort put thread
71                            last;
72                    };
73                    print STDERR "get_thr, id: $id\n" if ($debug);
74                    my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
75                    $q_drafts->enqueue( $doc_draft );
76            }
77    } );
78    
79  my $t = time();  my $t = time();
80    my $t_refresh = time();
81    my $doc_num = $from_n->doc_num || 1;
82    
83    my $put_thr = threads->new( sub {
84            while (my $doc_draft = $q_drafts->dequeue) {
85                    last unless ($doc_draft);
86                    print STDERR "put_thr, $doc_draft\n" if ($debug);
87                    $to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
88    
89  do {                  $i++;
90                    if (time() - $t_refresh > 3) {
91                            my $rate = ( $i / ((time() - $t) || 1) );
92                            printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n",
93                                    $i,
94                                    ($i * 100 / $doc_num),
95                                    $rate,
96                                    strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))),
97                            );
98                            $t_refresh = time();
99                    }
100    
101            }
102    } );
103    
104    print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n";
105    
106    my $prev;
107    my $more = 1;
108    
109    while($more) {
110            my $res;
111          $from_n->shuttle_url( $from_n->{url} . '/list',          $from_n->shuttle_url( $from_n->{url} . '/list',
112                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
113                  'max=256' . ( $prev ? '&prev=' . uri_escape( $prev ) : '' ),                  'max=' . $max . ( $prev ? '&prev=' . uri_escape( $prev ) : '' ),
114                  \$res,                  \$res,
115          );          );
116          last unless ($res);          if (! $res || $res eq '') {
117                    $more = 0;
118                    last;
119            }
120          foreach my $l (split(/\n/,$res)) {          foreach my $l (split(/\n/,$res)) {
121                  (my $id, $prev) = split(/\t/,$l, 2);                  (my $id, $prev) = split(/\t/,$l, 2);
122                  $to_n->put_doc( $from_n->get_doc( $id ));  
123                  $i++;                  #$to_n->put_doc( $from_n->get_doc( $id ));
124    
125                    #my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 );
126                    #$to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n";
127    
128                    $q_id->enqueue( $id );
129          }          }
130          warn "$prev\n";          warn "$prev\n" if ($debug);
131    
132          my $rate = ( $i / (time() - $t) );  }
133          printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n",  $q_id->enqueue( -1 );   # last one
134                  $i,  
135                  ($i * 100 / $doc_num),  $get_thr->join;
136                  $rate,  $put_thr->join;
                 strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))),  
         );  
137    
138  } while ($res);  printf "Copy of %d records completed [%1.2f rec/s]\n", $i,
139            ( $i / ((time() - $t) || 1) );
140    

Legend:
Removed from v.82  
changed lines
  Added in v.141

  ViewVC Help
Powered by ViewVC 1.1.26