--- trunk/scripts/estcp.pl 2006/01/17 00:41:18 83 +++ trunk/scripts/estcp-mt.pl 2006/01/19 14:33:33 86 @@ -5,6 +5,9 @@ use URI::Escape qw/uri_escape/; use Time::HiRes; use POSIX qw/strftime/; +use Config; +use threads; +use Thread::Queue; =head1 NAME @@ -12,6 +15,9 @@ =cut +die "Your perl isn't compiled with support for ithreads\n" unless ($Config{useithreads}); + + my ($from,$to) = @ARGV; die "usage: $0 http://localhost:1978/node/from http://remote.example.com:1978/node/to\n" unless ($from && $to); @@ -19,8 +25,6 @@ my $debug = 0; my $max = 256; -$max = 1024; - # create and configure node my $from_n = new Search::Estraier::Node( url => $from, @@ -33,16 +37,63 @@ debug => $debug, ); -print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n"; +unless(eval{ $to_n->name }) { + if ($to =~ m#^(http://.+)/node/(\w+)$#) { + my ($url,$name) = ($1,$2); + print "Creating '$name' on $url\n"; + $to_n->shuttle_url( $url . '/master?action=nodeadd', + 'application/x-www-form-urlencoded', + 'name=' . uri_escape($name) . '&label=' . uri_escape( $name ), + undef, + ); + } else { + die "can't extract node name from $to\n"; + } +} + +# total processed elements +my $i : shared = 1; +my $q_id = Thread::Queue->new; +my $q_drafts = Thread::Queue->new; + +my $get_thr = threads->new( sub { + while (my $id = $q_id->dequeue) { + print STDERR "get_thr, id: $id\n" if ($debug); + my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 ); + $q_drafts->enqueue( $doc_draft ); + } +} ); + +my $t = time(); +my $t_refresh = time(); my $doc_num = $from_n->doc_num || 1; +my $put_thr = threads->new( sub { + while (my $doc_draft = $q_drafts->dequeue) { + print STDERR "put_thr, $doc_draft\n" if ($debug); + $to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n"; + + $i++; + if (time() - $t_refresh > 3) { + my $rate = ( $i / ((time() - $t) || 1) ); + printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n", + $i, + ($i * 100 / $doc_num), + $rate, + strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))), + ); + $t_refresh = time(); + } + + } +} ); + +print "Copy from ",$from_n->name," (",$from_n->label,") to ",$to_n->name," (",$to_n->label,") - ",$from_n->doc_num," documents (",$from_n->word_num," words, ",$from_n->size," bytes)\n"; + my $prev; -my $i = 0; my $more = 1; -my $t = time(); - while($more) { my $res; $from_n->shuttle_url( $from_n->{url} . '/list', @@ -56,20 +107,20 @@ } foreach my $l (split(/\n/,$res)) { (my $id, $prev) = split(/\t/,$l, 2); - $to_n->put_doc( $from_n->get_doc( $id )); - $i++; + + #$to_n->put_doc( $from_n->get_doc( $id )); + + #my $doc_draft = $from_n->_fetch_doc( id => $id, chomp_resbody => 1 ); + #$to_n->shuttle_url( $to_n->{url} . '/put_doc', 'text/x-estraier-draft', $doc_draft, undef) == 200 or die "can't insert $doc_draft\n"; + + $q_id->enqueue( $id ); } warn "$prev\n"; - my $rate = ( $i / (time() - $t) ); - printf("%d records, %1.2f%% [%1.2f rec/s] estimated finish: %s\n", - $i, - ($i * 100 / $doc_num), - $rate, - strftime("%Y-%m-%d %H:%M:%S", localtime( time() + int(($doc_num-$i) / $rate))), - ); - } +$get_thr->join; +$put_thr->join; + print "Copy completed.\n";