--- trunk/run.pl 2007/12/23 22:06:08 1084 +++ trunk/run.pl 2009/04/25 14:46:42 1164 @@ -22,8 +22,8 @@ use Data::Dump qw/dump/; use Storable qw/dclone/; use Pod::Usage qw/pod2usage/; +use LWP::Simple; -use Proc::Queue size => 1; use POSIX ":sys_wait_h"; # imports WNOHANG =head1 NAME @@ -103,6 +103,10 @@ Create merged index of databases which have links +=item --mirror http://www.example.com + +Tries to download input path files from mirror URI + =back =cut @@ -123,6 +127,7 @@ my $parallel = 0; my $only_links = 0; my $merge = 0; +my $mirror; my $help; my $log = _new WebPAC::Common()->_get_logger(); @@ -144,6 +149,7 @@ "parallel=i" => \$parallel, "only-links!" => \$only_links, "merge" => \$merge, + "mirror=s" => \$mirror, "help" => \$help, ); @@ -207,6 +213,8 @@ my @links; if ($parallel) { + eval 'use Proc::Queue size => 1;'; + die $@ if $@; $log->info("Using $parallel processes for speedup"); Proc::Queue::size($parallel); } @@ -349,10 +357,32 @@ $log->info("database $database doesn't have inputs defined"); } ); + if ( -e 'out/debug' ) { # FIXME flag? + my $out; + foreach my $i ( @inputs ) { + warn dump( $i ); + next unless defined $i->{normalize}; + warn dump( $i->{normalize} ); + foreach my $normalize ( @{ $i->{normalize} } ) { + my $path = $normalize->{path}; + $out .= qq/\n##\n## $path\n##\n\n/; + $out .= read_file( $path ); + } + } + my $all = "out/debug/all-normalize.pl"; + write_file( $all, $out ); + warn "### all normalize for this input saved to: $all"; + }; + foreach my $input (@inputs) { my $input_name = $input->{name} || $log->logdie("input without a name isn't valid: ",dump($input)); + if ( $input->{skip} ) { + $log->info("skip $input_name"); + next; + } + next if ($only_input && ($input_name !~ m#$only_input#i && $input->{type} !~ m#$only_input#i)); my $type = lc($input->{type}); @@ -380,9 +410,16 @@ delete($input->{modify_file}); } + if ( $mirror ) { + my $path = $input->{path} || die "no input path in ",dump( $input ); + $log->info( "mirror ", $path, " ", -s $path, " bytes" ); + + $log->warn( "$path not modified" ) + if mirror( "$mirror/$path", $path ) == RC_NOT_MODIFIED; + } + my $input_db = new WebPAC::Input( module => $input_module, - encoding => $config->webpac('webpac_encoding'), limit => $limit || $input->{limit}, offset => $offset, recode => $input->{recode}, @@ -427,7 +464,7 @@ my $maxmfn = $input_db->open( path => $input->{path}, - code_page => $input->{encoding}, # database encoding + input_encoding => $input->{encoding}, # database encoding lookup_coderef => $lookup_coderef, lookup => $lookup_jar, %{ $input }, @@ -651,7 +688,7 @@ close($report_fh) if ($report_fh); } - eval { $indexer->finish } if ($indexer && $indexer->can('finish')); + $indexer->finish if $indexer && $indexer->can('finish'); foreach my $out ( @output_modules ) { $out->finish if $out->can('finish');