/[webpac2]/trunk/run.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/run.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 301 by dpavlin, Mon Dec 19 21:26:04 2005 UTC revision 401 by dpavlin, Sun Feb 19 16:36:42 2006 UTC
# Line 13  use WebPAC::Input 0.03; Line 13  use WebPAC::Input 0.03;
13  use WebPAC::Store 0.03;  use WebPAC::Store 0.03;
14  use WebPAC::Normalize::XML;  use WebPAC::Normalize::XML;
15  use WebPAC::Output::TT;  use WebPAC::Output::TT;
16  use WebPAC::Output::Estraier 0.05;  use WebPAC::Output::Estraier '0.10';
17  use YAML qw/LoadFile/;  use YAML qw/LoadFile/;
18  use Getopt::Long;  use Getopt::Long;
19  use File::Path;  use File::Path;
20    use Time::HiRes qw/time/;
21    
22  =head1 NAME  =head1 NAME
23    
# Line 40  limit loading to 100 records Line 41  limit loading to 100 records
41    
42  remove database and Hyper Estraier index before indexing  remove database and Hyper Estraier index before indexing
43    
44    =item --one=database_name
45    
46    reindex just single database
47    
48  =item --config conf/config.yml  =item --config conf/config.yml
49    
50  path to YAML configuration file  path to YAML configuration file
# Line 54  my $limit; Line 59  my $limit;
59  my $clean = 0;  my $clean = 0;
60  my $config = 'conf/config.yml';  my $config = 'conf/config.yml';
61  my $debug = 0;  my $debug = 0;
62    my $one_db_name;
63    
64  GetOptions(  GetOptions(
65          "limit=i" => \$limit,          "limit=i" => \$limit,
66          "offset=i" => \$offset,          "offset=i" => \$offset,
67          "clean" => \$clean,          "clean" => \$clean,
68            "one=s" => \$one_db_name,
69          "config" => \$config,          "config" => \$config,
70          "debug" => \$debug,          "debug" => \$debug,
71  );  );
# Line 70  print "config = ",Dumper($config) if ($d Line 77  print "config = ",Dumper($config) if ($d
77  die "no databases in config file!\n" unless ($config->{databases});  die "no databases in config file!\n" unless ($config->{databases});
78    
79  my $total_rows = 0;  my $total_rows = 0;
80    my $start_t = time();
81    
82  while (my ($database, $db_config) = each %{ $config->{databases} }) {  while (my ($database, $db_config) = each %{ $config->{databases} }) {
83    
84            next if ($one_db_name && $database !~ m/$one_db_name/i);
85    
86          my $log = _new WebPAC::Common()->_get_logger();          my $log = _new WebPAC::Common()->_get_logger();
87    
88          #          #
# Line 81  while (my ($database, $db_config) = each Line 91  while (my ($database, $db_config) = each
91    
92          my $est_config = $config->{hyperestraier} || $log->logdie("can't find 'hyperestraier' part in confguration");          my $est_config = $config->{hyperestraier} || $log->logdie("can't find 'hyperestraier' part in confguration");
93          $est_config->{database} = $database;          $est_config->{database} = $database;
94            $est_config->{clean} = $clean;
95            $est_config->{label} = $db_config->{name};
96    
97          my $est = new WebPAC::Output::Estraier(          my $est = new WebPAC::Output::Estraier( %{ $est_config } );
                 %{ $est_config },  
         );  
   
         if ($clean) {  
                 $log->warn("creating new empty index $database");  
                 $est->master( action => 'nodedel', name => $database );  
                 $est->master( action => 'nodeadd', name => $database, label => $database );  
         }  
98    
99          #          #
100          # now WebPAC::Store          # now WebPAC::Store
# Line 104  while (my ($database, $db_config) = each Line 108  while (my ($database, $db_config) = each
108                  $log->info("creating new database $database in $db_path");                  $log->info("creating new database $database in $db_path");
109                  rmtree( $db_path ) || $log->warn("can't remove $db_path: $!");                  rmtree( $db_path ) || $log->warn("can't remove $db_path: $!");
110          } else {          } else {
111                  $log->info("working on $database in $db_path");                  $log->debug("working on $database in $db_path");
112          }          }
113    
114          my $db = new WebPAC::Store(          my $db = new WebPAC::Store(
# Line 141  while (my ($database, $db_config) = each Line 145  while (my ($database, $db_config) = each
145    
146                  my $input_module = $config->{webpac}->{inputs}->{$type};                  my $input_module = $config->{webpac}->{inputs}->{$type};
147    
148                  $log->info("working on input $input->{path} [$input->{type}] using $input_module");                  $log->info("working on input '$input->{path}' [$input->{type}] using $input_module lookup '$input->{lookup}'");
149    
150                  my $input_db = new WebPAC::Input(                  my $input_db = new WebPAC::Input(
151                          module => $input_module,                          module => $input_module,
# Line 204  while (my ($database, $db_config) = each Line 208  while (my ($database, $db_config) = each
208    
209          };          };
210    
211          $log->info("$total_rows records indexed");          my $dt = time() - $start_t;
212            $log->info("$total_rows records indexed in " .
213                    sprintf("%.2f sec [%.2f rec/sec]",
214                            $dt, ($total_rows / $dt)
215                    )
216            );
217    
218          #          #
219          # add Hyper Estraier links to other databases          # add Hyper Estraier links to other databases

Legend:
Removed from v.301  
changed lines
  Added in v.401

  ViewVC Help
Powered by ViewVC 1.1.26