/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 74 by laperla, Fri Mar 8 21:18:51 2002 UTC revision 75 by laperla, Thu Mar 14 17:27:22 2002 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2  #                              -*- Mode: Perl -*-  #                              -*- Mode: Perl -*-
3  # $Basename$  # $Basename$
4  # $Revision: 1.11 $  # $Revision: 1.12 $
5  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
6  # Created On      : Mon Dec 31 13:57:11 2001  # Created On      : Mon Dec 31 13:57:11 2001
7  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
# Line 31  use Data::Dumper; Line 31  use Data::Dumper;
31    
32  $DB_BTREE->{'cachesize'} = 200_000 ;  $DB_BTREE->{'cachesize'} = 200_000 ;
33    
34    use lib "/usr/local/apache/lib";
35    use lib "/online/www/sites/ora/catalogsearch/run/lib";
36    use oreilly_de_catalog::config;
37    use oreilly_de_catalog::wait_filter;
38    
39  my %OPT = (  my %OPT = (
40             database => 'oreilly_de_catalog',             database => 'oreilly_de_catalog',
41             dir      => '/usr/local/apache/data',             dir      => oreilly_de_catalog::config::WAITDIR,
42             table    => 'ora',             table    => 'ora',
43            );            );
44    
45    my $droot = oreilly_de_catalog::config::CATALOG;
46    
47  GetOptions(\%OPT,  GetOptions(\%OPT,
48             'database=s',             'database=s',
49             'dir=s',             'dir=s',
# Line 53  my $db = WAIT::Database->create(name Line 60  my $db = WAIT::Database->create(name
60    
61  my $layout = new WAIT::Parse::Ora;  my $layout = new WAIT::Parse::Ora;
62    
 use lib "/usr/local/apache/lib";  
 use oreilly_de_catalog::wait_filter;  
   
63  my $stem  = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem'];  my $stem  = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem'];
64  my $text  = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop'];  # my $text  = ['OR_tr_20020124', 'split2', 'OR_minus_20020311', 'OR_lc_20020125', 'split2', 'stop'];
65  my $wplus = ['split2', 'OR_lc_20020125', 'OR_mixedonly_20020221'];  my $text  = ['OR_tr_20020124', 'split', 'OR_minus_20020311', 'OR_lc_20020125'];
66    my $wplus = ['split', 'OR_lc_20020125', 'OR_mixedonly_20020221'];
67  my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex'];  my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex'];
68  my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125'];  my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125'];
69  # split6 is better than split13 or split10: it allows them to enter  # split6 is better than split13 or split10: it allows them to enter
# Line 67  my $isbn  = ['split6', 'OR_isbn_20020127 Line 72  my $isbn  = ['split6', 'OR_isbn_20020127
72    
73  my $cwd = cwd;  my $cwd = cwd;
74    
 my $droot = shift or die "Usage: $0 <options> Document-Rootdirectories";  
   
75  my %D;  my %D;
76  my $access = tie %D, 'WAIT::Document::Ora', $droot,  my $access = tie %D, 'WAIT::Document::Ora', $droot,
77    or die "Couldn't tie to dir $droot: $!\n";    or die "Couldn't tie to dir $droot: $!\n";
# Line 117  my $todo = keys %D; Line 120  my $todo = keys %D;
120  my $lasttimeround = my $starttime = time;  my $lasttimeround = my $starttime = time;
121    
122  DOC: while (($did, $value) = each %D) {  DOC: while (($did, $value) = each %D) {
123      # next unless $did eq "jscook";
124    my $record   = $layout->split($value);    my $record   = $layout->split($value);
125    my $headline = $record->{title};    my $headline = $record->{title};
126    $headline =~ s/\s+/ /sg;    $headline =~ s/\s+/ /sg;
# Line 142  DOC: while (($did, $value) = each %D) { Line 146  DOC: while (($did, $value) = each %D) {
146      $ALL->{$did} = $record;      $ALL->{$did} = $record;
147      open F, ">:utf8", "$OPT{dir}/$OPT{database}-$jobid/debug.dump" or die;      open F, ">:utf8", "$OPT{dir}/$OPT{database}-$jobid/debug.dump" or die;
148      print F Data::Dumper::Dumper($ALL);      print F Data::Dumper::Dumper($ALL);
149      close F;      close F  or die "Couldn't close debug.dump: $!";;
150    }    }
151  }  }
152  undef $ALL;  undef $ALL;
# Line 178  for my $headline (@dictkeys) { Line 182  for my $headline (@dictkeys) {
182    $tritb->insert(docid => $headline, headline => $headline);    $tritb->insert(docid => $headline, headline => $headline);
183  }  }
184  $tritb->set(top=>1);  $tritb->set(top=>1);
185  $tritb->close;  $tritb->close or die "Couldn't close table: $!";
186  $tb->close();  $tb->close() or die "Couldn't close table: $!";
187  $db->close();  $db->close() or die "Couldn't close database: $!";
188    
189  # Atomically relinking symlink: now we have a new database with a very  # Atomically relinking symlink: now we have a new database with a very
190  # long name "$OPT{database}-$jobid" (e.g.  # long name "$OPT{database}-$jobid" (e.g.
# Line 198  rename $sltmp, $slwant or die "Couldn't Line 202  rename $sltmp, $slwant or die "Couldn't
202  warn "$slwant now points to $dir";  warn "$slwant now points to $dir";
203  system("chmod 777 $slwant/*/read")==0 or die;  system("chmod 777 $slwant/*/read")==0 or die;
204    
205    opendir DIR, "." or die "Could not opendir .: $!";
206    for my $dirent (readdir DIR) {
207      next if $dirent =~ /^\./;
208      next unless $dirent =~ /^$OPT{database}(.*)/;
209      my $ext = $1 or next;
210      next unless -M $dirent > 4;
211      warn "removing old index $dirent";
212      File::Path::rmtree($dirent);
213    }
214    closedir DIR;
215    
216  $WAIT::Config = $WAIT::Config; # make perl -w happy  $WAIT::Config = $WAIT::Config; # make perl -w happy
217    
218    

Legend:
Removed from v.74  
changed lines
  Added in v.75

  ViewVC Help
Powered by ViewVC 1.1.26