/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by laperla, Thu Mar 14 17:27:22 2002 UTC revision 76 by laperla, Sat Apr 6 19:00:54 2002 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2  #                              -*- Mode: Perl -*-  #                              -*- Mode: Perl -*-
3  # $Basename$  # $Basename$
4  # $Revision: 1.12 $  # $Revision: 1.13 $
5  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
6  # Created On      : Mon Dec 31 13:57:11 2001  # Created On      : Mon Dec 31 13:57:11 2001
7  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
# Line 48  GetOptions(\%OPT, Line 48  GetOptions(\%OPT,
48             'database=s',             'database=s',
49             'dir=s',             'dir=s',
50             'table=s',             'table=s',
51               'verbose!',
52               'debug!',
53            ) || die "Usage: ...\n";            ) || die "Usage: ...\n";
54    
55  my @localtime = localtime;  my @localtime = localtime;
# Line 60  my $db = WAIT::Database->create(name Line 62  my $db = WAIT::Database->create(name
62    
63  my $layout = new WAIT::Parse::Ora;  my $layout = new WAIT::Parse::Ora;
64    
65  my $stem  = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem'];  # my $stem  = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem'];
66  # my $text  = ['OR_tr_20020124', 'split2', 'OR_minus_20020311', 'OR_lc_20020125', 'split2', 'stop'];  # my $text  = ['OR_tr_20020124', 'split2', 'OR_minus_20020311', 'OR_lc_20020125', 'split2', 'stop'];
67  my $text  = ['OR_tr_20020124', 'split', 'OR_minus_20020311', 'OR_lc_20020125'];  my $text  = ['OR_tr_20020124', 'split', 'OR_minus_20020311', 'OR_lc_20020125'];
68  my $wplus = ['split', 'OR_lc_20020125', 'OR_mixedonly_20020221'];  my $wplus = ['OR_split_20020401', 'OR_lc_20020125', 'OR_mixedonly_20020221'];
69  my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex'];  # my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex'];
70  my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125'];  my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125'];
71    
72  # split6 is better than split13 or split10: it allows them to enter  # split6 is better than split13 or split10: it allows them to enter
73  # shorter sequences when searching.  # shorter sequences when searching.
74  my $isbn  = ['split6', 'OR_isbn_20020127'];  my $isbn  = ['split6', 'OR_isbn_20020127'];
# Line 90  my $tb = $db->create_table(name     => $ Line 93  my $tb = $db->create_table(name     => $
93                              'abstract' => $text,                              'abstract' => $text,
94                              'abstract' => $wplus,                              'abstract' => $wplus,
95                              'author' => $text,                              'author' => $text,
96                                'chapter' => $text,
97                                'chapter' => $wplus,
98                              'colophon' => $text,                              'colophon' => $text,
99                              'colophon' => $wplus,                              'colophon' => $wplus,
100                              'desc'   => $text,                              'desc'   => $text,
# Line 114  die "Couldn't create table $OPT{table}: Line 119  die "Couldn't create table $OPT{table}:
119  my ($did, $value);  my ($did, $value);
120  binmode STDOUT, ":utf8";  binmode STDOUT, ":utf8";
121  my $ALL;  my $ALL;
122  my $traceALL = 0; # expensive  my $traceALL = 0;
123    $traceALL = 1 if $OPT{debug}; # expensive
124  my $done = 0;  my $done = 0;
125  my $todo = keys %D;  my $todo = keys %D;
126  my $lasttimeround = my $starttime = time;  my $lasttimeround = my $starttime = time;
127    
128  DOC: while (($did, $value) = each %D) {  DOC: while (($did, $value) = each %D) {
129    # next unless $did eq "jscook";    my $superdebug = 0;
130      if ($superdebug && $OPT{debug}) {
131        # next unless $did =~ /perl/;
132      }
133      printf "%15s...(%d)\n", $did, $done if $OPT{verbose};
134    my $record   = $layout->split($value);    my $record   = $layout->split($value);
135    my $headline = $record->{title};    my $headline = $record->{title};
136    $headline =~ s/\s+/ /sg;    $headline =~ s/\s+/ /sg;
137    # printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);    # printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
138    printf "%15s %s...\n", $did, substr($headline,0,60);    printf "%15s %s...\n", $did, substr($headline,0,60) if $OPT{verbose};
139      if ($superdebug && $OPT{debug}) {
140        # $record = { chapter => $record->{chapter}};
141      }
142      $done++;
143      next if oreilly_de_catalog::config::DONT_INDEX_JUST_PARSE() ;
144    $tb->insert('docid'  => $did,    $tb->insert('docid'  => $did,
145                headline => $headline,                headline => $headline,
146                %{$record});                %{$record});
   $done++;  
147    my $spenttime = time - $starttime;    my $spenttime = time - $starttime;
148    my $averagetime = $spenttime/$done;    my $averagetime = $spenttime/$done;
149    my $left = $todo-$done;    my $left = $todo-$done;
# Line 140  DOC: while (($did, $value) = each %D) { Line 154  DOC: while (($did, $value) = each %D) {
154                 $spenttime,                 $spenttime,
155                 $averagetime,                 $averagetime,
156                 $left*$averagetime,                 $left*$averagetime,
157                );                ) if $OPT{verbose};
158    $lasttimeround = time;    $lasttimeround = time;
159    if ($traceALL) { # costs a lot when reaching the 100th file or so    if ($traceALL) { # costs a lot when reaching the 100th file or so
160      $ALL->{$did} = $record;      $ALL->{$did} = $record;
# Line 199  my $sltmp  = File::Spec->catdir($OPT{dir Line 213  my $sltmp  = File::Spec->catdir($OPT{dir
213  unlink $sltmp; # may fail  unlink $sltmp; # may fail
214  symlink $dir, $sltmp or die "Couldn't symlink $dir, $sltmp: $!";  symlink $dir, $sltmp or die "Couldn't symlink $dir, $sltmp: $!";
215  rename $sltmp, $slwant or die "Couldn't rename $sltmp, $slwant: $!";  rename $sltmp, $slwant or die "Couldn't rename $sltmp, $slwant: $!";
216  warn "$slwant now points to $dir";  print "$slwant now points to $dir\n" if $OPT{verbose};
217  system("chmod 777 $slwant/*/read")==0 or die;  system("chmod 777 $slwant/*/read")==0 or die;
218    
219  opendir DIR, "." or die "Could not opendir .: $!";  opendir DIR, "." or die "Could not opendir .: $!";

Legend:
Removed from v.75  
changed lines
  Added in v.76

  ViewVC Help
Powered by ViewVC 1.1.26