1 |
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
2 |
# -*- Mode: Perl -*- |
# -*- Mode: Perl -*- |
3 |
# $Basename$ |
# $Basename$ |
4 |
# $Revision: 1.11 $ |
# $Revision: 1.12 $ |
5 |
# Author : Ulrich Pfeifer |
# Author : Ulrich Pfeifer |
6 |
# Created On : Mon Dec 31 13:57:11 2001 |
# Created On : Mon Dec 31 13:57:11 2001 |
7 |
# Last Modified By: Ulrich Pfeifer |
# Last Modified By: Ulrich Pfeifer |
31 |
|
|
32 |
$DB_BTREE->{'cachesize'} = 200_000 ; |
$DB_BTREE->{'cachesize'} = 200_000 ; |
33 |
|
|
34 |
|
use lib "/usr/local/apache/lib"; |
35 |
|
use lib "/online/www/sites/ora/catalogsearch/run/lib"; |
36 |
|
use oreilly_de_catalog::config; |
37 |
|
use oreilly_de_catalog::wait_filter; |
38 |
|
|
39 |
my %OPT = ( |
my %OPT = ( |
40 |
database => 'oreilly_de_catalog', |
database => 'oreilly_de_catalog', |
41 |
dir => '/usr/local/apache/data', |
dir => oreilly_de_catalog::config::WAITDIR, |
42 |
table => 'ora', |
table => 'ora', |
43 |
); |
); |
44 |
|
|
45 |
|
my $droot = oreilly_de_catalog::config::CATALOG; |
46 |
|
|
47 |
GetOptions(\%OPT, |
GetOptions(\%OPT, |
48 |
'database=s', |
'database=s', |
49 |
'dir=s', |
'dir=s', |
60 |
|
|
61 |
my $layout = new WAIT::Parse::Ora; |
my $layout = new WAIT::Parse::Ora; |
62 |
|
|
|
use lib "/usr/local/apache/lib"; |
|
|
use oreilly_de_catalog::wait_filter; |
|
|
|
|
63 |
my $stem = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem']; |
my $stem = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop', 'Stem']; |
64 |
my $text = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'stop']; |
# my $text = ['OR_tr_20020124', 'split2', 'OR_minus_20020311', 'OR_lc_20020125', 'split2', 'stop']; |
65 |
my $wplus = ['split2', 'OR_lc_20020125', 'OR_mixedonly_20020221']; |
my $text = ['OR_tr_20020124', 'split', 'OR_minus_20020311', 'OR_lc_20020125']; |
66 |
|
my $wplus = ['split', 'OR_lc_20020125', 'OR_mixedonly_20020221']; |
67 |
my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex']; |
my $sound = ['OR_tr_20020124', 'OR_lc_20020125', 'split2', 'Soundex']; |
68 |
my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125']; |
my $trigr = ['OR_lc_20020125', 'OR_trigrams_20020125']; |
69 |
# split6 is better than split13 or split10: it allows them to enter |
# split6 is better than split13 or split10: it allows them to enter |
72 |
|
|
73 |
my $cwd = cwd; |
my $cwd = cwd; |
74 |
|
|
|
my $droot = shift or die "Usage: $0 <options> Document-Rootdirectories"; |
|
|
|
|
75 |
my %D; |
my %D; |
76 |
my $access = tie %D, 'WAIT::Document::Ora', $droot, |
my $access = tie %D, 'WAIT::Document::Ora', $droot, |
77 |
or die "Couldn't tie to dir $droot: $!\n"; |
or die "Couldn't tie to dir $droot: $!\n"; |
120 |
my $lasttimeround = my $starttime = time; |
my $lasttimeround = my $starttime = time; |
121 |
|
|
122 |
DOC: while (($did, $value) = each %D) { |
DOC: while (($did, $value) = each %D) { |
123 |
|
# next unless $did eq "jscook"; |
124 |
my $record = $layout->split($value); |
my $record = $layout->split($value); |
125 |
my $headline = $record->{title}; |
my $headline = $record->{title}; |
126 |
$headline =~ s/\s+/ /sg; |
$headline =~ s/\s+/ /sg; |
146 |
$ALL->{$did} = $record; |
$ALL->{$did} = $record; |
147 |
open F, ">:utf8", "$OPT{dir}/$OPT{database}-$jobid/debug.dump" or die; |
open F, ">:utf8", "$OPT{dir}/$OPT{database}-$jobid/debug.dump" or die; |
148 |
print F Data::Dumper::Dumper($ALL); |
print F Data::Dumper::Dumper($ALL); |
149 |
close F; |
close F or die "Couldn't close debug.dump: $!";; |
150 |
} |
} |
151 |
} |
} |
152 |
undef $ALL; |
undef $ALL; |
182 |
$tritb->insert(docid => $headline, headline => $headline); |
$tritb->insert(docid => $headline, headline => $headline); |
183 |
} |
} |
184 |
$tritb->set(top=>1); |
$tritb->set(top=>1); |
185 |
$tritb->close; |
$tritb->close or die "Couldn't close table: $!"; |
186 |
$tb->close(); |
$tb->close() or die "Couldn't close table: $!"; |
187 |
$db->close(); |
$db->close() or die "Couldn't close database: $!"; |
188 |
|
|
189 |
# Atomically relinking symlink: now we have a new database with a very |
# Atomically relinking symlink: now we have a new database with a very |
190 |
# long name "$OPT{database}-$jobid" (e.g. |
# long name "$OPT{database}-$jobid" (e.g. |
202 |
warn "$slwant now points to $dir"; |
warn "$slwant now points to $dir"; |
203 |
system("chmod 777 $slwant/*/read")==0 or die; |
system("chmod 777 $slwant/*/read")==0 or die; |
204 |
|
|
205 |
|
opendir DIR, "." or die "Could not opendir .: $!"; |
206 |
|
for my $dirent (readdir DIR) { |
207 |
|
next if $dirent =~ /^\./; |
208 |
|
next unless $dirent =~ /^$OPT{database}(.*)/; |
209 |
|
my $ext = $1 or next; |
210 |
|
next unless -M $dirent > 4; |
211 |
|
warn "removing old index $dirent"; |
212 |
|
File::Path::rmtree($dirent); |
213 |
|
} |
214 |
|
closedir DIR; |
215 |
|
|
216 |
$WAIT::Config = $WAIT::Config; # make perl -w happy |
$WAIT::Config = $WAIT::Config; # make perl -w happy |
217 |
|
|
218 |
|
|