/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 72 by laperla, Mon Jan 28 21:35:39 2002 UTC revision 73 by laperla, Tue Mar 5 13:40:38 2002 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2  #                              -*- Mode: Perl -*-  #                              -*- Mode: Perl -*-
3  # $Basename$  # $Basename$
4  # $Revision: 1.9 $  # $Revision: 1.10 $
5  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
6  # Created On      : Mon Dec 31 13:57:11 2001  # Created On      : Mon Dec 31 13:57:11 2001
7  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
8  # Last Modified On: Fri Jan  4 15:59:20 2002  # Last Modified On: Fri Jan  4 15:59:20 2002
9  # Language        : CPerl  # Language        : CPerl
10  #  #
11  # (C) Copyright 2001, UUNET Deutschland GmbH, Germany  # (C) Copyright 2001, Ulrich Pfeifer
12  #  #
13    
14  use 5.007;  use 5.007;
# Line 31  use WAIT::InvertedIndex; Line 31  use WAIT::InvertedIndex;
31  $DB_BTREE->{'cachesize'} = 200_000 ;  $DB_BTREE->{'cachesize'} = 200_000 ;
32    
33  my %OPT = (  my %OPT = (
34             database => 'DB',             database => 'oreilly_de_catalog',
35             dir      => $WAIT::Config->{WAIT_home} || '/tmp',             dir      => '/usr/local/apache/data',
36             table    => 'ora',             table    => 'ora',
37            );            );
38    
# Line 61  my $text  = [{ Line 61  my $text  = [{
61                'intervall' => ['OR_tr_20020124', 'OR_lc_20020124'],                'intervall' => ['OR_tr_20020124', 'OR_lc_20020124'],
62               },               },
63               'OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop'];               'OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop'];
64    my $wplus = ['split2', 'OR_lc_20020124', 'OR_mixedonly_20020221'];
65  my $sound = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'Soundex'];  my $sound = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'Soundex'];
66  my $trigr = ['OR_lc_20020124', 'OR_trigrams_20020125'];  my $trigr = ['OR_lc_20020124', 'OR_trigrams_20020125'];
67    # split6 is better than split13 or split10: it allows them to enter
68    # shorter sequences when searching.
69  my $isbn  = ['split6', 'OR_isbn_20020127'];  my $isbn  = ['split6', 'OR_isbn_20020127'];
70    
71  my $cwd = cwd;  my $cwd = cwd;
72    
73    my $droot = shift or die "Usage: $0 <options> Document-Rootdirectories";
74    
75  my %D;  my %D;
76  my $access = tie %D, 'WAIT::Document::Ora', @ARGV,  my $access = tie %D, 'WAIT::Document::Ora', $droot,
77    or die "Couldn't tie to file: $!\n";    or die "Couldn't tie to dir $droot: $!\n";
78    
79  my $tb = $db->create_table(name     => $OPT{table},  my $tb = $db->create_table(name     => $OPT{table},
80                             attr     => ['author', 'isbn', 'title',                             attr     => ['author', 'isbn', 'title',
# Line 79  my $tb = $db->create_table(name     => $ Line 84  my $tb = $db->create_table(name     => $
84                             invindex =>                             invindex =>
85                             [                             [
86                              'title'  => $text,                              'title'  => $text,
87                                'title'  => $wplus,
88                              # 'title'  => $stem,                              # 'title'  => $stem,
89                              'aboutauthor'  => $text,                              'aboutauthor'  => $text,
90                                'aboutauthor'  => $wplus,
91                              # 'aboutauthor'  => $stem,                              # 'aboutauthor'  => $stem,
92                              'desc'   => $text,                              'desc'   => $text,
93                                'desc'   => $wplus,
94                              'abstract' => $text,                              'abstract' => $text,
95                                'abstract' => $wplus,
96                              'author' => $text,                              'author' => $text,
97                              # 'author' => $sound,                              # 'author' => $sound,
98                              'colophon' => $text,                              'colophon' => $text,
99                                'colophon' => $wplus,
100                              'isbn'   => $isbn,                              'isbn'   => $isbn,
101                             ]                             ]
102                            );                            );
# Line 118  for my $f ($tb->fields) { Line 128  for my $f ($tb->fields) {
128    my(@idx) = @{$tb->table->{inverted}{$f} || []};    my(@idx) = @{$tb->table->{inverted}{$f} || []};
129    for my $idx (@idx) {    for my $idx (@idx) {
130      my $name = $idx->name;      my $name = $idx->name;
131      next if $name =~ /(_|\b)(Stem|Soundex)(\b|_)/; # irrelevant for alternatives      next if $name =~ /(_|\b)(mixedonly|Stem|Soundex)(\b|_)/;
132                  # irrelevant for alternatives
133      my @keys = $idx->keys;      my @keys = $idx->keys;
134      @dict{@keys} = ();      @dict{@keys} = ();
135    }    }
# Line 138  $tritb->close; Line 149  $tritb->close;
149  $tb->close();  $tb->close();
150  $db->close();  $db->close();
151    
152  # Now we have a new database with a very long name and we want that  # Atomically relinking symlink: now we have a new database with a very
153  # database to be accessible with the $OPT{database} name  # long name like oreilly_de_catalog-2002-01-28_16:12_16467 and we want
154    # that database to be accessible with the oreilly_de_catalog name.
155    
156  use File::Spec;  use File::Spec;
157  my $long_dir   = "$OPT{database}-$jobid";  my $dir    = "$OPT{database}-$jobid";
158  my $want_dir   = File::Spec->catdir($OPT{dir}, $OPT{database});  my $slwant = File::Spec->catdir($OPT{dir}, $OPT{database});
159  my $prel_slink = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");  my $sltmp  = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");
160  unlink $prel_slink; # may fail  unlink $sltmp; # may fail
161  symlink $long_dir, $prel_slink or die "Could not symlink $long_dir, $prel_slink: $!";  symlink $dir, $sltmp or die "Couldn't symlink $dir, $sltmp: $!";
162  rename $prel_slink, $want_dir or die "Could not rename $prel_slink, $want_dir: $!";  rename $sltmp, $slwant or die "Couldn't rename $sltmp, $slwant: $!";
163    warn "$slwant now points to $dir";
164  system("chmod 777 $want_dir/*/read")==0 or die;  system("chmod 777 $slwant/*/read")==0 or die;
165    
166  $WAIT::Config = $WAIT::Config; # make perl -w happy  $WAIT::Config = $WAIT::Config; # make perl -w happy
167    

Legend:
Removed from v.72  
changed lines
  Added in v.73

  ViewVC Help
Powered by ViewVC 1.1.26