/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 65 by laperla, Wed Jan 23 12:22:54 2002 UTC revision 69 by laperla, Fri Jan 25 07:27:30 2002 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2  #                              -*- Mode: Perl -*-  #                              -*- Mode: Perl -*-
3  # $Basename$  # $Basename$
4  # $Revision: 1.4 $  # $Revision: 1.7 $
5  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
6  # Created On      : Mon Dec 31 13:57:11 2001  # Created On      : Mon Dec 31 13:57:11 2001
7  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
# Line 11  Line 11 
11  # (C) Copyright 2001, UUNET Deutschland GmbH, Germany  # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12  #  #
13    
14    use 5.007;
15    
16  use strict;  use strict;
17    
18  use File::Path;  use File::Path;
19  use DB_File;  use DB_File;
20  use Getopt::Long;  use Getopt::Long;
21  use Cwd;  use Cwd;
22    
23  require WAIT::Config;  BEGIN {require WAIT::Config;}
24  require WAIT::Database;  use WAIT::Database;
25  require WAIT::Parse::Ora;  use WAIT::Parse::Ora;
26  require WAIT::Document::Ora;  use WAIT::Document::Ora;
27  require WAIT::InvertedIndex;  use WAIT::InvertedIndex;
28    
29    
30  $DB_BTREE->{'cachesize'} = 200_000 ;  $DB_BTREE->{'cachesize'} = 200_000 ;
31    
32  my %OPT = (clean    => 0,  my %OPT = (
33             database => 'DB',             database => 'DB',
34             dir      => $WAIT::Config->{WAIT_home} || '/tmp',             dir      => $WAIT::Config->{WAIT_home} || '/tmp',
35             table    => 'ora',             table    => 'ora',
36            );            );
37    
38  GetOptions(\%OPT,  GetOptions(\%OPT,
            'clean!',  
39             'database=s',             'database=s',
40             'dir=s',             'dir=s',
41             'table=s',             'table=s',
42            ) || die "Usage: ...\n";            ) || die "Usage: ...\n";
43    
44  if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {  my @localtime = localtime;
45    my $tmp = WAIT::Database->open(name        => $OPT{database},  $localtime[5] += 1900;
46                                   'directory' => $OPT{dir})  $localtime[4]++;
47      or die "Could not open table $OPT{table}: $@\n";  my $jobid = sprintf "%04s-%02s-%02s_%02s:%02s_%d", @localtime[5,4,3,2,1], $$;
48    my $tbl = $tmp->table(name => $OPT{table});  my $db = WAIT::Database->create(name      => "$OPT{database}-$jobid",
49    $tbl->drop if $tbl;                                  directory => $OPT{dir})
50    rmtree("$OPT{dir}/$OPT{database}/$OPT{table}", 1, 1)      or die "Could not create database $OPT{database}: $@\n";
     if -d "$OPT{dir}/$OPT{database}/$OPT{table}";  
   $tmp->close;  
 }  
   
 my $db;  
 unless (-d "$OPT{dir}/$OPT{database}") {  
   $db = WAIT::Database->create(name       => $OPT{database},  
                               'directory' => $OPT{dir})  
     or die "Could not open database $OPT{database}: $@\n";  
 }  
 else {  
   $db = WAIT::Database->open(name        => $OPT{database},  
                              'directory' => $OPT{dir})  
     or die "Could not open table $OPT{table}: $@\n";  
 }  
51    
52  my $layout = new WAIT::Parse::Ora;  my $layout = new WAIT::Parse::Ora;
53    
54  my $stem  = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];  use lib "/usr/local/apache/lib";
55    use oreilly_de_catalog::wait_handler;
56    
57    my $stem  = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop', 'Stem'];
58  my $text  = [{  my $text  = [{
59                'prefix'    => ['isotr', 'isolc'],                'prefix'    => ['OR_tr_20020124', 'OR_lc_20020124'],
60                'intervall' => ['isotr', 'isolc'],                'intervall' => ['OR_tr_20020124', 'OR_lc_20020124'],
61               },               },
62               'isotr', 'isolc', 'split2', 'stop'];               'OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop'];
63  my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;  my $sound = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'Soundex'],;
64    
65  my $cwd = cwd;  my $cwd = cwd;
66    
# Line 97  my $tb = $db->create_table(name     => $ Line 87  my $tb = $db->create_table(name     => $
87  die "Couldn't create table $OPT{table}: $@\n" unless $tb;  die "Couldn't create table $OPT{table}: $@\n" unless $tb;
88    
89  my ($did, $value);  my ($did, $value);
90    binmode STDOUT, ":utf8";
91  while (($did, $value) = each %D) {  while (($did, $value) = each %D) {
92    my $record   = $layout->split($value);    my $record   = $layout->split($value);
93    my $headline = $record->{title};    my $headline = $record->{title};
# Line 110  $tb->set(top=>1); Line 101  $tb->set(top=>1);
101  $tb->close();  $tb->close();
102  $db->close();  $db->close();
103    
104    # Now we have a new database with a very long name and we want that
105    # database to be accessible with the $OPT{database} name
106    
107    use File::Spec;
108    my $long_dir   = "$OPT{database}-$jobid";
109    my $want_dir   = File::Spec->catdir($OPT{dir}, $OPT{database});
110    my $prel_slink = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");
111    unlink $prel_slink; # may fail
112    symlink $long_dir, $prel_slink or die "Could not symlink $long_dir, $prel_slink: $!";
113    rename $prel_slink, $want_dir or die "Could not rename $prel_slink, $want_dir: $!";
114    
115  $WAIT::Config = $WAIT::Config; # make perl -w happy  $WAIT::Config = $WAIT::Config; # make perl -w happy
116    
117    
# Line 125  index_ora - generate an WAIT index for O Line 127  index_ora - generate an WAIT index for O
127  =head1 SYNOPSIS  =head1 SYNOPSIS
128    
129  B<index_ora>  B<index_ora>
 [B<-clean>] [B<-noclean>]  
130  [B<-database> I<dbname>]  [B<-database> I<dbname>]
131  [B<-dir> I<directory>]  [B<-dir> I<directory>]
132  [B<-table> I<table name>]  [B<-table> I<table name>]
# Line 137  I<directory> Line 138  I<directory>
138    
139  =over 5  =over 5
140    
 =item B<-clean> / B<-noclean>  
   
 Clean the table before indexing. Default is B<off>.  
   
141  =item B<-database> I<dbname>  =item B<-database> I<dbname>
142    
143  Specify database name. Default is F<DB>.  Specify database name. Default is F<DB>.

Legend:
Removed from v.65  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.26