/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log


Revision 68 - (hide annotations)
Thu Jan 24 01:53:25 2002 UTC (22 years, 3 months ago) by laperla
File size: 4112 byte(s)
- Enable reindexing with atomic symlinkhack

1 ulpfr 55 #!/usr/bin/perl -w
2     # -*- Mode: Perl -*-
3     # $Basename$
4 laperla 68 # $Revision: 1.6 $
5 ulpfr 55 # Author : Ulrich Pfeifer
6     # Created On : Mon Dec 31 13:57:11 2001
7     # Last Modified By: Ulrich Pfeifer
8 ulpfr 62 # Last Modified On: Fri Jan 4 15:59:20 2002
9 ulpfr 55 # Language : CPerl
10     #
11     # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12     #
13    
14     use strict;
15     use File::Path;
16     use DB_File;
17     use Getopt::Long;
18     use Cwd;
19    
20     require WAIT::Config;
21     require WAIT::Database;
22     require WAIT::Parse::Ora;
23     require WAIT::Document::Ora;
24     require WAIT::InvertedIndex;
25    
26    
27     $DB_BTREE->{'cachesize'} = 200_000 ;
28    
29 laperla 67 my %OPT = (
30 ulpfr 55 database => 'DB',
31     dir => $WAIT::Config->{WAIT_home} || '/tmp',
32     table => 'ora',
33     );
34    
35     GetOptions(\%OPT,
36     'database=s',
37     'dir=s',
38     'table=s',
39     ) || die "Usage: ...\n";
40    
41 laperla 68 my @localtime = localtime;
42     $localtime[5] += 1900;
43     $localtime[4]++;
44     my $jobid = sprintf "%04s-%02s-%02s_%02s:%02s_%d", @localtime[5,4,3,2,1], $$;
45     my $db = WAIT::Database->create(name => "$OPT{database}-$jobid",
46     directory => $OPT{dir})
47     or die "Could not create database $OPT{database}: $@\n";
48 ulpfr 55
49     my $layout = new WAIT::Parse::Ora;
50    
51     my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
52     my $text = [{
53     'prefix' => ['isotr', 'isolc'],
54     'intervall' => ['isotr', 'isolc'],
55     },
56     'isotr', 'isolc', 'split2', 'stop'];
57     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
58    
59     my $cwd = cwd;
60    
61     my %D;
62     my $access = tie %D, 'WAIT::Document::Ora', @ARGV,
63     or die "Couldn't tie to file: $!\n";
64    
65     my $tb = $db->create_table(name => $OPT{table},
66 ulpfr 62 attr => ['author', 'isbn', 'title',
67 ulpfr 55 'headline', 'docid'],
68     layout => $layout,
69     access => $access,
70     invindex =>
71     [
72     'title' => $stem,
73 ulpfr 59 'about' => $stem,
74 ulpfr 55 'text' => $text,
75     'author' => $text,
76 laperla 65 'colophon' => $text,
77 ulpfr 62 'author' => $sound,
78     'isbn' => $text,
79 ulpfr 55 ]
80     );
81     die "Couldn't create table $OPT{table}: $@\n" unless $tb;
82    
83     my ($did, $value);
84     while (($did, $value) = each %D) {
85     my $record = $layout->split($value);
86     my $headline = $record->{title};
87     $headline =~ s/\s+/ /sg;
88 ulpfr 62 printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
89 ulpfr 55 $tb->insert('docid' => $did,
90     headline => $headline,
91     %{$record});
92     }
93     $tb->set(top=>1);
94     $tb->close();
95     $db->close();
96    
97 laperla 68 # Now we have a new database with a very long name and we want that
98     # database to be accessible with the $OPT{database} name
99    
100     use File::Spec;
101     my $long_dir = "$OPT{database}-$jobid";
102     my $want_dir = File::Spec->catdir($OPT{dir}, $OPT{database});
103     my $prel_slink = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");
104     unlink $prel_slink; # may fail
105     symlink $long_dir, $prel_slink or die "Could not symlink $long_dir, $prel_slink: $!";
106     rename $prel_slink, $want_dir or die "Could not rename $prel_slink, $want_dir: $!";
107    
108 ulpfr 55 $WAIT::Config = $WAIT::Config; # make perl -w happy
109    
110    
111     __END__
112     ## ###################################################################
113     ## pod
114     ## ###################################################################
115    
116     =head1 NAME
117    
118     index_ora - generate an WAIT index for O'Reilly catalog
119    
120     =head1 SYNOPSIS
121    
122     B<index_ora>
123     [B<-database> I<dbname>]
124     [B<-dir> I<directory>]
125     [B<-table> I<table name>]
126     I<directory>
127    
128     =head1 DESCRIPTION
129    
130     =head1 OPTIONS
131    
132     =over 5
133    
134     =item B<-database> I<dbname>
135    
136     Specify database name. Default is F<DB>.
137    
138     =item B<-dir> I<directory>
139    
140     Alternate directory where databases are located. Default is the
141     directory specified during configuration of WAIT.
142    
143     =item B<-table> I<table name>
144    
145     Specify an alternate table name. Default is C<ora>.
146    
147     =head1 AUTHOR
148    
149     Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
150    

Properties

Name Value
cvs2svn:cvs-rev 1.6

  ViewVC Help
Powered by ViewVC 1.1.26