/[wait]/trunk/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log


Revision 68 - (show annotations)
Thu Jan 24 01:53:25 2002 UTC (22 years, 3 months ago) by laperla
Original Path: cvs-head/script/index_ora
File size: 4112 byte(s)
- Enable reindexing with atomic symlinkhack

1 #!/usr/bin/perl -w
2 # -*- Mode: Perl -*-
3 # $Basename$
4 # $Revision: 1.6 $
5 # Author : Ulrich Pfeifer
6 # Created On : Mon Dec 31 13:57:11 2001
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Fri Jan 4 15:59:20 2002
9 # Language : CPerl
10 #
11 # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12 #
13
14 use strict;
15 use File::Path;
16 use DB_File;
17 use Getopt::Long;
18 use Cwd;
19
20 require WAIT::Config;
21 require WAIT::Database;
22 require WAIT::Parse::Ora;
23 require WAIT::Document::Ora;
24 require WAIT::InvertedIndex;
25
26
27 $DB_BTREE->{'cachesize'} = 200_000 ;
28
29 my %OPT = (
30 database => 'DB',
31 dir => $WAIT::Config->{WAIT_home} || '/tmp',
32 table => 'ora',
33 );
34
35 GetOptions(\%OPT,
36 'database=s',
37 'dir=s',
38 'table=s',
39 ) || die "Usage: ...\n";
40
41 my @localtime = localtime;
42 $localtime[5] += 1900;
43 $localtime[4]++;
44 my $jobid = sprintf "%04s-%02s-%02s_%02s:%02s_%d", @localtime[5,4,3,2,1], $$;
45 my $db = WAIT::Database->create(name => "$OPT{database}-$jobid",
46 directory => $OPT{dir})
47 or die "Could not create database $OPT{database}: $@\n";
48
49 my $layout = new WAIT::Parse::Ora;
50
51 my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
52 my $text = [{
53 'prefix' => ['isotr', 'isolc'],
54 'intervall' => ['isotr', 'isolc'],
55 },
56 'isotr', 'isolc', 'split2', 'stop'];
57 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
58
59 my $cwd = cwd;
60
61 my %D;
62 my $access = tie %D, 'WAIT::Document::Ora', @ARGV,
63 or die "Couldn't tie to file: $!\n";
64
65 my $tb = $db->create_table(name => $OPT{table},
66 attr => ['author', 'isbn', 'title',
67 'headline', 'docid'],
68 layout => $layout,
69 access => $access,
70 invindex =>
71 [
72 'title' => $stem,
73 'about' => $stem,
74 'text' => $text,
75 'author' => $text,
76 'colophon' => $text,
77 'author' => $sound,
78 'isbn' => $text,
79 ]
80 );
81 die "Couldn't create table $OPT{table}: $@\n" unless $tb;
82
83 my ($did, $value);
84 while (($did, $value) = each %D) {
85 my $record = $layout->split($value);
86 my $headline = $record->{title};
87 $headline =~ s/\s+/ /sg;
88 printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
89 $tb->insert('docid' => $did,
90 headline => $headline,
91 %{$record});
92 }
93 $tb->set(top=>1);
94 $tb->close();
95 $db->close();
96
97 # Now we have a new database with a very long name and we want that
98 # database to be accessible with the $OPT{database} name
99
100 use File::Spec;
101 my $long_dir = "$OPT{database}-$jobid";
102 my $want_dir = File::Spec->catdir($OPT{dir}, $OPT{database});
103 my $prel_slink = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");
104 unlink $prel_slink; # may fail
105 symlink $long_dir, $prel_slink or die "Could not symlink $long_dir, $prel_slink: $!";
106 rename $prel_slink, $want_dir or die "Could not rename $prel_slink, $want_dir: $!";
107
108 $WAIT::Config = $WAIT::Config; # make perl -w happy
109
110
111 __END__
112 ## ###################################################################
113 ## pod
114 ## ###################################################################
115
116 =head1 NAME
117
118 index_ora - generate an WAIT index for O'Reilly catalog
119
120 =head1 SYNOPSIS
121
122 B<index_ora>
123 [B<-database> I<dbname>]
124 [B<-dir> I<directory>]
125 [B<-table> I<table name>]
126 I<directory>
127
128 =head1 DESCRIPTION
129
130 =head1 OPTIONS
131
132 =over 5
133
134 =item B<-database> I<dbname>
135
136 Specify database name. Default is F<DB>.
137
138 =item B<-dir> I<directory>
139
140 Alternate directory where databases are located. Default is the
141 directory specified during configuration of WAIT.
142
143 =item B<-table> I<table name>
144
145 Specify an alternate table name. Default is C<ora>.
146
147 =head1 AUTHOR
148
149 Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
150

Properties

Name Value
cvs2svn:cvs-rev 1.6

  ViewVC Help
Powered by ViewVC 1.1.26