/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log


Revision 67 - (show annotations)
Thu Jan 24 00:40:43 2002 UTC (22 years, 3 months ago) by laperla
File size: 3665 byte(s)
- remove clean option

1 #!/usr/bin/perl -w
2 # -*- Mode: Perl -*-
3 # $Basename$
4 # $Revision: 1.5 $
5 # Author : Ulrich Pfeifer
6 # Created On : Mon Dec 31 13:57:11 2001
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Fri Jan 4 15:59:20 2002
9 # Language : CPerl
10 #
11 # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12 #
13
14 use strict;
15 use File::Path;
16 use DB_File;
17 use Getopt::Long;
18 use Cwd;
19
20 require WAIT::Config;
21 require WAIT::Database;
22 require WAIT::Parse::Ora;
23 require WAIT::Document::Ora;
24 require WAIT::InvertedIndex;
25
26
27 $DB_BTREE->{'cachesize'} = 200_000 ;
28
29 my %OPT = (
30 database => 'DB',
31 dir => $WAIT::Config->{WAIT_home} || '/tmp',
32 table => 'ora',
33 );
34
35 GetOptions(\%OPT,
36 'database=s',
37 'dir=s',
38 'table=s',
39 ) || die "Usage: ...\n";
40
41 my $db;
42 unless (-d "$OPT{dir}/$OPT{database}") {
43 $db = WAIT::Database->create(name => $OPT{database},
44 'directory' => $OPT{dir})
45 or die "Could not open database $OPT{database}: $@\n";
46 }
47 else {
48 $db = WAIT::Database->open(name => $OPT{database},
49 'directory' => $OPT{dir})
50 or die "Could not open table $OPT{table}: $@\n";
51 }
52
53 my $layout = new WAIT::Parse::Ora;
54
55 my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
56 my $text = [{
57 'prefix' => ['isotr', 'isolc'],
58 'intervall' => ['isotr', 'isolc'],
59 },
60 'isotr', 'isolc', 'split2', 'stop'];
61 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
62
63 my $cwd = cwd;
64
65 my %D;
66 my $access = tie %D, 'WAIT::Document::Ora', @ARGV,
67 or die "Couldn't tie to file: $!\n";
68
69 my $tb = $db->create_table(name => $OPT{table},
70 attr => ['author', 'isbn', 'title',
71 'headline', 'docid'],
72 layout => $layout,
73 access => $access,
74 invindex =>
75 [
76 'title' => $stem,
77 'about' => $stem,
78 'text' => $text,
79 'author' => $text,
80 'colophon' => $text,
81 'author' => $sound,
82 'isbn' => $text,
83 ]
84 );
85 die "Couldn't create table $OPT{table}: $@\n" unless $tb;
86
87 my ($did, $value);
88 while (($did, $value) = each %D) {
89 my $record = $layout->split($value);
90 my $headline = $record->{title};
91 $headline =~ s/\s+/ /sg;
92 printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
93 $tb->insert('docid' => $did,
94 headline => $headline,
95 %{$record});
96 }
97 $tb->set(top=>1);
98 $tb->close();
99 $db->close();
100
101 $WAIT::Config = $WAIT::Config; # make perl -w happy
102
103
104 __END__
105 ## ###################################################################
106 ## pod
107 ## ###################################################################
108
109 =head1 NAME
110
111 index_ora - generate an WAIT index for O'Reilly catalog
112
113 =head1 SYNOPSIS
114
115 B<index_ora>
116 [B<-database> I<dbname>]
117 [B<-dir> I<directory>]
118 [B<-table> I<table name>]
119 I<directory>
120
121 =head1 DESCRIPTION
122
123 =head1 OPTIONS
124
125 =over 5
126
127 =item B<-database> I<dbname>
128
129 Specify database name. Default is F<DB>.
130
131 =item B<-dir> I<directory>
132
133 Alternate directory where databases are located. Default is the
134 directory specified during configuration of WAIT.
135
136 =item B<-table> I<table name>
137
138 Specify an alternate table name. Default is C<ora>.
139
140 =head1 AUTHOR
141
142 Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
143

Properties

Name Value
cvs2svn:cvs-rev 1.5

  ViewVC Help
Powered by ViewVC 1.1.26