/[wait]/cvs-head/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /cvs-head/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log


Revision 67 - (hide annotations)
Thu Jan 24 00:40:43 2002 UTC (22 years, 4 months ago) by laperla
File size: 3665 byte(s)
- remove clean option

1 ulpfr 55 #!/usr/bin/perl -w
2     # -*- Mode: Perl -*-
3     # $Basename$
4 laperla 67 # $Revision: 1.5 $
5 ulpfr 55 # Author : Ulrich Pfeifer
6     # Created On : Mon Dec 31 13:57:11 2001
7     # Last Modified By: Ulrich Pfeifer
8 ulpfr 62 # Last Modified On: Fri Jan 4 15:59:20 2002
9 ulpfr 55 # Language : CPerl
10     #
11     # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12     #
13    
14     use strict;
15     use File::Path;
16     use DB_File;
17     use Getopt::Long;
18     use Cwd;
19    
20     require WAIT::Config;
21     require WAIT::Database;
22     require WAIT::Parse::Ora;
23     require WAIT::Document::Ora;
24     require WAIT::InvertedIndex;
25    
26    
27     $DB_BTREE->{'cachesize'} = 200_000 ;
28    
29 laperla 67 my %OPT = (
30 ulpfr 55 database => 'DB',
31     dir => $WAIT::Config->{WAIT_home} || '/tmp',
32     table => 'ora',
33     );
34    
35     GetOptions(\%OPT,
36     'database=s',
37     'dir=s',
38     'table=s',
39     ) || die "Usage: ...\n";
40    
41     my $db;
42     unless (-d "$OPT{dir}/$OPT{database}") {
43     $db = WAIT::Database->create(name => $OPT{database},
44     'directory' => $OPT{dir})
45     or die "Could not open database $OPT{database}: $@\n";
46     }
47     else {
48     $db = WAIT::Database->open(name => $OPT{database},
49     'directory' => $OPT{dir})
50     or die "Could not open table $OPT{table}: $@\n";
51     }
52    
53     my $layout = new WAIT::Parse::Ora;
54    
55     my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
56     my $text = [{
57     'prefix' => ['isotr', 'isolc'],
58     'intervall' => ['isotr', 'isolc'],
59     },
60     'isotr', 'isolc', 'split2', 'stop'];
61     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
62    
63     my $cwd = cwd;
64    
65     my %D;
66     my $access = tie %D, 'WAIT::Document::Ora', @ARGV,
67     or die "Couldn't tie to file: $!\n";
68    
69     my $tb = $db->create_table(name => $OPT{table},
70 ulpfr 62 attr => ['author', 'isbn', 'title',
71 ulpfr 55 'headline', 'docid'],
72     layout => $layout,
73     access => $access,
74     invindex =>
75     [
76     'title' => $stem,
77 ulpfr 59 'about' => $stem,
78 ulpfr 55 'text' => $text,
79     'author' => $text,
80 laperla 65 'colophon' => $text,
81 ulpfr 62 'author' => $sound,
82     'isbn' => $text,
83 ulpfr 55 ]
84     );
85     die "Couldn't create table $OPT{table}: $@\n" unless $tb;
86    
87     my ($did, $value);
88     while (($did, $value) = each %D) {
89     my $record = $layout->split($value);
90     my $headline = $record->{title};
91     $headline =~ s/\s+/ /sg;
92 ulpfr 62 printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
93 ulpfr 55 $tb->insert('docid' => $did,
94     headline => $headline,
95     %{$record});
96     }
97     $tb->set(top=>1);
98     $tb->close();
99     $db->close();
100    
101     $WAIT::Config = $WAIT::Config; # make perl -w happy
102    
103    
104     __END__
105     ## ###################################################################
106     ## pod
107     ## ###################################################################
108    
109     =head1 NAME
110    
111     index_ora - generate an WAIT index for O'Reilly catalog
112    
113     =head1 SYNOPSIS
114    
115     B<index_ora>
116     [B<-database> I<dbname>]
117     [B<-dir> I<directory>]
118     [B<-table> I<table name>]
119     I<directory>
120    
121     =head1 DESCRIPTION
122    
123     =head1 OPTIONS
124    
125     =over 5
126    
127     =item B<-database> I<dbname>
128    
129     Specify database name. Default is F<DB>.
130    
131     =item B<-dir> I<directory>
132    
133     Alternate directory where databases are located. Default is the
134     directory specified during configuration of WAIT.
135    
136     =item B<-table> I<table name>
137    
138     Specify an alternate table name. Default is C<ora>.
139    
140     =head1 AUTHOR
141    
142     Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
143    

Properties

Name Value
cvs2svn:cvs-rev 1.5

  ViewVC Help
Powered by ViewVC 1.1.26