/[wait]/trunk/script/index_ora
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/script/index_ora

Parent Directory Parent Directory | Revision Log Revision Log


Revision 69 - (hide annotations)
Fri Jan 25 07:27:30 2002 UTC (22 years, 3 months ago) by laperla
Original Path: cvs-head/script/index_ora
File size: 4304 byte(s)
- Produced the first index that worked with 5.7.2@14354

1 ulpfr 55 #!/usr/bin/perl -w
2     # -*- Mode: Perl -*-
3     # $Basename$
4 laperla 69 # $Revision: 1.7 $
5 ulpfr 55 # Author : Ulrich Pfeifer
6     # Created On : Mon Dec 31 13:57:11 2001
7     # Last Modified By: Ulrich Pfeifer
8 ulpfr 62 # Last Modified On: Fri Jan 4 15:59:20 2002
9 ulpfr 55 # Language : CPerl
10     #
11     # (C) Copyright 2001, UUNET Deutschland GmbH, Germany
12     #
13    
14 laperla 69 use 5.007;
15    
16 ulpfr 55 use strict;
17 laperla 69
18 ulpfr 55 use File::Path;
19     use DB_File;
20     use Getopt::Long;
21     use Cwd;
22    
23 laperla 69 BEGIN {require WAIT::Config;}
24     use WAIT::Database;
25     use WAIT::Parse::Ora;
26     use WAIT::Document::Ora;
27     use WAIT::InvertedIndex;
28 ulpfr 55
29    
30     $DB_BTREE->{'cachesize'} = 200_000 ;
31    
32 laperla 67 my %OPT = (
33 ulpfr 55 database => 'DB',
34     dir => $WAIT::Config->{WAIT_home} || '/tmp',
35     table => 'ora',
36     );
37    
38     GetOptions(\%OPT,
39     'database=s',
40     'dir=s',
41     'table=s',
42     ) || die "Usage: ...\n";
43    
44 laperla 68 my @localtime = localtime;
45     $localtime[5] += 1900;
46     $localtime[4]++;
47     my $jobid = sprintf "%04s-%02s-%02s_%02s:%02s_%d", @localtime[5,4,3,2,1], $$;
48     my $db = WAIT::Database->create(name => "$OPT{database}-$jobid",
49     directory => $OPT{dir})
50     or die "Could not create database $OPT{database}: $@\n";
51 ulpfr 55
52     my $layout = new WAIT::Parse::Ora;
53    
54 laperla 69 use lib "/usr/local/apache/lib";
55     use oreilly_de_catalog::wait_handler;
56    
57     my $stem = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop', 'Stem'];
58 ulpfr 55 my $text = [{
59 laperla 69 'prefix' => ['OR_tr_20020124', 'OR_lc_20020124'],
60     'intervall' => ['OR_tr_20020124', 'OR_lc_20020124'],
61 ulpfr 55 },
62 laperla 69 'OR_tr_20020124', 'OR_lc_20020124', 'split2', 'stop'];
63     my $sound = ['OR_tr_20020124', 'OR_lc_20020124', 'split2', 'Soundex'],;
64 ulpfr 55
65     my $cwd = cwd;
66    
67     my %D;
68     my $access = tie %D, 'WAIT::Document::Ora', @ARGV,
69     or die "Couldn't tie to file: $!\n";
70    
71     my $tb = $db->create_table(name => $OPT{table},
72 ulpfr 62 attr => ['author', 'isbn', 'title',
73 ulpfr 55 'headline', 'docid'],
74     layout => $layout,
75     access => $access,
76     invindex =>
77     [
78     'title' => $stem,
79 ulpfr 59 'about' => $stem,
80 ulpfr 55 'text' => $text,
81     'author' => $text,
82 laperla 65 'colophon' => $text,
83 ulpfr 62 'author' => $sound,
84     'isbn' => $text,
85 ulpfr 55 ]
86     );
87     die "Couldn't create table $OPT{table}: $@\n" unless $tb;
88    
89     my ($did, $value);
90 laperla 69 binmode STDOUT, ":utf8";
91 ulpfr 55 while (($did, $value) = each %D) {
92     my $record = $layout->split($value);
93     my $headline = $record->{title};
94     $headline =~ s/\s+/ /sg;
95 ulpfr 62 printf "%15s %s\n", $record->{isbn}, substr($headline,0,60);
96 ulpfr 55 $tb->insert('docid' => $did,
97     headline => $headline,
98     %{$record});
99     }
100     $tb->set(top=>1);
101     $tb->close();
102     $db->close();
103    
104 laperla 68 # Now we have a new database with a very long name and we want that
105     # database to be accessible with the $OPT{database} name
106    
107     use File::Spec;
108     my $long_dir = "$OPT{database}-$jobid";
109     my $want_dir = File::Spec->catdir($OPT{dir}, $OPT{database});
110     my $prel_slink = File::Spec->catdir($OPT{dir}, "$OPT{database}-$$");
111     unlink $prel_slink; # may fail
112     symlink $long_dir, $prel_slink or die "Could not symlink $long_dir, $prel_slink: $!";
113     rename $prel_slink, $want_dir or die "Could not rename $prel_slink, $want_dir: $!";
114    
115 ulpfr 55 $WAIT::Config = $WAIT::Config; # make perl -w happy
116    
117    
118     __END__
119     ## ###################################################################
120     ## pod
121     ## ###################################################################
122    
123     =head1 NAME
124    
125     index_ora - generate an WAIT index for O'Reilly catalog
126    
127     =head1 SYNOPSIS
128    
129     B<index_ora>
130     [B<-database> I<dbname>]
131     [B<-dir> I<directory>]
132     [B<-table> I<table name>]
133     I<directory>
134    
135     =head1 DESCRIPTION
136    
137     =head1 OPTIONS
138    
139     =over 5
140    
141     =item B<-database> I<dbname>
142    
143     Specify database name. Default is F<DB>.
144    
145     =item B<-dir> I<directory>
146    
147     Alternate directory where databases are located. Default is the
148     directory specified during configuration of WAIT.
149    
150     =item B<-table> I<table name>
151    
152     Specify an alternate table name. Default is C<ora>.
153    
154     =head1 AUTHOR
155    
156     Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
157    

Properties

Name Value
cvs2svn:cvs-rev 1.7

  ViewVC Help
Powered by ViewVC 1.1.26