/[wait]/trunk/script/index_mail
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/script/index_mail

Parent Directory Parent Directory | Revision Log Revision Log


Revision 109 - (hide annotations)
Tue Jul 13 17:50:27 2004 UTC (19 years, 10 months ago) by dpavlin
File size: 4192 byte(s)
pod fixes

1 ulpfr 70 #!/usr/bin/perl -w
2 ulpfr 47 # -*- Mode: Perl -*-
3     # $Basename: index_mail $
4 ulpfr 70 # $Revision: 1.3 $
5 ulpfr 47 # Author : Ulrich Pfeifer
6     # Created On : Fri Apr 7 13:45:50 2000
7     # Last Modified By: Ulrich Pfeifer
8 ulpfr 48 # Last Modified On: Fri Dec 29 17:07:26 2000
9 ulpfr 47 # Language : CPerl
10     #
11     # (C) Copyright 2000, UUNET Deutschland GmbH, Germany
12     #
13    
14     use strict;
15     use File::Path;
16     use DB_File;
17     use Getopt::Long;
18     use Cwd;
19    
20     require WAIT::Config;
21     require WAIT::Database;
22     require WAIT::Parse::Overview;
23     require WAIT::Document::Split;
24     require WAIT::InvertedIndex;
25    
26    
27     $DB_BTREE->{'cachesize'} = 200_000 ;
28    
29     my %OPT = (clean => 0,
30     database => 'DB',
31     dir => $WAIT::Config->{WAIT_home} || '/tmp',
32     table => 'mail',
33     );
34    
35     GetOptions(\%OPT,
36     'clean!',
37     'database=s',
38     'dir=s',
39     'table=s',
40     ) || die "Usage: ...\n";
41    
42     if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
43     my $tmp = WAIT::Database->open(name => $OPT{database},
44     'directory' => $OPT{dir})
45     or die "Could not open table $OPT{table}: $@\n";
46     my $tbl = $tmp->table(name => $OPT{table});
47     $tbl->drop if $tbl;
48     rmtree("$OPT{dir}/$OPT{database}/$OPT{table}", 1, 1)
49     if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
50     $tmp->close;
51     }
52    
53     my $db;
54     unless (-d "$OPT{dir}/$OPT{database}") {
55     $db = WAIT::Database->create(name => $OPT{database},
56     'directory' => $OPT{dir})
57     or die "Could not open database $OPT{database}: $@\n";
58     }
59     else {
60     $db = WAIT::Database->open(name => $OPT{database},
61     'directory' => $OPT{dir})
62     or die "Could not open table $OPT{table}: $@\n";
63     }
64    
65     my $layout = new WAIT::Parse::Overview;
66    
67     my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
68     my $text = [{
69     'prefix' => ['isotr', 'isolc'],
70     'intervall' => ['isotr', 'isolc'],
71     },
72     'isotr', 'isolc', 'split2', 'stop'];
73     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
74    
75     my $cwd = cwd;
76    
77     my %D;
78     my $access = tie %D, 'WAIT::Document::Split', 'end', '$', @ARGV,
79     or die "Couldn't tie to file: $!\n";
80    
81     my $tb = $db->create_table(name => $OPT{table},
82 ulpfr 48 attr => ['from', 'to', 'subject', 'article', 'docid', 'headline'],
83 ulpfr 47 layout => $layout,
84     access => $access,
85     invindex =>
86     [
87     'subject' => $stem, 'subject' => $text,
88     'to' => $text,
89     'from' => $text,
90     ]
91     );
92     die "Couldn't create table $OPT{table}: $@\n" unless $tb;
93    
94     my ($did, $value);
95     while (($did, $value) = each %D) {
96     my $record = $layout->split($value);
97     my $headline = $record->{subject};
98     $headline =~ s/\s+/ /sg;
99 ulpfr 48 #printf "%s\n", substr($headline,0,80);
100 ulpfr 47 $tb->insert('docid' => $did,
101     headline => $headline,
102     %{$record});
103 ulpfr 48 print $did,"\n" if $did =~ / 0 0/;
104 ulpfr 47 }
105     $tb->set(top=>1);
106     $tb->close();
107     $db->close();
108    
109     $WAIT::Config = $WAIT::Config; # make perl -w happy
110    
111    
112     __END__
113     ## ###################################################################
114     ## pod
115     ## ###################################################################
116    
117     =head1 NAME
118    
119     index_mail - generate an WAIT index for .overview files
120    
121     =head1 SYNOPSIS
122    
123     B<index_mail>
124     [B<-clean>] [B<-noclean>]
125     [B<-database> I<dbname>]
126     [B<-dir> I<directory>]
127     [B<-table> I<table name>]
128    
129     =head1 DESCRIPTION
130    
131     Either indexes F<$WAIT/t/test.ste> (if called from directory F<$WAIT>)
132     or F</usr/local/ls6/tex/bib/bibdb.ste>.
133    
134     =head1 OPTIONS
135    
136     =over 5
137    
138     =item B<-clean> / B<-noclean>
139    
140     Clean the table before indexing. Default is B<off>.
141    
142     =item B<-database> I<dbname>
143    
144     Specify database name. Default is F<DB>.
145    
146     =item B<-dir> I<directory>
147    
148     Alternate directory where databases are located. Default is the
149     directory specified during configuration of WAIT.
150    
151     =item B<-table> I<table name>
152    
153     Specify an alternate table name. Default is C<bibdb>.
154    
155 dpavlin 109 =back
156    
157 ulpfr 47 =head1 AUTHOR
158    
159     Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
160    

Properties

Name Value
cvs2svn:cvs-rev 1.3

  ViewVC Help
Powered by ViewVC 1.1.26