/[wait]/cvs-head/script/index_mail
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/script/index_mail

Parent Directory Parent Directory | Revision Log Revision Log


Revision 48 - (show annotations)
Fri Dec 29 16:09:58 2000 UTC (23 years, 4 months ago) by ulpfr
File size: 4166 byte(s)
Mail indexing works resonable

1 # -*- Mode: Perl -*-
2 # $Basename: index_mail $
3 # $Revision: 1.2 $
4 # Author : Ulrich Pfeifer
5 # Created On : Fri Apr 7 13:45:50 2000
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Fri Dec 29 17:07:26 2000
8 # Language : CPerl
9 #
10 # (C) Copyright 2000, UUNET Deutschland GmbH, Germany
11 #
12
13 use strict;
14 use File::Path;
15 use DB_File;
16 use Getopt::Long;
17 use Cwd;
18
19 require WAIT::Config;
20 require WAIT::Database;
21 require WAIT::Parse::Overview;
22 require WAIT::Document::Split;
23 require WAIT::InvertedIndex;
24
25
26 $DB_BTREE->{'cachesize'} = 200_000 ;
27
28 my %OPT = (clean => 0,
29 database => 'DB',
30 dir => $WAIT::Config->{WAIT_home} || '/tmp',
31 table => 'mail',
32 );
33
34 GetOptions(\%OPT,
35 'clean!',
36 'database=s',
37 'dir=s',
38 'table=s',
39 ) || die "Usage: ...\n";
40
41 if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
42 my $tmp = WAIT::Database->open(name => $OPT{database},
43 'directory' => $OPT{dir})
44 or die "Could not open table $OPT{table}: $@\n";
45 my $tbl = $tmp->table(name => $OPT{table});
46 $tbl->drop if $tbl;
47 rmtree("$OPT{dir}/$OPT{database}/$OPT{table}", 1, 1)
48 if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
49 $tmp->close;
50 }
51
52 my $db;
53 unless (-d "$OPT{dir}/$OPT{database}") {
54 $db = WAIT::Database->create(name => $OPT{database},
55 'directory' => $OPT{dir})
56 or die "Could not open database $OPT{database}: $@\n";
57 }
58 else {
59 $db = WAIT::Database->open(name => $OPT{database},
60 'directory' => $OPT{dir})
61 or die "Could not open table $OPT{table}: $@\n";
62 }
63
64 my $layout = new WAIT::Parse::Overview;
65
66 my $stem = ['isotr', 'isolc', 'split2', 'stop', 'Stem'];
67 my $text = [{
68 'prefix' => ['isotr', 'isolc'],
69 'intervall' => ['isotr', 'isolc'],
70 },
71 'isotr', 'isolc', 'split2', 'stop'];
72 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
73
74 my $cwd = cwd;
75
76 my %D;
77 my $access = tie %D, 'WAIT::Document::Split', 'end', '$', @ARGV,
78 or die "Couldn't tie to file: $!\n";
79
80 my $tb = $db->create_table(name => $OPT{table},
81 attr => ['from', 'to', 'subject', 'article', 'docid', 'headline'],
82 layout => $layout,
83 access => $access,
84 invindex =>
85 [
86 'subject' => $stem, 'subject' => $text,
87 'to' => $text,
88 'from' => $text,
89 ]
90 );
91 die "Couldn't create table $OPT{table}: $@\n" unless $tb;
92
93 my ($did, $value);
94 while (($did, $value) = each %D) {
95 my $record = $layout->split($value);
96 my $headline = $record->{subject};
97 $headline =~ s/\s+/ /sg;
98 #printf "%s\n", substr($headline,0,80);
99 $tb->insert('docid' => $did,
100 headline => $headline,
101 %{$record});
102 print $did,"\n" if $did =~ / 0 0/;
103 }
104 $tb->set(top=>1);
105 $tb->close();
106 $db->close();
107
108 $WAIT::Config = $WAIT::Config; # make perl -w happy
109
110
111 __END__
112 ## ###################################################################
113 ## pod
114 ## ###################################################################
115
116 =head1 NAME
117
118 index_mail - generate an WAIT index for .overview files
119
120 =head1 SYNOPSIS
121
122 B<index_mail>
123 [B<-clean>] [B<-noclean>]
124 [B<-database> I<dbname>]
125 [B<-dir> I<directory>]
126 [B<-table> I<table name>]
127
128 =head1 DESCRIPTION
129
130 Either indexes F<$WAIT/t/test.ste> (if called from directory F<$WAIT>)
131 or F</usr/local/ls6/tex/bib/bibdb.ste>.
132
133 =head1 OPTIONS
134
135 =over 5
136
137 =item B<-clean> / B<-noclean>
138
139 Clean the table before indexing. Default is B<off>.
140
141 =item B<-database> I<dbname>
142
143 Specify database name. Default is F<DB>.
144
145 =item B<-dir> I<directory>
146
147 Alternate directory where databases are located. Default is the
148 directory specified during configuration of WAIT.
149
150 =item B<-table> I<table name>
151
152 Specify an alternate table name. Default is C<bibdb>.
153
154 =head1 AUTHOR
155
156 Ulrich Pfeifer E<lt>F<pfeifer@wait.de>E<gt>
157

Properties

Name Value
cvs2svn:cvs-rev 1.2

  ViewVC Help
Powered by ViewVC 1.1.26