/[wait]/trunk/script/cpanwait
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/script/cpanwait

Parent Directory Parent Directory | Revision Log Revision Log


Revision 109 - (hide annotations)
Tue Jul 13 17:50:27 2004 UTC (19 years, 10 months ago) by dpavlin
File size: 26352 byte(s)
pod fixes

1 dpavlin 86 #!/usr/bin/perl -w
2 ulpfr 10 ######################### -*- Mode: Perl -*- #########################
3     ##
4     ## $Basename: cpanwait $
5     ## $Revision: 1.7 $
6     ##
7     ## Author : Ulrich Pfeifer
8     ## Created On : Sat Jan 4 18:09:28 1997
9     ##
10     ## Last Modified By : Ulrich Pfeifer
11     ## Last Modified On : Sun Nov 22 18:44:36 1998
12     ##
13     ## Copyright (c) 1996-1997, Ulrich Pfeifer
14     ##
15     ##
16     ######################################################################
17    
18     use strict;
19    
20     use File::Path;
21     use DB_File;
22     use Getopt::Long;
23     use File::Find;
24     use File::Basename;
25     use IO::File;
26 dpavlin 102 use IO::Zlib;
27 ulpfr 10
28 dpavlin 108 #use lib '/data/wait/lib';
29     use blib;
30 dpavlin 86
31 ulpfr 10 require WAIT::Config;
32     require WAIT::Database;
33     require WAIT::Parse::Pod;
34     require WAIT::Document::Tar;
35    
36 dpavlin 86 sub fname($);
37 ulpfr 10
38 dpavlin 86 # maximum number of archives to index (set to -1 for unlimited)
39     my $max = -1;
40    
41 ulpfr 10 my %OPT = (database => 'DB',
42     dir => $WAIT::Config->{WAIT_home} || '/tmp',
43     table => 'cpan',
44     clean => 0,
45     remove => [],
46     force => 0,
47 dpavlin 86 # cpan => '/usr/src/perl/CPAN/sources',
48     cpan => '/rest/cpan/CPAN/',
49 ulpfr 10 trust_mtime => 1,
50     match => 'authors/id/',
51     test => 0,
52     # cpan => 'ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN',
53 dpavlin 86 # cpan => 'ftp://ftp.uni-hamburg.de:/pub/soft/lang/perl/CPAN',
54     keep => '/tmp/CPAN/',
55 ulpfr 10 );
56    
57     GetOptions(\%OPT,
58     'database=s',
59     'dir=s',
60     'cpan=s',
61     'table=s',
62     'keep=s',
63     'match=s',
64     'clean!',
65     'test=i', # test level 0: normal
66     # 1: don't change db
67     # 2: don't look at archives even
68    
69     'remove=s@',
70     'force!', # force indexing even if seen
71     'trust_mtime!', # use mtime instead of version number
72 dpavlin 86 'max=i',
73     'reorg!',
74 ulpfr 10 ) || die "Usage: ...\n";
75    
76 dpavlin 86 $max ||= $OPT{max};
77 ulpfr 10
78     clean_database(
79     database => $OPT{database},
80     dir => $OPT{dir},
81     table => $OPT{table},
82     ) if $OPT{clean};
83    
84     my $db = WAIT::Database->open(
85     name => $OPT{database},
86     'directory' => $OPT{dir},
87     )
88     || WAIT::Database->create(
89     name => $OPT{database},
90     'directory' => $OPT{dir},
91     )
92     or die "Could not open/create database '$OPT{dir}/$OPT{database}': $@";
93    
94     my $layout= new WAIT::Parse::Pod;
95    
96     my $tb = $db->table(name => $OPT{table})
97     || create_table(db => $db, table => $OPT{table}, layout => $layout);
98    
99     # Map e.g. '.../latest' to 'perl'. Used in wanted(). Effects version
100     # considerations. Value *must* match common prefix. Aliasing should be
101     # used if CPAN contains serveral distributions with different name but
102     # same root directory.
103     # We still have a problem if there are different root directories!
104    
105     my %ALIAS = (# tar name real (root) name
106 dpavlin 102 'Games-Scrabble' => 'Games',
107     'HTML-ParseBrowser' => 'HTML',
108     'iodbc_ext' => 'iodbc-ext-0.1',
109     'sol-inst' => 'Solaris',
110     'WebService-Validator-CSS-223C' => 'WebService-Validator-CSS-W3C-0.02',
111     'MPEG-ID3212Tag' => 'MPEG-ID3v2Tag-0.36',
112     'WebService-GoogleHack' => 'WebService',
113     'Db-Mediasurface-ReadConfig' => 'ReadConfig',
114     'Tie-Array-RestrictUpdates' => 'Tie',
115     'HTML-Lister' => 'HTML',
116     'Net-253950-AsyncZ' => 'Net-Z3950-AsyncZ-0.08',
117     'ChildExit_0' => 'ChildExit-0.1',
118     'Tie-TieConstant' => 'TieConstant.pm',
119     'Crypt-OpenSSL-23509' => 'Crypt-OpenSSL-X509-0.2',
120     'subclustv' => 'blib',
121     'finance-yahooquote' => 'Finance-YahooQuote-0.20',
122     'HPUX-FS' => 'FS',
123     'Business-DE-Konto' => 'Business',
124     'Digest-MD5-124p' => 'Digest-MD5-M4p-0.01',
125     'AKDB_Okewo_de' => 'AKDB',
126     'ExtUtils-0577' => 'ExtUtils-F77-1.14',
127     'LispFmt' => 'Lisp::Fmt-0.00',
128     'Acme-Stegano' => 'Acme',
129     'Acme-RTB' => 'Acme',
130     'WWW-Search-PRWire' => 'work',
131     'Video-Capture-214l' => 'Video-Capture-V4l-0.224',
132     'Tie-DirHandle' => 'Tie',
133     'DB2' => 'DBD-DB2-0.71a',
134     'Tie-Scalar-RestrictUpdates' => 'Tie',
135     'Math-MVPoly' => 'MVPoly',
136     'PlugIn' => 'PlugIn.pm',
137     'Lingua-ID-Nums2Words' => 'Nums2Words-0.01',
138     'chronos-1.' => 'Chronos',
139     'jp_beta' => 'jperl_beta_r1',
140     'Bundle-223C-Validator' => 'Bundle-W3C-Validator-0.6.5',
141     'Text-199' => 'Text-T9-1.0',
142     'Games-Literati' => 'Games',
143     'VMS-IndexedFile' => 'VMS',
144     'authen-rbac' => 'Authen',
145     'Graphics-EPS' => 'EPS.pm',
146     'new.spirit-2.' => 'new.spirit',
147     'Tk-MListbox' => 'MListbox-1.11',
148     'DBD-SQLrelay' => 'SQLRelay.pm',
149     'Tie-RDBM-Cached' => 'RDBM',
150     'PDL_IO_HDF' => 'HDF',
151     'HPUX-LVM' => 'LVM',
152     'Parse-Nibbler' => 'Parse',
153     'Digest-Perl-MD4' => 'MD4',
154     'Crypt-Imail' => 'Imail',
155     'ubertext' => 'Text-UberText-0.95',
156     'MP3-123U' => 'M3U',
157     'Qmail-Control' => 'Qmail',
158     'T-LXS' => 'Text-LevenshteinXS-0.02',
159     'HTML-Paginator' => 'HTML',
160     'swig' => 'SWIG1.1p5',
161     'MIDI-Realtime' => 'MIDI',
162     'sparky-public' => 'Sparky-Public-1.06',
163     'Chemistry-MolecularMass' => 'Chemistry',
164     'Net-253950-SimpleServer' => 'Net-Z3950-SimpleServer-0.08',
165     'NewsClipper-OpenSource' => 'NewsClipper-1.32-OpenSource',
166     'Win32API-Resources' => 'Resources.pm',
167     'Unicode-Collate-Standard-2131_1' => 'Unicode-Collate-Standard-V3_1_1-0.1',
168     'Net-026Term' => 'Net-C6Term-0.11',
169     'BitArray1' => 'BitArray',
170     'Audio-Radio-214L' => 'Audio-Radio-V4L-0.01',
171     'Devel-AutoProfiler' => 'Devel',
172     'Brasil-Checar-CGC' => 'Brasil',
173     'AI-NeuralNet-SOM' => 'SOM.pm',
174     'Net-BitTorrent-File-fix' => 'Net-BitTorrent-File-1.01',
175     'VMS-FindFile' => 'VMS',
176     'LoadHtml.' => 'README',
177     'Time-Compare' => 'Time',
178     'ShiftJIS-230213-MapUTF' => 'ShiftJIS-X0213-MapUTF-0.21',
179     'Image-WMF' => 'Image',
180     'sdf-2.0.eta' => 'sdf-2.001beta1',
181     'Math-Expr-LATEST' => 'Math-Expr-0.4',
182     'MP3-Player-PktConcert' => 'MP3',
183     'Apache-OWA' => 'OWA',
184     'Audio-Gramofile' => 'Audio',
185     'DBIx-Copy' => 'Copy',
186     'P4-024' => 'P4-C4-2.021',
187     'Disassemble-2386' => 'Disassemble-X86-0.13',
188     'Proc-Swarm' => 'Swarm-0.5',
189     'Smil' => 'perlysmil',
190     'Net-SSH-2232Perl' => 'Net-SSH-W32Perl-0.05',
191     'Win32-SerialPort' => 'SerialPort-0.19',
192     'Lingua-ID-Words2Nums' => 'Words2Nums-0.01',
193     'Parse-Text' => 'Text',
194     'DBIx-HTMLView-LATEST' => 'DBIx-HTMLView-0.9',
195     'Apache-NNTPGateway' => 'NNTPGateway-0.9',
196     'XPathToXML' => 'XPathToXML.pm',
197     'XML-WMM-ASX' => 'XML',
198     'CGISession' => 'CGI',
199     'Net-SMS-142' => 'Net-SMS-O2-0.019',
200     'Search-253950' => 'Search-Z3950-0.05',
201     'Date-Christmas' => 'Christmas',
202     'Win32-InternetExplorer-Window' => 'Win32',
203     'Apache-WAP-MailPeek' => 'MailPeek',
204     'Statistics-Table-F' => 'Statistics',
205     'BerkeleyDB_Locks' => 'BerkeleyDB-Locks-0_2',
206     'HookPrePostCall' => 'PrePostCall-1.2',
207     'Oak-AAS-Service-DBI_13_PAM' => 'Oak-AAS-Service-DBI_N_PAM-1.8',
208     'Math-Vector' => 'Vector.pm',
209     'Audio-124pDecrypt' => 'Audio-M4pDecrypt-0.04',
210     'libao-perl_0.03' => 'libao-perl-0.03',
211     'CGI-EZForm' => 'EZForm',
212     'Data-Locations-fixed' => 'Data-Locations-5.2-fixed',
213     'HTML-Template-Filter-Dreamweaver' => 'Dreamweaver',
214     'LineByLine' => 'LineByLine.pm',
215     'Geo-0400' => 'Geo-E00-0.05',
216     'WebService-Validator-HTML-223C' => 'WebService-Validator-HTML-W3C-0.03',
217     'DateTime-Format-223CDTF' => 'DateTime-Format-W3CDTF-0.04',
218     'DBD_SQLFLEX' => 'DBD-Sqlflex',
219     'Text-Number' => 'Number',
220     'DBIx-DataLookup' => 'DBIx',
221     'MP3-ID3211Tag' => 'MP3-ID3v1Tag-1.11',
222     'Text-Striphigh' => 'Striphigh-0.02',
223     'Tie-SortHash' => 'SortHash',
224     'Apache-AccessAbuse' => 'AccessAbuse',
225     'MP3-123U-Parser' => 'MP3-M3U-Parser',
226     'Net-253950' => 'Net-Z3950-0.44',
227     'Net-RBLClient' => 'RBLCLient-0.2',
228     'CGI-EasyCGI' => 'CGI',
229     'http-handle' => 'HTTP::Handle',
230     'JPEG-Comment' => 'JPEG',
231     'router-lg' => 'Router',
232     'Db-Mediasurface' => 'Mediasurface',
233     'Text-BarGraph' => 'bargraph',
234     'TL' => 'Text-Levenshtein-0.04',
235     'Config-Vars' => 'Config-0.01',
236     'Tie-PerfectHash' => 'Tie',
237     'DNS-TinyDNS' => 'DNS',
238     'DesignPattern-Factory' => 'Factory',
239     'WWW-01_Rail' => 'WWW-B_Rail-0.01',
240     'Win32-Exchange' => 'blib',
241     'Math-RPN' => 'Math',
242     'Db-Mediasurface-Cache' => 'Cache',
243     'perl_archie.' => 'Archie.pm',
244     'Acme-PGPSign' => 'Acme',
245     'HTML-Widget-sideBar' => 'HTML-Widget-SideBar-1.00',
246     'log' => 'Games',
247     'File-List' => 'File',
248     'Schedule-Cronchik' => 'Schedule',
249     'Curses-Devkit' => 'Cdk',
250     'Pod-PalmDoc' => 'Pod',
251     'Easy-WML' => 'Easy WML 0.1',
252     'Interval.' => 'Date',
253     'Brasil-Checar-CPF' => 'Brasil',
254     'Apache-WAP-AutoIndex' => 'AutoIndex',
255    
256     'SOM.pm' => 'SOM.pm',
257     'PlugIn.pm' => 'PlugIn.pm',
258     'XPathToXML.pm' => 'XPathToXML.pm',
259     'Vector.pm' => 'Vector.pm',
260     'LineByLine.pm' => 'LineByLine.pm',
261     'Archie.pm' => 'Archie.pm',
262     'TieConstant.pm' => 'TieConstant.pm',
263     'EPS.pm' => 'EPS.pm',
264     'SQLRelay.pm' => 'SQLRelay.pm',
265     'Resources.pm' => 'Resources.pm',
266     'README' => 'README',
267    
268     );
269 ulpfr 10 my %NEW_ALIAS; # found in this pass
270    
271     # Map module names to pathes. Generated by wanted() doing alisaing.
272     my %ARCHIVE;
273    
274     # Map module names to latest version. Generated by wanted()
275     my %VERSION;
276    
277    
278     # Mapping for modules with common root not matching modules name that
279     # are not aliased. This is just for prefix stripping and not strictly
280     # necessary. Takes effect after version considerations.
281     my %TR = (# tar name root to strip
282     'Net_SSLeay.pm' => 'SSLeay/',
283     'EventDrivenServer' => 'Server/',
284     'bio_lib.pl.' => '',
285     'AlarmCall' => 'Sys/',
286     'Cdk-ext' => 'Cdk/',
287     'Sx' => '\d.\d/',
288     'DumpStack' => 'Devel/',
289     'StatisticsDescriptive'=> 'Statistics/',
290     'Term-Gnuplot' => 'Gnuplot/',
291     'iodbc_ext' => 'iodbc-ext-\d.\d/',
292     'UNIVERSAL' => '',
293     'Term-Query' => 'Query/',
294     'SelfStubber' => 'Devel/',
295     'CallerItem' => 'Devel/',
296     );
297    
298     my $DIR = $tb->dir;
299     my $DATA = $tb->dir . "/data";
300     my $LWP;
301    
302    
303     if (@{$OPT{remove}}) {
304     my $pod;
305     for $pod (@{$OPT{remove}}) {
306     unless (-e $pod) {
307     $pod = "$DIR/$pod";
308     }
309     index_pod(file => $pod, remove => 1) if -f $pod;
310     unlink $pod or warn "Could not unlink '$pod': $!\n";
311     #$tb->sync;
312     }
313     $tb->close;
314     $db->close;
315     exit;
316     }
317    
318     # Now get the beef
319     if ($OPT{cpan} =~ /^(http|ftp):/) {
320     $LWP = 1;
321     require LWP::Simple;
322     LWP::Simple->import();
323    
324     mkpath($DATA,1,0755) or
325     die "Could not generate '$DATA/': $!"
326     unless -d $DATA;
327    
328     if (! -f "$DATA/find-ls.gz" or -M "$DATA/find-ls.gz" > 0.5) {
329     my $status = mirror("$OPT{cpan}/indices/find-ls.gz", "$DATA/find-ls.gz");
330     if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
331     # we could use Net:FTP here ...
332     die "Was unable to mirror '$OPT{cpan}/indices/find-ls.gz'\n";
333     }
334     }
335     my $fh = new IO::File "gzip -cd $DATA/find-ls.gz |";
336     die "Could not open 'gzip -cd $DATA/find-ls.gz': !$\n" unless $fh;
337    
338     my $line;
339     while (defined ($line = <$fh>)) {
340     chomp($line);
341     my ($mon, $mday, $time, $file, $is_link) = (split ' ', $line)[7..11];
342    
343     next if defined $is_link;
344     my $mtime = mtime($mon, $mday, $time);
345    
346     $file =~ s:^\./::;
347     ($_) = fileparse($file);
348     $File::Find::name = $file;
349     wanted($mtime);
350     }
351     } else {
352     find(sub {&wanted((stat($_))[9])}, $OPT{cpan});
353     }
354    
355     ARCHIVE:
356     for my $tar (sort keys %ARCHIVE) {
357     next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o;
358     my $base = (split /\//, $ARCHIVE{$tar})[-1];
359     my $parent;
360    
361     # logging
362     if ($OPT{trust_mtime}) {
363     printf "%-20s %10s %s\t", $tar,
364     substr(scalar(localtime($VERSION{$tar})),0,10), $base;
365     } else {
366     printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base;
367     }
368    
369     # Remember the archive
370     # We should have an extra table for the tar file data ...
371     if (!$OPT{force} and $tb->have(docid => $base)) {
372     print "skipping\n";
373     next ARCHIVE;
374     } else {
375     $parent = $tb->insert(docid => $base,
376     headline => $ARCHIVE{$tar}) unless $OPT{test};
377     print "indexing\n";
378     }
379    
380     next ARCHIVE if $OPT{test} > 1;
381    
382     my $TAR = myget($tar);
383    
384     next ARCHIVE unless $TAR; # not able to fetch it
385    
386     my %tar;
387     tie (%tar,
388     'WAIT::Document::Tar',
389 dpavlin 86 sub { $_[0] =~ /\.(pm|pod|PL)$/i or $_[0] =~ /readme/i},
390 ulpfr 10 #sub { $_[0] !~ m:/$: },
391     $TAR)
392     or warn "Could not tie '$TAR'\n";
393    
394     my $sloppy;
395     my ($key, $val);
396    
397     FILE:
398     while (($key, $val) = each %tar) {
399     my $file = fname($key);
400    
401     # don't index directories
402     next if $file =~ /\/$/;
403    
404     # is it a POD file?
405     next FILE unless $file =~ /readme/i or $val =~ /\n=head/;
406    
407     # remove directory prefix
408     unless ($sloppy # no common root
409     or $file =~ s:^\Q$tar\E[^/]*/:: # common root, maybe alias
410     or ($TR{$tar} # common root, not aliased
411     and $file =~ s:^\Q$TR{$tar}\E::)
412     ) {
413     # try to determine an alias
414     warn "Bad directory prefix: '$file'\n";
415     my ($prefix) = split /\//, $file;
416    
417     while ($key = (tied %tar)->NEXTKEY) {
418     my $file = fname($key);
419    
420     next if $file =~ /\/$/;
421     unless ($file =~ m:^$prefix/: or $file eq $prefix) {
422     warn "Archive contains different prefixes: $prefix,$file\n";
423     $prefix = '';
424     last;
425     }
426     }
427     if ($prefix) {
428     print "Please alias '$tar' to '$prefix' next time!\n";
429     print "See alias table later.\n";
430     $NEW_ALIAS{$tar} = $prefix;
431     $tb->delete_by_key($parent);
432     next ARCHIVE;
433     } else {
434     print "Assuming that tar file name $tar is a valid prefix\n";
435     $sloppy = 1;
436    
437     # We may reset too much here! But that this is not exact
438     # science anyway. Maybe we should ignore using 'next ARCHIVE'.
439    
440     $key = (tied %tar)->FIRSTKEY;
441     redo FILE;
442     }
443     }
444    
445     # remove /lib prefix
446     $file =~ s:^lib/::;
447    
448     # generate new path
449     my $path = "$DATA/$tar/$file";
450    
451     my ($sbase, $sdir) = fileparse($path);
452     my $fh;
453    
454     unless ($OPT{test}) {
455     if (-f $path) {
456     index_pod(file => $path, remove => 1);
457     unlink $path or warn "Could not unlink '$path' $!\n";
458     } elsif (!-d $sdir) {
459     mkpath($sdir,1,0755) or die "Could not mkpath($sdir): $!\n";
460     }
461 dpavlin 102 # $fh = new IO::File "> $path";
462     $fh = new IO::Zlib "$path.gz","wb";
463 ulpfr 10 die "Could not write '$path': $!\n" unless $fh;
464     }
465    
466     if ($file =~ /readme|install/i) { # make READMEs verbatim pods
467     $val =~ s/\n/\n /g;
468     $val = "=head1 NAME\n\n$tar $file\n\n=head1 DESCRIPTION\n\n $val"
469     unless $val =~ /^=head/m;
470     } else { # remove non-pod stuff
471     my $nval = $val; $val = '';
472     my $cutting = 1;
473    
474     for (split /\n/, $nval) {
475     if (/^=cut|!NO!SUBS!/) {
476     $cutting = 1;
477     } elsif ($cutting and /^=head/) {
478     $cutting = 0;
479     }
480     unless ($cutting) {
481     $val .= $_ . "\n";
482     }
483     }
484     }
485     unless ($OPT{test}) {
486     $fh->print($val);
487     index_pod(file => $path, parent => $parent,
488     text => $val, source => $ARCHIVE{$tar});
489     }
490     }
491    
492     if ($LWP and !$OPT{keep}) {
493     unlink $TAR or warn
494     "Could not unlink '$TAR': $!\n";
495     }
496     }
497    
498     if (%NEW_ALIAS) {
499     print "\%ALIAS = (\n";
500     for (keys %NEW_ALIAS) {
501     print "\t'$_'\t=> '$NEW_ALIAS{$_}',\n";
502     }
503     print "\t);\n";
504     }
505    
506 dpavlin 86 if ($OPT{reorg}) {
507     my $now = time;
508     warn "Starting reorg\n";
509     $tb->set(top=>1);
510     warn sprintf "Finished reorg %d seconds\n", time - $now;
511     }
512    
513 ulpfr 10 # we are done
514     $db->close();
515     exit;
516    
517     sub fname ($) {
518     my $key = shift;
519     my ($ntar, $file) = split $;, $key;
520    
521     # remove leading './' - shudder
522     $file =~ s/^\.\///;
523    
524     return($file);
525     }
526    
527     sub myget {
528     my $tar = shift;
529     my $TAR;
530    
531     if ($LWP) { # fetch the archive
532     if ($OPT{keep}) {
533     $TAR = "$OPT{keep}/$ARCHIVE{$tar}";
534     print "Keeping in '$TAR'\n" unless -e $TAR;
535     my ($base, $path) = fileparse($TAR);
536     unless (-d $path) {
537     mkpath($path,1,0755) or
538     die "Could not mkpath($path)\n";
539     }
540     } else {
541     $TAR = "/tmp/$tar.tar.gz";
542     }
543     unless (-e $TAR) { # lwp mirror seems to fetch ftp: in any case?
544     print "Fetching $OPT{cpan}/$ARCHIVE{$tar}\n";
545     my $status = mirror("$OPT{cpan}/$ARCHIVE{$tar}", $TAR);
546     if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
547     warn "Was unable to mirror '$ARCHIVE{$tar}, skipping'\n";
548     return;
549     }
550     }
551 dpavlin 86 } else {
552     $TAR = $ARCHIVE{$tar};
553 ulpfr 10 }
554     $TAR;
555     }
556    
557     sub index_pod {
558     my %parm = @_;
559     my $did = $parm{file};
560     my $rel_did = $did;
561     my $abs_did = $did;
562    
563     if ($rel_did =~ s:$DIR/::) {
564     $abs_did = "$DIR/$rel_did";
565     }
566    
567     undef $did;
568    
569     # check for both variants
570     if ($tb->have('docid' => $rel_did)) {
571     $did = $rel_did;
572     } elsif ($tb->have('docid' => $abs_did)) {
573     $did = $abs_did;
574     }
575     if ($did) { # have it version
576     if (!$parm{remove}) {
577     warn "duplicate: $did\n";
578     return;
579     }
580     } else { # not seen yet
581     $did = $rel_did;
582     if ($parm{remove}) {
583     print "missing: $did\n";
584     return;
585     }
586     }
587    
588     $parm{'text'} ||= WAIT::Document::Find->FETCH($abs_did);
589    
590     unless (defined $parm{'text'}) {
591     print "unavailable: $did\n";
592     return;
593     }
594    
595     my $record = $layout->split($parm{'text'});
596 dpavlin 86
597     if (! $record) {
598     print "empty pod: $did\n";
599     return;
600     }
601    
602 ulpfr 10 $record->{size} = length($parm{'text'});
603     my $headline = $record->{name} || $did;
604    
605     $headline =~ s/^$DATA//o; # $did
606     $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
607    
608     printf "%s %s\n", ($parm{remove})?'-':'+', substr($headline,0,70);
609     if ($parm{remove}) {
610     $tb->delete('docid' => $did,
611     headline => $headline,
612     %{$record});
613     } else {
614     $tb->insert('docid' => $did,
615     headline => $headline,
616     source => $parm{source},
617     parent => $parm{parent},
618     %{$record});
619     }
620     }
621    
622     # This *must* remove the version in *any* case. It should compute a
623     # resonable version number - but usually mtimes should be used.
624     sub version {
625     local ($_) = @_;
626    
627     # remove alpha/beta postfix
628     s/([-_\d])(a|b|alpha|beta|src)$/$1/;
629    
630     # jperl1.3@4.019.tar.gz
631     s/@\d.\d+//;
632    
633     # oraperl-v2.4-gk.tar.gz
634     s/-v(\d)/$1/;
635    
636     # lettered versions - shudder
637     s/([-_\d\.])([a-z])([\d\._])/sprintf "$1%02d$3", ord(lc $2) - ord('a') /ei;
638     s/([-_\d\.])([a-z])$/sprintf "$1%02d", ord(lc $2) - ord('a') /ei;
639    
640     # thanks libwww-5b12 ;-)
641     s/(\d+)b/($1-1).'.'/e;
642     s/(\d+)a/($1-2).'.'/e;
643    
644     # replace '-pre' by '0.'
645     s/-pre([\.\d])/-0.$1/;
646     s/\.\././g;
647     s/(\d)_(\d)/$1$2/g;
648    
649     # chop '[-.]' and thelike
650     s/\W$//;
651    
652     # ram's versions Storable-0.4@p
653     s/\@/./;
654    
655     if (s/[-_]?(\d+)\.(0\d+)\.(\d+)$//) {
656     return($_, $1 + "0.$2" + $3 / 1000000);
657     } elsif (s/[-_]?(\d+)\.(\d+)\.(\d+)$//) {
658     return($_, $1 + $2/1000 + $3 / 1000000);
659     } elsif (s/[-_]?(\d+\.[\d_]+)$//) {
660     return($_, $1);
661     } elsif (s/[-_]?([\d_]+)$//) {
662     return($_, $1);
663     } elsif (s/-(\d+.\d+)-/-/) { # perl-4.019-ref-guide
664     return($_, $1);
665     } else {
666     if ($_ =~ /\d/) { # smells like an unknown scheme
667     warn "Odd version Numbering: '$File::Find::name'\n";
668     return($_, undef);
669     } else { # assume version 0
670     warn "No version Numbering: '$File::Find::name'\n";
671     return($_, 0);
672     }
673    
674     }
675     }
676    
677     sub wanted {
678     my $mtime = shift; # called by parse_file_ls();
679    
680     return unless /^(.*)\.tar(\.gz|\.Z)$/;
681 dpavlin 86
682     return if (! $max);
683     $max--;
684    
685 ulpfr 10 my ($archive, $version) = version($1);
686    
687     unless (defined $version) {
688     warn "Skipping $1\n";
689     return;
690     }
691    
692     # Check for file alias
693     $archive = $ALIAS{$archive} if $ALIAS{$archive};
694    
695     # Check for path alias.
696     if ($File::Find::name =~ m(/CPAN/(?:source/)?(.*\Q$archive\E))) {
697     if ($ALIAS{$1}) {
698     $archive = $ALIAS{$1};
699     }
700     }
701    
702     if ($OPT{trust_mtime}) {
703     $version = $mtime;
704     } else {
705     $version =~ s/(\d)_/$1/;
706     $version ||= $mtime; # mtime
707     }
708    
709     if (!exists $ARCHIVE{$archive}
710     or $VERSION{$archive} < $version) {
711     $ARCHIVE{$archive} = $File::Find::name;
712     $VERSION{$archive} = $version;
713     }
714     }
715    
716     sub clean_database {
717     my %parm = @_;
718    
719     my $db = WAIT::Database->open(
720     name => $parm{database},
721     'directory' => $parm{dir},
722     )
723     or die "Could not open database '$parm{dir}/$parm{database}': $@";
724     my $tbl = $db->table(name => $parm{table});
725     if ($tbl) {
726     $tbl->drop or
727     die "Could not open table '$parm{tabel}': $@";
728     }
729    
730     $db->close;
731     }
732    
733     sub create_table {
734     my %parm = @_;
735    
736     my $access = bless {}, 'WAIT::Document::Find';
737    
738     my $stem = [{
739     'prefix' => ['isotr', 'isolc'],
740     'intervall' => ['isotr', 'isolc'],
741     }, 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
742     my $text = [{
743     'prefix' => ['isotr', 'isolc'],
744     'intervall' => ['isotr', 'isolc'],
745     },
746     'isotr', 'isolc', 'split2', 'stop'];
747     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
748    
749     my $tb =
750     $parm{db}->create_table
751     (name => $parm{table},
752     attr => ['docid', 'headline', 'source', 'size', 'parent'],
753     keyset => [['docid']],
754     layout => $parm{layout},
755     access => $access,
756     invindex =>
757     [
758     'name' => $stem,
759     'synopsis' => $stem,
760     'bugs' => $stem,
761     'description' => $stem,
762     'text' => $stem,
763     'environment' => $text,
764     'example' => $text, 'example' => $stem,
765     'author' => $sound, 'author' => $stem,
766     ]
767     );
768     die "Could not create table '$parm{table}'" unless $tb;
769     $tb;
770     }
771    
772     my %MON;
773     my $YEAR;
774    
775     BEGIN {
776     my $i = 1;
777     for (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
778     $MON{$_} = $i++;
779     }
780     $YEAR = (localtime(time))[5];
781     }
782    
783     # We could/should use Date::GetDate here
784     use Time::Local;
785     sub mtime {
786     my ($mon, $mday, $time) = @_;
787     my ($hour, $min, $year, $monn) = (0,0);
788    
789     if ($time =~ /(\d+):(\d+)/) {
790     ($hour, $min) = ($1, $2);
791     $year = $YEAR;
792     } else {
793     $year = $time;
794     }
795     $monn = $MON{$mon} || $MON{ucfirst lc $mon} || warn "Unknown month: '$mon'";
796     my $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year);
797     if ($guess > time) {
798     $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year-1);
799     }
800     $guess;
801     }
802    
803 dpavlin 104 $WAIT::Config = $WAIT::Config;
804 ulpfr 10
805     __END__
806     ## ###################################################################
807     ## pod
808     ## ###################################################################
809    
810     =head1 NAME
811    
812 dpavlin 86 cpanwait - generate an WAIT index for CPAN
813 ulpfr 10
814     =head1 SYNOPSIS
815    
816 dpavlin 86 B<cpanwait>
817 ulpfr 10 [B<-clean>] [B<-noclean>]
818     [B<-cpan> I<url or directory>]
819     [B<-database> I<dbname>]
820     [B<-dir> I<directory>]
821     [B<-force>] [B<-noforce>]
822     [B<-keep> I<directory>]
823     [B<-match> I<regexp>]
824     [B<-table> I<table name>]
825     [B<-test> I<level>]
826     [B<-trust_mtime>] [B<-notrust_mtime>]
827    
828     =head1 DESCRIPTION
829    
830     TBS
831    
832     =head1 OPTIONS
833    
834     =over 5
835    
836     =item B<-clean> / B<-noclean>
837    
838     Clean the table befor indexing. Default is B<off>.
839    
840     =item B<-cpan> I<url or directory>
841    
842     Default directory or URL for indexing. If an URL is given, there
843     currently must be a file F<indices/find-ls.gz> relative to it which
844     contains the output of C<find . -ls | gzip>.
845     Default is F<ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN>.
846    
847    
848     =item B<-database> I<dbname>
849    
850     Specify database name. Default is F<DB>.
851    
852     =item B<-dir> I<directory>
853    
854     Alternate directory were databases are located. Default is the
855     directory specified during configuration of WAIT.
856    
857     =item B<-force>
858    
859     Force reindexing, even if B<cpan> thinks files are up to date.
860     Default is B<off>
861    
862     =item B<-keep> I<directory>
863    
864     If fetching from a remote server, keep files in I<directory>. Default is
865     F</app/unido-i06/src/share/lang/perl/96a/CPAN/sources>.
866    
867     =item B<-match> I<regexp>
868    
869     Limit to patches matching I<regexp>. Default is F<authors/id/>.
870    
871     =item B<-table> I<table name>
872    
873     Specify an alternate table name. Default is C<cpan>.
874    
875     =item B<-test> I<level>
876    
877     Set test level, were B<0> means normal operation, B<1> means, don't
878     really index and B<2> means, don't even get archives and examine them.
879    
880     =item B<-trust_mtime> / B<-notrust_mtime>
881    
882     If B<on>, the files mtimes are used to decide, which version of an
883     archive is the newest. If b<off>, the version extracted is used
884     (beware, there are far more version numbering schemes than B<cpan> can
885     parse).
886    
887 dpavlin 109 =back
888    
889 ulpfr 10 =head1 AUTHOR
890    
891     Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortumund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26