/[wait]/cvs-head/script/cpanwait
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /cvs-head/script/cpanwait

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations)
Fri Apr 28 15:40:52 2000 UTC (24 years, 1 month ago) by ulpfr
File size: 19070 byte(s)
Initial revision

1 ulpfr 10 #!/usr/local/perl5.005_56.Mar06/bin/perl -w
2     ######################### -*- Mode: Perl -*- #########################
3     ##
4     ## $Basename: cpanwait $
5     ## $Revision: 1.7 $
6     ##
7     ## Author : Ulrich Pfeifer
8     ## Created On : Sat Jan 4 18:09:28 1997
9     ##
10     ## Last Modified By : Ulrich Pfeifer
11     ## Last Modified On : Sun Nov 22 18:44:36 1998
12     ##
13     ## Copyright (c) 1996-1997, Ulrich Pfeifer
14     ##
15     ##
16     ######################################################################
17    
18     eval 'exec perl -S $0 "$@"'
19     if 0;
20    
21    
22     use strict;
23    
24     use File::Path;
25     use DB_File;
26     use Getopt::Long;
27     use File::Find;
28     use File::Basename;
29     use IO::File;
30    
31     require WAIT::Config;
32     require WAIT::Database;
33     require WAIT::Parse::Pod;
34     require WAIT::Document::Tar;
35    
36    
37     my %OPT = (database => 'DB',
38     dir => $WAIT::Config->{WAIT_home} || '/tmp',
39     table => 'cpan',
40     clean => 0,
41     remove => [],
42     force => 0,
43     cpan => '/usr/src/perl/CPAN/sources',
44     trust_mtime => 1,
45     match => 'authors/id/',
46     test => 0,
47     # cpan => 'ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN',
48     cpan => 'ftp://ftp.uni-hamburg.de:/pub/soft/lang/perl/CPAN',
49     keep => '/app/unido-i06/src/share/lang/perl/96a/CPAN/sources',
50     );
51    
52     GetOptions(\%OPT,
53     'database=s',
54     'dir=s',
55     'cpan=s',
56     'table=s',
57     'keep=s',
58     'match=s',
59     'clean!',
60     'test=i', # test level 0: normal
61     # 1: don't change db
62     # 2: don't look at archives even
63    
64     'remove=s@',
65     'force!', # force indexing even if seen
66     'trust_mtime!', # use mtime instead of version number
67     ) || die "Usage: ...\n";
68    
69    
70     clean_database(
71     database => $OPT{database},
72     dir => $OPT{dir},
73     table => $OPT{table},
74     ) if $OPT{clean};
75    
76     my $db = WAIT::Database->open(
77     name => $OPT{database},
78     'directory' => $OPT{dir},
79     )
80     || WAIT::Database->create(
81     name => $OPT{database},
82     'directory' => $OPT{dir},
83     )
84     or die "Could not open/create database '$OPT{dir}/$OPT{database}': $@";
85    
86     my $layout= new WAIT::Parse::Pod;
87    
88     my $tb = $db->table(name => $OPT{table})
89     || create_table(db => $db, table => $OPT{table}, layout => $layout);
90    
91     # Map e.g. '.../latest' to 'perl'. Used in wanted(). Effects version
92     # considerations. Value *must* match common prefix. Aliasing should be
93     # used if CPAN contains serveral distributions with different name but
94     # same root directory.
95     # We still have a problem if there are different root directories!
96    
97     my %ALIAS = (# tar name real (root) name
98     'latest' => 'perl',
99     'perl5db-kit' => 'DB',
100     'SGI-FM' => 'FM',
101     'net-ext' => 'Net',
102     'VelocisSQL' => 'Velocis',
103     'Net-ext' => 'Net',
104     'Curses-DevKit' => 'Cdk',
105     'PostgresPerl' => 'Postgres',
106     'perlpdf' => 'PERLPDF',
107     'Des-perl' => 'Des',
108     'SGI-GL' => 'GL',
109     'DBD-DB2' => 'DB2',
110     );
111     my %NEW_ALIAS; # found in this pass
112    
113     # Map module names to pathes. Generated by wanted() doing alisaing.
114     my %ARCHIVE;
115    
116     # Map module names to latest version. Generated by wanted()
117     my %VERSION;
118    
119    
120     # Mapping for modules with common root not matching modules name that
121     # are not aliased. This is just for prefix stripping and not strictly
122     # necessary. Takes effect after version considerations.
123     my %TR = (# tar name root to strip
124     'Net_SSLeay.pm' => 'SSLeay/',
125     'EventDrivenServer' => 'Server/',
126     'bio_lib.pl.' => '',
127     'AlarmCall' => 'Sys/',
128     'Cdk-ext' => 'Cdk/',
129     'Sx' => '\d.\d/',
130     'DumpStack' => 'Devel/',
131     'StatisticsDescriptive'=> 'Statistics/',
132     'Term-Gnuplot' => 'Gnuplot/',
133     'iodbc_ext' => 'iodbc-ext-\d.\d/',
134     'UNIVERSAL' => '',
135     'Term-Query' => 'Query/',
136     'SelfStubber' => 'Devel/',
137     'CallerItem' => 'Devel/',
138     );
139    
140     my $DIR = $tb->dir;
141     my $DATA = $tb->dir . "/data";
142     my $LWP;
143    
144    
145     if (@{$OPT{remove}}) {
146     my $pod;
147     for $pod (@{$OPT{remove}}) {
148     unless (-e $pod) {
149     $pod = "$DIR/$pod";
150     }
151     index_pod(file => $pod, remove => 1) if -f $pod;
152     unlink $pod or warn "Could not unlink '$pod': $!\n";
153     #$tb->sync;
154     }
155     $tb->close;
156     $db->close;
157     exit;
158     }
159    
160     # Now get the beef
161     if ($OPT{cpan} =~ /^(http|ftp):/) {
162     $LWP = 1;
163     require LWP::Simple;
164     LWP::Simple->import();
165    
166     mkpath($DATA,1,0755) or
167     die "Could not generate '$DATA/': $!"
168     unless -d $DATA;
169    
170     if (! -f "$DATA/find-ls.gz" or -M "$DATA/find-ls.gz" > 0.5) {
171     my $status = mirror("$OPT{cpan}/indices/find-ls.gz", "$DATA/find-ls.gz");
172     if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
173     # we could use Net:FTP here ...
174     die "Was unable to mirror '$OPT{cpan}/indices/find-ls.gz'\n";
175     }
176     }
177     my $fh = new IO::File "gzip -cd $DATA/find-ls.gz |";
178     die "Could not open 'gzip -cd $DATA/find-ls.gz': !$\n" unless $fh;
179    
180     my $line;
181     while (defined ($line = <$fh>)) {
182     chomp($line);
183     my ($mon, $mday, $time, $file, $is_link) = (split ' ', $line)[7..11];
184    
185     next if defined $is_link;
186     my $mtime = mtime($mon, $mday, $time);
187    
188     $file =~ s:^\./::;
189     ($_) = fileparse($file);
190     $File::Find::name = $file;
191     wanted($mtime);
192     }
193     } else {
194     find(sub {&wanted((stat($_))[9])}, $OPT{cpan});
195     }
196    
197     ARCHIVE:
198     for my $tar (sort keys %ARCHIVE) {
199     next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o;
200     my $base = (split /\//, $ARCHIVE{$tar})[-1];
201     my $parent;
202    
203     # logging
204     if ($OPT{trust_mtime}) {
205     printf "%-20s %10s %s\t", $tar,
206     substr(scalar(localtime($VERSION{$tar})),0,10), $base;
207     } else {
208     printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base;
209     }
210    
211     # Remember the archive
212     # We should have an extra table for the tar file data ...
213     if (!$OPT{force} and $tb->have(docid => $base)) {
214     print "skipping\n";
215     next ARCHIVE;
216     } else {
217     $parent = $tb->insert(docid => $base,
218     headline => $ARCHIVE{$tar}) unless $OPT{test};
219     print "indexing\n";
220     }
221    
222     next ARCHIVE if $OPT{test} > 1;
223    
224     my $TAR = myget($tar);
225    
226     next ARCHIVE unless $TAR; # not able to fetch it
227    
228     my %tar;
229     tie (%tar,
230     'WAIT::Document::Tar',
231     sub { $_[0] =~ /\.(pm|pod|PL)$/ or $_[0] =~ /readme/i},
232     #sub { $_[0] !~ m:/$: },
233     $TAR)
234     or warn "Could not tie '$TAR'\n";
235    
236     my $sloppy;
237     my ($key, $val);
238    
239     FILE:
240     while (($key, $val) = each %tar) {
241     my $file = fname($key);
242    
243     # don't index directories
244     next if $file =~ /\/$/;
245    
246     # is it a POD file?
247     next FILE unless $file =~ /readme/i or $val =~ /\n=head/;
248    
249     # remove directory prefix
250     unless ($sloppy # no common root
251     or $file =~ s:^\Q$tar\E[^/]*/:: # common root, maybe alias
252     or ($TR{$tar} # common root, not aliased
253     and $file =~ s:^\Q$TR{$tar}\E::)
254     ) {
255     # try to determine an alias
256     warn "Bad directory prefix: '$file'\n";
257     my ($prefix) = split /\//, $file;
258    
259     while ($key = (tied %tar)->NEXTKEY) {
260     my $file = fname($key);
261    
262     next if $file =~ /\/$/;
263     unless ($file =~ m:^$prefix/: or $file eq $prefix) {
264     warn "Archive contains different prefixes: $prefix,$file\n";
265     $prefix = '';
266     last;
267     }
268     }
269     if ($prefix) {
270     print "Please alias '$tar' to '$prefix' next time!\n";
271     print "See alias table later.\n";
272     $NEW_ALIAS{$tar} = $prefix;
273     $tb->delete_by_key($parent);
274     next ARCHIVE;
275     } else {
276     print "Assuming that tar file name $tar is a valid prefix\n";
277     $sloppy = 1;
278    
279     # We may reset too much here! But that this is not exact
280     # science anyway. Maybe we should ignore using 'next ARCHIVE'.
281    
282     $key = (tied %tar)->FIRSTKEY;
283     redo FILE;
284     }
285     }
286    
287     # remove /lib prefix
288     $file =~ s:^lib/::;
289    
290     # generate new path
291     my $path = "$DATA/$tar/$file";
292    
293     my ($sbase, $sdir) = fileparse($path);
294     my $fh;
295    
296     unless ($OPT{test}) {
297     if (-f $path) {
298     index_pod(file => $path, remove => 1);
299     unlink $path or warn "Could not unlink '$path' $!\n";
300     } elsif (!-d $sdir) {
301     mkpath($sdir,1,0755) or die "Could not mkpath($sdir): $!\n";
302     }
303     $fh = new IO::File "> $path";
304     die "Could not write '$path': $!\n" unless $fh;
305     }
306    
307     if ($file =~ /readme|install/i) { # make READMEs verbatim pods
308     $val =~ s/\n/\n /g;
309     $val = "=head1 NAME\n\n$tar $file\n\n=head1 DESCRIPTION\n\n $val"
310     unless $val =~ /^=head/m;
311     } else { # remove non-pod stuff
312     my $nval = $val; $val = '';
313     my $cutting = 1;
314    
315     for (split /\n/, $nval) {
316     if (/^=cut|!NO!SUBS!/) {
317     $cutting = 1;
318     } elsif ($cutting and /^=head/) {
319     $cutting = 0;
320     }
321     unless ($cutting) {
322     $val .= $_ . "\n";
323     }
324     }
325     }
326     unless ($OPT{test}) {
327     $fh->print($val);
328     index_pod(file => $path, parent => $parent,
329     text => $val, source => $ARCHIVE{$tar});
330     }
331     }
332    
333     if ($LWP and !$OPT{keep}) {
334     unlink $TAR or warn
335     "Could not unlink '$TAR': $!\n";
336     }
337     }
338    
339     if (%NEW_ALIAS) {
340     print "\%ALIAS = (\n";
341     for (keys %NEW_ALIAS) {
342     print "\t'$_'\t=> '$NEW_ALIAS{$_}',\n";
343     }
344     print "\t);\n";
345     }
346    
347     # we are done
348     $db->close();
349     exit;
350    
351     sub fname ($) {
352     my $key = shift;
353     my ($ntar, $file) = split $;, $key;
354    
355     # remove leading './' - shudder
356     $file =~ s/^\.\///;
357    
358     return($file);
359     }
360    
361     sub myget {
362     my $tar = shift;
363     my $TAR;
364    
365     if ($LWP) { # fetch the archive
366     if ($OPT{keep}) {
367     $TAR = "$OPT{keep}/$ARCHIVE{$tar}";
368     print "Keeping in '$TAR'\n" unless -e $TAR;
369     my ($base, $path) = fileparse($TAR);
370     unless (-d $path) {
371     mkpath($path,1,0755) or
372     die "Could not mkpath($path)\n";
373     }
374     } else {
375     $TAR = "/tmp/$tar.tar.gz";
376     }
377     unless (-e $TAR) { # lwp mirror seems to fetch ftp: in any case?
378     print "Fetching $OPT{cpan}/$ARCHIVE{$tar}\n";
379     my $status = mirror("$OPT{cpan}/$ARCHIVE{$tar}", $TAR);
380     if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
381     warn "Was unable to mirror '$ARCHIVE{$tar}, skipping'\n";
382     return;
383     }
384     }
385     }
386     $TAR;
387     }
388    
389     sub index_pod {
390     my %parm = @_;
391     my $did = $parm{file};
392     my $rel_did = $did;
393     my $abs_did = $did;
394    
395     if ($rel_did =~ s:$DIR/::) {
396     $abs_did = "$DIR/$rel_did";
397     }
398    
399     undef $did;
400    
401     # check for both variants
402     if ($tb->have('docid' => $rel_did)) {
403     $did = $rel_did;
404     } elsif ($tb->have('docid' => $abs_did)) {
405     $did = $abs_did;
406     }
407     if ($did) { # have it version
408     if (!$parm{remove}) {
409     warn "duplicate: $did\n";
410     return;
411     }
412     } else { # not seen yet
413     $did = $rel_did;
414     if ($parm{remove}) {
415     print "missing: $did\n";
416     return;
417     }
418     }
419    
420     $parm{'text'} ||= WAIT::Document::Find->FETCH($abs_did);
421    
422     unless (defined $parm{'text'}) {
423     print "unavailable: $did\n";
424     return;
425     }
426    
427     my $record = $layout->split($parm{'text'});
428     $record->{size} = length($parm{'text'});
429     my $headline = $record->{name} || $did;
430    
431     $headline =~ s/^$DATA//o; # $did
432     $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
433    
434     printf "%s %s\n", ($parm{remove})?'-':'+', substr($headline,0,70);
435     if ($parm{remove}) {
436     $tb->delete('docid' => $did,
437     headline => $headline,
438     %{$record});
439     } else {
440     $tb->insert('docid' => $did,
441     headline => $headline,
442     source => $parm{source},
443     parent => $parm{parent},
444     %{$record});
445     }
446     }
447    
448     # This *must* remove the version in *any* case. It should compute a
449     # resonable version number - but usually mtimes should be used.
450     sub version {
451     local ($_) = @_;
452    
453     # remove alpha/beta postfix
454     s/([-_\d])(a|b|alpha|beta|src)$/$1/;
455    
456     # jperl1.3@4.019.tar.gz
457     s/@\d.\d+//;
458    
459     # oraperl-v2.4-gk.tar.gz
460     s/-v(\d)/$1/;
461    
462     # lettered versions - shudder
463     s/([-_\d\.])([a-z])([\d\._])/sprintf "$1%02d$3", ord(lc $2) - ord('a') /ei;
464     s/([-_\d\.])([a-z])$/sprintf "$1%02d", ord(lc $2) - ord('a') /ei;
465    
466     # thanks libwww-5b12 ;-)
467     s/(\d+)b/($1-1).'.'/e;
468     s/(\d+)a/($1-2).'.'/e;
469    
470     # replace '-pre' by '0.'
471     s/-pre([\.\d])/-0.$1/;
472     s/\.\././g;
473     s/(\d)_(\d)/$1$2/g;
474    
475     # chop '[-.]' and thelike
476     s/\W$//;
477    
478     # ram's versions Storable-0.4@p
479     s/\@/./;
480    
481     if (s/[-_]?(\d+)\.(0\d+)\.(\d+)$//) {
482     return($_, $1 + "0.$2" + $3 / 1000000);
483     } elsif (s/[-_]?(\d+)\.(\d+)\.(\d+)$//) {
484     return($_, $1 + $2/1000 + $3 / 1000000);
485     } elsif (s/[-_]?(\d+\.[\d_]+)$//) {
486     return($_, $1);
487     } elsif (s/[-_]?([\d_]+)$//) {
488     return($_, $1);
489     } elsif (s/-(\d+.\d+)-/-/) { # perl-4.019-ref-guide
490     return($_, $1);
491     } else {
492     if ($_ =~ /\d/) { # smells like an unknown scheme
493     warn "Odd version Numbering: '$File::Find::name'\n";
494     return($_, undef);
495     } else { # assume version 0
496     warn "No version Numbering: '$File::Find::name'\n";
497     return($_, 0);
498     }
499    
500     }
501     }
502    
503     sub wanted {
504     my $mtime = shift; # called by parse_file_ls();
505    
506     return unless /^(.*)\.tar(\.gz|\.Z)$/;
507     my ($archive, $version) = version($1);
508    
509     unless (defined $version) {
510     warn "Skipping $1\n";
511     return;
512     }
513    
514     # Check for file alias
515     $archive = $ALIAS{$archive} if $ALIAS{$archive};
516    
517     # Check for path alias.
518     if ($File::Find::name =~ m(/CPAN/(?:source/)?(.*\Q$archive\E))) {
519     if ($ALIAS{$1}) {
520     $archive = $ALIAS{$1};
521     }
522     }
523    
524     if ($OPT{trust_mtime}) {
525     $version = $mtime;
526     } else {
527     $version =~ s/(\d)_/$1/;
528     $version ||= $mtime; # mtime
529     }
530    
531     if (!exists $ARCHIVE{$archive}
532     or $VERSION{$archive} < $version) {
533     $ARCHIVE{$archive} = $File::Find::name;
534     $VERSION{$archive} = $version;
535     }
536     }
537    
538     sub clean_database {
539     my %parm = @_;
540    
541     my $db = WAIT::Database->open(
542     name => $parm{database},
543     'directory' => $parm{dir},
544     )
545     or die "Could not open database '$parm{dir}/$parm{database}': $@";
546     my $tbl = $db->table(name => $parm{table});
547     if ($tbl) {
548     $tbl->drop or
549     die "Could not open table '$parm{tabel}': $@";
550     }
551    
552     $db->close;
553     }
554    
555     sub create_table {
556     my %parm = @_;
557    
558     my $access = bless {}, 'WAIT::Document::Find';
559    
560     my $stem = [{
561     'prefix' => ['isotr', 'isolc'],
562     'intervall' => ['isotr', 'isolc'],
563     }, 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
564     my $text = [{
565     'prefix' => ['isotr', 'isolc'],
566     'intervall' => ['isotr', 'isolc'],
567     },
568     'isotr', 'isolc', 'split2', 'stop'];
569     my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
570    
571     my $tb =
572     $parm{db}->create_table
573     (name => $parm{table},
574     attr => ['docid', 'headline', 'source', 'size', 'parent'],
575     keyset => [['docid']],
576     layout => $parm{layout},
577     access => $access,
578     invindex =>
579     [
580     'name' => $stem,
581     'synopsis' => $stem,
582     'bugs' => $stem,
583     'description' => $stem,
584     'text' => $stem,
585     'environment' => $text,
586     'example' => $text, 'example' => $stem,
587     'author' => $sound, 'author' => $stem,
588     ]
589     );
590     die "Could not create table '$parm{table}'" unless $tb;
591     $tb;
592     }
593    
594     my %MON;
595     my $YEAR;
596    
597     BEGIN {
598     my $i = 1;
599     for (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
600     $MON{$_} = $i++;
601     }
602     $YEAR = (localtime(time))[5];
603     }
604    
605     # We could/should use Date::GetDate here
606     use Time::Local;
607     sub mtime {
608     my ($mon, $mday, $time) = @_;
609     my ($hour, $min, $year, $monn) = (0,0);
610    
611     if ($time =~ /(\d+):(\d+)/) {
612     ($hour, $min) = ($1, $2);
613     $year = $YEAR;
614     } else {
615     $year = $time;
616     }
617     $monn = $MON{$mon} || $MON{ucfirst lc $mon} || warn "Unknown month: '$mon'";
618     my $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year);
619     if ($guess > time) {
620     $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year-1);
621     }
622     $guess;
623     }
624    
625    
626     __END__
627     ## ###################################################################
628     ## pod
629     ## ###################################################################
630    
631     =head1 NAME
632    
633     cpan - generate an WAIT index for CPAN
634    
635     =head1 SYNOPSIS
636    
637     B<cpan>
638     [B<-clean>] [B<-noclean>]
639     [B<-cpan> I<url or directory>]
640     [B<-database> I<dbname>]
641     [B<-dir> I<directory>]
642     [B<-force>] [B<-noforce>]
643     [B<-keep> I<directory>]
644     [B<-match> I<regexp>]
645     [B<-table> I<table name>]
646     [B<-test> I<level>]
647     [B<-trust_mtime>] [B<-notrust_mtime>]
648    
649     =head1 DESCRIPTION
650    
651     TBS
652    
653     =head1 OPTIONS
654    
655     =over 5
656    
657     =item B<-clean> / B<-noclean>
658    
659     Clean the table befor indexing. Default is B<off>.
660    
661     =item B<-cpan> I<url or directory>
662    
663     Default directory or URL for indexing. If an URL is given, there
664     currently must be a file F<indices/find-ls.gz> relative to it which
665     contains the output of C<find . -ls | gzip>.
666     Default is F<ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN>.
667    
668    
669     =item B<-database> I<dbname>
670    
671     Specify database name. Default is F<DB>.
672    
673     =item B<-dir> I<directory>
674    
675     Alternate directory were databases are located. Default is the
676     directory specified during configuration of WAIT.
677    
678     =item B<-force>
679    
680     Force reindexing, even if B<cpan> thinks files are up to date.
681     Default is B<off>
682    
683     =item B<-keep> I<directory>
684    
685     If fetching from a remote server, keep files in I<directory>. Default is
686     F</app/unido-i06/src/share/lang/perl/96a/CPAN/sources>.
687    
688     =item B<-match> I<regexp>
689    
690     Limit to patches matching I<regexp>. Default is F<authors/id/>.
691    
692     =item B<-table> I<table name>
693    
694     Specify an alternate table name. Default is C<cpan>.
695    
696     =item B<-test> I<level>
697    
698     Set test level, were B<0> means normal operation, B<1> means, don't
699     really index and B<2> means, don't even get archives and examine them.
700    
701     =item B<-trust_mtime> / B<-notrust_mtime>
702    
703     If B<on>, the files mtimes are used to decide, which version of an
704     archive is the newest. If b<off>, the version extracted is used
705     (beware, there are far more version numbering schemes than B<cpan> can
706     parse).
707    
708     =head1 AUTHOR
709    
710     Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortumund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26