/[webpac2]/trunk/sql/pgbits.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/sql/pgbits.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (hide annotations)
Sat Jul 23 19:21:07 2005 UTC (18 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 2330 byte(s)
experimental parser for PgBits

1 dpavlin 27 #!/usr/bin/perl -w
2    
3     use File::Find;
4     use File::Slurp;
5     use Class::DBI::Loader;
6     #use Class::DBI::AbstractSearch;
7    
8     my $self = {
9     debug => 0,
10     dsn => 'dbi:Pg:dbname=webpac2',
11     user => 'dpavlin',
12     passwd => '',
13     catalog => {
14     path => '/rest/references/PgGeneratBits/bits',
15     title => 'PostgreSQL General Bits',
16     uri => 'http://www.varlena.com/varlena/GeneralBits/archive.php',
17     },
18     };
19    
20     my $l = Class::DBI::Loader->new(
21     debug => $self->{'debug'},
22     dsn => $self->{'dsn'},
23     user => $self->{'user'},
24     password => $self->{'passwd'},
25     namespace => "WebPAC::Input::PgBits::CDBI",
26     # additional_classes => qw/Class::DBI::AbstractSearch/,
27     # additional_base_classes => qw/My::Stuff/,
28     relationships => 1,
29     );
30    
31     my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} );
32     $this_catalog->dbi_commit;
33    
34     sub issue {
35     my $file = shift || die;
36    
37     my $html = read_file($file);
38    
39     my ($issue_no, $issue_date, $this_entry);
40    
41     if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) {
42     $issue_no = $2;
43     $issue_date = $1;
44     print "## issue $issue_no on $issue_date [$file]\n";
45    
46     $this_entry = $l->find_class('entries_pgbits')->find_or_create(
47     date => $issue_date,
48     issue => $issue_no,
49     path => $file,
50     title => $self->{catalog}->{title} . " :: $issue_no",
51     );
52     $this_entry->dbi_commit;
53    
54     $l->find_class('catalog_entry')->find_or_create(
55     catalog_id => $this_catalog->id,
56     entry_id => $this_entry->id,
57     e_type => 'pgbits',
58     )->dbi_commit;
59     } else {
60     warn "can't find issue number and date in $file, skipping\n";
61     return;
62     }
63    
64    
65     while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){
66     my $row = {
67     title => $1 . ( $2 ? " :: $2" : ""),
68    
69     ikey => $1,
70     mytitle => $2,
71     ititle => $3,
72     date => $4, # idate
73     html => $5,
74     contributors => $6,
75    
76     entry_id => $this_entry->id,
77     i_type => 'pgbits',
78     };
79    
80     print $row->{title}," ", $row->{date},"\n";
81     $l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit;
82     }
83     }
84    
85     find({ wanted => sub {
86     my $path = $File::Find::name;
87     return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#);
88     issue($path);
89     },
90     follow => 1 }, '/rest/references/PgGeneralBits/bits/');

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26