1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use File::Find; |
4 |
use File::Slurp; |
5 |
use Class::DBI::Loader; |
6 |
#use Class::DBI::AbstractSearch; |
7 |
|
8 |
my $self = { |
9 |
debug => 0, |
10 |
dsn => 'dbi:Pg:dbname=webpac2', |
11 |
user => 'dpavlin', |
12 |
passwd => '', |
13 |
catalog => { |
14 |
path => '/rest/references/PgGeneratBits/bits', |
15 |
title => 'PostgreSQL General Bits', |
16 |
uri => 'http://www.varlena.com/varlena/GeneralBits/archive.php', |
17 |
}, |
18 |
}; |
19 |
|
20 |
my $l = Class::DBI::Loader->new( |
21 |
debug => $self->{'debug'}, |
22 |
dsn => $self->{'dsn'}, |
23 |
user => $self->{'user'}, |
24 |
password => $self->{'passwd'}, |
25 |
namespace => "WebPAC::Input::PgBits::CDBI", |
26 |
# additional_classes => qw/Class::DBI::AbstractSearch/, |
27 |
# additional_base_classes => qw/My::Stuff/, |
28 |
relationships => 1, |
29 |
); |
30 |
|
31 |
my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} ); |
32 |
$this_catalog->dbi_commit; |
33 |
|
34 |
sub issue { |
35 |
my $file = shift || die; |
36 |
|
37 |
my $html = read_file($file); |
38 |
|
39 |
my ($issue_no, $issue_date, $this_entry); |
40 |
|
41 |
if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) { |
42 |
$issue_no = $2; |
43 |
$issue_date = $1; |
44 |
print "## issue $issue_no on $issue_date [$file]\n"; |
45 |
|
46 |
$this_entry = $l->find_class('entries_pgbits')->find_or_create( |
47 |
date => $issue_date, |
48 |
issue => $issue_no, |
49 |
path => $file, |
50 |
title => $self->{catalog}->{title} . " :: $issue_no", |
51 |
); |
52 |
$this_entry->dbi_commit; |
53 |
|
54 |
$l->find_class('catalog_entry')->find_or_create( |
55 |
catalog_id => $this_catalog->id, |
56 |
entry_id => $this_entry->id, |
57 |
e_type => 'pgbits', |
58 |
)->dbi_commit; |
59 |
} else { |
60 |
warn "can't find issue number and date in $file, skipping\n"; |
61 |
return; |
62 |
} |
63 |
|
64 |
|
65 |
while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){ |
66 |
my $row = { |
67 |
title => $2 . ( $3 ? " :: $3" : ""), |
68 |
|
69 |
ikey => $1, |
70 |
mytitle => $2, |
71 |
ititle => $3, |
72 |
date => $4, # idate |
73 |
html => $5, |
74 |
contributors => $6, |
75 |
|
76 |
entry_id => $this_entry->id, |
77 |
i_type => 'pgbits', |
78 |
}; |
79 |
|
80 |
print $row->{title}," ", $row->{date},"\n"; |
81 |
$l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit; |
82 |
} |
83 |
} |
84 |
|
85 |
find({ wanted => sub { |
86 |
my $path = $File::Find::name; |
87 |
return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#); |
88 |
issue($path); |
89 |
}, |
90 |
follow => 1 }, '/rest/references/PgGeneralBits/bits/'); |