1 |
dpavlin |
27 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
use File::Find; |
4 |
|
|
use File::Slurp; |
5 |
|
|
use Class::DBI::Loader; |
6 |
|
|
#use Class::DBI::AbstractSearch; |
7 |
|
|
|
8 |
|
|
my $self = { |
9 |
|
|
debug => 0, |
10 |
|
|
dsn => 'dbi:Pg:dbname=webpac2', |
11 |
|
|
user => 'dpavlin', |
12 |
|
|
passwd => '', |
13 |
|
|
catalog => { |
14 |
|
|
path => '/rest/references/PgGeneratBits/bits', |
15 |
|
|
title => 'PostgreSQL General Bits', |
16 |
|
|
uri => 'http://www.varlena.com/varlena/GeneralBits/archive.php', |
17 |
|
|
}, |
18 |
|
|
}; |
19 |
|
|
|
20 |
|
|
my $l = Class::DBI::Loader->new( |
21 |
|
|
debug => $self->{'debug'}, |
22 |
|
|
dsn => $self->{'dsn'}, |
23 |
|
|
user => $self->{'user'}, |
24 |
|
|
password => $self->{'passwd'}, |
25 |
|
|
namespace => "WebPAC::Input::PgBits::CDBI", |
26 |
|
|
# additional_classes => qw/Class::DBI::AbstractSearch/, |
27 |
|
|
# additional_base_classes => qw/My::Stuff/, |
28 |
|
|
relationships => 1, |
29 |
|
|
); |
30 |
|
|
|
31 |
|
|
my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} ); |
32 |
|
|
$this_catalog->dbi_commit; |
33 |
|
|
|
34 |
|
|
sub issue { |
35 |
|
|
my $file = shift || die; |
36 |
|
|
|
37 |
|
|
my $html = read_file($file); |
38 |
|
|
|
39 |
|
|
my ($issue_no, $issue_date, $this_entry); |
40 |
|
|
|
41 |
|
|
if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) { |
42 |
|
|
$issue_no = $2; |
43 |
|
|
$issue_date = $1; |
44 |
|
|
print "## issue $issue_no on $issue_date [$file]\n"; |
45 |
|
|
|
46 |
|
|
$this_entry = $l->find_class('entries_pgbits')->find_or_create( |
47 |
|
|
date => $issue_date, |
48 |
|
|
issue => $issue_no, |
49 |
|
|
path => $file, |
50 |
|
|
title => $self->{catalog}->{title} . " :: $issue_no", |
51 |
|
|
); |
52 |
|
|
$this_entry->dbi_commit; |
53 |
|
|
|
54 |
|
|
$l->find_class('catalog_entry')->find_or_create( |
55 |
|
|
catalog_id => $this_catalog->id, |
56 |
|
|
entry_id => $this_entry->id, |
57 |
|
|
e_type => 'pgbits', |
58 |
|
|
)->dbi_commit; |
59 |
|
|
} else { |
60 |
|
|
warn "can't find issue number and date in $file, skipping\n"; |
61 |
|
|
return; |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
|
65 |
|
|
while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){ |
66 |
|
|
my $row = { |
67 |
|
|
title => $1 . ( $2 ? " :: $2" : ""), |
68 |
|
|
|
69 |
|
|
ikey => $1, |
70 |
|
|
mytitle => $2, |
71 |
|
|
ititle => $3, |
72 |
|
|
date => $4, # idate |
73 |
|
|
html => $5, |
74 |
|
|
contributors => $6, |
75 |
|
|
|
76 |
|
|
entry_id => $this_entry->id, |
77 |
|
|
i_type => 'pgbits', |
78 |
|
|
}; |
79 |
|
|
|
80 |
|
|
print $row->{title}," ", $row->{date},"\n"; |
81 |
|
|
$l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit; |
82 |
|
|
} |
83 |
|
|
} |
84 |
|
|
|
85 |
|
|
find({ wanted => sub { |
86 |
|
|
my $path = $File::Find::name; |
87 |
|
|
return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#); |
88 |
|
|
issue($path); |
89 |
|
|
}, |
90 |
|
|
follow => 1 }, '/rest/references/PgGeneralBits/bits/'); |