1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use File::Find; |
4 |
use File::Slurp; |
5 |
use Class::DBI::Loader; |
6 |
#use Class::DBI::AbstractSearch; |
7 |
|
8 |
my $self = { |
9 |
debug => 0, |
10 |
dsn => 'dbi:Pg:dbname=webpac2', |
11 |
user => 'dpavlin', |
12 |
passwd => '', |
13 |
catalog => { |
14 |
name => 'PostgreSQL General Bits', |
15 |
path => '/rest/references/PgGeneratBits/bits', |
16 |
uri => 'http://www.varlena.com/varlena/GeneralBits/archive.php', |
17 |
type => 'pgbits', |
18 |
}, |
19 |
}; |
20 |
|
21 |
my $l = Class::DBI::Loader->new( |
22 |
debug => $self->{'debug'}, |
23 |
dsn => $self->{'dsn'}, |
24 |
user => $self->{'user'}, |
25 |
password => $self->{'passwd'}, |
26 |
namespace => "WebPAC::Input::PgBits::CDBI", |
27 |
# additional_classes => qw/Class::DBI::AbstractSearch/, |
28 |
# additional_base_classes => qw/My::Stuff/, |
29 |
relationships => 1, |
30 |
); |
31 |
|
32 |
my $top = $l->find_class('topics_webarchive')->find_or_create( $self->{catalog} ); |
33 |
$top->dbi_commit; |
34 |
|
35 |
sub issue { |
36 |
my $file = shift || die; |
37 |
|
38 |
my $html = read_file($file); |
39 |
|
40 |
my ($issue_no, $issue_date, $this_entry); |
41 |
|
42 |
if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) { |
43 |
$issue_no = $2; |
44 |
$issue_date = $1; |
45 |
print "## issue $issue_no on $issue_date [$file]\n"; |
46 |
|
47 |
$issue = $l->find_class('topics_pgbits')->find_or_create( |
48 |
name => "issue $issue_no", |
49 |
date => $issue_date, |
50 |
path => $file, |
51 |
issue => $issue_no, |
52 |
type => 'pgbits', |
53 |
parent_id => $top->id, |
54 |
); |
55 |
$issue->dbi_commit; |
56 |
|
57 |
} else { |
58 |
warn "can't find issue number and date in $file, skipping\n"; |
59 |
return; |
60 |
} |
61 |
|
62 |
|
63 |
while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){ |
64 |
my $row = { |
65 |
name => $2 . ( $3 ? " :: $3" : ""), |
66 |
|
67 |
ikey => $1, |
68 |
mytitle => $2, |
69 |
ititle => $3, |
70 |
date => $4, # idate |
71 |
html => $5, |
72 |
contributors => $6, |
73 |
|
74 |
type => 'pgbits', |
75 |
}; |
76 |
|
77 |
print $row->{name}," ", $row->{date},"\n"; |
78 |
my $article = $l->find_class('items_pgbits')->find_or_create( $row ); |
79 |
$article->dbi_commit; |
80 |
|
81 |
$l->find_class('item_topics')->find_or_create( |
82 |
topic_id => $issue->id, |
83 |
item_id => $article->id, |
84 |
)->dbi_commit; |
85 |
} |
86 |
} |
87 |
|
88 |
find({ wanted => sub { |
89 |
my $path = $File::Find::name; |
90 |
return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#); |
91 |
issue($path); |
92 |
}, |
93 |
follow => 1 }, '/rest/references/PgGeneralBits/bits/'); |