1 |
dpavlin |
1.1 |
#!/usr/local/bin/perl -w |
2 |
|
|
|
3 |
|
|
use LWP::UserAgent; |
4 |
|
|
use strict; |
5 |
|
|
use DBI; |
6 |
|
|
|
7 |
|
|
my $debug=0; |
8 |
|
|
|
9 |
|
|
$debug++ if (lc($ARGV[0]) eq "-d"); |
10 |
|
|
|
11 |
|
|
sub print_debug { |
12 |
|
|
return if (! $debug); |
13 |
|
|
open(DEBUG,">> debug") || warn "can't open debug file!"; |
14 |
|
|
print DEBUG "###",@_; |
15 |
|
|
print @_; |
16 |
|
|
close(DEBUG); |
17 |
|
|
} |
18 |
|
|
|
19 |
|
|
print_debug("debug level $debug\n"); |
20 |
|
|
|
21 |
|
|
my $dbh = DBI->connect("DBI:Pg:dbname=informatika","","") || die $DBI::errstr; |
22 |
|
|
|
23 |
|
|
my $ua = new LWP::UserAgent; |
24 |
|
|
$ua->agent("pliva_harvester 0.0"); |
25 |
|
|
$ua->timeout(60); |
26 |
|
|
$ua->env_proxy(); |
27 |
|
|
$ua->proxy(['http', 'ftp'], 'http://proxy.pliva.hr:8080/'); |
28 |
|
|
|
29 |
|
|
my @val; |
30 |
|
|
|
31 |
|
|
sub insert { |
32 |
|
|
my $tpl = "?," x $#val; |
33 |
|
|
$tpl.="?"; |
34 |
|
|
my $sql="insert into imenik values ($tpl)"; |
35 |
|
|
print_debug("sql: $sql\n"); |
36 |
|
|
my $sth = $dbh->prepare("$sql") || warn "$sql\n".$DBI::errstr; |
37 |
|
|
$sth->execute(@val); |
38 |
|
|
} |
39 |
|
|
|
40 |
|
|
my $req = HTTP::Request->new(GET =>'http://tkcpdc.pliva.hr/imenik/traziNew/Intranet/export1.asp'); |
41 |
|
|
|
42 |
|
|
print_debug("getting url...\n"); |
43 |
|
|
my $res = $ua->request($req); |
44 |
|
|
if ($res->is_success) { |
45 |
|
|
print_debug("html size: ".length($res->content)."\n"); |
46 |
|
|
foreach (split(/[\n\r]+/, $res->content)) { |
47 |
|
|
chomp; |
48 |
|
|
print_debug("line: $_\n"); |
49 |
|
|
if (m,</tr>,i) { |
50 |
|
|
insert() if (@val); |
51 |
|
|
undef @val; |
52 |
|
|
} |
53 |
|
|
if (m,<th>([^>]+)</th>,i) { |
54 |
|
|
push @val,$1; |
55 |
|
|
print_debug("val: $1"); |
56 |
|
|
} |
57 |
|
|
|
58 |
|
|
} |
59 |
|
|
insert(); |
60 |
|
|
} else { |
61 |
|
|
warn "can't fetch imenik data"; |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
$dbh->disconnect; |