1 |
#!/usr/local/bin/perl -w |
2 |
|
3 |
use LWP::UserAgent; |
4 |
use strict; |
5 |
use DBI; |
6 |
use HTML::TreeBuilder; |
7 |
|
8 |
my $debug=1; |
9 |
|
10 |
my $url = 'http://custom.marketwatch.com/custom/alliance/ftmw/invrel.asp?siteid=!plivadd-0773-4F6D-DHID-QN1112NFTD0X&symb=PLVD'; |
11 |
|
12 |
$debug++ if (lc($ARGV[0]) eq "-d"); |
13 |
|
14 |
my %val; |
15 |
|
16 |
sub print_debug { |
17 |
return if (! $debug); |
18 |
open(DEBUG,">> debug") || warn "can't open debug file!"; |
19 |
print DEBUG "###",@_,"\n"; |
20 |
print @_,"\n"; |
21 |
close(DEBUG); |
22 |
} |
23 |
|
24 |
sub print_val { |
25 |
return if (! $debug); |
26 |
foreach (keys %val) { |
27 |
print "$_: $val{$_}\n"; |
28 |
} |
29 |
} |
30 |
|
31 |
print_debug("debug level $debug"); |
32 |
|
33 |
my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr; |
34 |
|
35 |
my $ua = new LWP::UserAgent; |
36 |
$ua->agent("pliva_harvester 0.0"); |
37 |
$ua->timeout(60); |
38 |
$ua->env_proxy(); |
39 |
$ua->proxy(['http', 'ftp'], 'http://proxy.pliva.hr:8080/'); |
40 |
|
41 |
|
42 |
my $close_time='21:21:21 CET'; |
43 |
|
44 |
sub insert { |
45 |
my $sth = $dbh->prepare("select count(date) from stocks where date='".$val{date}."'"); |
46 |
$sth->execute(); |
47 |
my ($nr) = $sth->fetchrow_array; |
48 |
if ($nr == 0) { |
49 |
my $sql="insert into stocks values ('$val{date}','LSE',$val{curr},$val{change},$val{high},$val{low},$val{open})"; |
50 |
$sql=~s/,\+(\d)/,$1/g; # nuke + which pgsql doesn't like |
51 |
if ($sql =~ m,n/a,i) { |
52 |
print_val(); |
53 |
print_debug("undefined values found. not inserting in db"); |
54 |
} else { |
55 |
$dbh->do("$sql") || die "$sql\n".$DBI::errstr; |
56 |
} |
57 |
print_debug("sql: $sql\n"); |
58 |
} else { |
59 |
print_debug("skip: $val{date}\n"); |
60 |
} |
61 |
} |
62 |
|
63 |
my $req = HTTP::Request->new(GET => $url); |
64 |
my $tree = HTML::TreeBuilder->new; |
65 |
|
66 |
#my $res = $ua->request($req); |
67 |
#if ($res->is_success) { |
68 |
# print_debug("html: ".$res->content."\n"); |
69 |
# $tree->parse($res->content) || die "can't parse html file!"; |
70 |
|
71 |
if (1) { |
72 |
$tree->parse_file("out-without_proxy") || die "can't parse html file!"; |
73 |
|
74 |
# find date |
75 |
my $t = $tree; |
76 |
# $t = $tree->look_down('_tag', 'td'); |
77 |
# print "##td: ",$t->as_text,"\n"; |
78 |
if ($t = $tree->look_down('class', 'ft-quotedate')) { |
79 |
if ($t->as_text =~ m,(\d+):(\d+) (\d+)/(\d+)/(\d+),) { |
80 |
my ($h,$m,$dd,$mm,$yy) = ($1,$2,$3,$4,$5); |
81 |
my $date=($yy+2000)."-$mm-$dd"; |
82 |
my (undef,undef,$local_h) = localtime(time); |
83 |
$h += 12 if ($local_h - $h > 12); |
84 |
my $time="$h:$m"; |
85 |
print_debug("time: $time date: $date"); |
86 |
$val{date}="$date $time"; |
87 |
} else { |
88 |
die "can't recognise date format ".$t->as_text; |
89 |
} |
90 |
} else { |
91 |
die "can't find date in html"; |
92 |
} |
93 |
|
94 |
# last, change |
95 |
if (my @q = $tree->look_down('class', 'ft-quoteLG')) { |
96 |
if ($#q+1 == 2) { |
97 |
$val{curr} = $q[0]->as_text; |
98 |
$val{change} = $q[1]->as_text; |
99 |
} else { |
100 |
die "can't find 2 classes ft-quoteLG"; |
101 |
} |
102 |
} else { |
103 |
die "can't find class ft-quoteLG (last value and change)"; |
104 |
} |
105 |
|
106 |
# high, low, open |
107 |
if (my @q = $tree->look_down('class', 'ft-quoteMd')) { |
108 |
if ($#q+1 == 11) { |
109 |
$val{high} = $q[1]->as_text; |
110 |
$val{low} = $q[2]->as_text; |
111 |
$val{open} = $q[3]->as_text; |
112 |
} else { |
113 |
die "can't find 11 classes ft-quoteMd"; |
114 |
} |
115 |
} else { |
116 |
die "can't find class ft-quoteMd"; |
117 |
} |
118 |
|
119 |
insert(); |
120 |
} else { |
121 |
warn "can't fetch stock data"; |
122 |
} |
123 |
|
124 |
print_val(); |
125 |
|
126 |
$dbh->disconnect; |
127 |
|