/[corp]/esi/filter3.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /esi/filter3.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations)
Tue Sep 3 08:00:08 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
File MIME type: text/plain
new parser using HTML::TreeBuilder

1 #!/usr/local/bin/perl -w
2
3 use LWP::UserAgent;
4 use strict;
5 use DBI;
6 use HTML::TreeBuilder;
7
8 my $debug=1;
9
10 my $url = 'http://custom.marketwatch.com/custom/alliance/ftmw/invrel.asp?siteid=!plivadd-0773-4F6D-DHID-QN1112NFTD0X&symb=PLVD';
11
12 $debug++ if (lc($ARGV[0]) eq "-d");
13
14 my %val;
15
16 sub print_debug {
17 return if (! $debug);
18 open(DEBUG,">> debug") || warn "can't open debug file!";
19 print DEBUG "###",@_,"\n";
20 print @_,"\n";
21 close(DEBUG);
22 }
23
24 sub print_val {
25 return if (! $debug);
26 foreach (keys %val) {
27 print "$_: $val{$_}\n";
28 }
29 }
30
31 print_debug("debug level $debug");
32
33 my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr;
34
35 my $ua = new LWP::UserAgent;
36 $ua->agent("pliva_harvester 0.0");
37 $ua->timeout(60);
38 $ua->env_proxy();
39 $ua->proxy(['http', 'ftp'], 'http://proxy.pliva.hr:8080/');
40
41
42 my $close_time='21:21:21 CET';
43
44 sub insert {
45 my $sth = $dbh->prepare("select count(date) from stocks where date='".$val{date}."'");
46 $sth->execute();
47 my ($nr) = $sth->fetchrow_array;
48 if ($nr == 0) {
49 my $sql="insert into stocks values ('$val{date}','LSE',$val{curr},$val{change},$val{high},$val{low},$val{open})";
50 $sql=~s/,\+(\d)/,$1/g; # nuke + which pgsql doesn't like
51 if ($sql =~ m,n/a,i) {
52 print_val();
53 print_debug("undefined values found. not inserting in db");
54 } else {
55 $dbh->do("$sql") || die "$sql\n".$DBI::errstr;
56 }
57 print_debug("sql: $sql\n");
58 } else {
59 print_debug("skip: $val{date}\n");
60 }
61 }
62
63 my $req = HTTP::Request->new(GET => $url);
64 my $tree = HTML::TreeBuilder->new;
65
66 #my $res = $ua->request($req);
67 #if ($res->is_success) {
68 # print_debug("html: ".$res->content."\n");
69 # $tree->parse($res->content) || die "can't parse html file!";
70
71 if (1) {
72 $tree->parse_file("out-without_proxy") || die "can't parse html file!";
73
74 # find date
75 my $t = $tree;
76 # $t = $tree->look_down('_tag', 'td');
77 # print "##td: ",$t->as_text,"\n";
78 if ($t = $tree->look_down('class', 'ft-quotedate')) {
79 if ($t->as_text =~ m,(\d+):(\d+) (\d+)/(\d+)/(\d+),) {
80 my ($h,$m,$dd,$mm,$yy) = ($1,$2,$3,$4,$5);
81 my $date=($yy+2000)."-$mm-$dd";
82 my (undef,undef,$local_h) = localtime(time);
83 $h += 12 if ($local_h - $h > 12);
84 my $time="$h:$m";
85 print_debug("time: $time date: $date");
86 $val{date}="$date $time";
87 } else {
88 die "can't recognise date format ".$t->as_text;
89 }
90 } else {
91 die "can't find date in html";
92 }
93
94 # last, change
95 if (my @q = $tree->look_down('class', 'ft-quoteLG')) {
96 if ($#q+1 == 2) {
97 $val{curr} = $q[0]->as_text;
98 $val{change} = $q[1]->as_text;
99 } else {
100 die "can't find 2 classes ft-quoteLG";
101 }
102 } else {
103 die "can't find class ft-quoteLG (last value and change)";
104 }
105
106 # high, low, open
107 if (my @q = $tree->look_down('class', 'ft-quoteMd')) {
108 if ($#q+1 == 11) {
109 $val{high} = $q[1]->as_text;
110 $val{low} = $q[2]->as_text;
111 $val{open} = $q[3]->as_text;
112 } else {
113 die "can't find 11 classes ft-quoteMd";
114 }
115 } else {
116 die "can't find class ft-quoteMd";
117 }
118
119 insert();
120 } else {
121 warn "can't fetch stock data";
122 }
123
124 print_val();
125
126 $dbh->disconnect;
127

  ViewVC Help
Powered by ViewVC 1.1.26