/[corp]/esi/filter3.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /esi/filter3.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations)
Tue Sep 3 08:00:08 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
File MIME type: text/plain
new parser using HTML::TreeBuilder

1 dpavlin 1.1 #!/usr/local/bin/perl -w
2    
3     use LWP::UserAgent;
4     use strict;
5     use DBI;
6     use HTML::TreeBuilder;
7    
8     my $debug=1;
9    
10     my $url = 'http://custom.marketwatch.com/custom/alliance/ftmw/invrel.asp?siteid=!plivadd-0773-4F6D-DHID-QN1112NFTD0X&symb=PLVD';
11    
12     $debug++ if (lc($ARGV[0]) eq "-d");
13    
14     my %val;
15    
16     sub print_debug {
17     return if (! $debug);
18     open(DEBUG,">> debug") || warn "can't open debug file!";
19     print DEBUG "###",@_,"\n";
20     print @_,"\n";
21     close(DEBUG);
22     }
23    
24     sub print_val {
25     return if (! $debug);
26     foreach (keys %val) {
27     print "$_: $val{$_}\n";
28     }
29     }
30    
31     print_debug("debug level $debug");
32    
33     my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr;
34    
35     my $ua = new LWP::UserAgent;
36     $ua->agent("pliva_harvester 0.0");
37     $ua->timeout(60);
38     $ua->env_proxy();
39     $ua->proxy(['http', 'ftp'], 'http://proxy.pliva.hr:8080/');
40    
41    
42     my $close_time='21:21:21 CET';
43    
44     sub insert {
45     my $sth = $dbh->prepare("select count(date) from stocks where date='".$val{date}."'");
46     $sth->execute();
47     my ($nr) = $sth->fetchrow_array;
48     if ($nr == 0) {
49     my $sql="insert into stocks values ('$val{date}','LSE',$val{curr},$val{change},$val{high},$val{low},$val{open})";
50     $sql=~s/,\+(\d)/,$1/g; # nuke + which pgsql doesn't like
51     if ($sql =~ m,n/a,i) {
52     print_val();
53     print_debug("undefined values found. not inserting in db");
54     } else {
55     $dbh->do("$sql") || die "$sql\n".$DBI::errstr;
56     }
57     print_debug("sql: $sql\n");
58     } else {
59     print_debug("skip: $val{date}\n");
60     }
61     }
62    
63     my $req = HTTP::Request->new(GET => $url);
64     my $tree = HTML::TreeBuilder->new;
65    
66     #my $res = $ua->request($req);
67     #if ($res->is_success) {
68     # print_debug("html: ".$res->content."\n");
69     # $tree->parse($res->content) || die "can't parse html file!";
70    
71     if (1) {
72     $tree->parse_file("out-without_proxy") || die "can't parse html file!";
73    
74     # find date
75     my $t = $tree;
76     # $t = $tree->look_down('_tag', 'td');
77     # print "##td: ",$t->as_text,"\n";
78     if ($t = $tree->look_down('class', 'ft-quotedate')) {
79     if ($t->as_text =~ m,(\d+):(\d+) (\d+)/(\d+)/(\d+),) {
80     my ($h,$m,$dd,$mm,$yy) = ($1,$2,$3,$4,$5);
81     my $date=($yy+2000)."-$mm-$dd";
82     my (undef,undef,$local_h) = localtime(time);
83     $h += 12 if ($local_h - $h > 12);
84     my $time="$h:$m";
85     print_debug("time: $time date: $date");
86     $val{date}="$date $time";
87     } else {
88     die "can't recognise date format ".$t->as_text;
89     }
90     } else {
91     die "can't find date in html";
92     }
93    
94     # last, change
95     if (my @q = $tree->look_down('class', 'ft-quoteLG')) {
96     if ($#q+1 == 2) {
97     $val{curr} = $q[0]->as_text;
98     $val{change} = $q[1]->as_text;
99     } else {
100     die "can't find 2 classes ft-quoteLG";
101     }
102     } else {
103     die "can't find class ft-quoteLG (last value and change)";
104     }
105    
106     # high, low, open
107     if (my @q = $tree->look_down('class', 'ft-quoteMd')) {
108     if ($#q+1 == 11) {
109     $val{high} = $q[1]->as_text;
110     $val{low} = $q[2]->as_text;
111     $val{open} = $q[3]->as_text;
112     } else {
113     die "can't find 11 classes ft-quoteMd";
114     }
115     } else {
116     die "can't find class ft-quoteMd";
117     }
118    
119     insert();
120     } else {
121     warn "can't fetch stock data";
122     }
123    
124     print_val();
125    
126     $dbh->disconnect;
127    

  ViewVC Help
Powered by ViewVC 1.1.26