1 |
dpavlin |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
use LWP::UserAgent; |
4 |
|
|
use HTML::TreeBuilder; |
5 |
|
|
use URI::Escape; |
6 |
|
|
use strict; |
7 |
|
|
require "../common.pm"; |
8 |
|
|
|
9 |
|
|
my $debug=1; |
10 |
|
|
|
11 |
|
|
my $dir = open_data_files("ebsco"); |
12 |
|
|
my $last_tell=0; |
13 |
|
|
|
14 |
|
|
print MPS "M working...\n"; |
15 |
|
|
|
16 |
|
|
my %dbs = ( |
17 |
|
|
'http://www.epnet.com/TitleLists/html/ap_ft_h1.htm' => |
18 |
|
|
'Academic Search Premier', |
19 |
|
|
'http://www.epnet.com/TitleLists/html/bu_ft_h1.htm' => |
20 |
|
|
'Business Source Premier', |
21 |
|
|
'http://www.epnet.com/TitleLists/html/hc_h1.htm' => |
22 |
|
|
'Health Source: Nursing/Academic Edition', |
23 |
|
|
'http://www.epnet.com/images/html.gif' => |
24 |
|
|
'MasterFILE Premier', |
25 |
|
|
'http://www.epnet.com/TitleLists/html/bw_h1.htm' => |
26 |
|
|
'Regional Business News |
27 |
|
|
) |
28 |
|
|
|
29 |
|
|
my $url = 'http://www.epnet.com/TitleLists/html/ap_ft_h1.htm'; |
30 |
|
|
my $base_url = 'http://web13.epnet.com/HJAFDetail.asp?tb=1&_ug=dbs+0+ln+en%2Dus+sid+956028C9%2DC274%2D45E9%2DB9C3%2DE2EEDB566264%40Sessionmgr6+46E0&_uh=btn+Y+fst+Zoologica++Scripta+idb+aphish+jdb+aphjnh+lst+Zygon%3A++Journal++of++Religion++%26++Science+md+B+op+scan+shn+1+B7D1&_us=db+0+dstb+ES+fh+0+hd+0+hs+0+or+Date+sm+ES+ss+SO+BC5A&vw=D&rn=&st='; |
31 |
|
|
|
32 |
|
|
$debug++ if (lc($ARGV[0]) eq "-d"); |
33 |
|
|
|
34 |
|
|
sub print_debug { |
35 |
|
|
return if (! $debug); |
36 |
|
|
open(DEBUG,">> debug") || warn "can't open debug file!"; |
37 |
|
|
print DEBUG "###",@_; |
38 |
|
|
print @_; |
39 |
|
|
close(DEBUG); |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
print_debug("debug level $debug"); |
43 |
|
|
|
44 |
|
|
my $ua = new LWP::UserAgent; |
45 |
|
|
$ua->agent("Mjesec educational harvester -- contact mglavica\@ffzg.hr 0.0"); |
46 |
|
|
$ua->timeout(60); |
47 |
|
|
#$ua->env_proxy(); |
48 |
|
|
#$ua->proxy(['http', 'ftp'], 'http://proxy.carnet.hr:8001/'); |
49 |
|
|
|
50 |
|
|
my $req = HTTP::Request->new(GET => $url); |
51 |
|
|
|
52 |
|
|
my $tree = HTML::TreeBuilder->new; |
53 |
|
|
my $res = $ua->request($req); |
54 |
|
|
if ($res->is_success) { |
55 |
|
|
$tree->parse($res->content); |
56 |
|
|
# print_debug($res->content); |
57 |
|
|
|
58 |
|
|
#if (1) { |
59 |
|
|
# $tree->parse_file("list.html"); # ! |
60 |
|
|
|
61 |
|
|
print "parse...\n"; |
62 |
|
|
|
63 |
|
|
foreach my $tr ($tree->look_down('_tag', 'tr')) { |
64 |
|
|
my $link; |
65 |
|
|
if (1) { # oh, i'm lazy to fix indent... |
66 |
|
|
my @arr; |
67 |
|
|
foreach ($tr->look_down('_tag','td')) { |
68 |
|
|
push @arr,$_->as_text; |
69 |
|
|
} |
70 |
|
|
my ($issn,$title,$publisher) = @arr; |
71 |
|
|
print "#### $issn # $title # $publisher\n"; |
72 |
|
|
print "##",join("|",@arr),"\n"; |
73 |
|
|
if ($issn =~ m/^\d{4}\-\d{3}[\dX]/) { |
74 |
|
|
my $bib = ""; |
75 |
|
|
my $mps = ""; |
76 |
|
|
|
77 |
|
|
$bib .= "%tip Èasopis\n"; |
78 |
|
|
$bib .= "%tip on-line\n"; |
79 |
|
|
# url |
80 |
|
|
$bib .= "%856 $base_url".uri_escape($title)."\n"; |
81 |
|
|
|
82 |
|
|
# naslov |
83 |
|
|
$bib .= "%200+ $title\n"; |
84 |
|
|
$mps .= mps_expand(2,$title); |
85 |
|
|
|
86 |
|
|
# izdavanje |
87 |
|
|
$bib .= "%210+ $publisher\n"; |
88 |
|
|
$mps .= mps_expand(3,$publisher); |
89 |
|
|
|
90 |
|
|
# issn |
91 |
|
|
$bib .= "%ISSN $issn\n"; |
92 |
|
|
$mps .= mps_expand(2,$issn); |
93 |
|
|
|
94 |
|
|
# tip |
95 |
|
|
$mps .= mps_expand(17,"on-line"); |
96 |
|
|
|
97 |
|
|
$mps .= "H $title <i>(on-line, EBSCO Academic Search Premier)</i>\n"; |
98 |
|
|
|
99 |
|
|
print R $bib."\n"; |
100 |
|
|
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n"; |
101 |
|
|
$last_tell=tell(R); |
102 |
|
|
|
103 |
|
|
print R "\n"; |
104 |
|
|
|
105 |
|
|
$mps .= "E\n"; |
106 |
|
|
|
107 |
|
|
print S $mps; |
108 |
|
|
print MPS $mps; |
109 |
|
|
} |
110 |
|
|
} |
111 |
|
|
} |
112 |
|
|
|
113 |
|
|
$tree->delete; # clear memory! |
114 |
|
|
|
115 |
|
|
} else { |
116 |
|
|
warn "can't fetch web page from '$url'"; |
117 |
|
|
} |
118 |
|
|
|
119 |
|
|
print S "M over and out\nX\n"; |
120 |
|
|
print MPS "M over and out\nX\n"; |
121 |
|
|
close(MPS); |