27 |
my $aname; ## ancor name na originalnim stranicama |
my $aname; ## ancor name na originalnim stranicama |
28 |
|
|
29 |
my $nn_dir="../"; # dir u kojem su wget-ani fileovi |
my $nn_dir="../"; # dir u kojem su wget-ani fileovi |
30 |
my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%d#%d"; |
my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%s#%d"; |
31 |
|
|
32 |
my %opts; |
my %opts; |
33 |
getopts("vqdl:", \%opts); |
getopts("vqdl:", \%opts); |
100 |
$naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs))); |
$naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs))); |
101 |
# $naslov_czs = $hr->minimal(split(/ /,$naslov_czs)); |
# $naslov_czs = $hr->minimal(split(/ /,$naslov_czs)); |
102 |
my $xml="<nn>\n<br>$br</br>\n<god>$god</god>\n<nr>$nr</nr>\n<aname>$aname</aname>\n"; |
my $xml="<nn>\n<br>$br</br>\n<god>$god</god>\n<nr>$nr</nr>\n<aname>$aname</aname>\n"; |
103 |
$xml.="<naslov>". $l2_map->tou($naslov)->utf8 ."</naslov>\n"; |
my $naslov_utf=$l2_map->tou($naslov)->utf8; |
104 |
|
|
105 |
|
# Escape <, >, & and ", and to produce valid XML |
106 |
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
107 |
|
my $escape_re = join '|' => keys %escape; |
108 |
|
$naslov_utf =~ s/($escape_re)/$escape{$1}/g; |
109 |
|
|
110 |
|
$xml.="<naslov>$naslov_utf</naslov>\n"; |
111 |
$xml.="<naslov_czs>$naslov_czs</naslov_czs>\n</nn>\n\n"; |
$xml.="<naslov_czs>$naslov_czs</naslov_czs>\n</nn>\n\n"; |
112 |
dump_to_swish($xml,$god,$br,$nr,$aname); |
dump_to_swish($xml,$god,$br,$nr,$aname); |
113 |
|
|
117 |
} |
} |
118 |
|
|
119 |
if ($sadrzaj) { |
if ($sadrzaj) { |
120 |
if (s/<a href="#([^"]+)">\s*(\d+)\.\s*<[^>]+>//i) { |
if (s/<a href="#([^"]+)">\s*(\S+)\.\s*<[^>]+>//i) { |
121 |
($aname,$nr) = ($1,$2); |
($aname,$nr) = ($1,$2); |
122 |
} elsif (s/<a href="Javascript:Mojdok\((\d+),(\d+),'*(\w+)'*,(\d+)\)[^>]*>//i) { |
} elsif (s/<a href="Javascript:Mojdok\((\d+),(\d+),'*(\w+)'*,(\d+)\)[^>]*>//i) { |
123 |
($nr,$aname) = ($3,$4); |
($nr,$aname) = ($3,$4); |