/[nn]/swish/html2xml.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /swish/html2xml.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by dpavlin, Fri Sep 13 09:20:52 2002 UTC revision 1.8 by dpavlin, Tue Sep 9 08:20:53 2003 UTC
# Line 17  use strict; Line 17  use strict;
17  use Getopt::Std;  use Getopt::Std;
18  use Lingua::Spelling::Alternative;  use Lingua::Spelling::Alternative;
19  require Unicode::Map8;  require Unicode::Map8;
20    use GDBM_File;
21    
22  my $sadrzaj=0;  my $sadrzaj=0;
23  my $nr=0;  my $nr=0;
# Line 29  my $aname;     ## ancor name na originalnim Line 30  my $aname;     ## ancor name na originalnim
30  my $nn_dir="../";               # dir u kojem su wget-ani fileovi  my $nn_dir="../";               # dir u kojem su wget-ani fileovi
31  my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%s#%d";  my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%s#%d";
32    
33    my $gdbm_file="./brzakona.gdbm";
34    
35  my %opts;  my %opts;
36  getopts("vqdl:", \%opts);  getopts("vqdl:", \%opts);
37    
38  my $brojeva=0;  my $brojeva=0;
39  my $zakona=0;  my $zakona=0;
40    my $zak_u_broju;
41    
42  my $hr = new Lingua::Spelling::Alternative( DEBUG => $opts{d} );  my $hr = new Lingua::Spelling::Alternative( DEBUG => $opts{d} );
43  #$hr->load_affix("$nn_dir/search/croatian.aff");  #$hr->load_affix("$nn_dir/search/croatian.aff");
# Line 41  $hr->load_findaffix("$nn_dir/prvih_50.tx Line 45  $hr->load_findaffix("$nn_dir/prvih_50.tx
45    
46  my $l2_map = Unicode::Map8->new("ISO-8859-2") || die;  my $l2_map = Unicode::Map8->new("ISO-8859-2") || die;
47    
48    my %br_zakona;
49    tie %br_zakona, 'GDBM_File', $gdbm_file, &GDBM_NEWDB, 0644;
50    
51  #--------------------------------------------------------------------  #--------------------------------------------------------------------
52    
53    sub save_br_zak {
54            my $god = shift || return;
55            my $br = shift || return;
56            my $zak_u_broju = shift || return;
57            print STDERR "[$god/$br: $zak_u_broju zakona]\n" if (! $opts{q});
58            $br_zakona{sprintf("%04d/%02d",$god,$br)} = $zak_u_broju;
59    }
60    
61    #--------------------------------------------------------------------
62    
63  sub dump_to_swish {  sub dump_to_swish {
64          my $xml = shift @_;          my $xml = shift @_;
65          my ($god,$br,$nr,$aname) = @_;          my ($god,$br,$nr,$aname) = @_;
# Line 68  if ($opts{l}) { Line 86  if ($opts{l}) {
86  }  }
87  closedir(DIR);  closedir(DIR);
88    
89  foreach my $file (@files) {  foreach my $file (sort @files) {
90          open(IN,"$nn_dir/$file") || die "can't open '$nn_dir/$file': $!";          open(IN,"$nn_dir/$file") || die "can't open '$nn_dir/$file': $!";
91    
92          if ($file=~m/god=(\d+)\&br=(\d+)/) {          if ($file=~m/god=(\d+)\&br=(\d+)/) {
93                    save_br_zak($god,$br,$zak_u_broju);
94                    print STDERR "$file " if (! $opts{q});
95                  ($br,$god) = ($2,$1);                  ($br,$god) = ($2,$1);
                 print STDERR "$file -- $2 -- $1\n" if (! $opts{q});  
96                  $brojeva++;                  $brojeva++;
97                    $zak_u_broju = 0;
98          }          }
99    
100          while(<IN>) {          while(<IN>) {
# Line 114  foreach my $file (@files) { Line 134  foreach my $file (@files) {
134                          $naslov="";                          $naslov="";
135                          $nr=0;                          $nr=0;
136                          $zakona++;                          $zakona++;
137                            $zak_u_broju++;
138                  }                  }
139    
140                  if ($sadrzaj) {                  if ($sadrzaj) {
# Line 135  foreach my $file (@files) { Line 156  foreach my $file (@files) {
156          close(IN);          close(IN);
157  }  }
158    
159    save_br_zak($god,$br,$zak_u_broju);
160  print STDERR "Ukupno $brojeva brojeva NN, sa $zakona zakona...\n" if (! $opts{q});  print STDERR "Ukupno $brojeva brojeva NN, sa $zakona zakona...\n" if (! $opts{q});
161    
162    untie %br_zakona;

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.8

  ViewVC Help
Powered by ViewVC 1.1.26