--- trunk/all2xml.pl 2003/01/11 19:55:30 9
+++ trunk2/all2all.pl 2004/09/12 20:36:33 432
@@ -1,250 +1,421 @@
#!/usr/bin/perl -w
+=head1 NAME
+
+all2all.pl - basic script for all WebPAC needs
+
+=cut
+
use strict;
-use OpenIsis;
-use Getopt::Std;
use Data::Dumper;
-use XML::Simple;
-use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
-require Unicode::Map8;
-use DBI;
-
-my $config=XMLin(undef, forcearray => [ 'isis' ], forcecontent => 1);
-my $dbh = DBI->connect("DBI:Pg:dbname=webpac","","") || die $DBI::errstr; # FIX
-# FIX; select relname from pg_class where relname like 'index_%' ;
-$dbh->begin_work || die $dbh->errstr();
-
-$dbh->do("delete from index_author") || die $dbh->errstr();
-$dbh->do("delete from index_title") || die $dbh->errstr();
-
-my %opts;
-
-# usage:
-# -d directory name
-# -m multiple directories
-# -q quiet
-# -s run swish
-
-getopts('d:m:qs', \%opts);
-
-my $db_dir = $opts{d} || "ps"; # FIX
-
-#die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
-
-#print Dumper($config->{indexer});
-#print "-" x 70,"\n";
-
-# how to convert isis code page to UTF8?
-my $isis_map = Unicode::Map8->new($config->{isis_codepage}) || die;
-
-sub isis2xml {
-
- my $row = shift @_;
-
- my $xml;
- $xml->{db_dir} = [ $db_dir ]; # FIX remove?
-
- sub isis_sf {
- my $row = shift @_;
- my $isis_id = shift @_;
- my $subfield = shift @_;
- if ($row->{$isis_id}->[0]) {
- my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
- if (! defined $subfield || length($subfield) == 0) {
- # subfield list undef, empty or no defined subfields for this record
- my $all_sf = $row->{$isis_id}->[0];
- $all_sf =~ s/\^./ /g; nuke definirions
- return $all_sf;
- } elsif ($sf->{$subfield}) {
- return $sf->{$subfield};
- }
- }
- }
+use Carp;
- foreach my $field (keys %{$config->{indexer}}) {
+use lib './lib';
+use WebPAC;
+use WebPAC::jsFind;
+use WebPAC::Index;
- my $display_data = "";
- my $swish_data = "";
- my $index_data = "";
-
- foreach my $x (@{$config->{indexer}->{$field}->{isis}}) {
-
- my $display_tmp = "";
- my $swish_tmp = "";
- my $index_tmp = "";
-
- my $format = $x->{content};
- my $s = 1; # swish only
- my $d = 1; # display only
- my $i = 0; # index only
- $s = 0 if (lc($x->{type}) eq "display");
- $d = 0 if (lc($x->{type}) eq "swish");
- ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
-#print STDERR "## s: $s d: $d i: $i ## $format ##\n";
- # parse format
- my $prefix = "";
- if ($format =~ s/^([^\d]+)//) {
- $prefix = $1;
- }
- while ($format) {
- if ($format =~ s/^(\d\d\d)(\w?)//) {
- my $isis_tmp = isis_sf($row,$1,$2);
- if ($isis_tmp) {
-# $display_tmp .= $prefix . "/$1/$2/".$isis_tmp if ($d);
- $display_tmp .= $prefix . $isis_tmp if ($d);
- $swish_tmp .= $isis_tmp." " if ($s);
- $index_tmp .= $prefix . $isis_tmp if ($i);
-#print STDERR " $isis_tmp <--\n";
- }
- $prefix = "";
- } elsif ($format =~ s/^([^\d]+)//) {
- $prefix = $1;
- } else {
- print STDERR "WARNING: unparsed format '$format'\n";
- last;
- };
- }
- # add suffix
- $display_tmp .= $prefix if ($display_tmp);
- $index_tmp .= $prefix if ($index_tmp);
-
-# $display_data .= $display_tmp if ($display_tmp ne "");
-# $swish_data .= $swish_tmp if ($swish_tmp ne "");
- $display_data .= $display_tmp;
- $swish_data .= $swish_tmp;
- $index_data .= $index_tmp;
+my $webpac = new WebPAC(
+ code_page => 'ISO-8859-2',
+ limit_mfn => 500,
+ start_mfn => 6000,
+# debug => 1,
+# low_mem => 1,
+) || die;
- }
-#print "--display:$display_data\n--swish:$swish_data\n";
- #$xml->{$field."_display"} = $isis_map->tou($display_data)->utf8 if ($display_data);
- #$xml->{$field."_swish"} = unac_string($config->{isis_codepage},$swish_data) if ($swish_data);
- $xml->{$field."_display" } = [ $isis_map->tou($display_data)->utf8 ] if ($display_data);
- $xml->{$field."_swish"} = [ unac_string($config->{isis_codepage},$swish_data) ] if ($swish_data);
-
- # index
- if ($index_data && $index_data ne "") {
- my $sql = "select $field from index_$field where upper($field)=upper(?)";
- my $sth = $dbh->prepare($sql) || die $dbh->errstr();
- $sth->execute($index_data) || die "SQL: $sql; ".$dbh->errstr();
-#print STDERR "--->$index_data<---\n";
- if (! $sth->fetchrow_hashref) {
- my $sql = "insert into index_$field values (?)";
- my $sth = $dbh->prepare($sql) || die $dbh->errstr();
-#print STDERR "$sql: $index_data
+
+
+
+ search |
+ thesarus |
+ browse
+
+
+
+
+
+};
+
+my $l = $webpac->{'lookup'} || $log->logconfess("can't find lookup");
+
+my @tree = ({
+ # level 0
+ code_arr => sub { sort keys %{$l} },
+ filter_code => sub {
+ my $t = shift;
+ return $t if ($t =~ s/root://);
+ },
+ lookup_v900 => sub { shift @{$l->{"root:".$_[0]}} },
+ lookup_term => sub { shift @{$l->{"d:".$_[1]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[1]}} },
+ have_children => sub { defined($l->{"a:".$_[0]."::"}) },
+ child_code => sub { return $_[0] },
+ style => 'display: none',
+ },{
+ # 1
+ code_arr => sub { @{$l->{"a:".$_[0]."::"}} },
+ filter_code => sub { shift }, # nop
+ lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{"a:".$_[1].":"}) },
+ child_code => sub { return $_[1] },
+ style => 'display: none',
+ },{
+ # 2
+ code_arr => sub { @{$l->{"a:".$_[0].":"}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub { shift },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{"a:".$_[2].":".$_[1]}) },
+ child_code => sub { return "a:".$_[2].":".$_[1] },
+ style => 'display: none',
+ },{
+ # 3 uži pojam
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 4
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 5
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 6
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 7
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 8
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+# have_children => sub { defined($l->{$_[1]}) },
+# child_code => sub { return $_[1] },
+ have_children => sub { 0 },
+ child_code => sub { 0 },
+});
+
+my @show_ids;
+my @hide_ids;
+
+unroll(0,'');
+
+$log->debug("test filter: ",$tree[0]->{'filter_code'}->("root:99"));
+
+sub unroll {
+ my ($level,$start_code) = @_;
+
+ $log->logconfess("need level") unless (defined($level));
+
+ # all levels passed?
+ return if (! defined($tree[$level]));
+
+ $log->debug("unroll level $level, start code $start_code");
+
+ foreach my $code ($tree[$level]->{'code_arr'}->($start_code)) {
+
+ if ($code = $tree[$level]->{'filter_code'}->($code)) {
+
+ $log->debug("# $level filter passed code $code");
+
+ my $v900 = $tree[$level]->{'lookup_v900'}->($code,$start_code) || $log->warn("can't lookup_v900($code,$start_code)");
+ $log->debug("# $level lookup_v900($code,$start_code) = $v900");
+
+ my $term = $tree[$level]->{'lookup_term'}->($code,$v900) || $log->warn("can't lookup_term($code,$v900)");
+ $log->debug("# $level lookup_term($code,$v900) = $term");
+
+ my $mfn = $tree[$level]->{'lookup_mfn'}->($code,$v900) || $log->warn("can't lookup_mfn($code,$v900)");
+ $log->debug("# $level lookup_mfn($code,$v900) = $mfn");
+
+ $log->debug("$code -> $v900 : $term [$mfn]");
+
+ my ($link_start,$link_end) = ('','');
+
+ my $have_children = $tree[$level]->{'have_children'}->($code,$v900,$start_code);
+ if ($have_children) {
+ ($link_start,$link_end) = (qq{},qq{});
+ } else {
+ $log->debug("# $level doesn't have_children($code,$v900,$start_code)");
+ }
-This command will read ISIS data file using OpenIsis perl module and
-create XML file for usage with I
-indexer. Dispite it's name, this script B
-from isis files (isis allready has something like that). Output of this
-script is tailor-made for SWISH-E.
+ my $mfn_link = "thes/$mfn.html";
+ if (-e "out/$mfn_link") {
+ print HTML " " x $level .
+ qq{- ${link_start}${term}${link_end}}.
+ qq{ »
\n};
+ } else {
+ $log->warn("file 'out/$mfn_link' doesn't exist, skipping");
+ }
-=head1 AUTHOR
+ unless ($have_children) {
+ next;
+ }
+ my $style = $tree[$level]->{'style'};
-Dobrica Pavlinusic
+ print HTML " " x $level .
+ qq{\n \n};
+
+ if ($style) {
+ if ($style =~ m/display\s*:\s*none/i) {
+ push @hide_ids, "mfn$mfn";
+ } else {
+ push @show_ids, "mfn$mfn";
+ }
+ } else {
+ # default: show
+ push @show_ids, "mfn$mfn";
+ }
-=head1 COPYRIGHT
+ unroll($level+1, $tree[$level]->{'child_code'}->($code,$v900,$start_code));
+
+ print HTML " " x $level . qq{
\n};
-GNU Public License (GPL) v2 or later
+ }
+ }
+}
-=head1 SEE ALSO
+print HTML qq{
+
+
+