--- isis2stream.pl 2002/06/13 15:32:10 1.1
+++ isis2stream.pl 2002/06/16 19:39:42 1.8
@@ -3,58 +3,51 @@
use strict;
use OpenIsis;
use Getopt::Std;
-#use Data::Dumper;
-
-my $install_dir="/local/index";
-my $mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b";
-my $isis_data="/var/autofs/misc/isis_data/";
-#my $isis_data="/mnt/20020606/Isis/Data/"; # doma
+use Data::Dumper;
+use common;
my %opts;
-getopt('dD', \%opts);
+getopt('dm', \%opts);
-die "usage: $0 -d [database_dir] " if (! $opts{d});
+die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
my $db_dir = $opts{d};
-mkdir "$install_dir/$db_dir" if (!-e "$install_dir/$db_dir");
-mkdir "$install_dir/$db_dir/data" if (!-e "$install_dir/$db_dir/data");
+mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir");
+mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data");
-my $dir="$install_dir/$db_dir/data";
+my $dir="$common::install_dir/$db_dir/data";
-open(S,"> $dir/stream") || die "can't open output $dir/stram: $!";
+open(S,"> $dir/stream") || die "can't open output $dir/stream: $!";
open(R,"> $dir/bib") || die "can't open output $dir/bib: $!";
-open(MPS,"| $mpsindex -d $install_dir/$db_dir -autokey") || die "can't start MPS indexer $mpsindex: $!";
+open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!";
#open(MPS,"> /tmp/mpsindex") || die "mps: $!";
-my $s="V 5 3
-L hr-HR
-F 700+ 1 Autor
-F 200+ 2 Naslov
-F 210 3 Izdavanje
-F 225 4 Nakladnika cjelina
-F 300+ 5 Napomene
-F 330 6 Sadraj
-F 464 7 Analitiki radovi
-F 610 8 Kljune rijei
-F 675 9 UDK
-F 686 10 CC
-F 990 11 Signatura
-F 991 12 Inventarni broj
-F 10 13 ISBN
-";
+print S $common::mps_header;
+print MPS $common::mps_header;
+
+#--------------------------------------------------------------------
+# init array in_mps_header for config checks later
-print S $s;
-print MPS $s;
+my %in_mps_header;
+foreach (split(/\n/,$common::mps_header)) {
+ if (/^F /) {
+ my (undef,$isis,$mps,undef) = split(/ /,$_,4);
+ $in_mps_header{$mps}=$isis;
+ }
+}
+require "./search/config.pm";
+#--------------------------------------------------------------------
#
# expand(nr,"space separated string");
#
sub expand {
my $nr = shift @_;
+ die "$nr is not in mps_header" if (!$in_mps_header{$nr});
my $out = "";
while (my $fld = c_852_iso(shift @_)) {
my @words=split(/\s+/,$fld);
@@ -71,97 +64,6 @@
#--------------------------------------------------------------------
-################### ERASE###############3
-
-# expand sub-fileds from ISIS field
-# (^a.....^b....)
-my %data; # FIX
-sub ex_sf {
- %data = ();
- my $in = $_[0];
- if (my $tmp = $in) {
-# $tmp =~ tr/џ/ƾ/; # ISIS -> iso-8859-2
- $tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/;
- if ($in =~ m/^\^/) {
- my @sub = split(/\^/,$in);
- foreach my $fld (@sub) {
- $data{$1} = $2 if ($fld =~ m/^(\w+)(.+)$/)
- }
- } else {
- $data{all} = $in."<-- iz polja bez podpolja";
- }
- }
-}
-
-# dump all sub-fields
-sub all_sf {
- my $nr = shift @_;
- my $out="";
- foreach my $k (sort keys %data) {
- $out.=expand($nr,$data{$k});
- }
- return $out;
-}
-
-sub all_sf_r {
- my $nr = shift @_;
- my $out="";
- foreach my $k (sort {$b cmp $a} keys %data) {
- $out.=expand($nr,$data{$k});
- }
- return $out;
-}
-
-sub all_sf2bib {
- my $nr = shift @_;
- my $max_in_line=shift @_ || 0;
- my $sep = shift @_ || ' ';
- my $out;
- my $i=0;
- my $bib = "";
- foreach my $k (sort keys %data) {
- if ($out) {
- $out.= $sep.$data{$k};
- } else {
- $out = $data{$k};
- }
- $i++;
- if ($i == $max_in_line) {
- $bib .= $nr." ".$out."\n" if ($out);
- $i=0;
- $out="";
- }
- }
- $bib .= $nr." ".$out."\n" if ($out);
- return $bib;
-}
-
-sub all_sf2bib_r {
- my $nr = shift @_;
- my $max_in_line=shift @_ || 0;
- my $sep = shift @_ || ' ';
- my $out;
- my $i=0;
- my $bib = "";
- foreach my $k (sort {$b cmp $a} keys %data) {
- if ($out) {
- $out.= $sep.$data{$k};
- } else {
- $out = $data{$k};
- }
- $i++;
- if ($i == $max_in_line) {
- $bib .= $nr." ".$out."\n" if ($out);
- $i=0;
- $out="";
- }
- }
- $bib .= $nr." ".$out."\n" if ($out);
- return $bib;
-}
-
-#--------------------------------------------------------------------
-
sub c_852_iso {
my $tmp = $_[0];
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/ if ($tmp);
@@ -177,29 +79,8 @@
}
#--------------------------------------------------------------------
-#
-# mps_expand(nr,"space separated string");
-#
-
-sub mps_expand {
- my $nr = shift @_;
- my $out = "";
- while (my $fld = shift @_) {
- if ($fld =~ m/\s+/) {
- foreach my $w (split(/\s+/,$fld)) {
- # FIX: this should be replaced by stemmer!
- $out .= "W $w $nr\n";
- }
- } else {
- $out .= "W $fld $nr\n";
- }
- }
- return c_852_czs($out);
-}
-
-#--------------------------------------------------------------------
-# $mps .= sf_to_mps($subfiled_hash,"subfields",mps_id);
+# $mps .= sf_to_mps("subfields",$subfiled_hash)
#
# subfields options:
# * - all (no sort)
@@ -230,6 +111,7 @@
sub sf_to_mps {
my ($sf_hash,$subfields,$mps_id) = @_;
+ die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id});
my $out="";
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash);
@@ -249,6 +131,8 @@
my $mps_id = shift @_ || die;
my $subfields = shift @_;
+ die "$mps_id is not in mps_header" if (!$in_mps_header{$mps_id});
+
my $i=0;
my $out = "";
@@ -292,6 +176,9 @@
my $group_sort = shift @_ || '';
my $sep = shift @_ || ' ';
+ my $fld = $bib_id; $fld =~ s/^%//;
+ die "$fld is not in FieldNames" if (!$default::FieldNames{$fld});
+
my $i=0;
my $bib="";
@@ -349,7 +236,7 @@
sub isis_sf {
my $row = shift @_;
my $isis_id = shift @_;
- my $subfield = shift @_ || 'a';
+ my $subfield = shift @_;
my $prefix = shift @_ || '';
my $postfix = shift @_ || '';
@@ -357,7 +244,10 @@
if ($row->{$isis_id}->[0]) {
my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
- if (length($subfield) == 1) {
+ if (! defined $subfield || length($subfield) == 0) {
+ # subfield list undef, empty or no defined subfields for this record
+ return $prefix . $row->{$isis_id}->[0] . $postfix;
+ } elsif (length($subfield) == 1) {
if ($sf->{$subfield}) {
return $prefix . $sf->{$subfield} . $postfix;
} else {
@@ -381,23 +271,42 @@
}
#--------------------------------------------------------------------
-#--------------------------------------------------------------------
-
my $last_tell=0;
-my $db = OpenIsis::open( "$isis_data/$db_dir/LIBRI/LIBRI" );
+my @isis_dirs = ( '.' ); # use dirname as database name
-my $max_rowid = OpenIsis::maxRowid( $db );
+if ($opts{m}) {
+ @isis_dirs = split(/,/,$opts{m});
+}
+
+my @isis_dbs;
+
+foreach (@isis_dirs) {
+ if (-e "$common::isis_data/$db_dir/$_/LIBRI") {
+ push @isis_dbs,"$common::isis_data/$db_dir/$_/LIBRI/LIBRI";
+ }
+ if (-e "$common::isis_data/$db_dir/$_/PERI") {
+ push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI";
+ }
+}
-my $last_pcnt = 0;
+foreach my $isis_db (@isis_dbs) {
-for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
+ print MPS "M reading ISIS from '$isis_db'...\n";
+
+ my $db = OpenIsis::open( "$isis_db" );
+
+ my $max_rowid = OpenIsis::maxRowid( $db );
+
+ my $last_pcnt = 0;
+
+ for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
my $row = OpenIsis::read( $db, $row_id );
if (my $tmp = $row->{'200'}->[0]) {
my $bib = "%MFN $row->{mfn}\n";
- my $mps;
+ my $mps = "W $row->{mfn} 14\n";
my $pcnt = int($row->{mfn} * 100 / $max_rowid);
if ($pcnt != $last_pcnt) {
@@ -422,6 +331,7 @@
$mps .= isis_to_mps($row,'711',1);
$mps .= isis_to_mps($row,'503',1);
$mps .= isis_to_mps($row,'702',1);
+ $mps .= isis_to_mps($row,'200',1,"fg");
$bib .= isis_to_bib($row,'205','%205');
@@ -437,17 +347,60 @@
$book .= ". ".$sf->{c} if ($sf->{c});
$book .= " / ".$sf->{x} if ($sf->{x});
$book .= " ; ".$sf->{y} if ($sf->{y});
- $bib .= "%200 $book\n" if ($book);
+ $bib .= "%200+ $book\n" if ($book);
$mps .= isis_to_mps($row,'200',2,"akcde");
$mps .= isis_to_mps($row,'532',2);
$mps .= isis_to_mps($row,'424',2);
+ $mps .= isis_to_mps($row,'230',2,"ae");
+ $mps .= isis_to_mps($row,'231',2,"ae");
+ $mps .= isis_to_mps($row,'232',2,"ae");
+ $mps .= isis_to_mps($row,'233',2,"ae");
+
+
+ my $tmp;
+ $tmp = isis_sf($row,'230','v').
+ isis_sf($row,'230','a',' : ').
+ isis_sf($row,'250',undef,'. - ').
+ isis_sf($row,'260',undef,'. - ').
+ isis_sf($row,'290',undef,'
ISBN ');
+ $bib .= "%sv $tmp\n" if ($tmp);
+
+ $tmp = isis_sf($row,'231','v','
').
+ isis_sf($row,'231','a',' : ').
+ isis_sf($row,'251',undef,'. - ').
+ isis_sf($row,'261',undef,'. - ').
+ isis_sf($row,'291',undef,'
ISBN ');
+ $bib .= "%sv $tmp\n" if ($tmp);
+
+ $tmp = isis_sf($row,'232','v','
').
+ isis_sf($row,'232','a',' : ').
+ isis_sf($row,'252',undef,'. - ').
+ isis_sf($row,'262',undef,'. - ').
+ isis_sf($row,'292',undef,'
ISBN ');
+ $bib .= "%sv $tmp\n" if ($tmp);
+
+ $tmp = isis_sf($row,'233','v','
').
+ isis_sf($row,'233','a',' : ').
+ isis_sf($row,'253',undef,'. - ').
+ isis_sf($row,'263',undef,'. - ').
+ isis_sf($row,'293',undef,'
ISBN ');
+
+ $mps .= isis_to_mps($row,'270',2);
+ $mps .= isis_to_mps($row,'271',2);
+ $mps .= isis_to_mps($row,'272',2);
+ $mps .= isis_to_mps($row,'273',2);
+
$headline .= isis_sf($row,'700','b'," ");
$headline .= isis_sf($row,'700','a'," ");
# izdavac
$mps .= isis_to_mps($row,'210',3);
+ $mps .= isis_to_mps($row,'250',3);
+ $mps .= isis_to_mps($row,'251',3);
+ $mps .= isis_to_mps($row,'252',3);
+ $mps .= isis_to_mps($row,'253',3);
# if (my $sf = OpenIsis::subfields($row->{'210'}->[0])) {
# my $tmp;
# $tmp .= $sf->{a} if ($sf->{a});
@@ -455,7 +408,7 @@
# $tmp .= ", ".$sf->{d} if ($sf->{d});
# $bib .= "%210 $tmp\n" if ($tmp);
# }
- $bib .= "%210 ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
+ $bib .= "%210+ ".isis_sf($row,'210','acd', '','', ('',' : ',', ') )."\n";
if (my $year = isis_sf($row,'210','d')) {
$year =~ s/^\s*cop\.*\s*//i;
@@ -464,7 +417,12 @@
$headline .= " ($year)";
}
- $bib .= isis_to_bib($row,'215','%215', '*', undef, undef, ', ');
+ $mps .= isis_to_mps($row,'215',15);
+ $mps .= isis_to_mps($row,'260',15);
+ $mps .= isis_to_mps($row,'261',15);
+ $mps .= isis_to_mps($row,'262',15);
+ $mps .= isis_to_mps($row,'263',15);
+ $bib .= isis_to_bib($row,'215','%215+', '*', undef, undef, ', ');
# $bib .= isis_to_bib($row,'225','%225', 'aehivw');
$bib .= "%225 ".isis_sf($row,'225','aevhiw', '(',')', ('',' : ',' ; ','. ',', ',' ; '))."\n";
@@ -476,6 +434,10 @@
$mps .= isis_to_mps($row,'300',5);
$mps .= isis_to_mps($row,'320',5);
$mps .= isis_to_mps($row,'327',5);
+ $mps .= isis_to_mps($row,'280',5);
+ $mps .= isis_to_mps($row,'281',5);
+ $mps .= isis_to_mps($row,'282',5);
+ $mps .= isis_to_mps($row,'283',5);
$bib .= isis_to_bib($row,'330','%330');
$mps .= isis_to_mps($row,'330',6);
@@ -497,15 +459,28 @@
$bib .= isis_to_bib($row,'991','%991');
$mps .= isis_to_mps($row,'991',12);
- # ISBN
- if (my $isbn = $row->{10}->[0]) {
- $isbn =~ s/ +//g; # remove spaces
- $mps .= "W $isbn 13\n";
- $bib .= "%ISBN $isbn\n";
- $isbn =~ s/-//g;
- $mps .= "W $isbn 13\n";
+ sub store_isn {
+ if (my $isn = shift @_) {
+ my $nr = shift @_;
+ my $tag = shift @_;
+ $isn =~ s/ +//g; # remove spaces
+ $mps .= "W $isn $nr\n";
+ $bib .= "$tag $isn\n";
+ if ($isn =~ s/-//g) {
+ $mps .= "W $isn $nr\n";
+ }
+ }
}
- $mps .= isis_to_mps($row,'10',12);
+
+ # ISBN
+ store_isn($row->{10}->[0],13,'%ISBN');
+ $mps .= isis_to_mps($row,'290',13);
+ $mps .= isis_to_mps($row,'291',13);
+ $mps .= isis_to_mps($row,'292',13);
+ $mps .= isis_to_mps($row,'293',13);
+
+ # ISSN
+ #store_isn($row->{11}->[0],14,'%ISSN');
$mps .= isis_to_mps($row,'532',1);
@@ -514,6 +489,9 @@
# headline
if ($headline) {
$headline .= " [".$row->{mfn}."]"; ## debug MFN!
+ $headline =~ s/&/∧/g;
+ $headline =~ s/</g;
+ $headline =~ s/>/>/g;
$mps .= "H ".c_852_iso($headline)."\n";
} else {
$mps .= "H nepoznato\n";
@@ -529,6 +507,15 @@
print R c_852_iso($bib);
+ # check if all fields are defined
+ foreach (split(/\n/,$bib)) {
+ if (/^%(\w+)\s/ && !$default::FieldNames{$1}) {
+ die "field $1 used but not in FieldNames";
+ }
+ }
+
+# print R "%perl ".Dumper($row)."\n";
+
$mps .= "T document text/plain ".(tell(R) - $last_tell)." $dir/bib $last_tell ".tell(R)."\n";
$last_tell=tell(R);
@@ -540,6 +527,7 @@
print S $mps;
print MPS $mps;
}
+ }
}
print S "M over and out\nX\n";
print MPS "M over and out\nX\n";