8 |
|
|
9 |
my %opts; |
my %opts; |
10 |
|
|
11 |
getopt('dm', \%opts); |
getopts('d:m:q', \%opts); |
12 |
|
|
13 |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts); |
14 |
|
|
15 |
my $db_dir = $opts{d}; |
my $db_dir = $opts{d}; |
16 |
|
|
17 |
mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir"); |
my $dir = open_data_files($db_dir); |
|
mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data"); |
|
|
|
|
|
my $dir="$common::install_dir/$db_dir/data"; |
|
|
|
|
|
|
|
|
open(S,"> $dir/stream") || die "can't open output $dir/stream: $!"; |
|
|
open(R,"> $dir/bib") || die "can't open output $dir/bib: $!"; |
|
|
open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!"; |
|
|
#open(MPS,"> /tmp/mpsindex") || die "mps: $!"; |
|
|
|
|
|
print S $common::mps_header; |
|
|
print MPS $common::mps_header; |
|
18 |
|
|
19 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
20 |
# init array in_mps_header for config checks later |
# init array in_mps_header for config checks later |
29 |
require "./search/config.pm"; |
require "./search/config.pm"; |
30 |
|
|
31 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
32 |
|
# read database configuration, store database names |
33 |
|
open(CF,$common::database_cf) || die "$common::database_cf: $!"; |
34 |
|
my %DatabaseDescriptions; |
35 |
|
while(<CF>) { |
36 |
|
chomp; |
37 |
|
if (/^database-name:([^=]+)=(.*)$/) { |
38 |
|
my ($db_name,$db_desc) = ($1,$2); |
39 |
|
$db_desc=~s/^##\w+##//g; |
40 |
|
# c_iso_852 is a cludge so that output format would be |
41 |
|
# correct 8859-2 again... |
42 |
|
$DatabaseDescriptions{$db_name}=c_iso_852($db_desc); |
43 |
|
} |
44 |
|
} |
45 |
|
close(CF); |
46 |
|
|
47 |
|
#-------------------------------------------------------------------- |
48 |
# |
# |
49 |
# expand(nr,"space separated string"); |
# expand(nr,"space separated string"); |
50 |
# |
# |
58 |
foreach my $w (@words) { |
foreach my $w (@words) { |
59 |
# FIX: this should be replaced by stemmer! |
# FIX: this should be replaced by stemmer! |
60 |
#$w =~ tr/ƾ/sSdDcCcCzZ/; |
#$w =~ tr/ƾ/sSdDcCcCzZ/; |
61 |
$w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; |
$w =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; |
62 |
$w =~ s//ss/g; |
$w =~ s//ss/g; |
63 |
$out .= "W $w $nr\n"; |
$out .= "W $w $nr\n"; |
64 |
} |
} |
74 |
return $tmp; |
return $tmp; |
75 |
} |
} |
76 |
|
|
77 |
|
sub c_iso_852 { |
78 |
|
my $tmp = $_[0]; |
79 |
|
$tmp =~ tr/ܫꔼȺ̪㍐슂ٝ// if ($tmp); |
80 |
|
return $tmp; |
81 |
|
} |
82 |
|
|
83 |
sub c_852_czs { |
sub c_852_czs { |
84 |
my $tmp = $_[0]; |
my $tmp = $_[0]; |
85 |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
$tmp =~ tr//ܫꔼȺ̪㍐슂ٝ/; |
86 |
$tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'-".'',"'Rr/; |
$tmp =~ tr/ܫȺ̪/CueaauccleOoiZACELlooLlSsOUTtLcaiouAaZzEezCsAAESZzAadDDEdNIIeTUOoNnnSsRUrUyYt'".'',"'Rr/; |
87 |
$tmp =~ s//ss/g; |
$tmp =~ s//ss/g; |
88 |
return $tmp; |
return $tmp; |
89 |
} |
} |
126 |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
my @sf_arr = subfields_str_2_arr($subfields,$sf_hash); |
127 |
|
|
128 |
foreach (@sf_arr) { |
foreach (@sf_arr) { |
129 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
130 |
} |
} |
131 |
return $out; |
return $out; |
132 |
} |
} |
152 |
if (scalar keys %{$sf_hash} > 0) { |
if (scalar keys %{$sf_hash} > 0) { |
153 |
if ($subfields) { |
if ($subfields) { |
154 |
foreach (split(//,$subfields)) { |
foreach (split(//,$subfields)) { |
155 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
156 |
} |
} |
157 |
} else { |
} else { |
158 |
foreach (keys %{$sf_hash}) { |
foreach (keys %{$sf_hash}) { |
159 |
$out.=mps_expand($mps_id,$sf_hash->{$_}); |
$out.=mps_expand($mps_id,c_852_iso($sf_hash->{$_})); |
160 |
} |
} |
161 |
} |
} |
162 |
} else { |
} else { |
163 |
$out.=mps_expand($mps_id,$row->{$isis_id}->[$i]); |
$out.=mps_expand($mps_id,c_852_iso($row->{$isis_id}->[$i])); |
164 |
} |
} |
165 |
$i++; |
$i++; |
166 |
} |
} |
299 |
if (-e "$common::isis_data/$db_dir/$_/PERI") { |
if (-e "$common::isis_data/$db_dir/$_/PERI") { |
300 |
push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; |
push @isis_dbs,"$common::isis_data/$db_dir/$_/PERI/PERI"; |
301 |
} |
} |
302 |
|
if (-e "$common::isis_data/$db_dir/$_/AMS") { |
303 |
|
push @isis_dbs,"$common::isis_data/$db_dir/$_/AMS/AMS"; |
304 |
|
} |
305 |
|
if (-e "$common::isis_data/$db_dir/$_/ARTI") { |
306 |
|
# push @isis_dbs,"$common::isis_data/$db_dir/$_/ARTI/ARTI"; |
307 |
|
} |
308 |
} |
} |
309 |
|
|
310 |
foreach my $isis_db (@isis_dbs) { |
foreach my $isis_db (@isis_dbs) { |
313 |
|
|
314 |
my $db = OpenIsis::open( "$isis_db" ); |
my $db = OpenIsis::open( "$isis_db" ); |
315 |
|
|
316 |
|
if (! defined $db) { |
317 |
|
die "can't open '$isis_db'"; |
318 |
|
} |
319 |
|
|
320 |
|
my $tip = $isis_db; $tip =~ s/^.+?\/([^\/]+)$/$1/; |
321 |
|
if (defined $default::tip{$tip}) { |
322 |
|
$tip=$default::tip{$tip}; |
323 |
|
} elsif ($tip eq "AMS") { |
324 |
|
$tip=$default::tip{'LIBRI'}; |
325 |
|
} else { |
326 |
|
die "can't find tip for database '$isis_db'"; |
327 |
|
} |
328 |
|
$tip = c_iso_852($tip); |
329 |
|
|
330 |
my $max_rowid = OpenIsis::maxRowid( $db ); |
my $max_rowid = OpenIsis::maxRowid( $db ); |
331 |
|
|
332 |
my $last_pcnt = 0; |
my $last_pcnt = 0; |
335 |
my $row = OpenIsis::read( $db, $row_id ); |
my $row = OpenIsis::read( $db, $row_id ); |
336 |
if (my $tmp = $row->{'200'}->[0]) { |
if (my $tmp = $row->{'200'}->[0]) { |
337 |
|
|
338 |
my $bib = "%MFN $row->{mfn}\n"; |
my $bib; |
339 |
my $mps = "W $row->{mfn} 14\n"; |
my $mps = "W $row->{mfn} 14\n"; |
340 |
|
|
341 |
|
# tip gradje |
342 |
|
$mps .= "W ".c_852_czs($tip)." 17\n"; |
343 |
|
$bib .= "%tip $tip\n"; |
344 |
|
|
345 |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
my $pcnt = int($row->{mfn} * 100 / $max_rowid); |
346 |
if ($pcnt != $last_pcnt) { |
if ($pcnt != $last_pcnt) { |
347 |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt); |
printf MPS ("M %5d / %5d -- %-2d %%\n",$row->{mfn},$max_rowid,$pcnt) if (! $opts{q}); |
348 |
$last_pcnt = $pcnt; |
$last_pcnt = $pcnt; |
349 |
} |
} |
350 |
|
|
351 |
my $headline; |
my $headline; |
352 |
$headline .= isis_sf($row,'200','a',"'"); |
$headline .= isis_sf($row,'200','a'); |
353 |
$headline .= isis_sf($row,'200','e'," : ","'"); |
$headline .= isis_sf($row,'200','e'," : "); |
354 |
|
$headline .= isis_sf($row,'200','f'," / "); |
355 |
|
$headline .= isis_sf($row,'210','d'," , "); |
356 |
|
|
357 |
|
# remove newlines, compress spaces |
358 |
|
$headline =~ s/[\n\r]//g; |
359 |
|
$headline =~ s/^\s+//g; |
360 |
|
$headline =~ s/\s+$//g; |
361 |
|
|
362 |
# author |
# author |
363 |
$bib .= isis_to_bib($row,'700','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'700','%700+','ab',undef,'>',', '); |
364 |
$bib .= isis_to_bib($row,'701','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','ab',undef,'>',', '); |
365 |
$bib .= isis_to_bib($row,'710','%700+','*',2,'<'); |
$bib .= isis_to_bib($row,'701','%700+','cd',undef,'>',', '); |
366 |
$bib .= isis_to_bib($row,'711','%700+','*',2,'<'); |
|
367 |
$bib .= isis_to_bib($row,'503','%700+','*',2,'<'); |
my $tmp; |
368 |
|
$tmp = isis_sf($row,'710','abc', '', '', (' : ',', ')); |
369 |
|
$tmp .= isis_sf($row,'710','dfe', ' (', ')', ('', ' ; ',' ; ')); |
370 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
371 |
|
|
372 |
|
$tmp = isis_sf($row,'711','abc', '', '', (' : ',', ')); |
373 |
|
$tmp .= isis_sf($row,'711','dfe', ' (', ')', ('', ' ; ',' ; ')); |
374 |
|
$bib .= "%700+ $tmp\n" if ($tmp); |
375 |
|
|
376 |
|
$bib .= isis_to_bib($row,'503','%700+','ab',undef,'>',', '); |
377 |
|
|
378 |
$mps .= isis_to_mps($row,'700',1); |
$mps .= isis_to_mps($row,'700',1); |
379 |
$mps .= isis_to_mps($row,'701',1); |
$mps .= isis_to_mps($row,'701',1); |
409 |
$mps .= isis_to_mps($row,'233',2,"ae"); |
$mps .= isis_to_mps($row,'233',2,"ae"); |
410 |
|
|
411 |
|
|
|
my $tmp; |
|
412 |
$tmp = isis_sf($row,'230','v'). |
$tmp = isis_sf($row,'230','v'). |
413 |
isis_sf($row,'230','a',' : '). |
isis_sf($row,'230','a',' : '). |
414 |
isis_sf($row,'250',undef,'. - '). |
isis_sf($row,'250',undef,'. - '). |
441 |
$mps .= isis_to_mps($row,'272',2); |
$mps .= isis_to_mps($row,'272',2); |
442 |
$mps .= isis_to_mps($row,'273',2); |
$mps .= isis_to_mps($row,'273',2); |
443 |
|
|
|
$headline .= isis_sf($row,'700','b'," "); |
|
|
$headline .= isis_sf($row,'700','a'," "); |
|
|
|
|
444 |
# izdavac |
# izdavac |
445 |
$mps .= isis_to_mps($row,'210',3); |
$mps .= isis_to_mps($row,'210',3); |
446 |
$mps .= isis_to_mps($row,'250',3); |
$mps .= isis_to_mps($row,'250',3); |
459 |
if (my $year = isis_sf($row,'210','d')) { |
if (my $year = isis_sf($row,'210','d')) { |
460 |
$year =~ s/^\s*cop\.*\s*//i; |
$year =~ s/^\s*cop\.*\s*//i; |
461 |
$year =~ s/[\[\]]*//g; |
$year =~ s/[\[\]]*//g; |
462 |
|
$year =~ s/[\n\r]//g; # remove cr |
463 |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
$mps .= "D ${year}\n" if ($year !~ m/\?/ && $year =~ /\d{4}/); |
|
$headline .= " ($year)"; |
|
464 |
} |
} |
465 |
|
|
466 |
$mps .= isis_to_mps($row,'215',15); |
$mps .= isis_to_mps($row,'215',15); |
494 |
$bib .= isis_to_bib($row,'610','%610'); |
$bib .= isis_to_bib($row,'610','%610'); |
495 |
$mps .= isis_to_mps($row,'610',8); |
$mps .= isis_to_mps($row,'610',8); |
496 |
|
|
497 |
$bib .= isis_to_bib($row,'675','%675+'); |
$bib .= isis_to_bib($row,'675','%675+','a'); |
498 |
$mps .= isis_to_mps($row,'675',9); |
$mps .= isis_to_mps($row,'675',9); |
499 |
$bib .= isis_to_bib($row,'686','%675+'); |
$bib .= isis_to_bib($row,'686','%675+','a'); |
500 |
$mps .= isis_to_mps($row,'686',10); |
$mps .= isis_to_mps($row,'686',10); |
501 |
|
|
502 |
$bib .= isis_to_bib($row,'990','%990'); |
$bib .= isis_to_bib($row,'990','%990'); |
505 |
$bib .= isis_to_bib($row,'991','%991'); |
$bib .= isis_to_bib($row,'991','%991'); |
506 |
$mps .= isis_to_mps($row,'991',12); |
$mps .= isis_to_mps($row,'991',12); |
507 |
|
|
508 |
sub store_isn { |
# Jezik |
509 |
if (my $isn = shift @_) { |
$bib .= isis_to_bib($row,'101','%101'); |
510 |
my $nr = shift @_; |
$mps .= isis_to_mps($row,'101',16); |
511 |
my $tag = shift @_; |
# Pismo |
512 |
|
$bib .= isis_to_bib($row,'998','%101', 'a'); |
513 |
|
|
514 |
|
sub isis_isn_to_mps { |
515 |
|
my $row = shift @_ || die; |
516 |
|
my $isis_id = shift @_ || die; |
517 |
|
my $nr = shift @_ || die; |
518 |
|
my $i=0; |
519 |
|
my $mps=''; |
520 |
|
while (my $isn=$row->{$isis_id}->[$i]) { |
521 |
$isn =~ s/ +//g; # remove spaces |
$isn =~ s/ +//g; # remove spaces |
522 |
|
$isn =~ s/[\n\r]//g; # remove cr |
523 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
|
$bib .= "$tag $isn\n"; |
|
524 |
if ($isn =~ s/-//g) { |
if ($isn =~ s/-//g) { |
525 |
$mps .= "W $isn $nr\n"; |
$mps .= "W $isn $nr\n"; |
526 |
} |
} |
527 |
|
$i++; |
528 |
} |
} |
529 |
|
return $mps; |
530 |
} |
} |
531 |
|
|
532 |
# ISBN |
# ISBN |
533 |
store_isn($row->{10}->[0],13,'%ISBN'); |
$bib .= isis_to_bib($row,'10','%ISBN'); |
534 |
$mps .= isis_to_mps($row,'290',13); |
$mps .= isis_isn_to_mps($row,'10',13); |
535 |
$mps .= isis_to_mps($row,'291',13); |
$mps .= isis_isn_to_mps($row,'290',13); |
536 |
$mps .= isis_to_mps($row,'292',13); |
$mps .= isis_isn_to_mps($row,'291',13); |
537 |
$mps .= isis_to_mps($row,'293',13); |
$mps .= isis_isn_to_mps($row,'292',13); |
538 |
|
$mps .= isis_isn_to_mps($row,'293',13); |
539 |
|
|
540 |
# ISSN |
# ISSN |
541 |
#store_isn($row->{11}->[0],14,'%ISSN'); |
$bib .= isis_to_bib($row,'11','%ISSN'); |
542 |
|
$mps .= isis_isn_to_mps($row,'11',13); |
543 |
|
|
544 |
$mps .= isis_to_mps($row,'532',1); |
$mps .= isis_to_mps($row,'532',1); |
545 |
|
|
546 |
$bib .= isis_to_bib($row,'994','%994a','a'); |
# Casopisi |
547 |
|
$tmp = isis_to_bib($row,'326','%326'); |
548 |
|
$tmp =~ s/g1/godinjak/; |
549 |
|
$tmp =~ s/g6/dvomjesenik/; |
550 |
|
$tmp =~ s/10/godinje 10 brojeva/; |
551 |
|
$tmp =~ s/m1/mjesenik/; |
552 |
|
$tmp =~ s/m2/polumjesenik/; |
553 |
|
$tmp =~ s/nr/neredovito/; |
554 |
|
$tmp =~ s/g4/etiri puta godinje/; |
555 |
|
$bib .= c_iso_852($tmp); |
556 |
|
$bib .= isis_to_bib($row,'992','%992'); |
557 |
|
|
558 |
|
$bib .= '%knjiz '.$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}."\n"; |
559 |
# headline |
# headline |
560 |
if ($headline) { |
if ($headline) { |
561 |
$headline .= " [".$row->{mfn}."]"; ## debug MFN! |
$headline .= " <i>(".$DatabaseDescriptions{$db_dir}.", ".$row->{mfn}.")</i>"; ## debug MFN! |
562 |
$headline =~ s/&/∧/g; |
$headline =~ s/&/∧/g; |
563 |
$headline =~ s/</</g; |
$headline =~ s/</</g; |
564 |
$headline =~ s/>/>/g; |
$headline =~ s/>/>/g; |
593 |
|
|
594 |
$mps .= "E\n"; |
$mps .= "E\n"; |
595 |
|
|
|
|
|
596 |
print S $mps; |
print S $mps; |
597 |
print MPS $mps; |
print MPS $mps; |
598 |
} |
} |