--- trunk/run.pl 2006/07/02 21:22:26 567 +++ trunk/run.pl 2006/07/10 10:16:11 595 @@ -7,20 +7,20 @@ use lib './lib'; use WebPAC::Common 0.02; -use WebPAC::Lookup; -use WebPAC::Input 0.03; +use WebPAC::Lookup 0.03; +use WebPAC::Input 0.07; use WebPAC::Store 0.03; -use WebPAC::Normalize; +use WebPAC::Normalize 0.11; use WebPAC::Output::TT; use WebPAC::Validate; +use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; use File::Path; use Time::HiRes qw/time/; use File::Slurp; -use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net -use MARC::Lint; use Data::Dump qw/dump/; +use Storable qw/dclone/; =head1 NAME @@ -145,8 +145,6 @@ my @links; my $indexer; -my $lint = new MARC::Lint if ($marc_lint); - while (my ($database, $db_config) = each %{ $config->{databases} }) { my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); @@ -224,9 +222,13 @@ die "I know only how to handle input types ", join(",", @supported_inputs), " not '$type'!\n" unless (grep(/$type/, @supported_inputs)); - my $lookup = new WebPAC::Lookup( - lookup_file => $input->{lookup}, - ) if ($input->{lookup}); + my $lookup; + if ($input->{lookup}) { + $lookup = new WebPAC::Lookup( + lookup_file => $input->{lookup}, + ); + delete( $input->{lookup} ); + } my $input_module = $config->{webpac}->{inputs}->{$type}; @@ -236,10 +238,13 @@ my $input_db = new WebPAC::Input( module => $input_module, - code_page => $config->{webpac}->{webpac_encoding}, + encoding => $config->{webpac}->{webpac_encoding}, limit => $limit || $input->{limit}, offset => $offset, - lookup => $lookup, + lookup_coderef => sub { + my $rec = shift || return; + $lookup->add( $rec ); + }, recode => $input->{recode}, stats => $stats, ); @@ -271,13 +276,11 @@ $log->info("Using $normalize_path for normalization..."); - my $marc_fh; - if (my $path = $normalize->{output}) { - open($marc_fh, '>', $path) || - $log->logdie("can't open MARC output $path: $!"); - - $log->info("Creating MARC export file $path", $marc_lint ? ' (with lint)' : '', "\n"); - } + my $marc = new WebPAC::Output::MARC( + path => $normalize->{output}, + lint => $marc_lint, + dump => $marc_dump, + ) if ($normalize->{output}); # reset position in database $input_db->seek(1); @@ -300,10 +303,22 @@ $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); } + my $ds_config = dclone($db_config); + + # default values -> database key + $ds_config->{_} = $database; + + # current mfn + $ds_config->{_mfn} = $mfn; + + # attach current input + $ds_config->{input} = $input; + my $ds = WebPAC::Normalize::data_structure( row => $row, rules => $rules, lookup => $lookup ? $lookup->lookup_hash : undef, + config => $ds_config, marc_encoding => 'utf-8', ); @@ -319,47 +334,20 @@ type => $config->{$use_indexer}->{type}, ) if ($indexer && $ds); - if ($marc_fh) { - my $marc = new MARC::Record; - $marc->encoding( 'utf-8' ); - my @marc_fields = WebPAC::Normalize::_get_marc_fields(); - if (! @marc_fields) { - $log->warn("MARC record $mfn is empty, skipping"); - } else { - $marc->add_fields( @marc_fields ); - - # tweak leader - if (my $new_l = WebPAC::Normalize::marc_leader()) { - - my $leader = $marc->leader; - - foreach my $o ( keys %$new_l ) { - my $insert = $new_l->{$o}; - $leader = substr($leader, 0, $o) . - $insert . substr($leader, $o+length($insert)); - } - $marc->leader( $leader ); - } - - if ($marc_lint) { - $lint->check_record( $marc ); - my $err = join( "\n", $lint->warnings ); - $log->error("MARC lint detected warning on MFN $mfn\tleader:", $marc->leader(), "\n", - ">>>>> Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row:\n",dump(@marc_fields), "\n", - ">>>>> MARC lint warnings:\n",$err,"\n" - ) if ($err); - } - - if ($marc_dump) { - $log->info("MARC record on MFN $mfn\tleader:", $marc->leader(), "\n", - ">>>>> Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row:\n",dump(@marc_fields), "\n", - ); - } + if ($marc) { + my $i = 0; - print $marc_fh $marc->as_usmarc; + while (my $fields = WebPAC::Normalize::_get_marc_fields( fetch_next => 1 ) ) { + $marc->add( + id => $mfn . ( $i ? "/$i" : '' ), + fields => $fields, + leader => WebPAC::Normalize::marc_leader(), + row => $row, + ); + $i++; } + + $log->info("Created $i instances of MFN $mfn\n") if ($i > 1); } $total_rows++; @@ -368,7 +356,7 @@ $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); # close MARC file - close($marc_fh) if ($marc_fh); + $marc->finish if ($marc); }