--- trunk/run.pl 2006/07/03 22:12:13 576 +++ trunk/run.pl 2006/07/05 19:52:45 585 @@ -8,18 +8,17 @@ use WebPAC::Common 0.02; use WebPAC::Lookup; -use WebPAC::Input 0.03; +use WebPAC::Input 0.07; use WebPAC::Store 0.03; -use WebPAC::Normalize; +use WebPAC::Normalize 0.11; use WebPAC::Output::TT; use WebPAC::Validate; +use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; use File::Path; use Time::HiRes qw/time/; use File::Slurp; -use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net -use MARC::Lint; use Data::Dump qw/dump/; =head1 NAME @@ -145,8 +144,6 @@ my @links; my $indexer; -my $lint = new MARC::Lint if ($marc_lint); - while (my ($database, $db_config) = each %{ $config->{databases} }) { my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); @@ -236,10 +233,13 @@ my $input_db = new WebPAC::Input( module => $input_module, - code_page => $config->{webpac}->{webpac_encoding}, + encoding => $config->{webpac}->{webpac_encoding}, limit => $limit || $input->{limit}, offset => $offset, - lookup => $lookup, + lookup_coderef => sub { + my $rec = shift || return; + $lookup->add( $rec ); + }, recode => $input->{recode}, stats => $stats, ); @@ -271,13 +271,11 @@ $log->info("Using $normalize_path for normalization..."); - my $marc_fh; - if (my $path = $normalize->{output}) { - open($marc_fh, '>', $path) || - $log->logdie("can't open MARC output $path: $!"); - - $log->info("Creating MARC export file $path", $marc_lint ? ' (with lint)' : '', "\n"); - } + my $marc = new WebPAC::Output::MARC( + path => $normalize->{output}, + lint => $marc_lint, + dump => $marc_dump, + ) if ($normalize->{output}); # reset position in database $input_db->seek(1); @@ -319,47 +317,20 @@ type => $config->{$use_indexer}->{type}, ) if ($indexer && $ds); - if ($marc_fh) { - my $marc = new MARC::Record; - $marc->encoding( 'utf-8' ); - my @marc_fields = WebPAC::Normalize::_get_marc_fields(); - if (! @marc_fields) { - $log->warn("MARC record $mfn is empty, skipping"); - } else { - $marc->add_fields( @marc_fields ); - - # tweak leader - if (my $new_l = WebPAC::Normalize::marc_leader()) { - - my $leader = $marc->leader; - - foreach my $o ( keys %$new_l ) { - my $insert = $new_l->{$o}; - $leader = substr($leader, 0, $o) . - $insert . substr($leader, $o+length($insert)); - } - $marc->leader( $leader ); - } - - if ($marc_lint) { - $lint->check_record( $marc ); - my $err = join( "\n", $lint->warnings ); - $log->error("MARC lint detected warning on MFN $mfn\n", - "<<<<< Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n", - "!!!!! MARC lint warnings:\n",$err,"\n" - ) if ($err); - } - - if ($marc_dump) { - $log->info("MARC record on MFN $mfn\n", - "<<<<< Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n", - ); - } + if ($marc) { + my $i = 0; - print $marc_fh $marc->as_usmarc; + while (my $fields = WebPAC::Normalize::_get_marc_fields( fetch_next => 1 ) ) { + $marc->add( + id => $mfn . ( $i ? "/$i" : '' ), + fields => $fields, + leader => WebPAC::Normalize::marc_leader(), + row => $row, + ); + $i++; } + + $log->info("Created $i instances of MFN $mfn\n") if ($i > 1); } $total_rows++; @@ -368,7 +339,7 @@ $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); # close MARC file - close($marc_fh) if ($marc_fh); + $marc->finish if ($marc); }