--- trunk/lib/WebPAC/Input/ISIS.pm 2005/12/16 16:23:44 265 +++ trunk/lib/WebPAC/Input/ISIS.pm 2006/09/07 15:01:45 652 @@ -3,318 +3,123 @@ use warnings; use strict; -use WebPAC::Common; -use base qw/WebPAC::Input WebPAC::Common/; -use Text::Iconv; +use WebPAC::Input; +use Biblio::Isis 0.23; =head1 NAME -WebPAC::Input::ISIS - support for CDS/ISIS source files +WebPAC::Input::ISIS - support for CDS/ISIS database files =head1 VERSION -Version 0.01 +Version 0.07 =cut -our $VERSION = '0.01'; +our $VERSION = '0.07'; -# auto-configure - -my ($have_biblio_isis, $have_openisis) = (0,0); - -eval "use Biblio::Isis 0.13;"; -unless ($@) { - $have_biblio_isis = 1 -} else { - eval "use OpenIsis;"; - $have_openisis = 1 unless ($@); -} - =head1 SYNOPSIS -Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis -module and read all records to memory. +Open CDS/ISIS, WinISIS or IsisMarc database using C +and read all records to memory. my $isis = new WebPAC::Input::ISIS(); - $isis->open( filename => '/path/to/ISIS/ISIS' ); + $isis->open( path => '/path/to/ISIS/ISIS' ); =head1 FUNCTIONS -=head2 open - -This function will read whole database in memory and produce lookups. - - $isis->open( - filename => '/data/ISIS/ISIS', - code_page => '852', - limit_mfn => 500, - start_mfn => 6000, - lookup => $lookup_obj, - ); - -By default, ISIS code page is assumed to be C<852>. - -If optional parametar C is set, this will be first MFN to read -from database (so you can skip beginning of your database if you need to). - -If optional parametar C is set, it will read just 500 records -from database in example above. - -Returns size of database, regardless of C and C -parametars, see also C<$isis->size>. - -=cut - -sub open { - my $self = shift; - my $arg = {@_}; - - my $log = $self->_get_logger(); - - $log->logcroak("need filename") if (! $arg->{'filename'}); - my $code_page = $arg->{'code_page'} || '852'; - - # store data in object - $self->{'isis_code_page'} = $code_page; - foreach my $v (qw/isis_filename start_mfn limit_mfn/) { - $self->{$v} = $arg->{$v} if ($arg->{$v}); - } - - # create Text::Iconv object - my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); - - $log->info("reading ISIS database '",$arg->{'filename'},"'"); - $log->debug("isis code page: $code_page"); - - my ($isis_db,$db_size); - - if ($have_openisis) { - $log->debug("using OpenIsis perl bindings"); - $isis_db = OpenIsis::open($arg->{'filename'}); - $db_size = OpenIsis::maxRowid( $isis_db ) || 1; - } elsif ($have_biblio_isis) { - $log->debug("using Biblio::Isis"); - use Biblio::Isis; - $isis_db = new Biblio::Isis( - isisdb => $arg->{'filename'}, - include_deleted => 1, - hash_filter => sub { - my $l = shift || return; - $l = $cp->convert($l); - return $l; - }, - ) or $log->logdie("can't find database ",$arg->{'filename'}); - - $db_size = $isis_db->count; - - unless ($db_size) { - $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping..."); - return; - } - - } else { - $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN."); - } - - - my $startmfn = 1; - my $maxmfn = $db_size; - - if (my $s = $self->{'start_mfn'}) { - $log->info("skipping to MFN $s"); - $startmfn = $s; - } else { - $self->{'start_mfn'} = $startmfn; - } - - if ($self->{limit_mfn}) { - $log->info("limiting to ",$self->{limit_mfn}," records"); - $maxmfn = $startmfn + $self->{limit_mfn} - 1; - $maxmfn = $db_size if ($maxmfn > $db_size); - } - - # store size for later - $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0; - - $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis')); - - - # read database - for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { - - $log->debug("mfn: $mfn\n"); - - my $rec; - - if ($have_openisis) { - - # read record using OpenIsis - my $row = OpenIsis::read( $isis_db, $mfn ); - foreach my $k (keys %{$row}) { - if ($k ne "mfn") { - foreach my $l (@{$row->{$k}}) { - $l = $cp->convert($l); - # has subfields? - my $val; - if ($l =~ m/\^/) { - foreach my $t (split(/\^/,$l)) { - next if (! $t); - $val->{substr($t,0,1)} = substr($t,1); - } - } else { - $val = $l; - } - - push @{$rec->{$k}}, $val; - } - } else { - push @{$rec->{'000'}}, $mfn; - } - } - - } elsif ($have_biblio_isis) { - $rec = $isis_db->to_hash($mfn); - } else { - $log->logdie("hum? implementation missing?"); - } - - if (! $rec) { - $log->warn("record $mfn empty? skipping..."); - next; - } - - # store - if ($self->{'low_mem'}) { - $self->{'db'}->put($mfn, $rec); - } else { - $self->{'data'}->{$mfn} = $rec; - } - - # create lookup - $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'}); - - $self->progress_bar($mfn,$maxmfn); - - } - - $self->{'current_mfn'} = -1; - $self->{'last_pcnt'} = 0; +=head2 open_db - $log->debug("max mfn: $maxmfn"); +Returns handle to database and size in records - # store max mfn and return it. - $self->{'max_mfn'} = $maxmfn; + my ($db,$size) = $isis->open_db( + path => '/path/to/LIBRI' + filter => sub { + my ($l,$field_nr) = @_; + # do something with $l which is line of input file + return $l; + }, + } - return $db_size; -} +Options: -=head2 fetch +=over 4 -Fetch next record from database. It will also displays progress bar. +=item path - my $rec = $isis->fetch; +path to CDS/ISIS database -Record from this function should probably go to C for -normalisation. +=back =cut -sub fetch { +sub open_db { my $self = shift; - my $log = $self->_get_logger(); - - $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); - - if ($self->{'current_mfn'} == -1) { - $self->{'current_mfn'} = $self->{'start_mfn'}; - } else { - $self->{'current_mfn'}++; - } + my $arg = {@_}; - my $mfn = $self->{'current_mfn'}; + my $log = $self->_get_logger(); - if ($mfn > $self->{'max_mfn'}) { - $self->{'current_mfn'} = $self->{'max_mfn'}; - $log->debug("at EOF"); - return; - } + $log->info("opening ISIS database '$arg->{path}'"); - $self->progress_bar($mfn,$self->{'max_mfn'}); + $log->debug("using Biblio::Isis"); + my $isis_db = new Biblio::Isis( + isisdb => $arg->{path}, + include_deleted => 1, + hash_filter => $arg->{filter} ? sub { return $arg->{filter}->(@_); } : undef, + ) or $log->logdie("can't find database $arg->{path}"); - my $rec; + my $size = $isis_db->count; - if ($self->{'low_mem'}) { - $rec = $self->{'db'}->get($mfn); - } else { - $rec = $self->{'data'}->{$mfn}; - } + $self->{_isis_db} = $isis_db; - $rec ||= 0E0; + return ($isis_db, $size); } -=head2 pos - -Returns current record number (MFN). +=head2 fetch_rec - print $isis->pos; +Return record with ID C<$mfn> from database -First record in database has position 1. + my $rec = $self->fetch_rec( $mfn, $filter_coderef); =cut -sub pos { +sub fetch_rec { my $self = shift; - return $self->{'current_mfn'}; -} - - -=head2 size -Returns number of records in database + my ($mfn, $filter_coderef) = @_; - print $isis->size; + my $rec = $self->{_isis_db}->to_hash({ + mfn => $mfn, + include_subfields => 1, + hash_filter => $filter_coderef, +# hash_filter => sub { +# my ($l,$f) = @_; +# warn "## in hash_filter ($l,$f)\n"; +# my $o = $filter_coderef->($l,$f) if ($filter_coderef); +# warn "## out hash_filter = $o\n"; +# return $o; +# }, + }); -Result from this function can be used to loop through all records - - foreach my $mfn ( 1 ... $isis->size ) { ... } - -because it takes into account C and C. - -=cut - -sub size { - my $self = shift; - return $self->{'size'}; + return $rec; } -=head2 seek - -Seek to specified MFN in file. +=head2 dump_rec - $isis->seek(42); +Return dump of record ID C<$mfn> from database -First record in database has position 1. + my $rec = $self->dump_rec( $db, $mfn ); =cut -sub seek { +sub dump_rec { my $self = shift; - my $pos = shift || return; - - my $log = $self->_get_logger(); - if ($pos < 1) { - $log->warn("seek before first record"); - $pos = 1; - } elsif ($pos > $self->{'max_mfn'}) { - $log->warn("seek beyond last record"); - $pos = $self->{'max_mfn'}; - } + my $mfn = shift; - return $self->{'current_mfn'} = (($pos - 1) || -1); + return $self->{_isis_db}->to_ascii( $mfn ); } =head1 AUTHOR