--- trunk/lib/WebPAC/Input/ISIS.pm 2005/07/17 22:28:11 21 +++ trunk/lib/WebPAC/Input/ISIS.pm 2006/08/23 14:28:48 615 @@ -3,312 +3,159 @@ use warnings; use strict; -use WebPAC::Common; -use base qw/WebPAC::Input WebPAC::Common/; -use Text::Iconv; +use WebPAC::Input; =head1 NAME -WebPAC::Input::ISIS - support for CDS/ISIS source files +WebPAC::Input::ISIS - support for CDS/ISIS database files =head1 VERSION -Version 0.01 +Version 0.04 =cut -our $VERSION = '0.01'; +our $VERSION = '0.04'; -# auto-configure - -my ($have_biblio_isis, $have_openisis) = (0,0); - -eval "use Biblio::Isis 0.13;"; -unless ($@) { - $have_biblio_isis = 1 -} else { - eval "use OpenIsis;"; - $have_openisis = 1 unless ($@); -} - =head1 SYNOPSIS -Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis -module and read all records to memory. +Open CDS/ISIS, WinISIS or IsisMarc database using C or +C module and read all records to memory. my $isis = new WebPAC::Input::ISIS(); - $isis->open( filename => '/path/to/ISIS/ISIS' ); + $isis->open( path => '/path/to/ISIS/ISIS' ); =head1 FUNCTIONS -=head2 open - -This function will read whole database in memory and produce lookups. - - $isis->open( - filename => '/data/ISIS/ISIS', - code_page => '852', - limit_mfn => 500, - start_mfn => 6000, - lookup => $lookup_obj, - ); - -By default, ISIS code page is assumed to be C<852>. +=head2 init -If optional parametar C is set, this will be first MFN to read -from database (so you can skip beginning of your database if you need to). - -If optional parametar C is set, it will read just 500 records -from database in example above. - -Returns size of database, regardless of C and C -parametars, see also C<$isis->size>. +Autoconfigure this module to use C or C. =cut -sub open { +sub init { my $self = shift; - my $arg = {@_}; - - my $log = $self->_get_logger(); - - $log->logcroak("need filename") if (! $arg->{'filename'}); - my $code_page = $arg->{'code_page'} || '852'; - $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); - - # store data in object - $self->{'isis_code_page'} = $code_page; - foreach my $v (qw/isis_filename start_mfn limit_mfn/) { - $self->{$v} = $arg->{$v} if ($arg->{$v}); - } - - # create Text::Iconv object - my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); - - $log->info("reading ISIS database '",$arg->{'filename'},"'"); - $log->debug("isis code page: $code_page"); - - my ($isis_db,$db_size); - - if ($have_openisis) { - $log->debug("using OpenIsis perl bindings"); - $isis_db = OpenIsis::open($arg->{'filename'}); - $db_size = OpenIsis::maxRowid( $isis_db ) || 1; - } elsif ($have_biblio_isis) { - $log->debug("using Biblio::Isis"); - use Biblio::Isis; - $isis_db = new Biblio::Isis( - isisdb => $arg->{'filename'}, - include_deleted => 1, - hash_filter => sub { - my $l = shift || return; - $l = $cp->convert($l); - return $l; - }, - ); - $db_size = $isis_db->count; - - unless ($db_size) { - $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping..."); - return; - } - - } else { - $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN."); - } - - - my $startmfn = 1; - my $maxmfn = $db_size; - - if (my $s = $self->{'start_mfn'}) { - $log->info("skipping to MFN $s"); - $startmfn = $s; + eval "use Biblio::Isis;"; + unless ($@) { + $self->{have_biblio_isis} = 1 } else { - $self->{'start_mfn'} = $startmfn; - } - - if ($self->{limit_mfn}) { - $log->info("limiting to ",$self->{limit_mfn}," records"); - $maxmfn = $startmfn + $self->{limit_mfn} - 1; - $maxmfn = $db_size if ($maxmfn > $db_size); - } - - # store size for later - $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0; - - $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis')); - - - # read database - for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { - - $log->debug("mfn: $mfn\n"); - - my $rec; - - if ($have_openisis) { - - # read record using OpenIsis - my $row = OpenIsis::read( $isis_db, $mfn ); - foreach my $k (keys %{$row}) { - if ($k ne "mfn") { - foreach my $l (@{$row->{$k}}) { - $l = $cp->convert($l); - # has subfields? - my $val; - if ($l =~ m/\^/) { - foreach my $t (split(/\^/,$l)) { - next if (! $t); - $val->{substr($t,0,1)} = substr($t,1); - } - } else { - $val = $l; - } - - push @{$rec->{$k}}, $val; - } - } else { - push @{$rec->{'000'}}, $mfn; - } - } - - } elsif ($have_biblio_isis) { - $rec = $isis_db->to_hash($mfn); - } else { - $log->logdie("hum? implementation missing?"); - } - - $log->confess("record $mfn empty?") unless ($rec); - - # store - if ($self->{'low_mem'}) { - $self->{'db'}->put($mfn, $rec); - } else { - $self->{'data'}->{$mfn} = $rec; - } - - # create lookup - $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add)); - - $self->progress_bar($mfn,$maxmfn); - + eval "use OpenIsis;"; + $self->{have_openisis} = 1 unless ($@); } +} - $self->{'current_mfn'} = -1; - $self->{'last_pcnt'} = 0; +=head2 open_db - $log->debug("max mfn: $maxmfn"); +Returns handle to database and size in records - # store max mfn and return it. - $self->{'max_mfn'} = $maxmfn; + my ($db,$size) = $isis->open_db( + path => '/path/to/LIBRI' + filter => sub { + my ($l,$field_nr) = @_; + # do something with $l which is line of input file + return $l; + }, + } - return $db_size; -} +Options: -=head2 fetch +=over 4 -Fetch next record from database. It will also displays progress bar. +=item path - my $rec = $isis->fetch; +path to CDS/ISIS database -Record from this function should probably go to C for -normalisation. +=back =cut -sub fetch { +sub open_db { my $self = shift; + my $arg = {@_}; + my $log = $self->_get_logger(); - $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); + $log->info("opening ISIS database '$arg->{path}'"); - if ($self->{'current_mfn'} == -1) { - $self->{'current_mfn'} = $self->{'start_mfn'}; - } else { - $self->{'current_mfn'}++; - } - - my $mfn = $self->{'current_mfn'}; + my ($isis_db,$db_size); - if ($mfn > $self->{'max_mfn'}) { - $self->{'current_mfn'} = $self->{'max_mfn'}; - $log->debug("at EOF"); - return; - } + if ($self->{have_openisis}) { + $log->debug("using OpenIsis perl bindings"); + $isis_db = OpenIsis::open($arg->{path}); + $db_size = OpenIsis::maxRowid( $isis_db ) || 1; + } elsif ($self->{have_biblio_isis}) { + $log->debug("using Biblio::Isis"); + use Biblio::Isis; + $isis_db = new Biblio::Isis( + isisdb => $arg->{path}, + include_deleted => 1, + hash_filter => $arg->{filter} ? sub { return $arg->{filter}->(@_); } : undef, + ) or $log->logdie("can't find database $arg->{path}"); - $self->progress_bar($mfn,$self->{'max_mfn'}); + $db_size = $isis_db->count; - if ($self->{'low_mem'}) { - return $self->{'db'}->get($mfn); } else { - return $self->{'data'}->{$mfn}; + $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN."); } -} - -=head2 pos - -Returns current record number (MFN). - - print $isis->pos; - -First record in database has position 1. - -=cut -sub pos { - my $self = shift; - return $self->{'current_mfn'}; + return ($isis_db, $db_size); } +=head2 fetch_rec -=head2 size - -Returns number of records in database - - print $isis->size; - -Result from this function can be used to loop through all records - - foreach my $mfn ( 1 ... $isis->size ) { ... } +Return record with ID C<$mfn> from database -because it takes into account C and C. + my $rec = $self->fetch_rec( $db, $mfn ); =cut -sub size { +sub fetch_rec { my $self = shift; - return $self->{'size'}; -} - -=head2 seek - -Seek to specified MFN in file. - $isis->seek(42); + my ($isis_db, $mfn) = @_; -First record in database has position 1. + my $rec; -=cut + if ($self->{have_openisis}) { -sub seek { - my $self = shift; - my $pos = shift || return; + # read record using OpenIsis + my $row = OpenIsis::read( $isis_db, $mfn ); - my $log = $self->_get_logger(); + # convert record to hash + foreach my $k (keys %{$row}) { + if ($k ne "mfn") { + foreach my $l (@{$row->{$k}}) { + $l = $self->{iconv}->convert($l) if ($self->{iconv}); + # has subfields? + my $val; + if ($l =~ m/\^/) { + foreach my $t (split(/\^/,$l)) { + next if (! $t); + $val->{substr($t,0,1)} = substr($t,1); + } + } else { + $val = $l; + } + push @{$rec->{"$k"}}, $val; + } + } else { + push @{$rec->{'000'}}, $mfn; + } + } - if ($pos < 1) { - $log->warn("seek before first record"); - $pos = 1; - } elsif ($pos > $self->{'max_mfn'}) { - $log->warn("seek beyond last record"); - $pos = $self->{'max_mfn'}; + } elsif ($self->{have_biblio_isis}) { + $rec = $isis_db->to_hash({ + mfn => $mfn, + include_subfields => 1 + }); + } else { + $self->_get_logger()->logdie("hum? implementation missing?"); } - return $self->{'current_mfn'} = (($pos - 1) || -1); + return $rec; } =head1 AUTHOR