--- branches/Sack/lib/WebPAC/Input/ISI.pm 2009/09/21 19:04:14 1310 +++ branches/Sack/lib/WebPAC/Input/ISI.pm 2009/09/21 19:36:09 1311 @@ -7,6 +7,7 @@ use base qw/WebPAC::Common/; use Data::Dump qw/dump/; +use Carp qw/confess/; =head1 NAME @@ -14,7 +15,10 @@ =cut -our $VERSION = '0.03'; +our $VERSION = '0.04'; + +our $debug = 0; + =head1 SYNOPSIS @@ -77,9 +81,7 @@ my $arg = {@_}; - my $log = $self->_get_logger(); - - open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!"); + open( my $fh, '<', $arg->{path} ) || confess "can't open $arg->{path}: $!"; my ( $format, $version ); @@ -88,7 +90,7 @@ if ( $line =~ /^FN\s(.+)$/) { $format = $1; } else { - $log->logdie("first line of $arg->{path} has to be FN, but is: $line"); + die "first line of $arg->{path} has to be FN, but is: $line"; } $line = <$fh>; @@ -96,55 +98,73 @@ if ( $line =~ /^VR\s(.+)$/) { $version = $1; } else { - $log->logdie("second line of $arg->{path} has to be VN, but is: $line"); + die "second line of $arg->{path} has to be VN, but is: $line"; + } + + warn "I: $arg->{path} $format $version - generating record offsets\n"; + + $self->{fh} = $fh; + $self->{record_offset} = []; + + while( $line = <$fh> ) { + chomp($line); + next unless $line eq 'ER'; + push @{ $self->{record_offset} }, tell($fh); + last if $#{ $self->{record_offset} } > $self->{offset} + $self->{limit}; } + push @{ $self->{record_offset} }, tell($fh); # end of file + + warn "I $arg->{path} read ", tell($fh), " bytes $#{ $self->{record_offset} } records\n"; + + return $self; +} + + + +=head2 fetch_rec + +Return record with ID C<$mfn> from database + + my $rec = $input->fetch_rec( $mfn, $filter_coderef ); + +=cut + +sub fetch_rec { + my ( $self, $mfn, $filter_coderef ) = @_; - $log->info("opening $format $version database '$arg->{path}'"); + seek $self->{fh}, $self->{record_offset}->[ $mfn - 1 ], 0; my $tag; my $rec; - my $offset = $self->{offset} || 0; - my $limit = $self->{limit} || 0; + my $fh = $self->{fh}; - my $file_pos = 0; - my $end_pos = 0; - $end_pos = $offset + $limit if $limit; - - $self->{_rec} = []; - - warn "# offset: $offset limit: $limit end: $end_pos"; - - while( $line = <$fh> ) { + while( my $line = <$fh> ) { chomp($line); my $v; if ( $line eq 'EF' ) { - last; + return; } elsif ( $line eq 'ER' ) { - $file_pos++; - last if $end_pos && $file_pos > $end_pos; - if ( ! $offset || $file_pos > $offset ) { + $line = <$fh>; + chomp $line; + die "expected blank like in ",$self->{path}, " +$.: $line" unless $line eq ''; - # join tags - foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) { - $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_}; - } - # split on ; - foreach ( qw/ID SC DE/ ) { - $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_}; - } - $rec->{'000'} = [ $file_pos ]; - push @{ $self->{_rec} }, $rec; + # join tags + foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) { + $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_}; } - $rec = {}; - $line = <$fh>; - chomp $line; - $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' ); - } elsif ( $offset && $file_pos < $offset ) { - next; + # split on ; + foreach ( qw/ID SC DE/ ) { + $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_}; + } + + $rec->{'000'} = [ $mfn ]; + warn "## mfn $mfn" if $debug; + return $rec; + } elsif ( $line =~ /^(\S\S)\s(.+)$/ ) { $tag = $1; $v = $2; @@ -157,39 +177,21 @@ $v .= $doi; } } elsif ( $line =~ m{^(\S\S)\s*$} ) { - warn "# $arg->{path} +$. empty |$line|\n"; - } else { - $log->logdie("can't parse +$. $arg->{path} |$line|"); + warn "# $self->{path} +$. empty |$line|\n"; + } elsif ( $line ne '' ) { + warn "E: $self->{path} +$ | can't parse |$line|"; } if ( defined $v ) { $v = $subfields->{$tag}->($v) if defined $subfields->{$tag}; - $log->debug("$tag: ", sub { dump( $v ) }); + warn "## $tag: ", sub { dump( $v ) } if $debug; push @{ $rec->{$tag} }, $v; } - } - $log->debug("loaded ", $self->size, " records"); - - $self ? return $self : return undef; -} - -=head2 fetch_rec - -Return record with ID C<$mfn> from database - - my $rec = $input->fetch_rec( $mfn, $filter_coderef ); - -=cut - -sub fetch_rec { - my $self = shift; - - my ( $mfn, $filter_coderef ) = @_; - $mfn -= $self->{offset} if $self->{offset}; - return $self->{_rec}->[$mfn-1]; + warn "can't get full record $mfn got ", dump $rec; + return $rec; } @@ -203,20 +205,18 @@ sub size { my $self = shift; - $#{ $self->{_rec} } + 1; + $#{ $self->{record_offset} } - $self->{offset}; + # no need for +1 since we record end of file as last record } -=head1 SEE ALSO -L is only sane source of document format which Google could find... - =head1 AUTHOR Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE -Copyright 2007 Dobrica Pavlinusic, All Rights Reserved. +Copyright 2009 Dobrica Pavlinusic, All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.