--- trunk/lib/WebPAC/Input/ISI.pm 2007/10/10 19:01:55 898 +++ trunk/lib/WebPAC/Input/ISI.pm 2009/05/27 09:31:35 1194 @@ -6,18 +6,19 @@ use WebPAC::Input; use base qw/WebPAC::Common/; +use Data::Dump qw/dump/; + =head1 NAME WebPAC::Input::ISI - support for ISI Export Format =head1 VERSION -Version 0.00 +Version 0.02 =cut -our $VERSION = '0.00'; - +our $VERSION = '0.02'; =head1 SYNOPSIS @@ -54,6 +55,24 @@ =cut +my $subfields = { + 'CR' => sub { + my $full_cr = shift; + my @v = split(/, /, $full_cr); + my $f = { full => $full_cr }; + foreach ( qw/author year reference volume page/ ) { + if ( my $tmp = shift @v ) { + $f->{$_} = $tmp; + } + } + if ( $f->{author} =~ /^\*(.+)/ ) { + delete $f->{author}; + $f->{institution} = $1; + } + return $f; + }, +}; + sub new { my $class = shift; my $self = {@_}; @@ -85,6 +104,49 @@ $log->info("opening $format $version database '$arg->{path}'"); + my $tag; + my $rec; + + $self->{size} = 0; + + while( $line = <$fh> ) { + chomp($line); + + my $v; + + if ( $line =~ /^(\S\S)\s(.+)$/ ) { + $tag = $1; + $v = $2; + } elsif ( $line =~ /^\s{3}(.+)$/ ) { + $v = $1; + } elsif ( $line eq 'ER' ) { + # join tags + foreach ( qw/AB DE ID TI/ ) { + $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_}; + } + $rec->{'000'} = [ ++$self->{size} ]; + push @{ $self->{_rec} }, $rec; + $rec = {}; + $line = <$fh>; + chomp $line; + $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' ); + } elsif ( $line eq 'EF' ) { + last; + } else { + $log->logdie("can't parse +$. $arg->{path} : $line"); + } + + if ( defined $v ) { + $v = $subfields->{$tag}->($v) if defined $subfields->{$tag}; + + $log->debug("$tag: ", sub { dump( $v ) }); + push @{ $rec->{$tag} }, $v; + } + + } + + $log->debug("loaded ", $self->size, " records"); + $self ? return $self : return undef; } @@ -92,18 +154,16 @@ Return record with ID C<$mfn> from database - my $rec = $input->fetch_rec( $mfn, $filter_coderef); + my $rec = $input->fetch_rec( $mfn, $filter_coderef ); =cut sub fetch_rec { my $self = shift; - my ($mfn, $filter_coderef) = @_; - - my $rec; + my ( $mfn, $filter_coderef ) = @_; - return $rec; + return $self->{_rec}->[$mfn-1]; } @@ -117,9 +177,13 @@ sub size { my $self = shift; - return 2; + return $self->{size}; } +=head1 SEE ALSO + +L is only sane source of document format which Google could find... + =head1 AUTHOR Dobrica Pavlinusic, C<< >>