--- trunk/lib/WebPAC/Input/ISI.pm 2009/05/27 09:31:35 1194 +++ trunk/lib/WebPAC/Input/ISI.pm 2009/09/20 19:05:56 1302 @@ -12,13 +12,9 @@ WebPAC::Input::ISI - support for ISI Export Format -=head1 VERSION - -Version 0.02 - =cut -our $VERSION = '0.02'; +our $VERSION = '0.03'; =head1 SYNOPSIS @@ -60,7 +56,7 @@ my $full_cr = shift; my @v = split(/, /, $full_cr); my $f = { full => $full_cr }; - foreach ( qw/author year reference volume page/ ) { + foreach ( qw/author year reference volume page doi/ ) { if ( my $tmp = shift @v ) { $f->{$_} = $tmp; } @@ -69,6 +65,7 @@ delete $f->{author}; $f->{institution} = $1; } + $f->{doi} =~ s{DOI\s+}{} if $f->{doi}; # strip DOI prefix return $f; }, }; @@ -108,6 +105,10 @@ my $rec; $self->{size} = 0; + my $max_size; + $max_size = ( $self->{offset} || 0 ) + $self->{limit} if $self->{limit}; + + warn "# max_size: $max_size"; while( $line = <$fh> ) { chomp($line); @@ -119,21 +120,36 @@ $v = $2; } elsif ( $line =~ /^\s{3}(.+)$/ ) { $v = $1; + if ( $tag eq 'CR' && $v =~ m{DOI$} ) { + my $doi = <$fh>; + chomp($doi); + $doi =~ s{^\s{3}}{ } || die "can't find DOI in: $doi"; + $v .= $doi; + } } elsif ( $line eq 'ER' ) { # join tags - foreach ( qw/AB DE ID TI/ ) { + foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) { $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_}; } + # split on ; + foreach ( qw/ID SC DE/ ) { + $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_}; + } $rec->{'000'} = [ ++$self->{size} ]; push @{ $self->{_rec} }, $rec; + + last if $max_size && $self->{size} == $max_size; + $rec = {}; $line = <$fh>; chomp $line; $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' ); } elsif ( $line eq 'EF' ) { last; + } elsif ( $line =~ m{^(\S\S)\s*$} ) { + warn "# $arg->{path} +$. empty |$line|\n"; } else { - $log->logdie("can't parse +$. $arg->{path} : $line"); + $log->logdie("can't parse +$. $arg->{path} |$line|"); } if ( defined $v ) {