--- trunk/lib/WebPAC/Input/ISI.pm 2007/10/10 19:46:58 900 +++ trunk/lib/WebPAC/Input/ISI.pm 2007/10/10 21:00:27 902 @@ -12,11 +12,11 @@ =head1 VERSION -Version 0.00 +Version 0.01 =cut -our $VERSION = '0.00'; +our $VERSION = '0.01'; =head1 SYNOPSIS @@ -54,6 +54,23 @@ =cut +my $subfields = { + 'CR' => sub { + my @v = split(/, /, shift); + my $f; + foreach ( qw/author year reference volume page/ ) { + if ( my $tmp = shift @v ) { + $f->{$_} = $tmp; + } + } + if ( $f->{author} =~ /^\*(.+)/ ) { + delete $f->{author}; + $f->{institution} = $1; + } + return $f; + }, +}; + sub new { my $class = shift; my $self = {@_}; @@ -99,6 +116,10 @@ } elsif ( $line =~ /^\s{3}(.+)$/ ) { $v = $1; } elsif ( $line eq 'ER' ) { + # join tags + foreach ( qw/AB DE ID TI/ ) { + $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_}; + } push @{ $self->{_rec} }, $rec; $rec = {}; $line = <$fh>; @@ -110,6 +131,8 @@ $log->logdie("can't parse +$. $arg->{path} : $line"); } + $v = $subfields->{$tag}->($v) if defined $subfields->{$tag}; + push @{ $rec->{$tag} }, $v; } @@ -152,7 +175,7 @@ =head1 SEE ALSO L is only sane source of document format which Google could find... - + =head1 AUTHOR Dobrica Pavlinusic, C<< >>