/[webpac2]/trunk/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1194 - (hide annotations)
Wed May 27 09:31:35 2009 UTC (14 years, 11 months ago) by dpavlin
File size: 3463 byte(s)
 r1878@llin:  dpavlin | 2009-05-27 11:31:28 +0200
 CR field in ISI format now also contains 'full' line from original
 file as well as parsed components

1 dpavlin 898 package WebPAC::Input::ISI;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9 dpavlin 904 use Data::Dump qw/dump/;
10    
11 dpavlin 898 =head1 NAME
12    
13     WebPAC::Input::ISI - support for ISI Export Format
14    
15     =head1 VERSION
16    
17 dpavlin 904 Version 0.02
18 dpavlin 898
19     =cut
20    
21 dpavlin 904 our $VERSION = '0.02';
22 dpavlin 898
23     =head1 SYNOPSIS
24    
25     Open file in ISI export fromat
26    
27     my $input = new WebPAC::Input::ISI(
28     path => '/path/to/ISI/records.txt',
29     );
30    
31     =head1 FUNCTIONS
32    
33     =head2 new
34    
35     Returns new low-level input API object
36    
37     my $input = new WebPAC::Input::ISI(
38     path => '/path/to/ISI/records.txt'
39     filter => sub {
40     my ($l,$field_nr) = @_;
41     # do something with $l which is line of input file
42     return $l;
43     },
44     }
45    
46     Options:
47    
48     =over 4
49    
50     =item path
51    
52     path to ISI export file
53    
54     =back
55    
56     =cut
57    
58 dpavlin 901 my $subfields = {
59     'CR' => sub {
60 dpavlin 1194 my $full_cr = shift;
61     my @v = split(/, /, $full_cr);
62     my $f = { full => $full_cr };
63 dpavlin 901 foreach ( qw/author year reference volume page/ ) {
64     if ( my $tmp = shift @v ) {
65     $f->{$_} = $tmp;
66     }
67     }
68     if ( $f->{author} =~ /^\*(.+)/ ) {
69     delete $f->{author};
70     $f->{institution} = $1;
71     }
72     return $f;
73     },
74     };
75    
76 dpavlin 898 sub new {
77     my $class = shift;
78     my $self = {@_};
79     bless($self, $class);
80    
81     my $arg = {@_};
82    
83     my $log = $self->_get_logger();
84    
85     open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
86    
87     my ( $format, $version );
88    
89     my $line = <$fh>;
90     chomp($line);
91     if ( $line =~ /^FN\s(.+)$/) {
92     $format = $1;
93     } else {
94     $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
95     }
96    
97     $line = <$fh>;
98     chomp($line);
99     if ( $line =~ /^VR\s(.+)$/) {
100     $version = $1;
101     } else {
102     $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
103     }
104    
105     $log->info("opening $format $version database '$arg->{path}'");
106    
107 dpavlin 899 my $tag;
108     my $rec;
109    
110 dpavlin 1077 $self->{size} = 0;
111    
112 dpavlin 899 while( $line = <$fh> ) {
113     chomp($line);
114    
115     my $v;
116    
117     if ( $line =~ /^(\S\S)\s(.+)$/ ) {
118     $tag = $1;
119     $v = $2;
120     } elsif ( $line =~ /^\s{3}(.+)$/ ) {
121     $v = $1;
122     } elsif ( $line eq 'ER' ) {
123 dpavlin 901 # join tags
124 dpavlin 902 foreach ( qw/AB DE ID TI/ ) {
125 dpavlin 901 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
126     }
127 dpavlin 1077 $rec->{'000'} = [ ++$self->{size} ];
128 dpavlin 899 push @{ $self->{_rec} }, $rec;
129     $rec = {};
130     $line = <$fh>;
131     chomp $line;
132     $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
133     } elsif ( $line eq 'EF' ) {
134     last;
135     } else {
136     $log->logdie("can't parse +$. $arg->{path} : $line");
137     }
138    
139 dpavlin 904 if ( defined $v ) {
140     $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
141 dpavlin 901
142 dpavlin 904 $log->debug("$tag: ", sub { dump( $v ) });
143     push @{ $rec->{$tag} }, $v;
144     }
145 dpavlin 899
146     }
147    
148     $log->debug("loaded ", $self->size, " records");
149    
150 dpavlin 898 $self ? return $self : return undef;
151     }
152    
153     =head2 fetch_rec
154    
155     Return record with ID C<$mfn> from database
156    
157 dpavlin 899 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
158 dpavlin 898
159     =cut
160    
161     sub fetch_rec {
162     my $self = shift;
163    
164 dpavlin 899 my ( $mfn, $filter_coderef ) = @_;
165 dpavlin 898
166 dpavlin 899 return $self->{_rec}->[$mfn-1];
167 dpavlin 898 }
168    
169    
170     =head2 size
171    
172     Return number of records in database
173    
174     my $size = $input->size;
175    
176     =cut
177    
178     sub size {
179     my $self = shift;
180 dpavlin 1077 return $self->{size};
181 dpavlin 898 }
182    
183 dpavlin 900 =head1 SEE ALSO
184    
185     L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
186 dpavlin 901
187 dpavlin 898 =head1 AUTHOR
188    
189     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
190    
191     =head1 COPYRIGHT & LICENSE
192    
193     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
194    
195     This program is free software; you can redistribute it and/or modify it
196     under the same terms as Perl itself.
197    
198     =cut
199    
200     1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26