/[webpac2]/branches/Sack/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/Sack/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 904 - (hide annotations)
Fri Oct 12 12:07:35 2007 UTC (16 years, 7 months ago) by dpavlin
Original Path: trunk/lib/WebPAC/Input/ISI.pm
File size: 3361 byte(s)
 r1355@llin:  dpavlin | 2007-10-12 14:07:29 +0200
 fix empty tags

1 dpavlin 898 package WebPAC::Input::ISI;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9 dpavlin 904 use Data::Dump qw/dump/;
10    
11 dpavlin 898 =head1 NAME
12    
13     WebPAC::Input::ISI - support for ISI Export Format
14    
15     =head1 VERSION
16    
17 dpavlin 904 Version 0.02
18 dpavlin 898
19     =cut
20    
21 dpavlin 904 our $VERSION = '0.02';
22 dpavlin 898
23     =head1 SYNOPSIS
24    
25     Open file in ISI export fromat
26    
27     my $input = new WebPAC::Input::ISI(
28     path => '/path/to/ISI/records.txt',
29     );
30    
31     =head1 FUNCTIONS
32    
33     =head2 new
34    
35     Returns new low-level input API object
36    
37     my $input = new WebPAC::Input::ISI(
38     path => '/path/to/ISI/records.txt'
39     filter => sub {
40     my ($l,$field_nr) = @_;
41     # do something with $l which is line of input file
42     return $l;
43     },
44     }
45    
46     Options:
47    
48     =over 4
49    
50     =item path
51    
52     path to ISI export file
53    
54     =back
55    
56     =cut
57    
58 dpavlin 901 my $subfields = {
59     'CR' => sub {
60     my @v = split(/, /, shift);
61     my $f;
62     foreach ( qw/author year reference volume page/ ) {
63     if ( my $tmp = shift @v ) {
64     $f->{$_} = $tmp;
65     }
66     }
67     if ( $f->{author} =~ /^\*(.+)/ ) {
68     delete $f->{author};
69     $f->{institution} = $1;
70     }
71     return $f;
72     },
73     };
74    
75 dpavlin 898 sub new {
76     my $class = shift;
77     my $self = {@_};
78     bless($self, $class);
79    
80     my $arg = {@_};
81    
82     my $log = $self->_get_logger();
83    
84     open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
85    
86     my ( $format, $version );
87    
88     my $line = <$fh>;
89     chomp($line);
90     if ( $line =~ /^FN\s(.+)$/) {
91     $format = $1;
92     } else {
93     $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
94     }
95    
96     $line = <$fh>;
97     chomp($line);
98     if ( $line =~ /^VR\s(.+)$/) {
99     $version = $1;
100     } else {
101     $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
102     }
103    
104     $log->info("opening $format $version database '$arg->{path}'");
105    
106 dpavlin 899 my $tag;
107     my $rec;
108    
109     while( $line = <$fh> ) {
110     chomp($line);
111    
112     my $v;
113    
114     if ( $line =~ /^(\S\S)\s(.+)$/ ) {
115     $tag = $1;
116     $v = $2;
117     } elsif ( $line =~ /^\s{3}(.+)$/ ) {
118     $v = $1;
119     } elsif ( $line eq 'ER' ) {
120 dpavlin 901 # join tags
121 dpavlin 902 foreach ( qw/AB DE ID TI/ ) {
122 dpavlin 901 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
123     }
124 dpavlin 899 push @{ $self->{_rec} }, $rec;
125     $rec = {};
126     $line = <$fh>;
127     chomp $line;
128     $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
129     } elsif ( $line eq 'EF' ) {
130     last;
131     } else {
132     $log->logdie("can't parse +$. $arg->{path} : $line");
133     }
134    
135 dpavlin 904 if ( defined $v ) {
136     $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
137 dpavlin 901
138 dpavlin 904 $log->debug("$tag: ", sub { dump( $v ) });
139     push @{ $rec->{$tag} }, $v;
140     }
141 dpavlin 899
142     }
143    
144     $log->debug("loaded ", $self->size, " records");
145    
146 dpavlin 898 $self ? return $self : return undef;
147     }
148    
149     =head2 fetch_rec
150    
151     Return record with ID C<$mfn> from database
152    
153 dpavlin 899 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
154 dpavlin 898
155     =cut
156    
157     sub fetch_rec {
158     my $self = shift;
159    
160 dpavlin 899 my ( $mfn, $filter_coderef ) = @_;
161 dpavlin 898
162 dpavlin 899 return $self->{_rec}->[$mfn-1];
163 dpavlin 898 }
164    
165    
166     =head2 size
167    
168     Return number of records in database
169    
170     my $size = $input->size;
171    
172     =cut
173    
174     sub size {
175     my $self = shift;
176 dpavlin 899 return $#{$self->{_rec}} + 1;
177 dpavlin 898 }
178    
179 dpavlin 900 =head1 SEE ALSO
180    
181     L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
182 dpavlin 901
183 dpavlin 898 =head1 AUTHOR
184    
185     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
186    
187     =head1 COPYRIGHT & LICENSE
188    
189     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
190    
191     This program is free software; you can redistribute it and/or modify it
192     under the same terms as Perl itself.
193    
194     =cut
195    
196     1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26