/[webpac2]/trunk/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 902 - (hide annotations)
Wed Oct 10 21:00:27 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3262 byte(s)
more tags to join

1 dpavlin 898 package WebPAC::Input::ISI;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9     =head1 NAME
10    
11     WebPAC::Input::ISI - support for ISI Export Format
12    
13     =head1 VERSION
14    
15 dpavlin 901 Version 0.01
16 dpavlin 898
17     =cut
18    
19 dpavlin 901 our $VERSION = '0.01';
20 dpavlin 898
21    
22     =head1 SYNOPSIS
23    
24     Open file in ISI export fromat
25    
26     my $input = new WebPAC::Input::ISI(
27     path => '/path/to/ISI/records.txt',
28     );
29    
30     =head1 FUNCTIONS
31    
32     =head2 new
33    
34     Returns new low-level input API object
35    
36     my $input = new WebPAC::Input::ISI(
37     path => '/path/to/ISI/records.txt'
38     filter => sub {
39     my ($l,$field_nr) = @_;
40     # do something with $l which is line of input file
41     return $l;
42     },
43     }
44    
45     Options:
46    
47     =over 4
48    
49     =item path
50    
51     path to ISI export file
52    
53     =back
54    
55     =cut
56    
57 dpavlin 901 my $subfields = {
58     'CR' => sub {
59     my @v = split(/, /, shift);
60     my $f;
61     foreach ( qw/author year reference volume page/ ) {
62     if ( my $tmp = shift @v ) {
63     $f->{$_} = $tmp;
64     }
65     }
66     if ( $f->{author} =~ /^\*(.+)/ ) {
67     delete $f->{author};
68     $f->{institution} = $1;
69     }
70     return $f;
71     },
72     };
73    
74 dpavlin 898 sub new {
75     my $class = shift;
76     my $self = {@_};
77     bless($self, $class);
78    
79     my $arg = {@_};
80    
81     my $log = $self->_get_logger();
82    
83     open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
84    
85     my ( $format, $version );
86    
87     my $line = <$fh>;
88     chomp($line);
89     if ( $line =~ /^FN\s(.+)$/) {
90     $format = $1;
91     } else {
92     $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
93     }
94    
95     $line = <$fh>;
96     chomp($line);
97     if ( $line =~ /^VR\s(.+)$/) {
98     $version = $1;
99     } else {
100     $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
101     }
102    
103     $log->info("opening $format $version database '$arg->{path}'");
104    
105 dpavlin 899 my $tag;
106     my $rec;
107    
108     while( $line = <$fh> ) {
109     chomp($line);
110    
111     my $v;
112    
113     if ( $line =~ /^(\S\S)\s(.+)$/ ) {
114     $tag = $1;
115     $v = $2;
116     } elsif ( $line =~ /^\s{3}(.+)$/ ) {
117     $v = $1;
118     } elsif ( $line eq 'ER' ) {
119 dpavlin 901 # join tags
120 dpavlin 902 foreach ( qw/AB DE ID TI/ ) {
121 dpavlin 901 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
122     }
123 dpavlin 899 push @{ $self->{_rec} }, $rec;
124     $rec = {};
125     $line = <$fh>;
126     chomp $line;
127     $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
128     } elsif ( $line eq 'EF' ) {
129     last;
130     } else {
131     $log->logdie("can't parse +$. $arg->{path} : $line");
132     }
133    
134 dpavlin 901 $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
135    
136 dpavlin 899 push @{ $rec->{$tag} }, $v;
137    
138     }
139    
140     $log->debug("loaded ", $self->size, " records");
141    
142 dpavlin 898 $self ? return $self : return undef;
143     }
144    
145     =head2 fetch_rec
146    
147     Return record with ID C<$mfn> from database
148    
149 dpavlin 899 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
150 dpavlin 898
151     =cut
152    
153     sub fetch_rec {
154     my $self = shift;
155    
156 dpavlin 899 my ( $mfn, $filter_coderef ) = @_;
157 dpavlin 898
158 dpavlin 899 return $self->{_rec}->[$mfn-1];
159 dpavlin 898 }
160    
161    
162     =head2 size
163    
164     Return number of records in database
165    
166     my $size = $input->size;
167    
168     =cut
169    
170     sub size {
171     my $self = shift;
172 dpavlin 899 return $#{$self->{_rec}} + 1;
173 dpavlin 898 }
174    
175 dpavlin 900 =head1 SEE ALSO
176    
177     L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
178 dpavlin 901
179 dpavlin 898 =head1 AUTHOR
180    
181     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
182    
183     =head1 COPYRIGHT & LICENSE
184    
185     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
186    
187     This program is free software; you can redistribute it and/or modify it
188     under the same terms as Perl itself.
189    
190     =cut
191    
192     1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26