/[webpac2]/trunk/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 904 - (show annotations)
Fri Oct 12 12:07:35 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3361 byte(s)
 r1355@llin:  dpavlin | 2007-10-12 14:07:29 +0200
 fix empty tags

1 package WebPAC::Input::ISI;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Input;
7 use base qw/WebPAC::Common/;
8
9 use Data::Dump qw/dump/;
10
11 =head1 NAME
12
13 WebPAC::Input::ISI - support for ISI Export Format
14
15 =head1 VERSION
16
17 Version 0.02
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 SYNOPSIS
24
25 Open file in ISI export fromat
26
27 my $input = new WebPAC::Input::ISI(
28 path => '/path/to/ISI/records.txt',
29 );
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35 Returns new low-level input API object
36
37 my $input = new WebPAC::Input::ISI(
38 path => '/path/to/ISI/records.txt'
39 filter => sub {
40 my ($l,$field_nr) = @_;
41 # do something with $l which is line of input file
42 return $l;
43 },
44 }
45
46 Options:
47
48 =over 4
49
50 =item path
51
52 path to ISI export file
53
54 =back
55
56 =cut
57
58 my $subfields = {
59 'CR' => sub {
60 my @v = split(/, /, shift);
61 my $f;
62 foreach ( qw/author year reference volume page/ ) {
63 if ( my $tmp = shift @v ) {
64 $f->{$_} = $tmp;
65 }
66 }
67 if ( $f->{author} =~ /^\*(.+)/ ) {
68 delete $f->{author};
69 $f->{institution} = $1;
70 }
71 return $f;
72 },
73 };
74
75 sub new {
76 my $class = shift;
77 my $self = {@_};
78 bless($self, $class);
79
80 my $arg = {@_};
81
82 my $log = $self->_get_logger();
83
84 open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
85
86 my ( $format, $version );
87
88 my $line = <$fh>;
89 chomp($line);
90 if ( $line =~ /^FN\s(.+)$/) {
91 $format = $1;
92 } else {
93 $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
94 }
95
96 $line = <$fh>;
97 chomp($line);
98 if ( $line =~ /^VR\s(.+)$/) {
99 $version = $1;
100 } else {
101 $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
102 }
103
104 $log->info("opening $format $version database '$arg->{path}'");
105
106 my $tag;
107 my $rec;
108
109 while( $line = <$fh> ) {
110 chomp($line);
111
112 my $v;
113
114 if ( $line =~ /^(\S\S)\s(.+)$/ ) {
115 $tag = $1;
116 $v = $2;
117 } elsif ( $line =~ /^\s{3}(.+)$/ ) {
118 $v = $1;
119 } elsif ( $line eq 'ER' ) {
120 # join tags
121 foreach ( qw/AB DE ID TI/ ) {
122 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
123 }
124 push @{ $self->{_rec} }, $rec;
125 $rec = {};
126 $line = <$fh>;
127 chomp $line;
128 $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
129 } elsif ( $line eq 'EF' ) {
130 last;
131 } else {
132 $log->logdie("can't parse +$. $arg->{path} : $line");
133 }
134
135 if ( defined $v ) {
136 $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
137
138 $log->debug("$tag: ", sub { dump( $v ) });
139 push @{ $rec->{$tag} }, $v;
140 }
141
142 }
143
144 $log->debug("loaded ", $self->size, " records");
145
146 $self ? return $self : return undef;
147 }
148
149 =head2 fetch_rec
150
151 Return record with ID C<$mfn> from database
152
153 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
154
155 =cut
156
157 sub fetch_rec {
158 my $self = shift;
159
160 my ( $mfn, $filter_coderef ) = @_;
161
162 return $self->{_rec}->[$mfn-1];
163 }
164
165
166 =head2 size
167
168 Return number of records in database
169
170 my $size = $input->size;
171
172 =cut
173
174 sub size {
175 my $self = shift;
176 return $#{$self->{_rec}} + 1;
177 }
178
179 =head1 SEE ALSO
180
181 L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
182
183 =head1 AUTHOR
184
185 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
186
187 =head1 COPYRIGHT & LICENSE
188
189 Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
190
191 This program is free software; you can redistribute it and/or modify it
192 under the same terms as Perl itself.
193
194 =cut
195
196 1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26