/[webpac2]/trunk/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1077 - (show annotations)
Wed Nov 28 22:52:01 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 3414 byte(s)
fake mfn to make rest of WebPAC happy

1 package WebPAC::Input::ISI;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Input;
7 use base qw/WebPAC::Common/;
8
9 use Data::Dump qw/dump/;
10
11 =head1 NAME
12
13 WebPAC::Input::ISI - support for ISI Export Format
14
15 =head1 VERSION
16
17 Version 0.02
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 SYNOPSIS
24
25 Open file in ISI export fromat
26
27 my $input = new WebPAC::Input::ISI(
28 path => '/path/to/ISI/records.txt',
29 );
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35 Returns new low-level input API object
36
37 my $input = new WebPAC::Input::ISI(
38 path => '/path/to/ISI/records.txt'
39 filter => sub {
40 my ($l,$field_nr) = @_;
41 # do something with $l which is line of input file
42 return $l;
43 },
44 }
45
46 Options:
47
48 =over 4
49
50 =item path
51
52 path to ISI export file
53
54 =back
55
56 =cut
57
58 my $subfields = {
59 'CR' => sub {
60 my @v = split(/, /, shift);
61 my $f;
62 foreach ( qw/author year reference volume page/ ) {
63 if ( my $tmp = shift @v ) {
64 $f->{$_} = $tmp;
65 }
66 }
67 if ( $f->{author} =~ /^\*(.+)/ ) {
68 delete $f->{author};
69 $f->{institution} = $1;
70 }
71 return $f;
72 },
73 };
74
75 sub new {
76 my $class = shift;
77 my $self = {@_};
78 bless($self, $class);
79
80 my $arg = {@_};
81
82 my $log = $self->_get_logger();
83
84 open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
85
86 my ( $format, $version );
87
88 my $line = <$fh>;
89 chomp($line);
90 if ( $line =~ /^FN\s(.+)$/) {
91 $format = $1;
92 } else {
93 $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
94 }
95
96 $line = <$fh>;
97 chomp($line);
98 if ( $line =~ /^VR\s(.+)$/) {
99 $version = $1;
100 } else {
101 $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
102 }
103
104 $log->info("opening $format $version database '$arg->{path}'");
105
106 my $tag;
107 my $rec;
108
109 $self->{size} = 0;
110
111 while( $line = <$fh> ) {
112 chomp($line);
113
114 my $v;
115
116 if ( $line =~ /^(\S\S)\s(.+)$/ ) {
117 $tag = $1;
118 $v = $2;
119 } elsif ( $line =~ /^\s{3}(.+)$/ ) {
120 $v = $1;
121 } elsif ( $line eq 'ER' ) {
122 # join tags
123 foreach ( qw/AB DE ID TI/ ) {
124 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
125 }
126 $rec->{'000'} = [ ++$self->{size} ];
127 push @{ $self->{_rec} }, $rec;
128 $rec = {};
129 $line = <$fh>;
130 chomp $line;
131 $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
132 } elsif ( $line eq 'EF' ) {
133 last;
134 } else {
135 $log->logdie("can't parse +$. $arg->{path} : $line");
136 }
137
138 if ( defined $v ) {
139 $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
140
141 $log->debug("$tag: ", sub { dump( $v ) });
142 push @{ $rec->{$tag} }, $v;
143 }
144
145 }
146
147 $log->debug("loaded ", $self->size, " records");
148
149 $self ? return $self : return undef;
150 }
151
152 =head2 fetch_rec
153
154 Return record with ID C<$mfn> from database
155
156 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
157
158 =cut
159
160 sub fetch_rec {
161 my $self = shift;
162
163 my ( $mfn, $filter_coderef ) = @_;
164
165 return $self->{_rec}->[$mfn-1];
166 }
167
168
169 =head2 size
170
171 Return number of records in database
172
173 my $size = $input->size;
174
175 =cut
176
177 sub size {
178 my $self = shift;
179 return $self->{size};
180 }
181
182 =head1 SEE ALSO
183
184 L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
185
186 =head1 AUTHOR
187
188 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
189
190 =head1 COPYRIGHT & LICENSE
191
192 Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
193
194 This program is free software; you can redistribute it and/or modify it
195 under the same terms as Perl itself.
196
197 =cut
198
199 1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26