/[webpac2]/branches/Sack/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/Sack/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1194 - (show annotations)
Wed May 27 09:31:35 2009 UTC (14 years, 11 months ago) by dpavlin
Original Path: trunk/lib/WebPAC/Input/ISI.pm
File size: 3463 byte(s)
 r1878@llin:  dpavlin | 2009-05-27 11:31:28 +0200
 CR field in ISI format now also contains 'full' line from original
 file as well as parsed components

1 package WebPAC::Input::ISI;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Input;
7 use base qw/WebPAC::Common/;
8
9 use Data::Dump qw/dump/;
10
11 =head1 NAME
12
13 WebPAC::Input::ISI - support for ISI Export Format
14
15 =head1 VERSION
16
17 Version 0.02
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 SYNOPSIS
24
25 Open file in ISI export fromat
26
27 my $input = new WebPAC::Input::ISI(
28 path => '/path/to/ISI/records.txt',
29 );
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35 Returns new low-level input API object
36
37 my $input = new WebPAC::Input::ISI(
38 path => '/path/to/ISI/records.txt'
39 filter => sub {
40 my ($l,$field_nr) = @_;
41 # do something with $l which is line of input file
42 return $l;
43 },
44 }
45
46 Options:
47
48 =over 4
49
50 =item path
51
52 path to ISI export file
53
54 =back
55
56 =cut
57
58 my $subfields = {
59 'CR' => sub {
60 my $full_cr = shift;
61 my @v = split(/, /, $full_cr);
62 my $f = { full => $full_cr };
63 foreach ( qw/author year reference volume page/ ) {
64 if ( my $tmp = shift @v ) {
65 $f->{$_} = $tmp;
66 }
67 }
68 if ( $f->{author} =~ /^\*(.+)/ ) {
69 delete $f->{author};
70 $f->{institution} = $1;
71 }
72 return $f;
73 },
74 };
75
76 sub new {
77 my $class = shift;
78 my $self = {@_};
79 bless($self, $class);
80
81 my $arg = {@_};
82
83 my $log = $self->_get_logger();
84
85 open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
86
87 my ( $format, $version );
88
89 my $line = <$fh>;
90 chomp($line);
91 if ( $line =~ /^FN\s(.+)$/) {
92 $format = $1;
93 } else {
94 $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
95 }
96
97 $line = <$fh>;
98 chomp($line);
99 if ( $line =~ /^VR\s(.+)$/) {
100 $version = $1;
101 } else {
102 $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
103 }
104
105 $log->info("opening $format $version database '$arg->{path}'");
106
107 my $tag;
108 my $rec;
109
110 $self->{size} = 0;
111
112 while( $line = <$fh> ) {
113 chomp($line);
114
115 my $v;
116
117 if ( $line =~ /^(\S\S)\s(.+)$/ ) {
118 $tag = $1;
119 $v = $2;
120 } elsif ( $line =~ /^\s{3}(.+)$/ ) {
121 $v = $1;
122 } elsif ( $line eq 'ER' ) {
123 # join tags
124 foreach ( qw/AB DE ID TI/ ) {
125 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
126 }
127 $rec->{'000'} = [ ++$self->{size} ];
128 push @{ $self->{_rec} }, $rec;
129 $rec = {};
130 $line = <$fh>;
131 chomp $line;
132 $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
133 } elsif ( $line eq 'EF' ) {
134 last;
135 } else {
136 $log->logdie("can't parse +$. $arg->{path} : $line");
137 }
138
139 if ( defined $v ) {
140 $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
141
142 $log->debug("$tag: ", sub { dump( $v ) });
143 push @{ $rec->{$tag} }, $v;
144 }
145
146 }
147
148 $log->debug("loaded ", $self->size, " records");
149
150 $self ? return $self : return undef;
151 }
152
153 =head2 fetch_rec
154
155 Return record with ID C<$mfn> from database
156
157 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
158
159 =cut
160
161 sub fetch_rec {
162 my $self = shift;
163
164 my ( $mfn, $filter_coderef ) = @_;
165
166 return $self->{_rec}->[$mfn-1];
167 }
168
169
170 =head2 size
171
172 Return number of records in database
173
174 my $size = $input->size;
175
176 =cut
177
178 sub size {
179 my $self = shift;
180 return $self->{size};
181 }
182
183 =head1 SEE ALSO
184
185 L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
186
187 =head1 AUTHOR
188
189 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
190
191 =head1 COPYRIGHT & LICENSE
192
193 Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
194
195 This program is free software; you can redistribute it and/or modify it
196 under the same terms as Perl itself.
197
198 =cut
199
200 1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26