/[webpac2]/branches/Sack/lib/WebPAC/Input/ISI.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/Sack/lib/WebPAC/Input/ISI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1077 - (hide annotations)
Wed Nov 28 22:52:01 2007 UTC (16 years, 5 months ago) by dpavlin
Original Path: trunk/lib/WebPAC/Input/ISI.pm
File size: 3414 byte(s)
fake mfn to make rest of WebPAC happy

1 dpavlin 898 package WebPAC::Input::ISI;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9 dpavlin 904 use Data::Dump qw/dump/;
10    
11 dpavlin 898 =head1 NAME
12    
13     WebPAC::Input::ISI - support for ISI Export Format
14    
15     =head1 VERSION
16    
17 dpavlin 904 Version 0.02
18 dpavlin 898
19     =cut
20    
21 dpavlin 904 our $VERSION = '0.02';
22 dpavlin 898
23     =head1 SYNOPSIS
24    
25     Open file in ISI export fromat
26    
27     my $input = new WebPAC::Input::ISI(
28     path => '/path/to/ISI/records.txt',
29     );
30    
31     =head1 FUNCTIONS
32    
33     =head2 new
34    
35     Returns new low-level input API object
36    
37     my $input = new WebPAC::Input::ISI(
38     path => '/path/to/ISI/records.txt'
39     filter => sub {
40     my ($l,$field_nr) = @_;
41     # do something with $l which is line of input file
42     return $l;
43     },
44     }
45    
46     Options:
47    
48     =over 4
49    
50     =item path
51    
52     path to ISI export file
53    
54     =back
55    
56     =cut
57    
58 dpavlin 901 my $subfields = {
59     'CR' => sub {
60     my @v = split(/, /, shift);
61     my $f;
62     foreach ( qw/author year reference volume page/ ) {
63     if ( my $tmp = shift @v ) {
64     $f->{$_} = $tmp;
65     }
66     }
67     if ( $f->{author} =~ /^\*(.+)/ ) {
68     delete $f->{author};
69     $f->{institution} = $1;
70     }
71     return $f;
72     },
73     };
74    
75 dpavlin 898 sub new {
76     my $class = shift;
77     my $self = {@_};
78     bless($self, $class);
79    
80     my $arg = {@_};
81    
82     my $log = $self->_get_logger();
83    
84     open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
85    
86     my ( $format, $version );
87    
88     my $line = <$fh>;
89     chomp($line);
90     if ( $line =~ /^FN\s(.+)$/) {
91     $format = $1;
92     } else {
93     $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
94     }
95    
96     $line = <$fh>;
97     chomp($line);
98     if ( $line =~ /^VR\s(.+)$/) {
99     $version = $1;
100     } else {
101     $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
102     }
103    
104     $log->info("opening $format $version database '$arg->{path}'");
105    
106 dpavlin 899 my $tag;
107     my $rec;
108    
109 dpavlin 1077 $self->{size} = 0;
110    
111 dpavlin 899 while( $line = <$fh> ) {
112     chomp($line);
113    
114     my $v;
115    
116     if ( $line =~ /^(\S\S)\s(.+)$/ ) {
117     $tag = $1;
118     $v = $2;
119     } elsif ( $line =~ /^\s{3}(.+)$/ ) {
120     $v = $1;
121     } elsif ( $line eq 'ER' ) {
122 dpavlin 901 # join tags
123 dpavlin 902 foreach ( qw/AB DE ID TI/ ) {
124 dpavlin 901 $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
125     }
126 dpavlin 1077 $rec->{'000'} = [ ++$self->{size} ];
127 dpavlin 899 push @{ $self->{_rec} }, $rec;
128     $rec = {};
129     $line = <$fh>;
130     chomp $line;
131     $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
132     } elsif ( $line eq 'EF' ) {
133     last;
134     } else {
135     $log->logdie("can't parse +$. $arg->{path} : $line");
136     }
137    
138 dpavlin 904 if ( defined $v ) {
139     $v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
140 dpavlin 901
141 dpavlin 904 $log->debug("$tag: ", sub { dump( $v ) });
142     push @{ $rec->{$tag} }, $v;
143     }
144 dpavlin 899
145     }
146    
147     $log->debug("loaded ", $self->size, " records");
148    
149 dpavlin 898 $self ? return $self : return undef;
150     }
151    
152     =head2 fetch_rec
153    
154     Return record with ID C<$mfn> from database
155    
156 dpavlin 899 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
157 dpavlin 898
158     =cut
159    
160     sub fetch_rec {
161     my $self = shift;
162    
163 dpavlin 899 my ( $mfn, $filter_coderef ) = @_;
164 dpavlin 898
165 dpavlin 899 return $self->{_rec}->[$mfn-1];
166 dpavlin 898 }
167    
168    
169     =head2 size
170    
171     Return number of records in database
172    
173     my $size = $input->size;
174    
175     =cut
176    
177     sub size {
178     my $self = shift;
179 dpavlin 1077 return $self->{size};
180 dpavlin 898 }
181    
182 dpavlin 900 =head1 SEE ALSO
183    
184     L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
185 dpavlin 901
186 dpavlin 898 =head1 AUTHOR
187    
188     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
189    
190     =head1 COPYRIGHT & LICENSE
191    
192     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
193    
194     This program is free software; you can redistribute it and/or modify it
195     under the same terms as Perl itself.
196    
197     =cut
198    
199     1; # End of WebPAC::Input::ISI

  ViewVC Help
Powered by ViewVC 1.1.26