/[webpac2]/trunk/lib/WebPAC/Input/Ovid.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/Ovid.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1296 - (show annotations)
Sat Sep 19 23:29:23 2009 UTC (14 years, 8 months ago) by dpavlin
File size: 3043 byte(s)
- added support for Direct Export format to existing BRS/Tagged
- specify file glob (as from ovid-download-results.pl) for multiple files

1 package WebPAC::Input::Ovid;
2
3 use warnings;
4 use strict;
5
6 use lib 'lib';
7 use WebPAC::Input;
8 use base qw/WebPAC::Common/;
9
10 use Data::Dump qw/dump/;
11
12 =head1 NAME
13
14 WebPAC::Input::Ovid - support for Ovid citation export (BRS/Tagged and Direct Export)
15
16 =head1 VERSION
17
18 Version 0.01
19
20 =cut
21
22 our $VERSION = '0.01';
23 our $debug = 0;
24
25 =head1 SYNOPSIS
26
27 Open file in Ovid citation export fromat
28
29 my $input = new WebPAC::Input::Ovid(
30 path => '/path/to/ovid-cites.txt',
31 );
32
33 You can also specify file glob:
34
35 my $input = WebPAC::Input::Ovid->new( path => '/path/to/ovid.*.txt' );
36
37 =head1 FUNCTIONS
38
39 =head2 new
40
41 Returns new low-level input API object
42
43 my $input = new WebPAC::Input::Ovid(
44 path => '/path/to/ebsco.txt',
45 filter => sub {
46 my ($l,$field_nr) = @_;
47 # do something with $l which is line of input file
48 return $l;
49 },
50 }
51
52 Options:
53
54 =over 4
55
56 =item path
57
58 path to Ovid export file
59
60 =back
61
62 =cut
63
64 sub new {
65 my $class = shift;
66 my $self = {@_};
67 bless($self, $class);
68
69 my $arg = {@_};
70
71 my $log = $self->_get_logger();
72
73 my @paths;
74
75 if ( $arg->{path} =~ m/\*/ ) {
76 @paths = glob $arg->{path};
77 } else {
78 @paths = ( $arg->{path} );
79 }
80
81 my $size = 0;
82 $self->{_rec} = [];
83
84 my $max_size;
85 $max_size = ( $self->{offset} || 0 ) + $self->{limit} if $self->{limit};
86
87 foreach my $path ( @paths ) {
88
89 open( my $fh, '<', $path ) || $log->logconfess("can't open $path: $!");
90 $log->info("reading '$path'");
91
92 my $tag;
93 my $rec;
94
95 while( my $line = <$fh> ) {
96 $line =~ s{[\r\n]+$}{};
97 next if $line eq '';
98
99 warn "<< $line\n" if $debug;
100
101 if ( $line =~ m/^<(\d+)>$/ ) {
102 last if $max_size && $size > $max_size;
103
104 push @{ $self->{_rec} }, $rec if $rec;
105 warn "## rec = ",dump( $rec ),$/ if $debug;
106 my $expect_rec = $#{ $self->{_rec} } + 2;
107 warn "wrong Ovid record number: $1 != $expect_rec" if $debug && $1 != $expect_rec;
108 $rec = { '000' => [ ++$size ] };
109 } elsif ( $line =~ /^(\w+)\s+(-\s)?(.*)/ ) {
110 $tag = $1;
111 warn "++ $tag\n" if $debug;
112 $rec->{$tag} = [ $3 ] if $3;
113 } elsif ( $line =~ /^\s+(-\s)?(.+)/ ) {
114 push @{ $rec->{$tag} }, $2;
115 } else {
116 warn "### skip: '$line'\n" if $debug;
117 }
118
119 }
120
121 # save last rec
122 push @{ $self->{_rec} }, $rec if $rec;
123 warn "### rec ",dump $rec if $debug;
124
125 $log->debug("loaded ", $self->size, " records from $path");
126
127 }
128
129 $self ? return $self : return undef;
130 }
131
132 =head2 fetch_rec
133
134 Return record with ID C<$mfn> from database
135
136 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
137
138 =cut
139
140 sub fetch_rec {
141 my $self = shift;
142
143 my ( $mfn, $filter_coderef ) = @_;
144
145 return $self->{_rec}->[$mfn-1];
146 }
147
148
149 =head2 size
150
151 Return number of records in database
152
153 my $size = $input->size;
154
155 =cut
156
157 sub size {
158 my $self = shift;
159 return $#{ $self->{_rec} } + 1;
160 }
161
162 =head1 AUTHOR
163
164 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
165
166 =head1 COPYRIGHT & LICENSE
167
168 Copyright 2008 Dobrica Pavlinusic, All Rights Reserved.
169
170 This program is free software; you can redistribute it and/or modify it
171 under the same terms as Perl itself.
172
173 =cut
174
175 1; # End of WebPAC::Input::Ovid

  ViewVC Help
Powered by ViewVC 1.1.26