/[webpac2]/trunk/lib/WebPAC/Input/Ovid.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/Ovid.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1296 - (hide annotations)
Sat Sep 19 23:29:23 2009 UTC (14 years, 7 months ago) by dpavlin
File size: 3043 byte(s)
- added support for Direct Export format to existing BRS/Tagged
- specify file glob (as from ovid-download-results.pl) for multiple files

1 dpavlin 1124 package WebPAC::Input::Ovid;
2    
3     use warnings;
4     use strict;
5    
6     use lib 'lib';
7     use WebPAC::Input;
8     use base qw/WebPAC::Common/;
9    
10     use Data::Dump qw/dump/;
11    
12     =head1 NAME
13    
14 dpavlin 1296 WebPAC::Input::Ovid - support for Ovid citation export (BRS/Tagged and Direct Export)
15 dpavlin 1124
16     =head1 VERSION
17    
18     Version 0.01
19    
20     =cut
21    
22     our $VERSION = '0.01';
23 dpavlin 1130 our $debug = 0;
24 dpavlin 1124
25     =head1 SYNOPSIS
26    
27     Open file in Ovid citation export fromat
28    
29     my $input = new WebPAC::Input::Ovid(
30     path => '/path/to/ovid-cites.txt',
31     );
32    
33 dpavlin 1296 You can also specify file glob:
34    
35     my $input = WebPAC::Input::Ovid->new( path => '/path/to/ovid.*.txt' );
36    
37 dpavlin 1124 =head1 FUNCTIONS
38    
39     =head2 new
40    
41     Returns new low-level input API object
42    
43     my $input = new WebPAC::Input::Ovid(
44     path => '/path/to/ebsco.txt',
45     filter => sub {
46     my ($l,$field_nr) = @_;
47     # do something with $l which is line of input file
48     return $l;
49     },
50     }
51    
52     Options:
53    
54     =over 4
55    
56     =item path
57    
58     path to Ovid export file
59    
60     =back
61    
62     =cut
63    
64     sub new {
65     my $class = shift;
66     my $self = {@_};
67     bless($self, $class);
68    
69     my $arg = {@_};
70    
71     my $log = $self->_get_logger();
72    
73 dpavlin 1296 my @paths;
74 dpavlin 1124
75 dpavlin 1296 if ( $arg->{path} =~ m/\*/ ) {
76     @paths = glob $arg->{path};
77     } else {
78     @paths = ( $arg->{path} );
79     }
80 dpavlin 1124
81     my $size = 0;
82     $self->{_rec} = [];
83    
84 dpavlin 1296 my $max_size;
85     $max_size = ( $self->{offset} || 0 ) + $self->{limit} if $self->{limit};
86 dpavlin 1124
87 dpavlin 1296 foreach my $path ( @paths ) {
88 dpavlin 1124
89 dpavlin 1296 open( my $fh, '<', $path ) || $log->logconfess("can't open $path: $!");
90     $log->info("reading '$path'");
91    
92     my $tag;
93     my $rec;
94    
95     while( my $line = <$fh> ) {
96     $line =~ s{[\r\n]+$}{};
97     next if $line eq '';
98    
99     warn "<< $line\n" if $debug;
100    
101     if ( $line =~ m/^<(\d+)>$/ ) {
102     last if $max_size && $size > $max_size;
103    
104     push @{ $self->{_rec} }, $rec if $rec;
105     warn "## rec = ",dump( $rec ),$/ if $debug;
106     my $expect_rec = $#{ $self->{_rec} } + 2;
107     warn "wrong Ovid record number: $1 != $expect_rec" if $debug && $1 != $expect_rec;
108     $rec = { '000' => [ ++$size ] };
109     } elsif ( $line =~ /^(\w+)\s+(-\s)?(.*)/ ) {
110     $tag = $1;
111     warn "++ $tag\n" if $debug;
112     $rec->{$tag} = [ $3 ] if $3;
113     } elsif ( $line =~ /^\s+(-\s)?(.+)/ ) {
114     push @{ $rec->{$tag} }, $2;
115     } else {
116     warn "### skip: '$line'\n" if $debug;
117     }
118    
119 dpavlin 1124 }
120    
121 dpavlin 1296 # save last rec
122     push @{ $self->{_rec} }, $rec if $rec;
123     warn "### rec ",dump $rec if $debug;
124 dpavlin 1124
125 dpavlin 1296 $log->debug("loaded ", $self->size, " records from $path");
126 dpavlin 1124
127 dpavlin 1296 }
128 dpavlin 1124
129     $self ? return $self : return undef;
130     }
131    
132     =head2 fetch_rec
133    
134     Return record with ID C<$mfn> from database
135    
136     my $rec = $input->fetch_rec( $mfn, $filter_coderef );
137    
138     =cut
139    
140     sub fetch_rec {
141     my $self = shift;
142    
143     my ( $mfn, $filter_coderef ) = @_;
144    
145     return $self->{_rec}->[$mfn-1];
146     }
147    
148    
149     =head2 size
150    
151     Return number of records in database
152    
153     my $size = $input->size;
154    
155     =cut
156    
157     sub size {
158     my $self = shift;
159 dpavlin 1126 return $#{ $self->{_rec} } + 1;
160 dpavlin 1124 }
161    
162     =head1 AUTHOR
163    
164     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
165    
166     =head1 COPYRIGHT & LICENSE
167    
168     Copyright 2008 Dobrica Pavlinusic, All Rights Reserved.
169    
170     This program is free software; you can redistribute it and/or modify it
171     under the same terms as Perl itself.
172    
173     =cut
174    
175     1; # End of WebPAC::Input::Ovid

  ViewVC Help
Powered by ViewVC 1.1.26