1 |
dpavlin |
1366 |
package WebPAC::Input::OAI; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use HTTP::OAI; |
7 |
|
|
use HTTP::OAI::Metadata::OAI_DC; |
8 |
|
|
use base qw/WebPAC::Common/; |
9 |
|
|
use Carp qw/confess/; |
10 |
|
|
use Data::Dump qw/dump/; |
11 |
|
|
|
12 |
|
|
=head1 NAME |
13 |
|
|
|
14 |
|
|
WebPAC::Input::OAI - read MARC records from OAI |
15 |
|
|
|
16 |
|
|
=cut |
17 |
|
|
|
18 |
|
|
our $VERSION = '0.00'; |
19 |
|
|
|
20 |
|
|
=head1 FUNCTIONS |
21 |
|
|
|
22 |
|
|
=head2 new |
23 |
|
|
|
24 |
|
|
my $input = new WebPAC::Input::OAI( |
25 |
|
|
url => 'http://arXiv.org/oai2', |
26 |
|
|
from => '2001-02-03', |
27 |
|
|
until => '2001-04-10', |
28 |
|
|
path => 'var/oai/arXiv', |
29 |
|
|
} |
30 |
|
|
|
31 |
|
|
=cut |
32 |
|
|
|
33 |
|
|
sub new { |
34 |
|
|
my $class = shift; |
35 |
|
|
my $self = {@_}; |
36 |
|
|
bless($self, $class); |
37 |
|
|
|
38 |
|
|
my $arg = {@_}; |
39 |
|
|
|
40 |
|
|
my $log = $self->_get_logger(); |
41 |
|
|
$log->debug( 'arg = ', dump($arg) ); |
42 |
|
|
|
43 |
|
|
open(my $fh, '<', $arg->{path}); |
44 |
|
|
if ( ! $fh ) { |
45 |
|
|
$log->error("can't open $arg->{path}: $!"); |
46 |
|
|
return; |
47 |
|
|
} |
48 |
|
|
|
49 |
|
|
my $h = HTTP::OAI::Harvester->new( baseURL => $self->{url} ); |
50 |
|
|
|
51 |
|
|
my $list; |
52 |
|
|
$list->{$_} = $self->{$_} foreach ( qw( from until ) ); |
53 |
|
|
|
54 |
|
|
$log->info("ListRecords ", dump($list)); |
55 |
|
|
|
56 |
|
|
my $response = $h->ListRecords( |
57 |
|
|
metadataPrefix=>'oai_dc', |
58 |
|
|
handlers=>{metadata=>'HTTP::OAI::Metadata::OAI_DC'}, |
59 |
|
|
%$list, |
60 |
|
|
); |
61 |
|
|
|
62 |
|
|
warn "## ",dump($response); |
63 |
|
|
|
64 |
|
|
if ( $response->is_error ) { |
65 |
|
|
$log->logdie("Error harvesting $self->{url}: $response->message"); |
66 |
|
|
} |
67 |
|
|
|
68 |
|
|
$self->{oai_response} = $response; |
69 |
|
|
|
70 |
|
|
$self ? return $self : return undef; |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
=head2 fetch_rec |
74 |
|
|
|
75 |
|
|
Return record with ID C<$mfn> from database |
76 |
|
|
|
77 |
|
|
my $rec = $input->fetch_rec( $mfn ); |
78 |
|
|
|
79 |
|
|
=cut |
80 |
|
|
|
81 |
|
|
sub fetch_rec { |
82 |
|
|
my $self = shift; |
83 |
|
|
|
84 |
|
|
my $mfn = shift; |
85 |
|
|
|
86 |
|
|
my $rec = $self->{oai_response}->next; |
87 |
|
|
|
88 |
|
|
my $row = $rec->metadata->dc; |
89 |
|
|
warn "# row ",dump($row); |
90 |
|
|
|
91 |
|
|
push @{$row->{'000'}}, $mfn; |
92 |
|
|
return $row; |
93 |
|
|
} |
94 |
|
|
|
95 |
|
|
=head2 size |
96 |
|
|
|
97 |
|
|
Return number of records in database |
98 |
|
|
|
99 |
|
|
my $size = $isis->size; |
100 |
|
|
|
101 |
|
|
=cut |
102 |
|
|
|
103 |
|
|
sub size { |
104 |
|
|
my $self = shift; |
105 |
|
|
return $self->{oai_response}->resumptionToken->completeListSize; |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
|
109 |
|
|
=head1 AUTHOR |
110 |
|
|
|
111 |
|
|
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >> |
112 |
|
|
|
113 |
|
|
=head1 COPYRIGHT & LICENSE |
114 |
|
|
|
115 |
|
|
Copyright 2011 Dobrica Pavlinusic, All Rights Reserved. |
116 |
|
|
|
117 |
|
|
This program is free software; you can redistribute it and/or modify it |
118 |
|
|
under the same terms as Perl itself. |
119 |
|
|
|
120 |
|
|
=cut |
121 |
|
|
|
122 |
|
|
1; |