/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (show annotations)
Sat Jul 16 16:48:35 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 5882 byte(s)
little cleanup and first cut into WebPAC::Normalize::XML

1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8
9 =head1 NAME
10
11 WebPAC::Input::ISIS - support for CDS/ISIS source files
12
13 =head1 VERSION
14
15 Version 0.01
16
17 =cut
18
19 our $VERSION = '0.01';
20
21
22 # auto-configure
23
24 my ($have_biblio_isis, $have_openisis) = (0,0);
25
26 eval "use Biblio::Isis 0.13;";
27 unless ($@) {
28 $have_biblio_isis = 1
29 } else {
30 eval "use OpenIsis;";
31 $have_openisis = 1 unless ($@);
32 }
33
34 =head1 SYNOPSIS
35
36 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
37 module and read all records to memory.
38
39 my $isis = new WebPAC::Input::ISIS();
40 $isis->open( filename => '/path/to/ISIS/ISIS' );
41
42 =head1 FUNCTIONS
43
44 =head2 open
45
46 This function will read whole database in memory and produce lookups.
47
48 $isis->open(
49 filename => '/data/ISIS/ISIS',
50 code_page => '852',
51 limit_mfn => 500,
52 start_mfn => 6000,
53 lookup => $lookup_obj,
54 );
55
56 By default, ISIS code page is assumed to be C<852>.
57
58 If optional parametar C<start_mfn> is set, this will be first MFN to read
59 from database (so you can skip beginning of your database if you need to).
60
61 If optional parametar C<limit_mfn> is set, it will read just 500 records
62 from database in example above.
63
64 Returns number of last record read into memory (size of database, really).
65
66 =cut
67
68 sub open {
69 my $self = shift;
70 my $arg = {@_};
71
72 my $log = $self->_get_logger();
73
74 $log->logcroak("need filename") if (! $arg->{'filename'});
75 my $code_page = $arg->{'code_page'} || '852';
76
77 $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
78
79 # store data in object
80 $self->{'isis_filename'} = $arg->{'filename'};
81 $self->{'isis_code_page'} = $code_page;
82
83 #$self->{'isis_code_page'} = $code_page;
84
85 # create Text::Iconv object
86 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
87
88 $log->info("reading ISIS database '",$arg->{'filename'},"'");
89 $log->debug("isis code page: $code_page");
90
91 my ($isis_db,$maxmfn);
92
93 if ($have_openisis) {
94 $log->debug("using OpenIsis perl bindings");
95 $isis_db = OpenIsis::open($arg->{'filename'});
96 $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
97 } elsif ($have_biblio_isis) {
98 $log->debug("using Biblio::Isis");
99 use Biblio::Isis;
100 $isis_db = new Biblio::Isis(
101 isisdb => $arg->{'filename'},
102 include_deleted => 1,
103 hash_filter => sub {
104 my $l = shift || return;
105 $l = $cp->convert($l);
106 return $l;
107 },
108 );
109 $maxmfn = $isis_db->count;
110
111 unless ($maxmfn) {
112 $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
113 return;
114 }
115
116 } else {
117 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
118 }
119
120
121 my $startmfn = 1;
122
123 if (my $s = $self->{'start_mfn'}) {
124 $log->info("skipping to MFN $s");
125 $startmfn = $s;
126 } else {
127 $self->{'start_mfn'} = $startmfn;
128 }
129
130 $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
131
132 $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
133
134
135 # read database
136 for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
137
138 $log->debug("mfn: $mfn\n");
139
140 my $rec;
141
142 if ($have_openisis) {
143
144 # read record using OpenIsis
145 my $row = OpenIsis::read( $isis_db, $mfn );
146 foreach my $k (keys %{$row}) {
147 if ($k ne "mfn") {
148 foreach my $l (@{$row->{$k}}) {
149 $l = $cp->convert($l);
150 # has subfields?
151 my $val;
152 if ($l =~ m/\^/) {
153 foreach my $t (split(/\^/,$l)) {
154 next if (! $t);
155 $val->{substr($t,0,1)} = substr($t,1);
156 }
157 } else {
158 $val = $l;
159 }
160
161 push @{$rec->{$k}}, $val;
162 }
163 } else {
164 push @{$rec->{'000'}}, $mfn;
165 }
166 }
167
168 } elsif ($have_biblio_isis) {
169 $rec = $isis_db->to_hash($mfn);
170 } else {
171 $log->logdie("hum? implementation missing?");
172 }
173
174 $log->confess("record $mfn empty?") unless ($rec);
175
176 # store
177 if ($self->{'low_mem'}) {
178 $self->{'db'}->put($mfn, $rec);
179 } else {
180 $self->{'data'}->{$mfn} = $rec;
181 }
182
183 # create lookup
184 $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
185
186 $self->progress_bar($mfn,$maxmfn);
187
188 }
189
190 $self->{'current_mfn'} = -1;
191 $self->{'last_pcnt'} = 0;
192
193 $log->debug("max mfn: $maxmfn");
194
195 # store max mfn and return it.
196 return $self->{'max_mfn'} = $maxmfn;
197 }
198
199 =head2 fetch_rec
200
201 Fetch next record from database. It will also displays progress bar.
202
203 my $rec = $webpac->fetch_rec;
204
205 You should rearly have the need to call this function directly. Instead use
206 C<fetch_data_structure> which returns normalised data.
207
208 =cut
209
210 sub fetch_rec {
211 my $self = shift;
212
213 my $log = $self->_get_logger();
214
215 $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
216
217 if ($self->{'current_mfn'} == -1) {
218 $self->{'current_mfn'} = $self->{'start_mfn'};
219 } else {
220 $self->{'current_mfn'}++;
221 }
222
223 my $mfn = $self->{'current_mfn'};
224
225 if ($mfn > $self->{'max_mfn'}) {
226 $self->{'current_mfn'} = $self->{'max_mfn'};
227 $log->debug("at EOF");
228 return;
229 }
230
231 $self->progress_bar($mfn,$self->{'max_mfn'});
232
233 if ($self->{'low_mem'}) {
234 return $self->{'db'}->get($mfn);
235 } else {
236 return $self->{'data'}->{$mfn};
237 }
238 }
239
240 =head2 fetch_data_structure
241
242 Fetch data structure of next record from database.
243
244 my @ds = $webpac->fetch_data_structure;
245
246 =cut
247
248 sub fetch_data_structure {
249 my $self = shift;
250
251 return $self->data_structure(
252 $self->fetch_rec(@_)
253 );
254 }
255
256 =head2 mfn
257
258 Returns current record number (MFN).
259
260 print $webpac->mfn;
261
262 =cut
263
264 sub mfn {
265 my $self = shift;
266 return $self->{'current_mfn'};
267 }
268
269 =head1 AUTHOR
270
271 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
272
273 =head1 COPYRIGHT & LICENSE
274
275 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
276
277 This program is free software; you can redistribute it and/or modify it
278 under the same terms as Perl itself.
279
280 =cut
281
282 1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26