/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 265 - (show annotations)
Fri Dec 16 16:23:44 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 6694 byte(s)
 r11736@llin:  dpavlin | 2005-12-16 21:22:26 +0100
 die if database can't be opened, confirms to test

1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8 use Text::Iconv;
9
10 =head1 NAME
11
12 WebPAC::Input::ISIS - support for CDS/ISIS source files
13
14 =head1 VERSION
15
16 Version 0.01
17
18 =cut
19
20 our $VERSION = '0.01';
21
22
23 # auto-configure
24
25 my ($have_biblio_isis, $have_openisis) = (0,0);
26
27 eval "use Biblio::Isis 0.13;";
28 unless ($@) {
29 $have_biblio_isis = 1
30 } else {
31 eval "use OpenIsis;";
32 $have_openisis = 1 unless ($@);
33 }
34
35 =head1 SYNOPSIS
36
37 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38 module and read all records to memory.
39
40 my $isis = new WebPAC::Input::ISIS();
41 $isis->open( filename => '/path/to/ISIS/ISIS' );
42
43 =head1 FUNCTIONS
44
45 =head2 open
46
47 This function will read whole database in memory and produce lookups.
48
49 $isis->open(
50 filename => '/data/ISIS/ISIS',
51 code_page => '852',
52 limit_mfn => 500,
53 start_mfn => 6000,
54 lookup => $lookup_obj,
55 );
56
57 By default, ISIS code page is assumed to be C<852>.
58
59 If optional parametar C<start_mfn> is set, this will be first MFN to read
60 from database (so you can skip beginning of your database if you need to).
61
62 If optional parametar C<limit_mfn> is set, it will read just 500 records
63 from database in example above.
64
65 Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
66 parametars, see also C<$isis->size>.
67
68 =cut
69
70 sub open {
71 my $self = shift;
72 my $arg = {@_};
73
74 my $log = $self->_get_logger();
75
76 $log->logcroak("need filename") if (! $arg->{'filename'});
77 my $code_page = $arg->{'code_page'} || '852';
78
79 # store data in object
80 $self->{'isis_code_page'} = $code_page;
81 foreach my $v (qw/isis_filename start_mfn limit_mfn/) {
82 $self->{$v} = $arg->{$v} if ($arg->{$v});
83 }
84
85 # create Text::Iconv object
86 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
87
88 $log->info("reading ISIS database '",$arg->{'filename'},"'");
89 $log->debug("isis code page: $code_page");
90
91 my ($isis_db,$db_size);
92
93 if ($have_openisis) {
94 $log->debug("using OpenIsis perl bindings");
95 $isis_db = OpenIsis::open($arg->{'filename'});
96 $db_size = OpenIsis::maxRowid( $isis_db ) || 1;
97 } elsif ($have_biblio_isis) {
98 $log->debug("using Biblio::Isis");
99 use Biblio::Isis;
100 $isis_db = new Biblio::Isis(
101 isisdb => $arg->{'filename'},
102 include_deleted => 1,
103 hash_filter => sub {
104 my $l = shift || return;
105 $l = $cp->convert($l);
106 return $l;
107 },
108 ) or $log->logdie("can't find database ",$arg->{'filename'});
109
110 $db_size = $isis_db->count;
111
112 unless ($db_size) {
113 $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
114 return;
115 }
116
117 } else {
118 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
119 }
120
121
122 my $startmfn = 1;
123 my $maxmfn = $db_size;
124
125 if (my $s = $self->{'start_mfn'}) {
126 $log->info("skipping to MFN $s");
127 $startmfn = $s;
128 } else {
129 $self->{'start_mfn'} = $startmfn;
130 }
131
132 if ($self->{limit_mfn}) {
133 $log->info("limiting to ",$self->{limit_mfn}," records");
134 $maxmfn = $startmfn + $self->{limit_mfn} - 1;
135 $maxmfn = $db_size if ($maxmfn > $db_size);
136 }
137
138 # store size for later
139 $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
140
141 $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
142
143
144 # read database
145 for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
146
147 $log->debug("mfn: $mfn\n");
148
149 my $rec;
150
151 if ($have_openisis) {
152
153 # read record using OpenIsis
154 my $row = OpenIsis::read( $isis_db, $mfn );
155 foreach my $k (keys %{$row}) {
156 if ($k ne "mfn") {
157 foreach my $l (@{$row->{$k}}) {
158 $l = $cp->convert($l);
159 # has subfields?
160 my $val;
161 if ($l =~ m/\^/) {
162 foreach my $t (split(/\^/,$l)) {
163 next if (! $t);
164 $val->{substr($t,0,1)} = substr($t,1);
165 }
166 } else {
167 $val = $l;
168 }
169
170 push @{$rec->{$k}}, $val;
171 }
172 } else {
173 push @{$rec->{'000'}}, $mfn;
174 }
175 }
176
177 } elsif ($have_biblio_isis) {
178 $rec = $isis_db->to_hash($mfn);
179 } else {
180 $log->logdie("hum? implementation missing?");
181 }
182
183 if (! $rec) {
184 $log->warn("record $mfn empty? skipping...");
185 next;
186 }
187
188 # store
189 if ($self->{'low_mem'}) {
190 $self->{'db'}->put($mfn, $rec);
191 } else {
192 $self->{'data'}->{$mfn} = $rec;
193 }
194
195 # create lookup
196 $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
197
198 $self->progress_bar($mfn,$maxmfn);
199
200 }
201
202 $self->{'current_mfn'} = -1;
203 $self->{'last_pcnt'} = 0;
204
205 $log->debug("max mfn: $maxmfn");
206
207 # store max mfn and return it.
208 $self->{'max_mfn'} = $maxmfn;
209
210 return $db_size;
211 }
212
213 =head2 fetch
214
215 Fetch next record from database. It will also displays progress bar.
216
217 my $rec = $isis->fetch;
218
219 Record from this function should probably go to C<data_structure> for
220 normalisation.
221
222 =cut
223
224 sub fetch {
225 my $self = shift;
226
227 my $log = $self->_get_logger();
228
229 $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
230
231 if ($self->{'current_mfn'} == -1) {
232 $self->{'current_mfn'} = $self->{'start_mfn'};
233 } else {
234 $self->{'current_mfn'}++;
235 }
236
237 my $mfn = $self->{'current_mfn'};
238
239 if ($mfn > $self->{'max_mfn'}) {
240 $self->{'current_mfn'} = $self->{'max_mfn'};
241 $log->debug("at EOF");
242 return;
243 }
244
245 $self->progress_bar($mfn,$self->{'max_mfn'});
246
247 my $rec;
248
249 if ($self->{'low_mem'}) {
250 $rec = $self->{'db'}->get($mfn);
251 } else {
252 $rec = $self->{'data'}->{$mfn};
253 }
254
255 $rec ||= 0E0;
256 }
257
258 =head2 pos
259
260 Returns current record number (MFN).
261
262 print $isis->pos;
263
264 First record in database has position 1.
265
266 =cut
267
268 sub pos {
269 my $self = shift;
270 return $self->{'current_mfn'};
271 }
272
273
274 =head2 size
275
276 Returns number of records in database
277
278 print $isis->size;
279
280 Result from this function can be used to loop through all records
281
282 foreach my $mfn ( 1 ... $isis->size ) { ... }
283
284 because it takes into account C<start_mfn> and C<limit_mfn>.
285
286 =cut
287
288 sub size {
289 my $self = shift;
290 return $self->{'size'};
291 }
292
293 =head2 seek
294
295 Seek to specified MFN in file.
296
297 $isis->seek(42);
298
299 First record in database has position 1.
300
301 =cut
302
303 sub seek {
304 my $self = shift;
305 my $pos = shift || return;
306
307 my $log = $self->_get_logger();
308
309 if ($pos < 1) {
310 $log->warn("seek before first record");
311 $pos = 1;
312 } elsif ($pos > $self->{'max_mfn'}) {
313 $log->warn("seek beyond last record");
314 $pos = $self->{'max_mfn'};
315 }
316
317 return $self->{'current_mfn'} = (($pos - 1) || -1);
318 }
319
320 =head1 AUTHOR
321
322 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
323
324 =head1 COPYRIGHT & LICENSE
325
326 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
327
328 This program is free software; you can redistribute it and/or modify it
329 under the same terms as Perl itself.
330
331 =cut
332
333 1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26