/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 265 - (hide annotations)
Fri Dec 16 16:23:44 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 6694 byte(s)
 r11736@llin:  dpavlin | 2005-12-16 21:22:26 +0100
 die if database can't be opened, confirms to test

1 dpavlin 6 package WebPAC::Input::ISIS;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Common;
7     use base qw/WebPAC::Input WebPAC::Common/;
8 dpavlin 9 use Text::Iconv;
9 dpavlin 6
10     =head1 NAME
11    
12     WebPAC::Input::ISIS - support for CDS/ISIS source files
13    
14     =head1 VERSION
15    
16     Version 0.01
17    
18     =cut
19    
20     our $VERSION = '0.01';
21    
22    
23     # auto-configure
24    
25     my ($have_biblio_isis, $have_openisis) = (0,0);
26    
27     eval "use Biblio::Isis 0.13;";
28     unless ($@) {
29     $have_biblio_isis = 1
30     } else {
31     eval "use OpenIsis;";
32     $have_openisis = 1 unless ($@);
33     }
34    
35     =head1 SYNOPSIS
36    
37     Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38     module and read all records to memory.
39    
40     my $isis = new WebPAC::Input::ISIS();
41     $isis->open( filename => '/path/to/ISIS/ISIS' );
42    
43     =head1 FUNCTIONS
44    
45     =head2 open
46    
47     This function will read whole database in memory and produce lookups.
48    
49     $isis->open(
50     filename => '/data/ISIS/ISIS',
51     code_page => '852',
52     limit_mfn => 500,
53     start_mfn => 6000,
54     lookup => $lookup_obj,
55     );
56    
57     By default, ISIS code page is assumed to be C<852>.
58    
59     If optional parametar C<start_mfn> is set, this will be first MFN to read
60     from database (so you can skip beginning of your database if you need to).
61    
62     If optional parametar C<limit_mfn> is set, it will read just 500 records
63     from database in example above.
64    
65 dpavlin 21 Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
66     parametars, see also C<$isis->size>.
67 dpavlin 6
68     =cut
69    
70     sub open {
71     my $self = shift;
72     my $arg = {@_};
73    
74     my $log = $self->_get_logger();
75    
76     $log->logcroak("need filename") if (! $arg->{'filename'});
77     my $code_page = $arg->{'code_page'} || '852';
78    
79     # store data in object
80     $self->{'isis_code_page'} = $code_page;
81 dpavlin 21 foreach my $v (qw/isis_filename start_mfn limit_mfn/) {
82     $self->{$v} = $arg->{$v} if ($arg->{$v});
83     }
84 dpavlin 6
85     # create Text::Iconv object
86     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
87    
88     $log->info("reading ISIS database '",$arg->{'filename'},"'");
89     $log->debug("isis code page: $code_page");
90    
91 dpavlin 21 my ($isis_db,$db_size);
92 dpavlin 6
93     if ($have_openisis) {
94     $log->debug("using OpenIsis perl bindings");
95     $isis_db = OpenIsis::open($arg->{'filename'});
96 dpavlin 21 $db_size = OpenIsis::maxRowid( $isis_db ) || 1;
97 dpavlin 6 } elsif ($have_biblio_isis) {
98     $log->debug("using Biblio::Isis");
99     use Biblio::Isis;
100     $isis_db = new Biblio::Isis(
101     isisdb => $arg->{'filename'},
102     include_deleted => 1,
103     hash_filter => sub {
104     my $l = shift || return;
105     $l = $cp->convert($l);
106     return $l;
107     },
108 dpavlin 265 ) or $log->logdie("can't find database ",$arg->{'filename'});
109    
110 dpavlin 21 $db_size = $isis_db->count;
111 dpavlin 6
112 dpavlin 21 unless ($db_size) {
113 dpavlin 6 $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
114     return;
115     }
116    
117     } else {
118     $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
119     }
120    
121    
122     my $startmfn = 1;
123 dpavlin 21 my $maxmfn = $db_size;
124 dpavlin 6
125     if (my $s = $self->{'start_mfn'}) {
126     $log->info("skipping to MFN $s");
127     $startmfn = $s;
128     } else {
129     $self->{'start_mfn'} = $startmfn;
130     }
131    
132 dpavlin 21 if ($self->{limit_mfn}) {
133     $log->info("limiting to ",$self->{limit_mfn}," records");
134     $maxmfn = $startmfn + $self->{limit_mfn} - 1;
135     $maxmfn = $db_size if ($maxmfn > $db_size);
136     }
137 dpavlin 6
138 dpavlin 21 # store size for later
139     $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
140    
141 dpavlin 6 $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
142    
143    
144     # read database
145     for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
146    
147     $log->debug("mfn: $mfn\n");
148    
149     my $rec;
150    
151     if ($have_openisis) {
152    
153     # read record using OpenIsis
154     my $row = OpenIsis::read( $isis_db, $mfn );
155     foreach my $k (keys %{$row}) {
156     if ($k ne "mfn") {
157     foreach my $l (@{$row->{$k}}) {
158     $l = $cp->convert($l);
159     # has subfields?
160     my $val;
161     if ($l =~ m/\^/) {
162     foreach my $t (split(/\^/,$l)) {
163     next if (! $t);
164     $val->{substr($t,0,1)} = substr($t,1);
165     }
166     } else {
167     $val = $l;
168     }
169    
170     push @{$rec->{$k}}, $val;
171     }
172     } else {
173     push @{$rec->{'000'}}, $mfn;
174     }
175     }
176    
177     } elsif ($have_biblio_isis) {
178     $rec = $isis_db->to_hash($mfn);
179     } else {
180     $log->logdie("hum? implementation missing?");
181     }
182    
183 dpavlin 113 if (! $rec) {
184     $log->warn("record $mfn empty? skipping...");
185     next;
186     }
187 dpavlin 6
188     # store
189     if ($self->{'low_mem'}) {
190     $self->{'db'}->put($mfn, $rec);
191     } else {
192     $self->{'data'}->{$mfn} = $rec;
193     }
194    
195     # create lookup
196 dpavlin 251 $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
197 dpavlin 6
198     $self->progress_bar($mfn,$maxmfn);
199    
200     }
201    
202     $self->{'current_mfn'} = -1;
203     $self->{'last_pcnt'} = 0;
204    
205     $log->debug("max mfn: $maxmfn");
206    
207     # store max mfn and return it.
208 dpavlin 21 $self->{'max_mfn'} = $maxmfn;
209    
210     return $db_size;
211 dpavlin 6 }
212    
213 dpavlin 10 =head2 fetch
214 dpavlin 6
215     Fetch next record from database. It will also displays progress bar.
216    
217 dpavlin 10 my $rec = $isis->fetch;
218 dpavlin 6
219 dpavlin 10 Record from this function should probably go to C<data_structure> for
220     normalisation.
221 dpavlin 8
222 dpavlin 6 =cut
223    
224 dpavlin 10 sub fetch {
225 dpavlin 6 my $self = shift;
226    
227     my $log = $self->_get_logger();
228    
229     $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
230    
231     if ($self->{'current_mfn'} == -1) {
232     $self->{'current_mfn'} = $self->{'start_mfn'};
233     } else {
234     $self->{'current_mfn'}++;
235     }
236    
237     my $mfn = $self->{'current_mfn'};
238    
239     if ($mfn > $self->{'max_mfn'}) {
240     $self->{'current_mfn'} = $self->{'max_mfn'};
241     $log->debug("at EOF");
242     return;
243     }
244    
245     $self->progress_bar($mfn,$self->{'max_mfn'});
246    
247 dpavlin 113 my $rec;
248    
249 dpavlin 6 if ($self->{'low_mem'}) {
250 dpavlin 113 $rec = $self->{'db'}->get($mfn);
251 dpavlin 6 } else {
252 dpavlin 113 $rec = $self->{'data'}->{$mfn};
253 dpavlin 6 }
254 dpavlin 113
255     $rec ||= 0E0;
256 dpavlin 6 }
257    
258 dpavlin 10 =head2 pos
259 dpavlin 8
260 dpavlin 10 Returns current record number (MFN).
261 dpavlin 8
262 dpavlin 10 print $isis->pos;
263 dpavlin 8
264 dpavlin 10 First record in database has position 1.
265    
266 dpavlin 8 =cut
267    
268 dpavlin 10 sub pos {
269 dpavlin 8 my $self = shift;
270 dpavlin 10 return $self->{'current_mfn'};
271     }
272 dpavlin 8
273 dpavlin 10
274     =head2 size
275    
276     Returns number of records in database
277    
278     print $isis->size;
279    
280 dpavlin 21 Result from this function can be used to loop through all records
281    
282     foreach my $mfn ( 1 ... $isis->size ) { ... }
283    
284     because it takes into account C<start_mfn> and C<limit_mfn>.
285    
286 dpavlin 10 =cut
287    
288     sub size {
289     my $self = shift;
290 dpavlin 21 return $self->{'size'};
291 dpavlin 8 }
292    
293 dpavlin 10 =head2 seek
294 dpavlin 6
295 dpavlin 10 Seek to specified MFN in file.
296 dpavlin 6
297 dpavlin 10 $isis->seek(42);
298 dpavlin 6
299 dpavlin 10 First record in database has position 1.
300    
301 dpavlin 6 =cut
302    
303 dpavlin 10 sub seek {
304 dpavlin 6 my $self = shift;
305 dpavlin 10 my $pos = shift || return;
306    
307     my $log = $self->_get_logger();
308    
309     if ($pos < 1) {
310     $log->warn("seek before first record");
311     $pos = 1;
312     } elsif ($pos > $self->{'max_mfn'}) {
313     $log->warn("seek beyond last record");
314     $pos = $self->{'max_mfn'};
315     }
316    
317     return $self->{'current_mfn'} = (($pos - 1) || -1);
318 dpavlin 6 }
319    
320     =head1 AUTHOR
321    
322     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
323    
324     =head1 COPYRIGHT & LICENSE
325    
326     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
327    
328     This program is free software; you can redistribute it and/or modify it
329     under the same terms as Perl itself.
330    
331     =cut
332    
333     1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26