/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations)
Sat Jul 16 20:35:30 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 6228 byte(s)
ISIS input is finished, low_mem option has code (and not only documentation :-)

1 dpavlin 6 package WebPAC::Input::ISIS;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Common;
7     use base qw/WebPAC::Input WebPAC::Common/;
8 dpavlin 9 use Text::Iconv;
9 dpavlin 6
10     =head1 NAME
11    
12     WebPAC::Input::ISIS - support for CDS/ISIS source files
13    
14     =head1 VERSION
15    
16     Version 0.01
17    
18     =cut
19    
20     our $VERSION = '0.01';
21    
22    
23     # auto-configure
24    
25     my ($have_biblio_isis, $have_openisis) = (0,0);
26    
27     eval "use Biblio::Isis 0.13;";
28     unless ($@) {
29     $have_biblio_isis = 1
30     } else {
31     eval "use OpenIsis;";
32     $have_openisis = 1 unless ($@);
33     }
34    
35     =head1 SYNOPSIS
36    
37     Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38     module and read all records to memory.
39    
40     my $isis = new WebPAC::Input::ISIS();
41     $isis->open( filename => '/path/to/ISIS/ISIS' );
42    
43     =head1 FUNCTIONS
44    
45     =head2 open
46    
47     This function will read whole database in memory and produce lookups.
48    
49     $isis->open(
50     filename => '/data/ISIS/ISIS',
51     code_page => '852',
52     limit_mfn => 500,
53     start_mfn => 6000,
54     lookup => $lookup_obj,
55     );
56    
57     By default, ISIS code page is assumed to be C<852>.
58    
59     If optional parametar C<start_mfn> is set, this will be first MFN to read
60     from database (so you can skip beginning of your database if you need to).
61    
62     If optional parametar C<limit_mfn> is set, it will read just 500 records
63     from database in example above.
64    
65     Returns number of last record read into memory (size of database, really).
66    
67     =cut
68    
69     sub open {
70     my $self = shift;
71     my $arg = {@_};
72    
73     my $log = $self->_get_logger();
74    
75     $log->logcroak("need filename") if (! $arg->{'filename'});
76     my $code_page = $arg->{'code_page'} || '852';
77    
78     $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
79    
80     # store data in object
81     $self->{'isis_filename'} = $arg->{'filename'};
82     $self->{'isis_code_page'} = $code_page;
83    
84     #$self->{'isis_code_page'} = $code_page;
85    
86     # create Text::Iconv object
87     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
88    
89     $log->info("reading ISIS database '",$arg->{'filename'},"'");
90     $log->debug("isis code page: $code_page");
91    
92     my ($isis_db,$maxmfn);
93    
94     if ($have_openisis) {
95     $log->debug("using OpenIsis perl bindings");
96     $isis_db = OpenIsis::open($arg->{'filename'});
97     $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
98     } elsif ($have_biblio_isis) {
99     $log->debug("using Biblio::Isis");
100     use Biblio::Isis;
101     $isis_db = new Biblio::Isis(
102     isisdb => $arg->{'filename'},
103     include_deleted => 1,
104     hash_filter => sub {
105     my $l = shift || return;
106     $l = $cp->convert($l);
107     return $l;
108     },
109     );
110     $maxmfn = $isis_db->count;
111    
112     unless ($maxmfn) {
113     $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
114     return;
115     }
116    
117     } else {
118     $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
119     }
120    
121    
122     my $startmfn = 1;
123    
124     if (my $s = $self->{'start_mfn'}) {
125     $log->info("skipping to MFN $s");
126     $startmfn = $s;
127     } else {
128     $self->{'start_mfn'} = $startmfn;
129     }
130    
131     $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
132    
133     $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
134    
135    
136     # read database
137     for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
138    
139     $log->debug("mfn: $mfn\n");
140    
141     my $rec;
142    
143     if ($have_openisis) {
144    
145     # read record using OpenIsis
146     my $row = OpenIsis::read( $isis_db, $mfn );
147     foreach my $k (keys %{$row}) {
148     if ($k ne "mfn") {
149     foreach my $l (@{$row->{$k}}) {
150     $l = $cp->convert($l);
151     # has subfields?
152     my $val;
153     if ($l =~ m/\^/) {
154     foreach my $t (split(/\^/,$l)) {
155     next if (! $t);
156     $val->{substr($t,0,1)} = substr($t,1);
157     }
158     } else {
159     $val = $l;
160     }
161    
162     push @{$rec->{$k}}, $val;
163     }
164     } else {
165     push @{$rec->{'000'}}, $mfn;
166     }
167     }
168    
169     } elsif ($have_biblio_isis) {
170     $rec = $isis_db->to_hash($mfn);
171     } else {
172     $log->logdie("hum? implementation missing?");
173     }
174    
175     $log->confess("record $mfn empty?") unless ($rec);
176    
177     # store
178     if ($self->{'low_mem'}) {
179     $self->{'db'}->put($mfn, $rec);
180     } else {
181     $self->{'data'}->{$mfn} = $rec;
182     }
183    
184     # create lookup
185     $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
186    
187     $self->progress_bar($mfn,$maxmfn);
188    
189     }
190    
191     $self->{'current_mfn'} = -1;
192     $self->{'last_pcnt'} = 0;
193    
194     $log->debug("max mfn: $maxmfn");
195    
196     # store max mfn and return it.
197     return $self->{'max_mfn'} = $maxmfn;
198     }
199    
200 dpavlin 10 =head2 fetch
201 dpavlin 6
202     Fetch next record from database. It will also displays progress bar.
203    
204 dpavlin 10 my $rec = $isis->fetch;
205 dpavlin 6
206 dpavlin 10 Record from this function should probably go to C<data_structure> for
207     normalisation.
208 dpavlin 8
209 dpavlin 6 =cut
210    
211 dpavlin 10 sub fetch {
212 dpavlin 6 my $self = shift;
213    
214     my $log = $self->_get_logger();
215    
216     $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
217    
218     if ($self->{'current_mfn'} == -1) {
219     $self->{'current_mfn'} = $self->{'start_mfn'};
220     } else {
221     $self->{'current_mfn'}++;
222     }
223    
224     my $mfn = $self->{'current_mfn'};
225    
226     if ($mfn > $self->{'max_mfn'}) {
227     $self->{'current_mfn'} = $self->{'max_mfn'};
228     $log->debug("at EOF");
229     return;
230     }
231    
232     $self->progress_bar($mfn,$self->{'max_mfn'});
233    
234     if ($self->{'low_mem'}) {
235     return $self->{'db'}->get($mfn);
236     } else {
237     return $self->{'data'}->{$mfn};
238     }
239     }
240    
241 dpavlin 10 =head2 pos
242 dpavlin 8
243 dpavlin 10 Returns current record number (MFN).
244 dpavlin 8
245 dpavlin 10 print $isis->pos;
246 dpavlin 8
247 dpavlin 10 First record in database has position 1.
248    
249 dpavlin 8 =cut
250    
251 dpavlin 10 sub pos {
252 dpavlin 8 my $self = shift;
253 dpavlin 10 return $self->{'current_mfn'};
254     }
255 dpavlin 8
256 dpavlin 10
257     =head2 size
258    
259     Returns number of records in database
260    
261     print $isis->size;
262    
263     =cut
264    
265     sub size {
266     my $self = shift;
267     return $self->{'max_mfn'};
268 dpavlin 8 }
269    
270 dpavlin 10 =head2 seek
271 dpavlin 6
272 dpavlin 10 Seek to specified MFN in file.
273 dpavlin 6
274 dpavlin 10 $isis->seek(42);
275 dpavlin 6
276 dpavlin 10 First record in database has position 1.
277    
278 dpavlin 6 =cut
279    
280 dpavlin 10 sub seek {
281 dpavlin 6 my $self = shift;
282 dpavlin 10 my $pos = shift || return;
283    
284     my $log = $self->_get_logger();
285    
286     if ($pos < 1) {
287     $log->warn("seek before first record");
288     $pos = 1;
289     } elsif ($pos > $self->{'max_mfn'}) {
290     $log->warn("seek beyond last record");
291     $pos = $self->{'max_mfn'};
292     }
293    
294     return $self->{'current_mfn'} = (($pos - 1) || -1);
295 dpavlin 6 }
296    
297     =head1 AUTHOR
298    
299     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
300    
301     =head1 COPYRIGHT & LICENSE
302    
303     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
304    
305     This program is free software; you can redistribute it and/or modify it
306     under the same terms as Perl itself.
307    
308     =cut
309    
310     1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26