/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (show annotations)
Sat Jul 16 20:35:30 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 6228 byte(s)
ISIS input is finished, low_mem option has code (and not only documentation :-)

1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8 use Text::Iconv;
9
10 =head1 NAME
11
12 WebPAC::Input::ISIS - support for CDS/ISIS source files
13
14 =head1 VERSION
15
16 Version 0.01
17
18 =cut
19
20 our $VERSION = '0.01';
21
22
23 # auto-configure
24
25 my ($have_biblio_isis, $have_openisis) = (0,0);
26
27 eval "use Biblio::Isis 0.13;";
28 unless ($@) {
29 $have_biblio_isis = 1
30 } else {
31 eval "use OpenIsis;";
32 $have_openisis = 1 unless ($@);
33 }
34
35 =head1 SYNOPSIS
36
37 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38 module and read all records to memory.
39
40 my $isis = new WebPAC::Input::ISIS();
41 $isis->open( filename => '/path/to/ISIS/ISIS' );
42
43 =head1 FUNCTIONS
44
45 =head2 open
46
47 This function will read whole database in memory and produce lookups.
48
49 $isis->open(
50 filename => '/data/ISIS/ISIS',
51 code_page => '852',
52 limit_mfn => 500,
53 start_mfn => 6000,
54 lookup => $lookup_obj,
55 );
56
57 By default, ISIS code page is assumed to be C<852>.
58
59 If optional parametar C<start_mfn> is set, this will be first MFN to read
60 from database (so you can skip beginning of your database if you need to).
61
62 If optional parametar C<limit_mfn> is set, it will read just 500 records
63 from database in example above.
64
65 Returns number of last record read into memory (size of database, really).
66
67 =cut
68
69 sub open {
70 my $self = shift;
71 my $arg = {@_};
72
73 my $log = $self->_get_logger();
74
75 $log->logcroak("need filename") if (! $arg->{'filename'});
76 my $code_page = $arg->{'code_page'} || '852';
77
78 $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
79
80 # store data in object
81 $self->{'isis_filename'} = $arg->{'filename'};
82 $self->{'isis_code_page'} = $code_page;
83
84 #$self->{'isis_code_page'} = $code_page;
85
86 # create Text::Iconv object
87 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
88
89 $log->info("reading ISIS database '",$arg->{'filename'},"'");
90 $log->debug("isis code page: $code_page");
91
92 my ($isis_db,$maxmfn);
93
94 if ($have_openisis) {
95 $log->debug("using OpenIsis perl bindings");
96 $isis_db = OpenIsis::open($arg->{'filename'});
97 $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
98 } elsif ($have_biblio_isis) {
99 $log->debug("using Biblio::Isis");
100 use Biblio::Isis;
101 $isis_db = new Biblio::Isis(
102 isisdb => $arg->{'filename'},
103 include_deleted => 1,
104 hash_filter => sub {
105 my $l = shift || return;
106 $l = $cp->convert($l);
107 return $l;
108 },
109 );
110 $maxmfn = $isis_db->count;
111
112 unless ($maxmfn) {
113 $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
114 return;
115 }
116
117 } else {
118 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
119 }
120
121
122 my $startmfn = 1;
123
124 if (my $s = $self->{'start_mfn'}) {
125 $log->info("skipping to MFN $s");
126 $startmfn = $s;
127 } else {
128 $self->{'start_mfn'} = $startmfn;
129 }
130
131 $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
132
133 $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
134
135
136 # read database
137 for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
138
139 $log->debug("mfn: $mfn\n");
140
141 my $rec;
142
143 if ($have_openisis) {
144
145 # read record using OpenIsis
146 my $row = OpenIsis::read( $isis_db, $mfn );
147 foreach my $k (keys %{$row}) {
148 if ($k ne "mfn") {
149 foreach my $l (@{$row->{$k}}) {
150 $l = $cp->convert($l);
151 # has subfields?
152 my $val;
153 if ($l =~ m/\^/) {
154 foreach my $t (split(/\^/,$l)) {
155 next if (! $t);
156 $val->{substr($t,0,1)} = substr($t,1);
157 }
158 } else {
159 $val = $l;
160 }
161
162 push @{$rec->{$k}}, $val;
163 }
164 } else {
165 push @{$rec->{'000'}}, $mfn;
166 }
167 }
168
169 } elsif ($have_biblio_isis) {
170 $rec = $isis_db->to_hash($mfn);
171 } else {
172 $log->logdie("hum? implementation missing?");
173 }
174
175 $log->confess("record $mfn empty?") unless ($rec);
176
177 # store
178 if ($self->{'low_mem'}) {
179 $self->{'db'}->put($mfn, $rec);
180 } else {
181 $self->{'data'}->{$mfn} = $rec;
182 }
183
184 # create lookup
185 $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
186
187 $self->progress_bar($mfn,$maxmfn);
188
189 }
190
191 $self->{'current_mfn'} = -1;
192 $self->{'last_pcnt'} = 0;
193
194 $log->debug("max mfn: $maxmfn");
195
196 # store max mfn and return it.
197 return $self->{'max_mfn'} = $maxmfn;
198 }
199
200 =head2 fetch
201
202 Fetch next record from database. It will also displays progress bar.
203
204 my $rec = $isis->fetch;
205
206 Record from this function should probably go to C<data_structure> for
207 normalisation.
208
209 =cut
210
211 sub fetch {
212 my $self = shift;
213
214 my $log = $self->_get_logger();
215
216 $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
217
218 if ($self->{'current_mfn'} == -1) {
219 $self->{'current_mfn'} = $self->{'start_mfn'};
220 } else {
221 $self->{'current_mfn'}++;
222 }
223
224 my $mfn = $self->{'current_mfn'};
225
226 if ($mfn > $self->{'max_mfn'}) {
227 $self->{'current_mfn'} = $self->{'max_mfn'};
228 $log->debug("at EOF");
229 return;
230 }
231
232 $self->progress_bar($mfn,$self->{'max_mfn'});
233
234 if ($self->{'low_mem'}) {
235 return $self->{'db'}->get($mfn);
236 } else {
237 return $self->{'data'}->{$mfn};
238 }
239 }
240
241 =head2 pos
242
243 Returns current record number (MFN).
244
245 print $isis->pos;
246
247 First record in database has position 1.
248
249 =cut
250
251 sub pos {
252 my $self = shift;
253 return $self->{'current_mfn'};
254 }
255
256
257 =head2 size
258
259 Returns number of records in database
260
261 print $isis->size;
262
263 =cut
264
265 sub size {
266 my $self = shift;
267 return $self->{'max_mfn'};
268 }
269
270 =head2 seek
271
272 Seek to specified MFN in file.
273
274 $isis->seek(42);
275
276 First record in database has position 1.
277
278 =cut
279
280 sub seek {
281 my $self = shift;
282 my $pos = shift || return;
283
284 my $log = $self->_get_logger();
285
286 if ($pos < 1) {
287 $log->warn("seek before first record");
288 $pos = 1;
289 } elsif ($pos > $self->{'max_mfn'}) {
290 $log->warn("seek beyond last record");
291 $pos = $self->{'max_mfn'};
292 }
293
294 return $self->{'current_mfn'} = (($pos - 1) || -1);
295 }
296
297 =head1 AUTHOR
298
299 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
300
301 =head1 COPYRIGHT & LICENSE
302
303 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
304
305 This program is free software; you can redistribute it and/or modify it
306 under the same terms as Perl itself.
307
308 =cut
309
310 1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26