/[webpac2]/trunk/lib/WebPAC/Input/ISIS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/ISIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (hide annotations)
Sat Jul 16 16:48:35 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 5882 byte(s)
little cleanup and first cut into WebPAC::Normalize::XML

1 dpavlin 6 package WebPAC::Input::ISIS;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Common;
7     use base qw/WebPAC::Input WebPAC::Common/;
8    
9     =head1 NAME
10    
11     WebPAC::Input::ISIS - support for CDS/ISIS source files
12    
13     =head1 VERSION
14    
15     Version 0.01
16    
17     =cut
18    
19     our $VERSION = '0.01';
20    
21    
22     # auto-configure
23    
24     my ($have_biblio_isis, $have_openisis) = (0,0);
25    
26     eval "use Biblio::Isis 0.13;";
27     unless ($@) {
28     $have_biblio_isis = 1
29     } else {
30     eval "use OpenIsis;";
31     $have_openisis = 1 unless ($@);
32     }
33    
34     =head1 SYNOPSIS
35    
36     Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
37     module and read all records to memory.
38    
39     my $isis = new WebPAC::Input::ISIS();
40     $isis->open( filename => '/path/to/ISIS/ISIS' );
41    
42     =head1 FUNCTIONS
43    
44     =head2 open
45    
46     This function will read whole database in memory and produce lookups.
47    
48     $isis->open(
49     filename => '/data/ISIS/ISIS',
50     code_page => '852',
51     limit_mfn => 500,
52     start_mfn => 6000,
53     lookup => $lookup_obj,
54     );
55    
56     By default, ISIS code page is assumed to be C<852>.
57    
58     If optional parametar C<start_mfn> is set, this will be first MFN to read
59     from database (so you can skip beginning of your database if you need to).
60    
61     If optional parametar C<limit_mfn> is set, it will read just 500 records
62     from database in example above.
63    
64     Returns number of last record read into memory (size of database, really).
65    
66     =cut
67    
68     sub open {
69     my $self = shift;
70     my $arg = {@_};
71    
72     my $log = $self->_get_logger();
73    
74     $log->logcroak("need filename") if (! $arg->{'filename'});
75     my $code_page = $arg->{'code_page'} || '852';
76    
77     $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
78    
79     # store data in object
80     $self->{'isis_filename'} = $arg->{'filename'};
81     $self->{'isis_code_page'} = $code_page;
82    
83     #$self->{'isis_code_page'} = $code_page;
84    
85     # create Text::Iconv object
86     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
87    
88     $log->info("reading ISIS database '",$arg->{'filename'},"'");
89     $log->debug("isis code page: $code_page");
90    
91     my ($isis_db,$maxmfn);
92    
93     if ($have_openisis) {
94     $log->debug("using OpenIsis perl bindings");
95     $isis_db = OpenIsis::open($arg->{'filename'});
96     $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
97     } elsif ($have_biblio_isis) {
98     $log->debug("using Biblio::Isis");
99     use Biblio::Isis;
100     $isis_db = new Biblio::Isis(
101     isisdb => $arg->{'filename'},
102     include_deleted => 1,
103     hash_filter => sub {
104     my $l = shift || return;
105     $l = $cp->convert($l);
106     return $l;
107     },
108     );
109     $maxmfn = $isis_db->count;
110    
111     unless ($maxmfn) {
112     $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
113     return;
114     }
115    
116     } else {
117     $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
118     }
119    
120    
121     my $startmfn = 1;
122    
123     if (my $s = $self->{'start_mfn'}) {
124     $log->info("skipping to MFN $s");
125     $startmfn = $s;
126     } else {
127     $self->{'start_mfn'} = $startmfn;
128     }
129    
130     $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
131    
132     $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
133    
134    
135     # read database
136     for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
137    
138     $log->debug("mfn: $mfn\n");
139    
140     my $rec;
141    
142     if ($have_openisis) {
143    
144     # read record using OpenIsis
145     my $row = OpenIsis::read( $isis_db, $mfn );
146     foreach my $k (keys %{$row}) {
147     if ($k ne "mfn") {
148     foreach my $l (@{$row->{$k}}) {
149     $l = $cp->convert($l);
150     # has subfields?
151     my $val;
152     if ($l =~ m/\^/) {
153     foreach my $t (split(/\^/,$l)) {
154     next if (! $t);
155     $val->{substr($t,0,1)} = substr($t,1);
156     }
157     } else {
158     $val = $l;
159     }
160    
161     push @{$rec->{$k}}, $val;
162     }
163     } else {
164     push @{$rec->{'000'}}, $mfn;
165     }
166     }
167    
168     } elsif ($have_biblio_isis) {
169     $rec = $isis_db->to_hash($mfn);
170     } else {
171     $log->logdie("hum? implementation missing?");
172     }
173    
174     $log->confess("record $mfn empty?") unless ($rec);
175    
176     # store
177     if ($self->{'low_mem'}) {
178     $self->{'db'}->put($mfn, $rec);
179     } else {
180     $self->{'data'}->{$mfn} = $rec;
181     }
182    
183     # create lookup
184     $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
185    
186     $self->progress_bar($mfn,$maxmfn);
187    
188     }
189    
190     $self->{'current_mfn'} = -1;
191     $self->{'last_pcnt'} = 0;
192    
193     $log->debug("max mfn: $maxmfn");
194    
195     # store max mfn and return it.
196     return $self->{'max_mfn'} = $maxmfn;
197     }
198    
199     =head2 fetch_rec
200    
201     Fetch next record from database. It will also displays progress bar.
202    
203     my $rec = $webpac->fetch_rec;
204    
205 dpavlin 8 You should rearly have the need to call this function directly. Instead use
206     C<fetch_data_structure> which returns normalised data.
207    
208 dpavlin 6 =cut
209    
210     sub fetch_rec {
211     my $self = shift;
212    
213     my $log = $self->_get_logger();
214    
215     $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
216    
217     if ($self->{'current_mfn'} == -1) {
218     $self->{'current_mfn'} = $self->{'start_mfn'};
219     } else {
220     $self->{'current_mfn'}++;
221     }
222    
223     my $mfn = $self->{'current_mfn'};
224    
225     if ($mfn > $self->{'max_mfn'}) {
226     $self->{'current_mfn'} = $self->{'max_mfn'};
227     $log->debug("at EOF");
228     return;
229     }
230    
231     $self->progress_bar($mfn,$self->{'max_mfn'});
232    
233     if ($self->{'low_mem'}) {
234     return $self->{'db'}->get($mfn);
235     } else {
236     return $self->{'data'}->{$mfn};
237     }
238     }
239    
240 dpavlin 8 =head2 fetch_data_structure
241    
242     Fetch data structure of next record from database.
243    
244     my @ds = $webpac->fetch_data_structure;
245    
246     =cut
247    
248     sub fetch_data_structure {
249     my $self = shift;
250    
251     return $self->data_structure(
252     $self->fetch_rec(@_)
253     );
254     }
255    
256 dpavlin 6 =head2 mfn
257    
258     Returns current record number (MFN).
259    
260     print $webpac->mfn;
261    
262     =cut
263    
264     sub mfn {
265     my $self = shift;
266     return $self->{'current_mfn'};
267     }
268    
269     =head1 AUTHOR
270    
271     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
272    
273     =head1 COPYRIGHT & LICENSE
274    
275     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
276    
277     This program is free software; you can redistribute it and/or modify it
278     under the same terms as Perl itself.
279    
280     =cut
281    
282     1; # End of WebPAC::Input::ISIS

  ViewVC Help
Powered by ViewVC 1.1.26