/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (hide annotations)
Wed Dec 29 17:03:52 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 9554 byte(s)
documentation and dependency improvements, inline Read32 to get some more
performance.

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 9 $VERSION = 0.03;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25 dpavlin 11 use IsisDB;
26    
27 dpavlin 1 my $isis = new IsisDB(
28     isisdb => './cds/cds',
29     );
30    
31 dpavlin 11 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32     print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37     This module will read CDS/ISIS databases and create hash values out of it.
38     It can be used as perl-only alternative to OpenIsis module.
39    
40 dpavlin 11 This will module will always be slower that OpenIsis module which use C
41     library. However, since it's written in perl, it's platform independent (so
42     you don't need C compiler), and can be easily modified.
43    
44     Unique feature of this module is ability to C<include_deleted> records.
45     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46     fields which are zero sized will be filled with random junk from memory).
47    
48 dpavlin 1 =head1 METHODS
49    
50     =cut
51    
52     # my $ORDN; # Nodes Order
53     # my $ORDF; # Leafs Order
54     # my $N; # Number of Memory buffers for nodes
55     # my $K; # Number of buffers for first level index
56     # my $LIV; # Current number of Index Levels
57     # my $POSRX; # Pointer to Root Record in N0x
58     # my $NMAXPOS; # Next Available position in N0x
59     # my $FMAXPOS; # Next available position in L0x
60     # my $ABNORMAL; # Formal BTree normality indicator
61    
62     #
63     # some binary reads
64     #
65    
66     =head2 new
67    
68     Open CDS/ISIS database
69    
70     my $isis = new IsisDB(
71     isisdb => './cds/cds',
72     read_fdt => 1,
73     debug => 1,
74 dpavlin 9 include_deleted => 1,
75 dpavlin 1 );
76    
77 dpavlin 2 Options are described below:
78    
79     =over 5
80    
81 dpavlin 1 =item isisdb
82    
83     Prefix path to CDS/ISIS. It should contain full or relative path to database
84     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
85    
86     =item read_fdt
87    
88     Boolean flag to specify if field definition table should be read. It's off
89     by default.
90    
91     =item debug
92    
93     Dump a C<lot> of debugging output.
94    
95 dpavlin 9 =item include_deleted
96    
97 dpavlin 11 Don't skip logically deleted records in ISIS.
98 dpavlin 9
99 dpavlin 2 =back
100    
101     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
102    
103 dpavlin 1 =cut
104    
105     sub new {
106     my $class = shift;
107     my $self = {};
108     bless($self, $class);
109    
110 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
111 dpavlin 1
112 dpavlin 9 foreach my $v (qw{isisdb debug include_deleted}) {
113     $self->{$v} = {@_}->{$v};
114     }
115 dpavlin 1
116     # if you want to read .FDT file use read_fdt argument when creating class!
117     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
118    
119     # read the $db.FDT file for tags
120     my $fieldzone=0;
121    
122     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
123    
124     while (<fileFDT>) {
125     chomp;
126     if ($fieldzone) {
127     my $name=substr($_,0,30);
128     my $tag=substr($_,50,3);
129    
130     $name =~ s/\s+$//;
131     $tag =~ s/\s+$//;
132    
133     $self->{'TagName'}->{$tag}=$name;
134     }
135    
136     if (/^\*\*\*/) {
137     $fieldzone=1;
138     }
139     }
140    
141     close(fileFDT);
142     }
143    
144     # Get the Maximum MFN from $db.MST
145    
146     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
147    
148     # MST format: (* = 32 bit signed)
149     # CTLMFN* always 0
150     # NXTMFN* MFN to be assigned to the next record created
151     # NXTMFB* last block allocated to master file
152     # NXTMFP offset to next available position in last block
153     # MFTYPE always 0 for user db file (1 for system)
154     seek(fileMST,4,0);
155    
156 dpavlin 11 my $buff;
157    
158     read(fileMST, $buff, 4);
159     $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
160    
161 dpavlin 2 # save maximum MFN
162     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
163    
164 dpavlin 1 close(fileMST);
165    
166     # Get the index information from $db.CNT
167    
168     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
169    
170     # There is two 26 Bytes fixed lenght records
171    
172     # 0: IDTYPE BTree type 16
173     # 2: ORDN Nodes Order 16
174     # 4: ORDF Leafs Order 16
175     # 6: N Number of Memory buffers for nodes 16
176     # 8: K Number of buffers for first level index 16
177     # 10: LIV Current number of Index Levels 16
178     # 12: POSRX* Pointer to Root Record in N0x 32
179     # 16: NMAXPOS* Next Available position in N0x 32
180     # 20: FMAXPOS* Next available position in L0x 32
181     # 24: ABNORMAL Formal BTree normality indicator 16
182     # length: 26 bytes
183    
184     sub unpack_cnt {
185     my $self = shift;
186    
187     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
188    
189     my $buff = shift || return;
190     my @arr = unpack("ssssssllls", $buff);
191    
192 dpavlin 9 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
193    
194 dpavlin 1 my $IDTYPE = shift @arr;
195     foreach (@flds) {
196     $self->{$IDTYPE}->{$_} = abs(shift @arr);
197     }
198     }
199    
200     read(fileCNT, $buff, 26);
201     $self->unpack_cnt($buff);
202    
203     read(fileCNT, $buff, 26);
204     $self->unpack_cnt($buff);
205    
206    
207     close(fileCNT);
208    
209     print Dumper($self) if ($self->{debug});
210    
211 dpavlin 7 # open files for later
212     open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
213    
214     open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
215    
216 dpavlin 1 $self ? return $self : return undef;
217     }
218    
219 dpavlin 7 =head2 fetch
220 dpavlin 1
221 dpavlin 2 Read record with selected MFN
222 dpavlin 1
223 dpavlin 7 my $rec = $isis->fetch(55);
224 dpavlin 2
225     Returns hash with keys which are field names and values are unpacked values
226     for that field.
227    
228     =cut
229    
230 dpavlin 7 sub fetch {
231 dpavlin 1 my $self = shift;
232    
233 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
234 dpavlin 1
235 dpavlin 7 print "fetch: $mfn\n" if ($self->{debug});
236 dpavlin 1
237     # XXX check this?
238     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
239    
240     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
241 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
242 dpavlin 1
243 dpavlin 11 my $buff;
244    
245 dpavlin 1 # read XRFMFB abd XRFMFP
246 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
247     my $pointer=unpack("l",$buff) || carp "pointer is null";
248 dpavlin 1
249     my $XRFMFB = int($pointer/2048);
250     my $XRFMFP = $pointer - ($XRFMFB*2048);
251    
252     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
253    
254     # XXX fix this to be more readable!!
255     # e.g. (XRFMFB - 1) * 512 + XRFMFP
256    
257     my $offset = $pointer;
258     my $offset2=int($offset/2048)-1;
259     my $offset22=int($offset/4096);
260     my $offset3=$offset-($offset22*4096);
261     if ($offset3>512) {
262     $offset3=$offset3-2048;
263     }
264     my $offset4=($offset2*512)+$offset3;
265    
266     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
267    
268     # Get Record Information
269    
270 dpavlin 7 seek($self->{'fileMST'},$offset4,0);
271 dpavlin 1
272 dpavlin 11 read($self->{'fileMST'}, $buff, 4);
273     my $value=unpack("l",$buff);
274 dpavlin 1
275     if ($value!=$mfn) {
276     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
277     return -1; # XXX deleted record?
278     }
279    
280     # $MFRL=$self->Read16($fileMST);
281     # $MFBWB=$self->Read32($fileMST);
282     # $MFBWP=$self->Read16($fileMST);
283     # $BASE=$self->Read16($fileMST);
284     # $NVF=$self->Read16($fileMST);
285     # $STATUS=$self->Read16($fileMST);
286    
287 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
288 dpavlin 1
289     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
290    
291     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
292    
293 dpavlin 9 # delete old record
294     delete $self->{record};
295    
296     if (! $self->{'include_deleted'} && $MFRL < 0) {
297     print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
298     return;
299     }
300    
301 dpavlin 1 # Get Directory Format
302    
303     my @FieldPOS;
304     my @FieldLEN;
305     my @FieldTAG;
306    
307 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
308    
309     my $fld_len = 0;
310    
311 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
312    
313     # $TAG=$self->Read16($fileMST);
314     # $POS=$self->Read16($fileMST);
315     # $LEN=$self->Read16($fileMST);
316    
317 dpavlin 8 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
318 dpavlin 1
319     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
320    
321     # The TAG does not exists in .FDT so we set it to 0.
322     #
323     # XXX This is removed from perl version; .FDT file is updated manually, so
324     # you will often have fields in .MST file which aren't in .FDT. On the other
325     # hand, IsisMarc doesn't use .FDT files at all!
326    
327     #if (! $self->{TagName}->{$TAG}) {
328     # $TAG=0;
329     #}
330    
331     push @FieldTAG,$TAG;
332     push @FieldPOS,$POS;
333     push @FieldLEN,$LEN;
334 dpavlin 8
335     $fld_len += $LEN;
336 dpavlin 1 }
337    
338     # Get Variable Fields
339    
340 dpavlin 8 read($self->{'fileMST'},$buff,$fld_len);
341    
342 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
343 dpavlin 10 # skip zero-sized fields
344     next if ($FieldLEN[$i] == 0);
345    
346 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
347 dpavlin 1 }
348     close(fileMST);
349    
350     print Dumper($self) if ($self->{debug});
351    
352 dpavlin 2 return $self->{'record'};
353 dpavlin 1 }
354    
355 dpavlin 2 =head2 to_ascii
356    
357     Dump ascii output of selected MFN
358    
359     print $isis->to_ascii(55);
360    
361     =cut
362    
363     sub to_ascii {
364     my $self = shift;
365    
366     my $mfn = shift || croak "need MFN";
367    
368 dpavlin 7 my $rec = $self->fetch($mfn);
369 dpavlin 2
370     my $out = "0\t$mfn";
371    
372     foreach my $f (sort keys %{$rec}) {
373     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
374     }
375    
376     $out .= "\n";
377    
378     return $out;
379     }
380    
381 dpavlin 1 #
382     # XXX porting from php left-over:
383     #
384     # do I *REALLY* need those methods, or should I use
385     # $self->{something} directly?
386     #
387     # Probably direct usage is better!
388     #
389    
390 dpavlin 7 sub TagName {
391 dpavlin 1 my $self = shift;
392     return $self->{TagName};
393     }
394    
395 dpavlin 7 sub NextMFN {
396 dpavlin 1 my $self = shift;
397     return $self->{NXTMFN};
398     }
399    
400     1;
401    
402     =head1 BUGS
403    
404     This module has been very lightly tested. Use with caution and report bugs.
405    
406     =head1 AUTHOR
407    
408     Dobrica Pavlinusic
409     CPAN ID: DPAVLIN
410     dpavlin@rot13.org
411     http://www.rot13.org/~dpavlin/
412    
413     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
414     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
415    
416     =head1 COPYRIGHT
417    
418     This program is free software; you can redistribute
419     it and/or modify it under the same terms as Perl itself.
420    
421     The full text of the license can be found in the
422     LICENSE file included with this module.
423    
424    
425     =head1 SEE ALSO
426    
427     L<http://www.openisis.org|OpenIsis>, perl(1).
428    

  ViewVC Help
Powered by ViewVC 1.1.26