/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (show annotations)
Wed Dec 29 15:17:59 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 8777 byte(s)
another speedup (7845.71/s)

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.02;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB
26 my $isis = new IsisDB(
27 isisdb => './cds/cds',
28 );
29
30 =head1 DESCRIPTION
31
32 This module will read CDS/ISIS databases and create hash values out of it.
33 It can be used as perl-only alternative to OpenIsis module.
34
35 =head1 METHODS
36
37 =cut
38
39 # my $ORDN; # Nodes Order
40 # my $ORDF; # Leafs Order
41 # my $N; # Number of Memory buffers for nodes
42 # my $K; # Number of buffers for first level index
43 # my $LIV; # Current number of Index Levels
44 # my $POSRX; # Pointer to Root Record in N0x
45 # my $NMAXPOS; # Next Available position in N0x
46 # my $FMAXPOS; # Next available position in L0x
47 # my $ABNORMAL; # Formal BTree normality indicator
48
49 #
50 # some binary reads
51 #
52
53 sub Read32 {
54 my $self = shift;
55
56 my $f = shift || die "Read32 needs file handle";
57 read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58 return unpack("l",$b);
59 }
60
61 =head2 new
62
63 Open CDS/ISIS database
64
65 my $isis = new IsisDB(
66 isisdb => './cds/cds',
67 read_fdt => 1,
68 debug => 1,
69 );
70
71 Options are described below:
72
73 =over 5
74
75 =item isisdb
76
77 Prefix path to CDS/ISIS. It should contain full or relative path to database
78 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79
80 =item read_fdt
81
82 Boolean flag to specify if field definition table should be read. It's off
83 by default.
84
85 =item debug
86
87 Dump a C<lot> of debugging output.
88
89 =back
90
91 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92
93 =cut
94
95 sub new {
96 my $class = shift;
97 my $self = {};
98 bless($self, $class);
99
100 $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101
102 $self->{debug} = {@_}->{debug};
103
104 # if you want to read .FDT file use read_fdt argument when creating class!
105 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106
107 # read the $db.FDT file for tags
108 my $fieldzone=0;
109
110 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111
112 while (<fileFDT>) {
113 chomp;
114 if ($fieldzone) {
115 my $name=substr($_,0,30);
116 my $tag=substr($_,50,3);
117
118 $name =~ s/\s+$//;
119 $tag =~ s/\s+$//;
120
121 $self->{'TagName'}->{$tag}=$name;
122 }
123
124 if (/^\*\*\*/) {
125 $fieldzone=1;
126 }
127 }
128
129 close(fileFDT);
130 }
131
132 # Get the Maximum MFN from $db.MST
133
134 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135
136 # MST format: (* = 32 bit signed)
137 # CTLMFN* always 0
138 # NXTMFN* MFN to be assigned to the next record created
139 # NXTMFB* last block allocated to master file
140 # NXTMFP offset to next available position in last block
141 # MFTYPE always 0 for user db file (1 for system)
142 seek(fileMST,4,0);
143 $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144
145 # save maximum MFN
146 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147
148 close(fileMST);
149
150 # Get the index information from $db.CNT
151
152 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153
154 # There is two 26 Bytes fixed lenght records
155
156 # 0: IDTYPE BTree type 16
157 # 2: ORDN Nodes Order 16
158 # 4: ORDF Leafs Order 16
159 # 6: N Number of Memory buffers for nodes 16
160 # 8: K Number of buffers for first level index 16
161 # 10: LIV Current number of Index Levels 16
162 # 12: POSRX* Pointer to Root Record in N0x 32
163 # 16: NMAXPOS* Next Available position in N0x 32
164 # 20: FMAXPOS* Next available position in L0x 32
165 # 24: ABNORMAL Formal BTree normality indicator 16
166 # length: 26 bytes
167
168 sub unpack_cnt {
169 my $self = shift;
170
171 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172
173 my $buff = shift || return;
174 my @arr = unpack("ssssssllls", $buff);
175
176 my $IDTYPE = shift @arr;
177 foreach (@flds) {
178 $self->{$IDTYPE}->{$_} = abs(shift @arr);
179 }
180 }
181
182 my $buff;
183 read(fileCNT, $buff, 26);
184 $self->unpack_cnt($buff);
185
186 read(fileCNT, $buff, 26);
187 $self->unpack_cnt($buff);
188
189
190 close(fileCNT);
191
192 print Dumper($self) if ($self->{debug});
193
194 # open files for later
195 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
196
197 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
198
199 $self ? return $self : return undef;
200 }
201
202 =head2 fetch
203
204 Read record with selected MFN
205
206 my $rec = $isis->fetch(55);
207
208 Returns hash with keys which are field names and values are unpacked values
209 for that field.
210
211 =cut
212
213 sub fetch {
214 my $self = shift;
215
216 my $mfn = shift || croak "fetch needs MFN as argument!";
217
218 print "fetch: $mfn\n" if ($self->{debug});
219
220 # XXX check this?
221 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
222
223 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
224 seek($self->{'fileXRF'},$mfnpos,0);
225
226 # read XRFMFB abd XRFMFP
227 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
228
229 my $XRFMFB = int($pointer/2048);
230 my $XRFMFP = $pointer - ($XRFMFB*2048);
231
232 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
233
234 # XXX fix this to be more readable!!
235 # e.g. (XRFMFB - 1) * 512 + XRFMFP
236
237 my $offset = $pointer;
238 my $offset2=int($offset/2048)-1;
239 my $offset22=int($offset/4096);
240 my $offset3=$offset-($offset22*4096);
241 if ($offset3>512) {
242 $offset3=$offset3-2048;
243 }
244 my $offset4=($offset2*512)+$offset3;
245
246 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
247
248 # Get Record Information
249
250 seek($self->{'fileMST'},$offset4,0);
251
252 my $value=$self->Read32(\*{$self->{'fileMST'}});
253
254 if ($value!=$mfn) {
255 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
256 return -1; # XXX deleted record?
257 }
258
259 # $MFRL=$self->Read16($fileMST);
260 # $MFBWB=$self->Read32($fileMST);
261 # $MFBWP=$self->Read16($fileMST);
262 # $BASE=$self->Read16($fileMST);
263 # $NVF=$self->Read16($fileMST);
264 # $STATUS=$self->Read16($fileMST);
265
266 my $buff;
267 read($self->{'fileMST'}, $buff, 14);
268
269 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
270
271 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
272
273 # Get Directory Format
274
275 my @FieldPOS;
276 my @FieldLEN;
277 my @FieldTAG;
278
279 read($self->{'fileMST'}, $buff, 6 * $NVF);
280
281 my $fld_len = 0;
282
283 for (my $i = 0 ; $i < $NVF ; $i++) {
284
285 # $TAG=$self->Read16($fileMST);
286 # $POS=$self->Read16($fileMST);
287 # $LEN=$self->Read16($fileMST);
288
289 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
290
291 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
292
293 # The TAG does not exists in .FDT so we set it to 0.
294 #
295 # XXX This is removed from perl version; .FDT file is updated manually, so
296 # you will often have fields in .MST file which aren't in .FDT. On the other
297 # hand, IsisMarc doesn't use .FDT files at all!
298
299 #if (! $self->{TagName}->{$TAG}) {
300 # $TAG=0;
301 #}
302
303 push @FieldTAG,$TAG;
304 push @FieldPOS,$POS;
305 push @FieldLEN,$LEN;
306
307 $fld_len += $LEN;
308 }
309
310 # Get Variable Fields
311
312 delete $self->{record};
313
314 read($self->{'fileMST'},$buff,$fld_len);
315
316 for (my $i = 0 ; $i < $NVF ; $i++) {
317 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
318 }
319 close(fileMST);
320
321 # The record is marked for deletion
322 if ($STATUS==1) {
323 return -1;
324 }
325
326 print Dumper($self) if ($self->{debug});
327
328 return $self->{'record'};
329 }
330
331 =head2 to_ascii
332
333 Dump ascii output of selected MFN
334
335 print $isis->to_ascii(55);
336
337 =cut
338
339 sub to_ascii {
340 my $self = shift;
341
342 my $mfn = shift || croak "need MFN";
343
344 my $rec = $self->fetch($mfn);
345
346 my $out = "0\t$mfn";
347
348 foreach my $f (sort keys %{$rec}) {
349 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
350 }
351
352 $out .= "\n";
353
354 return $out;
355 }
356
357 #
358 # XXX porting from php left-over:
359 #
360 # do I *REALLY* need those methods, or should I use
361 # $self->{something} directly?
362 #
363 # Probably direct usage is better!
364 #
365
366 sub TagName {
367 my $self = shift;
368 return $self->{TagName};
369 }
370
371 sub NextMFN {
372 my $self = shift;
373 return $self->{NXTMFN};
374 }
375
376 1;
377
378 =head1 BUGS
379
380 This module has been very lightly tested. Use with caution and report bugs.
381
382 =head1 AUTHOR
383
384 Dobrica Pavlinusic
385 CPAN ID: DPAVLIN
386 dpavlin@rot13.org
387 http://www.rot13.org/~dpavlin/
388
389 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
390 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
391
392 =head1 COPYRIGHT
393
394 This program is free software; you can redistribute
395 it and/or modify it under the same terms as Perl itself.
396
397 The full text of the license can be found in the
398 LICENSE file included with this module.
399
400
401 =head1 SEE ALSO
402
403 L<http://www.openisis.org|OpenIsis>, perl(1).
404

  ViewVC Help
Powered by ViewVC 1.1.26