/[Biblio-Isis]/trunk/IsisDB.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/IsisDB.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (show annotations)
Wed Dec 29 16:04:07 2004 UTC (17 years, 6 months ago) by dpavlin
File size: 9127 byte(s)
skip fields with length 0, OpenIsis produce binary junk in this case.

1 package IsisDB;
2 use strict;
3
4 use Carp;
5 use Data::Dumper;
6
7 BEGIN {
8 use Exporter ();
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 $VERSION = 0.03;
11 @ISA = qw (Exporter);
12 #Give a hoot don't pollute, do not export more than needed by default
13 @EXPORT = qw ();
14 @EXPORT_OK = qw ();
15 %EXPORT_TAGS = ();
16
17 }
18
19 =head1 NAME
20
21 IsisDB - Read CDS/ISIS database
22
23 =head1 SYNOPSIS
24
25 use IsisDB
26 my $isis = new IsisDB(
27 isisdb => './cds/cds',
28 );
29
30 =head1 DESCRIPTION
31
32 This module will read CDS/ISIS databases and create hash values out of it.
33 It can be used as perl-only alternative to OpenIsis module.
34
35 =head1 METHODS
36
37 =cut
38
39 # my $ORDN; # Nodes Order
40 # my $ORDF; # Leafs Order
41 # my $N; # Number of Memory buffers for nodes
42 # my $K; # Number of buffers for first level index
43 # my $LIV; # Current number of Index Levels
44 # my $POSRX; # Pointer to Root Record in N0x
45 # my $NMAXPOS; # Next Available position in N0x
46 # my $FMAXPOS; # Next available position in L0x
47 # my $ABNORMAL; # Formal BTree normality indicator
48
49 #
50 # some binary reads
51 #
52
53 sub Read32 {
54 my $self = shift;
55
56 my $f = shift || die "Read32 needs file handle";
57 read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58 return unpack("l",$b);
59 }
60
61 =head2 new
62
63 Open CDS/ISIS database
64
65 my $isis = new IsisDB(
66 isisdb => './cds/cds',
67 read_fdt => 1,
68 debug => 1,
69 include_deleted => 1,
70 );
71
72 Options are described below:
73
74 =over 5
75
76 =item isisdb
77
78 Prefix path to CDS/ISIS. It should contain full or relative path to database
79 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
80
81 =item read_fdt
82
83 Boolean flag to specify if field definition table should be read. It's off
84 by default.
85
86 =item debug
87
88 Dump a C<lot> of debugging output.
89
90 =item include_deleted
91
92 Don't skip logically deleted records.
93
94 =back
95
96 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
97
98 =cut
99
100 sub new {
101 my $class = shift;
102 my $self = {};
103 bless($self, $class);
104
105 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
106
107 foreach my $v (qw{isisdb debug include_deleted}) {
108 $self->{$v} = {@_}->{$v};
109 }
110
111 # if you want to read .FDT file use read_fdt argument when creating class!
112 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
113
114 # read the $db.FDT file for tags
115 my $fieldzone=0;
116
117 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
118
119 while (<fileFDT>) {
120 chomp;
121 if ($fieldzone) {
122 my $name=substr($_,0,30);
123 my $tag=substr($_,50,3);
124
125 $name =~ s/\s+$//;
126 $tag =~ s/\s+$//;
127
128 $self->{'TagName'}->{$tag}=$name;
129 }
130
131 if (/^\*\*\*/) {
132 $fieldzone=1;
133 }
134 }
135
136 close(fileFDT);
137 }
138
139 # Get the Maximum MFN from $db.MST
140
141 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
142
143 # MST format: (* = 32 bit signed)
144 # CTLMFN* always 0
145 # NXTMFN* MFN to be assigned to the next record created
146 # NXTMFB* last block allocated to master file
147 # NXTMFP offset to next available position in last block
148 # MFTYPE always 0 for user db file (1 for system)
149 seek(fileMST,4,0);
150 $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
151
152 # save maximum MFN
153 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
154
155 close(fileMST);
156
157 # Get the index information from $db.CNT
158
159 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
160
161 # There is two 26 Bytes fixed lenght records
162
163 # 0: IDTYPE BTree type 16
164 # 2: ORDN Nodes Order 16
165 # 4: ORDF Leafs Order 16
166 # 6: N Number of Memory buffers for nodes 16
167 # 8: K Number of buffers for first level index 16
168 # 10: LIV Current number of Index Levels 16
169 # 12: POSRX* Pointer to Root Record in N0x 32
170 # 16: NMAXPOS* Next Available position in N0x 32
171 # 20: FMAXPOS* Next available position in L0x 32
172 # 24: ABNORMAL Formal BTree normality indicator 16
173 # length: 26 bytes
174
175 sub unpack_cnt {
176 my $self = shift;
177
178 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
179
180 my $buff = shift || return;
181 my @arr = unpack("ssssssllls", $buff);
182
183 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
184
185 my $IDTYPE = shift @arr;
186 foreach (@flds) {
187 $self->{$IDTYPE}->{$_} = abs(shift @arr);
188 }
189 }
190
191 my $buff;
192 read(fileCNT, $buff, 26);
193 $self->unpack_cnt($buff);
194
195 read(fileCNT, $buff, 26);
196 $self->unpack_cnt($buff);
197
198
199 close(fileCNT);
200
201 print Dumper($self) if ($self->{debug});
202
203 # open files for later
204 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
205
206 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
207
208 $self ? return $self : return undef;
209 }
210
211 =head2 fetch
212
213 Read record with selected MFN
214
215 my $rec = $isis->fetch(55);
216
217 Returns hash with keys which are field names and values are unpacked values
218 for that field.
219
220 =cut
221
222 sub fetch {
223 my $self = shift;
224
225 my $mfn = shift || croak "fetch needs MFN as argument!";
226
227 print "fetch: $mfn\n" if ($self->{debug});
228
229 # XXX check this?
230 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
231
232 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
233 seek($self->{'fileXRF'},$mfnpos,0);
234
235 # read XRFMFB abd XRFMFP
236 my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
237
238 my $XRFMFB = int($pointer/2048);
239 my $XRFMFP = $pointer - ($XRFMFB*2048);
240
241 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
242
243 # XXX fix this to be more readable!!
244 # e.g. (XRFMFB - 1) * 512 + XRFMFP
245
246 my $offset = $pointer;
247 my $offset2=int($offset/2048)-1;
248 my $offset22=int($offset/4096);
249 my $offset3=$offset-($offset22*4096);
250 if ($offset3>512) {
251 $offset3=$offset3-2048;
252 }
253 my $offset4=($offset2*512)+$offset3;
254
255 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
256
257 # Get Record Information
258
259 seek($self->{'fileMST'},$offset4,0);
260
261 my $value=$self->Read32(\*{$self->{'fileMST'}});
262
263 if ($value!=$mfn) {
264 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
265 return -1; # XXX deleted record?
266 }
267
268 # $MFRL=$self->Read16($fileMST);
269 # $MFBWB=$self->Read32($fileMST);
270 # $MFBWP=$self->Read16($fileMST);
271 # $BASE=$self->Read16($fileMST);
272 # $NVF=$self->Read16($fileMST);
273 # $STATUS=$self->Read16($fileMST);
274
275 my $buff;
276 read($self->{'fileMST'}, $buff, 14);
277
278 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
279
280 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
281
282 # delete old record
283 delete $self->{record};
284
285 if (! $self->{'include_deleted'} && $MFRL < 0) {
286 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
287 return;
288 }
289
290 # Get Directory Format
291
292 my @FieldPOS;
293 my @FieldLEN;
294 my @FieldTAG;
295
296 read($self->{'fileMST'}, $buff, 6 * $NVF);
297
298 my $fld_len = 0;
299
300 for (my $i = 0 ; $i < $NVF ; $i++) {
301
302 # $TAG=$self->Read16($fileMST);
303 # $POS=$self->Read16($fileMST);
304 # $LEN=$self->Read16($fileMST);
305
306 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
307
308 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
309
310 # The TAG does not exists in .FDT so we set it to 0.
311 #
312 # XXX This is removed from perl version; .FDT file is updated manually, so
313 # you will often have fields in .MST file which aren't in .FDT. On the other
314 # hand, IsisMarc doesn't use .FDT files at all!
315
316 #if (! $self->{TagName}->{$TAG}) {
317 # $TAG=0;
318 #}
319
320 push @FieldTAG,$TAG;
321 push @FieldPOS,$POS;
322 push @FieldLEN,$LEN;
323
324 $fld_len += $LEN;
325 }
326
327 # Get Variable Fields
328
329 read($self->{'fileMST'},$buff,$fld_len);
330
331 for (my $i = 0 ; $i < $NVF ; $i++) {
332 # skip zero-sized fields
333 next if ($FieldLEN[$i] == 0);
334
335 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
336 }
337 close(fileMST);
338
339 print Dumper($self) if ($self->{debug});
340
341 return $self->{'record'};
342 }
343
344 =head2 to_ascii
345
346 Dump ascii output of selected MFN
347
348 print $isis->to_ascii(55);
349
350 =cut
351
352 sub to_ascii {
353 my $self = shift;
354
355 my $mfn = shift || croak "need MFN";
356
357 my $rec = $self->fetch($mfn);
358
359 my $out = "0\t$mfn";
360
361 foreach my $f (sort keys %{$rec}) {
362 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
363 }
364
365 $out .= "\n";
366
367 return $out;
368 }
369
370 #
371 # XXX porting from php left-over:
372 #
373 # do I *REALLY* need those methods, or should I use
374 # $self->{something} directly?
375 #
376 # Probably direct usage is better!
377 #
378
379 sub TagName {
380 my $self = shift;
381 return $self->{TagName};
382 }
383
384 sub NextMFN {
385 my $self = shift;
386 return $self->{NXTMFN};
387 }
388
389 1;
390
391 =head1 BUGS
392
393 This module has been very lightly tested. Use with caution and report bugs.
394
395 =head1 AUTHOR
396
397 Dobrica Pavlinusic
398 CPAN ID: DPAVLIN
399 dpavlin@rot13.org
400 http://www.rot13.org/~dpavlin/
401
402 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
403 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
404
405 =head1 COPYRIGHT
406
407 This program is free software; you can redistribute
408 it and/or modify it under the same terms as Perl itself.
409
410 The full text of the license can be found in the
411 LICENSE file included with this module.
412
413
414 =head1 SEE ALSO
415
416 L<http://www.openisis.org|OpenIsis>, perl(1).
417

  ViewVC Help
Powered by ViewVC 1.1.26