/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (hide annotations)
Tue Dec 28 01:41:45 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 16080 byte(s)
first working version:
- add support for repeatable fields (so all hash values becomed arrays, even
  with single element)
- scripts to dump CDS/ISIS database using this module and OpenIsis
- to_ascii method which dumps ascii output of record

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10     $VERSION = 0.01;
11     @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69     );
70    
71 dpavlin 2 Options are described below:
72    
73     =over 5
74    
75 dpavlin 1 =item isisdb
76    
77     Prefix path to CDS/ISIS. It should contain full or relative path to database
78     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79    
80     =item read_fdt
81    
82     Boolean flag to specify if field definition table should be read. It's off
83     by default.
84    
85     =item debug
86    
87     Dump a C<lot> of debugging output.
88    
89 dpavlin 2 =back
90    
91     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92    
93 dpavlin 1 =cut
94    
95     sub new {
96     my $class = shift;
97     my $self = {};
98     bless($self, $class);
99    
100     $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101    
102 dpavlin 2 $self->{debug} = {@_}->{debug};
103 dpavlin 1
104     # if you want to read .FDT file use read_fdt argument when creating class!
105     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106    
107     # read the $db.FDT file for tags
108     my $fieldzone=0;
109    
110     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111    
112     while (<fileFDT>) {
113     chomp;
114     if ($fieldzone) {
115     my $name=substr($_,0,30);
116     my $tag=substr($_,50,3);
117    
118     $name =~ s/\s+$//;
119     $tag =~ s/\s+$//;
120    
121     $self->{'TagName'}->{$tag}=$name;
122     }
123    
124     if (/^\*\*\*/) {
125     $fieldzone=1;
126     }
127     }
128    
129     close(fileFDT);
130     }
131    
132     # Get the Maximum MFN from $db.MST
133    
134     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135    
136     # MST format: (* = 32 bit signed)
137     # CTLMFN* always 0
138     # NXTMFN* MFN to be assigned to the next record created
139     # NXTMFB* last block allocated to master file
140     # NXTMFP offset to next available position in last block
141     # MFTYPE always 0 for user db file (1 for system)
142     seek(fileMST,4,0);
143     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144    
145 dpavlin 2 # save maximum MFN
146     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147    
148 dpavlin 1 close(fileMST);
149    
150     # Get the index information from $db.CNT
151    
152     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153    
154     # There is two 26 Bytes fixed lenght records
155    
156     # 0: IDTYPE BTree type 16
157     # 2: ORDN Nodes Order 16
158     # 4: ORDF Leafs Order 16
159     # 6: N Number of Memory buffers for nodes 16
160     # 8: K Number of buffers for first level index 16
161     # 10: LIV Current number of Index Levels 16
162     # 12: POSRX* Pointer to Root Record in N0x 32
163     # 16: NMAXPOS* Next Available position in N0x 32
164     # 20: FMAXPOS* Next available position in L0x 32
165     # 24: ABNORMAL Formal BTree normality indicator 16
166     # length: 26 bytes
167    
168     sub unpack_cnt {
169     my $self = shift;
170    
171     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172    
173     my $buff = shift || return;
174     my @arr = unpack("ssssssllls", $buff);
175    
176     my $IDTYPE = shift @arr;
177     foreach (@flds) {
178     $self->{$IDTYPE}->{$_} = abs(shift @arr);
179     }
180     }
181    
182     my $buff;
183     read(fileCNT, $buff, 26);
184     $self->unpack_cnt($buff);
185    
186     read(fileCNT, $buff, 26);
187     $self->unpack_cnt($buff);
188    
189    
190     close(fileCNT);
191    
192     print Dumper($self) if ($self->{debug});
193    
194     $self ? return $self : return undef;
195     }
196    
197 dpavlin 2 =head2 GetMFN
198 dpavlin 1
199 dpavlin 2 Read record with selected MFN
200 dpavlin 1
201 dpavlin 2 my $rec = $isis->GetMFN(55);
202    
203     Returns hash with keys which are field names and values are unpacked values
204     for that field.
205    
206     =cut
207    
208 dpavlin 1 sub GetMFN {
209     my $self = shift;
210    
211     my $mfn = shift || croak "GetMFN needs MFN as argument!";
212    
213     print "GetMFN: $mfn\n" if ($self->{debug});
214    
215     open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
216    
217     # XXX check this?
218     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
219    
220     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
221     seek(fileXRF,$mfnpos,0);
222    
223     # read XRFMFB abd XRFMFP
224     my $pointer=$self->Read32(\*fileXRF);
225    
226     my $XRFMFB = int($pointer/2048);
227     my $XRFMFP = $pointer - ($XRFMFB*2048);
228    
229     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
230    
231     # XXX fix this to be more readable!!
232     # e.g. (XRFMFB - 1) * 512 + XRFMFP
233    
234     my $offset = $pointer;
235     my $offset2=int($offset/2048)-1;
236     my $offset22=int($offset/4096);
237     my $offset3=$offset-($offset22*4096);
238     if ($offset3>512) {
239     $offset3=$offset3-2048;
240     }
241     my $offset4=($offset2*512)+$offset3;
242    
243     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
244    
245     close(fileXRF);
246    
247     # Get Record Information
248    
249     open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
250    
251     seek(fileMST,$offset4,0);
252    
253     my $value=$self->Read32(\*fileMST);
254    
255     if ($value!=$mfn) {
256     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
257     return -1; # XXX deleted record?
258     }
259    
260     # $MFRL=$self->Read16($fileMST);
261     # $MFBWB=$self->Read32($fileMST);
262     # $MFBWP=$self->Read16($fileMST);
263     # $BASE=$self->Read16($fileMST);
264     # $NVF=$self->Read16($fileMST);
265     # $STATUS=$self->Read16($fileMST);
266    
267     my $buff;
268     read(fileMST, $buff, 14);
269    
270     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
271    
272     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
273    
274     # Get Directory Format
275    
276     my @FieldPOS;
277     my @FieldLEN;
278     my @FieldTAG;
279    
280     for (my $i = 0 ; $i < $NVF ; $i++) {
281    
282     # $TAG=$self->Read16($fileMST);
283     # $POS=$self->Read16($fileMST);
284     # $LEN=$self->Read16($fileMST);
285    
286     read(fileMST, $buff, 6);
287     my ($TAG,$POS,$LEN) = unpack("sss", $buff);
288    
289     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
290    
291     # The TAG does not exists in .FDT so we set it to 0.
292     #
293     # XXX This is removed from perl version; .FDT file is updated manually, so
294     # you will often have fields in .MST file which aren't in .FDT. On the other
295     # hand, IsisMarc doesn't use .FDT files at all!
296    
297     #if (! $self->{TagName}->{$TAG}) {
298     # $TAG=0;
299     #}
300    
301     push @FieldTAG,$TAG;
302     push @FieldPOS,$POS;
303     push @FieldLEN,$LEN;
304     }
305    
306     # Get Variable Fields
307    
308 dpavlin 2 delete $self->{record};
309    
310 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
311     my $rec;
312     read(fileMST,$rec,$FieldLEN[$i]);
313 dpavlin 2 push @{$self->{record}->{$FieldTAG[$i]}}, $rec;
314 dpavlin 1 }
315     close(fileMST);
316    
317     # The record is marked for deletion
318     if ($STATUS==1) {
319     return -1;
320     }
321    
322     print Dumper($self) if ($self->{debug});
323    
324 dpavlin 2 return $self->{'record'};
325 dpavlin 1 }
326    
327 dpavlin 2 =head2 to_ascii
328    
329     Dump ascii output of selected MFN
330    
331     print $isis->to_ascii(55);
332    
333     =cut
334    
335     sub to_ascii {
336     my $self = shift;
337    
338     my $mfn = shift || croak "need MFN";
339    
340     my $rec = $self->GetMFN($mfn);
341    
342     print STDERR Dumper($rec);
343    
344     my $out = "0\t$mfn";
345    
346     foreach my $f (sort keys %{$rec}) {
347     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
348     }
349    
350     $out .= "\n";
351    
352     return $out;
353     }
354    
355     ################# old cruft which is not ported from php to perl
356    
357 dpavlin 1 =begin php
358    
359     # Load the dictionary from the $db.L0x files.
360     # Not usefull Yet
361    
362     sub LoadDictionary()
363     {
364     $fileL01=fopen($self->{isisdb}.".L01","r");
365     rewind($fileL01);
366    
367     do
368     {
369    
370     $POS=$self->Read32($fileL01);
371     $OCK=$self->Read16($fileL01);
372     $IT=$self->Read16($fileL01);
373     $PS=$self->Read32($fileL01);
374     print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
375     for ($i=0;$i<$OCK;$i++)
376     {
377     $KEY=fread($fileL01,10);
378    
379     print $KEY." ### ";
380    
381     $INFO1=$self->Read32($fileL01);
382     $INFO2=$self->Read32($fileL01);
383    
384     #L01Key->{$key}=array($INFO1,$INFO2);
385     }
386    
387     rewind($fileL01);
388     $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
389     fseek($fileL01,$offset);
390    
391     } While (!feof($fileL01));
392    
393     fclose($fileL01);
394     }
395    
396     # self function search through the tree and returns an array of pointers to IFP
397     # The function must be recursive
398    
399     sub SearchTree($search,$fileNB,$PUNT)
400     {
401     $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14));
402    
403     rewind($fileNB1);
404    
405     fseek($fileNB,$offset);
406    
407     $POS=$self->Read32($fileNB);
408     $OCK=$self->Read16($fileNB);
409     $IT=$self->Read16($fileNB);
410    
411     #print "<br>".$POS." - ".$OCK." - ".$IT;
412    
413     $OLDPUNT=$POS;
414     $j=0;
415     for ($i=0;$i<$OCK;$i++)
416     {
417     $KEY=fread($fileNB,10);
418    
419     $PUNT=$self->Read32($fileNB);
420    
421     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
422    
423     If (strcmp($search,chop($KEY))<0)
424     {
425     break;
426     }
427     $OLDPUNT=$PUNT;
428     }
429     #print $OLDPUNT;
430     Return $OLDPUNT;
431     }
432    
433     # Search ISIS for record containing search
434     # Return a sorted array of MFN
435    
436     sub Search($search)
437     {
438    
439     $search=strtoupper($search);
440     #print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
441     # first search .x01
442    
443    
444     # Search in .N01
445    
446    
447     $fileN01=fopen($self->{isisdb}.".N01","r");
448     $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
449    
450     do
451     {
452     rewind($fileN01);
453    
454     fseek($fileN01,$offset);
455    
456     $POS=$self->Read32($fileN01);
457     $OCK=$self->Read16($fileN01);
458     $IT=$self->Read16($fileN01);
459    
460     #print "<br>".$POS." - ".$OCK." - ".$IT;
461    
462     $OLDPUNT=$POS;
463     for ($i=0;$i<$OCK;$i++)
464     {
465     $KEY=fread($fileN01,10);
466    
467     $PUNT=$self->Read32($fileN01);
468    
469     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
470    
471     If (strcmp($search,chop($KEY))<0)
472     {
473     break;
474     }
475     $OLDPUNT=$PUNT;
476     }
477     $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));
478     } while ($OLDPUNT>0);
479     #print $OLDPUNT;
480    
481    
482     fclose($fileN01);
483    
484     # Now look for records in .L01 file
485     $fileL01=fopen($self->{isisdb}.".L01","r");
486     rewind($fileL01);
487    
488     $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
489     fseek($fileL01,$offset);
490    
491     $POS=$self->Read32($fileL01);
492     $OCK=$self->Read16($fileL01);
493     $IT=$self->Read16($fileL01);
494     $PS=$self->Read32($fileL01);
495     #print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
496     for ($i=0;$i<$OCK;$i++)
497     {
498     $KEY=fread($fileL01,10);
499    
500     #print $KEY." ### ";
501    
502     $INFO1=$self->Read32($fileL01);
503     $INFO2=$self->Read32($fileL01);
504    
505     If (strcmp($search,chop($KEY))==0)
506     {
507     break;
508     }
509     }
510    
511     fclose($fileL01);
512    
513     #print $INFO1."--".$INFO2;
514    
515     # Now look in .IFP for the MFN
516     $fileIFP=fopen($self->{isisdb}.".IFP","r");
517     rewind($fileIFP);
518     $offset=($INFO1-1)*512+($INFO2*4);
519     fseek($fileIFP,$offset);
520    
521     $IFPBLK=$self->Read32($fileIFP);
522    
523     $IFPNXTB=$self->Read32($fileIFP);
524     $IFPNXTP=$self->Read32($fileIFP);
525     $IFPTOTP=$self->Read32($fileIFP);
526     $IFPSEGP=$self->Read32($fileIFP);
527     $IFPSEGC=$self->Read32($fileIFP);
528    
529    
530     #print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
531    
532     rewind($fileIFP);
533     $offset=($INFO1-1)*512+24+($INFO2*4);
534     fseek($fileIFP,$offset);
535    
536     $j=24+($INFO2*4);
537     $k=0;
538     $l=1;
539     $OLDPMFN="";
540     for ($i=0;$i<$IFPSEGP;$i++)
541     {
542     $B1=$self->Read8($fileIFP);
543     $B2=$self->Read8($fileIFP);
544     $B3=$self->Read8($fileIFP);
545     $B4=$self->Read8($fileIFP);
546     $B5=$self->Read8($fileIFP);
547     $B6=$self->Read8($fileIFP);
548     $B7=$self->Read8($fileIFP);
549     $B8=$self->Read8($fileIFP);
550    
551     $PMFN=$B1*65536+$B2*256+$B3;
552     $PTAG=$B4*256+$B5;
553     $POCC=$B6;
554     $PCNT=$B7*256+$B8;
555    
556     if ($OLDPMFN!=$PMFN)
557     {
558     if ($PMFN!=0)
559     {
560     $self->{MFNArray}->{$l}=$PMFN;
561     $OLDPMFN=$PMFN;
562     $l+=1;
563     }
564     }
565    
566     $j=$j+8;
567     #print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
568     #print "@@".$j."@@@@";
569     if ($j>=504)
570     {
571     if ($IFPNXTB==0 && $IFPNXTP==0)
572     {
573     $k=$k+1;
574     rewind($fileIFP);
575     $offset=($INFO1-1+$k)*512;
576     fseek($fileIFP,$offset);
577     $B=$self->Read32($fileIFP);
578     #print "<br>-".$B."-<br>";
579     $j=0;
580     } else
581     {
582     rewind($fileIFP);
583     $offset=($IFPNXTB-1)*512;
584     fseek($fileIFP,$offset);
585    
586     $OLDIFPNXTB=$IFPNXTB;
587     $OLDIFPNXTP=$IFPNXTP;
588    
589     $IFPBLK=$self->Read32($fileIFP);
590    
591     $IFPNXTB=$self->Read32($fileIFP);
592     $IFPNXTP=$self->Read32($fileIFP);
593     $IFPTOTP=$self->Read32($fileIFP);
594     $IFPSEGP=$self->Read32($fileIFP);
595     $IFPSEGC=$self->Read32($fileIFP);
596    
597     rewind($fileIFP);
598     $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
599     fseek($fileIFP,$offset);
600    
601     $j=24+($OLDIFPNXTP*4);
602     $k=0;
603     $j=0;
604     }
605     }
606    
607     }
608     fclose($fileIFP);
609     return $l-1;
610     }
611    
612     =cut
613    
614     #
615     # XXX porting from php left-over:
616     #
617     # do I *REALLY* need those methods, or should I use
618     # $self->{something} directly?
619     #
620     # Probably direct usage is better!
621     #
622    
623     sub GetFieldName {
624     my $self = shift;
625     return $self->{FieldName};
626     }
627    
628     sub GetTagName {
629     my $self = shift;
630     return $self->{TagName};
631     }
632    
633     sub GetFieldTag {
634     my $self = shift;
635     return $self->{FieldTAG};
636     }
637    
638     sub GetNextMFN {
639     my $self = shift;
640     return $self->{NXTMFN};
641     }
642    
643     sub GetMFNArray {
644     my $self = shift;
645     return $self->{MFNArray};
646     }
647     =begin php
648    
649     sub Read32($fileNB)
650     {
651     $B1=ord(fread($fileNB,1));
652     $B2=ord(fread($fileNB,1));
653     $B3=ord(fread($fileNB,1));
654     $B4=ord(fread($fileNB,1));
655    
656     if ($B4<=128)
657     {
658     $value=$B1+$B2*256+$B3*65536+$B4*16777216;
659     } else
660     {
661     $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
662     $value=-($value+1);
663     }
664     # print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
665    
666     return $value;
667     }
668    
669     sub Read24($fileNB)
670     {
671     $B1=ord(fread($fileNB,1));
672     $B2=ord(fread($fileNB,1));
673     $B3=ord(fread($fileNB,1));
674    
675     $value=$B1+$B2*256+$B3*65536;
676    
677     # print "(".$B1.",".$B2.",".$B3.":".$value.")";
678    
679     return $value;
680     }
681    
682     sub Read16($fileNB)
683     {
684     $B1=ord(fread($fileNB,1));
685     $B2=ord(fread($fileNB,1));
686    
687     $value=$B1+$B2*256;
688     # print "(".$B1.",".$B2.":".$value.")";
689    
690     return $value;
691     }
692    
693     sub Read8($fileNB)
694     {
695     $B1=ord(fread($fileNB,1));
696    
697     $value=$B1;
698     # print "(".$value.")";
699    
700     return $value;
701     }
702    
703     sub Not8($value)
704     {
705     $value=decbin($value);
706     if (strlen($value)<8)
707     {
708     $buffer="";
709     for($i=0;$i<(8-strlen($value));$i++)
710     {
711     $buffer.="0";
712     }
713     $value=$buffer.$value;
714     }
715     $value=ereg_replace("0","3",$value);
716     $value=ereg_replace("1","0",$value);
717     $value=ereg_replace("3","1",$value);
718     $value=bindec($value);
719     return $value;
720     }
721     }
722    
723     =cut
724    
725     1;
726     __END__
727    
728     =head1 BUGS
729    
730     This module has been very lightly tested. Use with caution and report bugs.
731    
732     =head1 AUTHOR
733    
734     Dobrica Pavlinusic
735     CPAN ID: DPAVLIN
736     dpavlin@rot13.org
737     http://www.rot13.org/~dpavlin/
738    
739     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
740     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
741    
742     =head1 COPYRIGHT
743    
744     This program is free software; you can redistribute
745     it and/or modify it under the same terms as Perl itself.
746    
747     The full text of the license can be found in the
748     LICENSE file included with this module.
749    
750    
751     =head1 SEE ALSO
752    
753     L<http://www.openisis.org|OpenIsis>, perl(1).
754    

  ViewVC Help
Powered by ViewVC 1.1.26