/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (hide annotations)
Tue Dec 28 01:48:44 2004 UTC (19 years, 3 months ago) by dpavlin
Original Path: trunk/IsisDB.pm
File size: 16052 byte(s)
remove debugging

1 dpavlin 1 package IsisDB;
2     use strict;
3    
4     use Carp;
5     use Data::Dumper;
6    
7     BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10     $VERSION = 0.01;
11     @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21     IsisDB - Read CDS/ISIS database
22    
23     =head1 SYNOPSIS
24    
25     use IsisDB
26     my $isis = new IsisDB(
27     isisdb => './cds/cds',
28     );
29    
30     =head1 DESCRIPTION
31    
32     This module will read CDS/ISIS databases and create hash values out of it.
33     It can be used as perl-only alternative to OpenIsis module.
34    
35     =head1 METHODS
36    
37     =cut
38    
39     # my $ORDN; # Nodes Order
40     # my $ORDF; # Leafs Order
41     # my $N; # Number of Memory buffers for nodes
42     # my $K; # Number of buffers for first level index
43     # my $LIV; # Current number of Index Levels
44     # my $POSRX; # Pointer to Root Record in N0x
45     # my $NMAXPOS; # Next Available position in N0x
46     # my $FMAXPOS; # Next available position in L0x
47     # my $ABNORMAL; # Formal BTree normality indicator
48    
49     #
50     # some binary reads
51     #
52    
53     sub Read32 {
54     my $self = shift;
55    
56     my $f = shift || die "Read32 needs file handle";
57     read($$f,$b,4) || die "can't read 4 bytes from $$f from position ".tell($f);
58     return unpack("l",$b);
59     }
60    
61     =head2 new
62    
63     Open CDS/ISIS database
64    
65     my $isis = new IsisDB(
66     isisdb => './cds/cds',
67     read_fdt => 1,
68     debug => 1,
69     );
70    
71 dpavlin 2 Options are described below:
72    
73     =over 5
74    
75 dpavlin 1 =item isisdb
76    
77     Prefix path to CDS/ISIS. It should contain full or relative path to database
78     and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
79    
80     =item read_fdt
81    
82     Boolean flag to specify if field definition table should be read. It's off
83     by default.
84    
85     =item debug
86    
87     Dump a C<lot> of debugging output.
88    
89 dpavlin 2 =back
90    
91     It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
92    
93 dpavlin 1 =cut
94    
95     sub new {
96     my $class = shift;
97     my $self = {};
98     bless($self, $class);
99    
100     $self->{isisdb} = {@_}->{isisdb} || croak "new needs database name as argument!";
101    
102 dpavlin 2 $self->{debug} = {@_}->{debug};
103 dpavlin 1
104     # if you want to read .FDT file use read_fdt argument when creating class!
105     if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
106    
107     # read the $db.FDT file for tags
108     my $fieldzone=0;
109    
110     open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
111    
112     while (<fileFDT>) {
113     chomp;
114     if ($fieldzone) {
115     my $name=substr($_,0,30);
116     my $tag=substr($_,50,3);
117    
118     $name =~ s/\s+$//;
119     $tag =~ s/\s+$//;
120    
121     $self->{'TagName'}->{$tag}=$name;
122     }
123    
124     if (/^\*\*\*/) {
125     $fieldzone=1;
126     }
127     }
128    
129     close(fileFDT);
130     }
131    
132     # Get the Maximum MFN from $db.MST
133    
134     open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
135    
136     # MST format: (* = 32 bit signed)
137     # CTLMFN* always 0
138     # NXTMFN* MFN to be assigned to the next record created
139     # NXTMFB* last block allocated to master file
140     # NXTMFP offset to next available position in last block
141     # MFTYPE always 0 for user db file (1 for system)
142     seek(fileMST,4,0);
143     $self->{'NXTMFN'}=$self->Read32(\*fileMST) || carp "NXTNFN is zero";
144    
145 dpavlin 2 # save maximum MFN
146     $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
147    
148 dpavlin 1 close(fileMST);
149    
150     # Get the index information from $db.CNT
151    
152     open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
153    
154     # There is two 26 Bytes fixed lenght records
155    
156     # 0: IDTYPE BTree type 16
157     # 2: ORDN Nodes Order 16
158     # 4: ORDF Leafs Order 16
159     # 6: N Number of Memory buffers for nodes 16
160     # 8: K Number of buffers for first level index 16
161     # 10: LIV Current number of Index Levels 16
162     # 12: POSRX* Pointer to Root Record in N0x 32
163     # 16: NMAXPOS* Next Available position in N0x 32
164     # 20: FMAXPOS* Next available position in L0x 32
165     # 24: ABNORMAL Formal BTree normality indicator 16
166     # length: 26 bytes
167    
168     sub unpack_cnt {
169     my $self = shift;
170    
171     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
172    
173     my $buff = shift || return;
174     my @arr = unpack("ssssssllls", $buff);
175    
176     my $IDTYPE = shift @arr;
177     foreach (@flds) {
178     $self->{$IDTYPE}->{$_} = abs(shift @arr);
179     }
180     }
181    
182     my $buff;
183     read(fileCNT, $buff, 26);
184     $self->unpack_cnt($buff);
185    
186     read(fileCNT, $buff, 26);
187     $self->unpack_cnt($buff);
188    
189    
190     close(fileCNT);
191    
192     print Dumper($self) if ($self->{debug});
193    
194     $self ? return $self : return undef;
195     }
196    
197 dpavlin 2 =head2 GetMFN
198 dpavlin 1
199 dpavlin 2 Read record with selected MFN
200 dpavlin 1
201 dpavlin 2 my $rec = $isis->GetMFN(55);
202    
203     Returns hash with keys which are field names and values are unpacked values
204     for that field.
205    
206     =cut
207    
208 dpavlin 1 sub GetMFN {
209     my $self = shift;
210    
211     my $mfn = shift || croak "GetMFN needs MFN as argument!";
212    
213     print "GetMFN: $mfn\n" if ($self->{debug});
214    
215     open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
216    
217     # XXX check this?
218     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
219    
220     print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
221     seek(fileXRF,$mfnpos,0);
222    
223     # read XRFMFB abd XRFMFP
224     my $pointer=$self->Read32(\*fileXRF);
225    
226     my $XRFMFB = int($pointer/2048);
227     my $XRFMFP = $pointer - ($XRFMFB*2048);
228    
229     print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
230    
231     # XXX fix this to be more readable!!
232     # e.g. (XRFMFB - 1) * 512 + XRFMFP
233    
234     my $offset = $pointer;
235     my $offset2=int($offset/2048)-1;
236     my $offset22=int($offset/4096);
237     my $offset3=$offset-($offset22*4096);
238     if ($offset3>512) {
239     $offset3=$offset3-2048;
240     }
241     my $offset4=($offset2*512)+$offset3;
242    
243     print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
244    
245     close(fileXRF);
246    
247     # Get Record Information
248    
249     open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
250    
251     seek(fileMST,$offset4,0);
252    
253     my $value=$self->Read32(\*fileMST);
254    
255     if ($value!=$mfn) {
256     print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
257     return -1; # XXX deleted record?
258     }
259    
260     # $MFRL=$self->Read16($fileMST);
261     # $MFBWB=$self->Read32($fileMST);
262     # $MFBWP=$self->Read16($fileMST);
263     # $BASE=$self->Read16($fileMST);
264     # $NVF=$self->Read16($fileMST);
265     # $STATUS=$self->Read16($fileMST);
266    
267     my $buff;
268     read(fileMST, $buff, 14);
269    
270     my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
271    
272     print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
273    
274     # Get Directory Format
275    
276     my @FieldPOS;
277     my @FieldLEN;
278     my @FieldTAG;
279    
280     for (my $i = 0 ; $i < $NVF ; $i++) {
281    
282     # $TAG=$self->Read16($fileMST);
283     # $POS=$self->Read16($fileMST);
284     # $LEN=$self->Read16($fileMST);
285    
286     read(fileMST, $buff, 6);
287     my ($TAG,$POS,$LEN) = unpack("sss", $buff);
288    
289     print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
290    
291     # The TAG does not exists in .FDT so we set it to 0.
292     #
293     # XXX This is removed from perl version; .FDT file is updated manually, so
294     # you will often have fields in .MST file which aren't in .FDT. On the other
295     # hand, IsisMarc doesn't use .FDT files at all!
296    
297     #if (! $self->{TagName}->{$TAG}) {
298     # $TAG=0;
299     #}
300    
301     push @FieldTAG,$TAG;
302     push @FieldPOS,$POS;
303     push @FieldLEN,$LEN;
304     }
305    
306     # Get Variable Fields
307    
308 dpavlin 2 delete $self->{record};
309    
310 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
311     my $rec;
312     read(fileMST,$rec,$FieldLEN[$i]);
313 dpavlin 2 push @{$self->{record}->{$FieldTAG[$i]}}, $rec;
314 dpavlin 1 }
315     close(fileMST);
316    
317     # The record is marked for deletion
318     if ($STATUS==1) {
319     return -1;
320     }
321    
322     print Dumper($self) if ($self->{debug});
323    
324 dpavlin 2 return $self->{'record'};
325 dpavlin 1 }
326    
327 dpavlin 2 =head2 to_ascii
328    
329     Dump ascii output of selected MFN
330    
331     print $isis->to_ascii(55);
332    
333     =cut
334    
335     sub to_ascii {
336     my $self = shift;
337    
338     my $mfn = shift || croak "need MFN";
339    
340     my $rec = $self->GetMFN($mfn);
341    
342     my $out = "0\t$mfn";
343    
344     foreach my $f (sort keys %{$rec}) {
345     $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
346     }
347    
348     $out .= "\n";
349    
350     return $out;
351     }
352    
353     ################# old cruft which is not ported from php to perl
354    
355 dpavlin 1 =begin php
356    
357     # Load the dictionary from the $db.L0x files.
358     # Not usefull Yet
359    
360     sub LoadDictionary()
361     {
362     $fileL01=fopen($self->{isisdb}.".L01","r");
363     rewind($fileL01);
364    
365     do
366     {
367    
368     $POS=$self->Read32($fileL01);
369     $OCK=$self->Read16($fileL01);
370     $IT=$self->Read16($fileL01);
371     $PS=$self->Read32($fileL01);
372     print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
373     for ($i=0;$i<$OCK;$i++)
374     {
375     $KEY=fread($fileL01,10);
376    
377     print $KEY." ### ";
378    
379     $INFO1=$self->Read32($fileL01);
380     $INFO2=$self->Read32($fileL01);
381    
382     #L01Key->{$key}=array($INFO1,$INFO2);
383     }
384    
385     rewind($fileL01);
386     $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
387     fseek($fileL01,$offset);
388    
389     } While (!feof($fileL01));
390    
391     fclose($fileL01);
392     }
393    
394     # self function search through the tree and returns an array of pointers to IFP
395     # The function must be recursive
396    
397     sub SearchTree($search,$fileNB,$PUNT)
398     {
399     $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14));
400    
401     rewind($fileNB1);
402    
403     fseek($fileNB,$offset);
404    
405     $POS=$self->Read32($fileNB);
406     $OCK=$self->Read16($fileNB);
407     $IT=$self->Read16($fileNB);
408    
409     #print "<br>".$POS." - ".$OCK." - ".$IT;
410    
411     $OLDPUNT=$POS;
412     $j=0;
413     for ($i=0;$i<$OCK;$i++)
414     {
415     $KEY=fread($fileNB,10);
416    
417     $PUNT=$self->Read32($fileNB);
418    
419     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
420    
421     If (strcmp($search,chop($KEY))<0)
422     {
423     break;
424     }
425     $OLDPUNT=$PUNT;
426     }
427     #print $OLDPUNT;
428     Return $OLDPUNT;
429     }
430    
431     # Search ISIS for record containing search
432     # Return a sorted array of MFN
433    
434     sub Search($search)
435     {
436    
437     $search=strtoupper($search);
438     #print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
439     # first search .x01
440    
441    
442     # Search in .N01
443    
444    
445     $fileN01=fopen($self->{isisdb}.".N01","r");
446     $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
447    
448     do
449     {
450     rewind($fileN01);
451    
452     fseek($fileN01,$offset);
453    
454     $POS=$self->Read32($fileN01);
455     $OCK=$self->Read16($fileN01);
456     $IT=$self->Read16($fileN01);
457    
458     #print "<br>".$POS." - ".$OCK." - ".$IT;
459    
460     $OLDPUNT=$POS;
461     for ($i=0;$i<$OCK;$i++)
462     {
463     $KEY=fread($fileN01,10);
464    
465     $PUNT=$self->Read32($fileN01);
466    
467     #print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## ";
468    
469     If (strcmp($search,chop($KEY))<0)
470     {
471     break;
472     }
473     $OLDPUNT=$PUNT;
474     }
475     $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));
476     } while ($OLDPUNT>0);
477     #print $OLDPUNT;
478    
479    
480     fclose($fileN01);
481    
482     # Now look for records in .L01 file
483     $fileL01=fopen($self->{isisdb}.".L01","r");
484     rewind($fileL01);
485    
486     $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
487     fseek($fileL01,$offset);
488    
489     $POS=$self->Read32($fileL01);
490     $OCK=$self->Read16($fileL01);
491     $IT=$self->Read16($fileL01);
492     $PS=$self->Read32($fileL01);
493     #print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
494     for ($i=0;$i<$OCK;$i++)
495     {
496     $KEY=fread($fileL01,10);
497    
498     #print $KEY." ### ";
499    
500     $INFO1=$self->Read32($fileL01);
501     $INFO2=$self->Read32($fileL01);
502    
503     If (strcmp($search,chop($KEY))==0)
504     {
505     break;
506     }
507     }
508    
509     fclose($fileL01);
510    
511     #print $INFO1."--".$INFO2;
512    
513     # Now look in .IFP for the MFN
514     $fileIFP=fopen($self->{isisdb}.".IFP","r");
515     rewind($fileIFP);
516     $offset=($INFO1-1)*512+($INFO2*4);
517     fseek($fileIFP,$offset);
518    
519     $IFPBLK=$self->Read32($fileIFP);
520    
521     $IFPNXTB=$self->Read32($fileIFP);
522     $IFPNXTP=$self->Read32($fileIFP);
523     $IFPTOTP=$self->Read32($fileIFP);
524     $IFPSEGP=$self->Read32($fileIFP);
525     $IFPSEGC=$self->Read32($fileIFP);
526    
527    
528     #print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
529    
530     rewind($fileIFP);
531     $offset=($INFO1-1)*512+24+($INFO2*4);
532     fseek($fileIFP,$offset);
533    
534     $j=24+($INFO2*4);
535     $k=0;
536     $l=1;
537     $OLDPMFN="";
538     for ($i=0;$i<$IFPSEGP;$i++)
539     {
540     $B1=$self->Read8($fileIFP);
541     $B2=$self->Read8($fileIFP);
542     $B3=$self->Read8($fileIFP);
543     $B4=$self->Read8($fileIFP);
544     $B5=$self->Read8($fileIFP);
545     $B6=$self->Read8($fileIFP);
546     $B7=$self->Read8($fileIFP);
547     $B8=$self->Read8($fileIFP);
548    
549     $PMFN=$B1*65536+$B2*256+$B3;
550     $PTAG=$B4*256+$B5;
551     $POCC=$B6;
552     $PCNT=$B7*256+$B8;
553    
554     if ($OLDPMFN!=$PMFN)
555     {
556     if ($PMFN!=0)
557     {
558     $self->{MFNArray}->{$l}=$PMFN;
559     $OLDPMFN=$PMFN;
560     $l+=1;
561     }
562     }
563    
564     $j=$j+8;
565     #print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
566     #print "@@".$j."@@@@";
567     if ($j>=504)
568     {
569     if ($IFPNXTB==0 && $IFPNXTP==0)
570     {
571     $k=$k+1;
572     rewind($fileIFP);
573     $offset=($INFO1-1+$k)*512;
574     fseek($fileIFP,$offset);
575     $B=$self->Read32($fileIFP);
576     #print "<br>-".$B."-<br>";
577     $j=0;
578     } else
579     {
580     rewind($fileIFP);
581     $offset=($IFPNXTB-1)*512;
582     fseek($fileIFP,$offset);
583    
584     $OLDIFPNXTB=$IFPNXTB;
585     $OLDIFPNXTP=$IFPNXTP;
586    
587     $IFPBLK=$self->Read32($fileIFP);
588    
589     $IFPNXTB=$self->Read32($fileIFP);
590     $IFPNXTP=$self->Read32($fileIFP);
591     $IFPTOTP=$self->Read32($fileIFP);
592     $IFPSEGP=$self->Read32($fileIFP);
593     $IFPSEGC=$self->Read32($fileIFP);
594    
595     rewind($fileIFP);
596     $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
597     fseek($fileIFP,$offset);
598    
599     $j=24+($OLDIFPNXTP*4);
600     $k=0;
601     $j=0;
602     }
603     }
604    
605     }
606     fclose($fileIFP);
607     return $l-1;
608     }
609    
610     =cut
611    
612     #
613     # XXX porting from php left-over:
614     #
615     # do I *REALLY* need those methods, or should I use
616     # $self->{something} directly?
617     #
618     # Probably direct usage is better!
619     #
620    
621     sub GetFieldName {
622     my $self = shift;
623     return $self->{FieldName};
624     }
625    
626     sub GetTagName {
627     my $self = shift;
628     return $self->{TagName};
629     }
630    
631     sub GetFieldTag {
632     my $self = shift;
633     return $self->{FieldTAG};
634     }
635    
636     sub GetNextMFN {
637     my $self = shift;
638     return $self->{NXTMFN};
639     }
640    
641     sub GetMFNArray {
642     my $self = shift;
643     return $self->{MFNArray};
644     }
645     =begin php
646    
647     sub Read32($fileNB)
648     {
649     $B1=ord(fread($fileNB,1));
650     $B2=ord(fread($fileNB,1));
651     $B3=ord(fread($fileNB,1));
652     $B4=ord(fread($fileNB,1));
653    
654     if ($B4<=128)
655     {
656     $value=$B1+$B2*256+$B3*65536+$B4*16777216;
657     } else
658     {
659     $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
660     $value=-($value+1);
661     }
662     # print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
663    
664     return $value;
665     }
666    
667     sub Read24($fileNB)
668     {
669     $B1=ord(fread($fileNB,1));
670     $B2=ord(fread($fileNB,1));
671     $B3=ord(fread($fileNB,1));
672    
673     $value=$B1+$B2*256+$B3*65536;
674    
675     # print "(".$B1.",".$B2.",".$B3.":".$value.")";
676    
677     return $value;
678     }
679    
680     sub Read16($fileNB)
681     {
682     $B1=ord(fread($fileNB,1));
683     $B2=ord(fread($fileNB,1));
684    
685     $value=$B1+$B2*256;
686     # print "(".$B1.",".$B2.":".$value.")";
687    
688     return $value;
689     }
690    
691     sub Read8($fileNB)
692     {
693     $B1=ord(fread($fileNB,1));
694    
695     $value=$B1;
696     # print "(".$value.")";
697    
698     return $value;
699     }
700    
701     sub Not8($value)
702     {
703     $value=decbin($value);
704     if (strlen($value)<8)
705     {
706     $buffer="";
707     for($i=0;$i<(8-strlen($value));$i++)
708     {
709     $buffer.="0";
710     }
711     $value=$buffer.$value;
712     }
713     $value=ereg_replace("0","3",$value);
714     $value=ereg_replace("1","0",$value);
715     $value=ereg_replace("3","1",$value);
716     $value=bindec($value);
717     return $value;
718     }
719     }
720    
721     =cut
722    
723     1;
724     __END__
725    
726     =head1 BUGS
727    
728     This module has been very lightly tested. Use with caution and report bugs.
729    
730     =head1 AUTHOR
731    
732     Dobrica Pavlinusic
733     CPAN ID: DPAVLIN
734     dpavlin@rot13.org
735     http://www.rot13.org/~dpavlin/
736    
737     This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
738     written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
739    
740     =head1 COPYRIGHT
741    
742     This program is free software; you can redistribute
743     it and/or modify it under the same terms as Perl itself.
744    
745     The full text of the license can be found in the
746     LICENSE file included with this module.
747    
748    
749     =head1 SEE ALSO
750    
751     L<http://www.openisis.org|OpenIsis>, perl(1).
752    

  ViewVC Help
Powered by ViewVC 1.1.26