/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 32 by dpavlin, Wed Jan 5 15:46:26 2005 UTC trunk/lib/Biblio/Isis.pm revision 41 by dpavlin, Sat Mar 12 21:05:29 2005 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.09;          $VERSION     = 0.13;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 20  BEGIN {
20    
21  =head1 NAME  =head1 NAME
22    
23  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
24    
25  =head1 SYNOPSIS  =head1 SYNOPSIS
26    
27    use IsisDB;    use Biblio::Isis;
28    
29    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
# Line 81  rarely an issue). Line 81  rarely an issue).
81    
82  Open ISIS database  Open ISIS database
83    
84   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
85          isisdb => './cds/cds',          isisdb => './cds/cds',
86          read_fdt => 1,          read_fdt => 1,
87          include_deleted => 1,          include_deleted => 1,
# Line 147  sub new { Line 147  sub new {
147          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
148    
149          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
150                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
151                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
152                            return;
153                    }
154          }          }
155    
156          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
# Line 158  sub new { Line 161  sub new {
161                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
162                  my $fieldzone=0;                  my $fieldzone=0;
163    
164                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
165                    binmode($fileFDT);
166    
167                  while (<fileFDT>) {                  while (<$fileFDT>) {
168                          chomp;                          chomp;
169                          if ($fieldzone) {                          if ($fieldzone) {
170                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 177  sub new { Line 181  sub new {
181                          }                          }
182                  }                  }
183                                    
184                  close(fileFDT);                  close($fileFDT);
185          }          }
186    
187          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
188    
189          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
190            binmode($self->{'fileMST'});
191    
192          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
193          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 190  sub new { Line 195  sub new {
195          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
196          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
197          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
198          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
199    
200          my $buff;          my $buff;
201    
202          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
203          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
   
   
   
204    
205          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
206    
207          # open files for later          # open files for later
208          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
209            binmode($self->{'fileXRF'});
210    
211          $self ? return $self : return undef;          $self ? return $self : return undef;
212  }  }
# Line 221  sub count { Line 224  sub count {
224          return $self->{'NXTMFN'} - 1;          return $self->{'NXTMFN'} - 1;
225  }  }
226    
 =head2 read_cnt  
   
 Read content of C<.CNT> file and return hash containing it.  
   
   print Dumper($isis->read_cnt);  
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
   
 =cut  
   
 sub read_cnt  {  
         my $self = shift;  
   
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
   
         my $buff;  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         close(fileCNT);  
   
         return $self->{cnt};  
 }  
   
 =head2 unpack_cnt  
   
 Unpack one of two 26 bytes fixed length record in C<.CNT> file.  
   
 Here is definition of record:  
   
  off key        description                             size  
   0: IDTYPE     BTree type                              s  
   2: ORDN       Nodes Order                             s  
   4: ORDF       Leafs Order                             s  
   6: N          Number of Memory buffers for nodes      s  
   8: K          Number of buffers for first level index s  
  10: LIV        Current number of Index Levels          s  
  12: POSRX      Pointer to Root Record in N0x           l  
  16: NMAXPOS    Next Available position in N0x          l  
  20: FMAXPOS    Next available position in L0x          l  
  24: ABNORMAL   Formal BTree normality indicator        s  
  length: 26 bytes  
   
 This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.  
   
 =cut  
   
 sub unpack_cnt {  
         my $self = shift;  
   
         my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
         my $buff = shift || return;  
         my @arr = unpack("ssssssllls", $buff);  
   
         print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
         my $IDTYPE = shift @arr;  
         foreach (@flds) {  
                 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
         }  
 }  
   
227  =head2 fetch  =head2 fetch
228    
229  Read record with selected MFN  Read record with selected MFN
# Line 334  sub fetch { Line 264  sub fetch {
264    
265          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
266          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
267          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff);
268            if (! $pointer) {
269                    if ($self->{include_deleted}) {
270                            return;
271                    } else {
272                            warn "pointer for MFN $mfn is null\n";
273                            return;
274                    }
275            }
276    
277          # check for logically deleted record          # check for logically deleted record
278          if ($pointer < 0) {          if ($pointer & 0x80000000) {
279                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
280                  $self->{deleted} = $mfn;                  $self->{deleted} = $mfn;
281    
282                  return unless $self->{include_deleted};                  return unless $self->{include_deleted};
283    
284                  $pointer = abs($pointer);                  # abs
285                    $pointer = ($pointer ^ 0xffffffff) + 1;
286          }          }
287    
288          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
# Line 358  sub fetch { Line 297  sub fetch {
297    
298          # Get Record Information          # Get Record Information
299    
300          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
301    
302          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
303          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
304    
305          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
306    
# Line 378  sub fetch { Line 317  sub fetch {
317    
318          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
319    
320          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
321    
322          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
323    
# Line 398  sub fetch { Line 337  sub fetch {
337    
338          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
339    
340                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
341    
342                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
343    
# Line 462  sub to_ascii { Line 401  sub to_ascii {
401    
402          my $mfn = shift || croak "need MFN";          my $mfn = shift || croak "need MFN";
403    
404          my $rec = $self->fetch($mfn);          my $rec = $self->fetch($mfn) || return;
405    
406          my $out = "0\t$mfn";          my $out = "0\t$mfn";
407    
# Line 530  sub to_hash { Line 469  sub to_hash {
469          # init record to include MFN as field 000          # init record to include MFN as field 000
470          my $rec = { '000' => [ $mfn ] };          my $rec = { '000' => [ $mfn ] };
471    
472          my $row = $self->fetch($mfn);          my $row = $self->fetch($mfn) || return;
473    
474          foreach my $k (keys %{$row}) {          foreach my $k (keys %{$row}) {
475                  foreach my $l (@{$row->{$k}}) {                  foreach my $l (@{$row->{$k}}) {
# Line 574  sub tag_name { Line 513  sub tag_name {
513          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
514  }  }
515    
516    
517    =head2 read_cnt
518    
519    Read content of C<.CNT> file and return hash containing it.
520    
521      print Dumper($isis->read_cnt);
522    
523    This function is not used by module (C<.CNT> files are not required for this
524    module to work), but it can be useful to examine your index (while debugging
525    for example).
526    
527    =cut
528    
529    sub read_cnt  {
530            my $self = shift;
531    
532            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
533    
534            # Get the index information from $db.CNT
535      
536            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
537            binmode($fileCNT);
538    
539            my $buff;
540    
541            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
542            $self->unpack_cnt($buff);
543    
544            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
545            $self->unpack_cnt($buff);
546    
547            close($fileCNT);
548    
549            return $self->{cnt};
550    }
551    
552    =head2 unpack_cnt
553    
554    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
555    
556    Here is definition of record:
557    
558     off key        description                             size
559      0: IDTYPE     BTree type                              s
560      2: ORDN       Nodes Order                             s
561      4: ORDF       Leafs Order                             s
562      6: N          Number of Memory buffers for nodes      s
563      8: K          Number of buffers for first level index s
564     10: LIV        Current number of Index Levels          s
565     12: POSRX      Pointer to Root Record in N0x           l
566     16: NMAXPOS    Next Available position in N0x          l
567     20: FMAXPOS    Next available position in L0x          l
568     24: ABNORMAL   Formal BTree normality indicator        s
569     length: 26 bytes
570    
571    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
572    
573    =cut
574    
575    sub unpack_cnt {
576            my $self = shift;
577    
578            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
579    
580            my $buff = shift || return;
581            my @arr = unpack("vvvvvvVVVv", $buff);
582    
583            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
584    
585            my $IDTYPE = shift @arr;
586            foreach (@flds) {
587                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
588            }
589    }
590    
591  1;  1;
592    
593  =head1 BUGS  =head1 BUGS

Legend:
Removed from v.32  
changed lines
  Added in v.41

  ViewVC Help
Powered by ViewVC 1.1.26