/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 27 by dpavlin, Sat Jan 1 22:29:35 2005 UTC trunk/lib/Biblio/Isis.pm revision 37 by dpavlin, Fri Jan 7 20:57:56 2005 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.08;          $VERSION     = 0.11;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 20  BEGIN {
20    
21  =head1 NAME  =head1 NAME
22    
23  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
24    
25  =head1 SYNOPSIS  =head1 SYNOPSIS
26    
27    use IsisDB;    use Biblio::Isis;
28    
29    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
33    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
35    }    }
36    
# Line 81  rarely an issue). Line 81  rarely an issue).
81    
82  Open ISIS database  Open ISIS database
83    
84   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
85          isisdb => './cds/cds',          isisdb => './cds/cds',
86          read_fdt => 1,          read_fdt => 1,
87          include_deleted => 1,          include_deleted => 1,
# Line 123  Dump a B<lot> of debugging output. Line 123  Dump a B<lot> of debugging output.
123    
124  =back  =back
125    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
126  =cut  =cut
127    
128  sub new {  sub new {
# Line 160  sub new { Line 158  sub new {
158                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
159                  my $fieldzone=0;                  my $fieldzone=0;
160    
161                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
162                    binmode($fileFDT);
163    
164                  while (<fileFDT>) {                  while (<$fileFDT>) {
165                          chomp;                          chomp;
166                          if ($fieldzone) {                          if ($fieldzone) {
167                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 179  sub new { Line 178  sub new {
178                          }                          }
179                  }                  }
180                                    
181                  close(fileFDT);                  close($fileFDT);
182          }          }
183    
184          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
185    
186          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
187            binmode($self->{'fileMST'});
188    
189          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
190          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 192  sub new { Line 192  sub new {
192          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
193          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
194          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
195          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
196    
197          my $buff;          my $buff;
198    
199          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
200          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
   
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
   
   
   
201    
202          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
203    
204          # open files for later          # open files for later
205          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
206            binmode($self->{'fileXRF'});
207    
208          $self ? return $self : return undef;          $self ? return $self : return undef;
209  }  }
210    
211  =head2 read_cnt  =head2 count
212    
213  Read content of C<.CNT> file and return hash containing it.  Return number of records in database
214    
215    print Dumper($isis->read_cnt);    print $isis->count;
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
216    
217  =cut  =cut
218    
219  sub read_cnt  {  sub count {
220          my $self = shift;          my $self = shift;
221            return $self->{'NXTMFN'} - 1;
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
   
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
   
         my $buff;  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         close(fileCNT);  
   
         return $self->{cnt};  
222  }  }
223    
224  =head2 fetch  =head2 fetch
# Line 317  sub fetch { Line 261  sub fetch {
261    
262          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
263          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
264          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff) || croak "pointer is null";
265    
266          # check for logically deleted record          # check for logically deleted record
267          if ($pointer < 0) {          if ($pointer & 0x80000000) {
268                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
269                  $self->{deleted} = $mfn;                  $self->{deleted} = $mfn;
270    
271                  return unless $self->{include_deleted};                  return unless $self->{include_deleted};
272    
273                  $pointer = abs($pointer);                  # abs
274                    $pointer = ($pointer ^ 0xffffffff) + 1;
275          }          }
276    
277          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
# Line 341  sub fetch { Line 286  sub fetch {
286    
287          # Get Record Information          # Get Record Information
288    
289          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
290    
291          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
292          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
293    
294          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
295    
# Line 361  sub fetch { Line 306  sub fetch {
306    
307          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
308    
309          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
310    
311          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
312    
# Line 381  sub fetch { Line 326  sub fetch {
326    
327          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
328    
329                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
330    
331                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
332    
# Line 557  sub tag_name { Line 502  sub tag_name {
502          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
503  }  }
504    
505    
506    =head2 read_cnt
507    
508    Read content of C<.CNT> file and return hash containing it.
509    
510      print Dumper($isis->read_cnt);
511    
512    This function is not used by module (C<.CNT> files are not required for this
513    module to work), but it can be useful to examine your index (while debugging
514    for example).
515    
516    =cut
517    
518    sub read_cnt  {
519            my $self = shift;
520    
521            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
522    
523            # Get the index information from $db.CNT
524      
525            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
526            binmode($fileCNT);
527    
528            my $buff;
529    
530            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
531            $self->unpack_cnt($buff);
532    
533            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
534            $self->unpack_cnt($buff);
535    
536            close($fileCNT);
537    
538            return $self->{cnt};
539    }
540    
541    =head2 unpack_cnt
542    
543    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
544    
545    Here is definition of record:
546    
547     off key        description                             size
548      0: IDTYPE     BTree type                              s
549      2: ORDN       Nodes Order                             s
550      4: ORDF       Leafs Order                             s
551      6: N          Number of Memory buffers for nodes      s
552      8: K          Number of buffers for first level index s
553     10: LIV        Current number of Index Levels          s
554     12: POSRX      Pointer to Root Record in N0x           l
555     16: NMAXPOS    Next Available position in N0x          l
556     20: FMAXPOS    Next available position in L0x          l
557     24: ABNORMAL   Formal BTree normality indicator        s
558     length: 26 bytes
559    
560    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
561    
562    =cut
563    
564    sub unpack_cnt {
565            my $self = shift;
566    
567            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
568    
569            my $buff = shift || return;
570            my @arr = unpack("vvvvvvVVVv", $buff);
571    
572            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
573    
574            my $IDTYPE = shift @arr;
575            foreach (@flds) {
576                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
577            }
578    }
579    
580  1;  1;
581    
582  =head1 BUGS  =head1 BUGS

Legend:
Removed from v.27  
changed lines
  Added in v.37

  ViewVC Help
Powered by ViewVC 1.1.26