/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 27 by dpavlin, Sat Jan 1 22:29:35 2005 UTC trunk/lib/Biblio/Isis.pm revision 54 by dpavlin, Fri Jul 7 23:45:12 2006 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
5  use File::Glob qw(:globally :nocase);  use File::Glob qw(:globally :nocase);
6    
 use Data::Dumper;  
   
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.08;          $VERSION     = 0.20;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 18  BEGIN {
18    
19  =head1 NAME  =head1 NAME
20    
21  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use IsisDB;    use Biblio::Isis;
26    
27    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
28          isisdb => './cds/cds',          isisdb => './cds/cds',
29    );    );
30    
31    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
33    }    }
34    
# Line 81  rarely an issue). Line 79  rarely an issue).
79    
80  Open ISIS database  Open ISIS database
81    
82   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
83          isisdb => './cds/cds',          isisdb => './cds/cds',
84          read_fdt => 1,          read_fdt => 1,
85          include_deleted => 1,          include_deleted => 1,
# Line 119  Filter code ref which will be used befor Line 117  Filter code ref which will be used befor
117    
118  =item debug  =item debug
119    
120  Dump a B<lot> of debugging output.  Dump a B<lot> of debugging output even at level 1. For even more increase level.
121    
122  =back  =back
123    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
124  =cut  =cut
125    
126  sub new {  sub new {
# Line 149  sub new { Line 145  sub new {
145          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
146    
147          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
148                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
149                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
150                            return;
151                    }
152          }          }
153    
154          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          if ($self->{debug}) {
155                    print STDERR "## using files: ",join(" ",@isis_files),"\n";
156                    eval "use Data::Dump";
157    
158                    if (! $@) {
159                            *Dumper = *Data::Dump::dump;
160                    } else {
161                            use Data::Dumper;
162                    }
163            }
164    
165          # if you want to read .FDT file use read_fdt argument when creating class!          # if you want to read .FDT file use read_fdt argument when creating class!
166          if ($self->{read_fdt} && -e $self->{fdt_file}) {          if ($self->{read_fdt} && -e $self->{fdt_file}) {
# Line 160  sub new { Line 168  sub new {
168                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
169                  my $fieldzone=0;                  my $fieldzone=0;
170    
171                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
172                    binmode($fileFDT);
173    
174                  while (<fileFDT>) {                  while (<$fileFDT>) {
175                          chomp;                          chomp;
176                          if ($fieldzone) {                          if ($fieldzone) {
177                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 179  sub new { Line 188  sub new {
188                          }                          }
189                  }                  }
190                                    
191                  close(fileFDT);                  close($fileFDT);
192          }          }
193    
194          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
195    
196          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
197            binmode($self->{'fileMST'});
198    
199          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
200          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 192  sub new { Line 202  sub new {
202          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
203          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
204          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
205          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
206    
207          my $buff;          my $buff;
208    
209          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
210          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
   
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
211    
212            print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
   
   
         print STDERR Dumper($self),"\n" if ($self->{debug});  
213    
214          # open files for later          # open files for later
215          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
216            binmode($self->{'fileXRF'});
217    
218          $self ? return $self : return undef;          $self ? return $self : return undef;
219  }  }
220    
221  =head2 read_cnt  =head2 count
222    
223  Read content of C<.CNT> file and return hash containing it.  Return number of records in database
224    
225    print Dumper($isis->read_cnt);    print $isis->count;
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
226    
227  =cut  =cut
228    
229  sub read_cnt  {  sub count {
230          my $self = shift;          my $self = shift;
231            return $self->{'NXTMFN'} - 1;
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
   
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
   
         my $buff;  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         read(fileCNT, $buff, 26);  
         $self->unpack_cnt($buff);  
   
         close(fileCNT);  
   
         return $self->{cnt};  
232  }  }
233    
234  =head2 fetch  =head2 fetch
# Line 317  sub fetch { Line 271  sub fetch {
271    
272          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
273          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
274          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff);
275            if (! $pointer) {
276                    if ($self->{include_deleted}) {
277                            return;
278                    } else {
279                            warn "pointer for MFN $mfn is null\n";
280                            return;
281                    }
282            }
283    
284          # check for logically deleted record          # check for logically deleted record
285          if ($pointer < 0) {          if ($pointer & 0x80000000) {
286                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});                  print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
287                  $self->{deleted} = $mfn;                  $self->{deleted} = $mfn;
288    
289                  return unless $self->{include_deleted};                  return unless $self->{include_deleted};
290    
291                  $pointer = abs($pointer);                  # abs
292                    $pointer = ($pointer ^ 0xffffffff) + 1;
293          }          }
294    
295          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
# Line 341  sub fetch { Line 304  sub fetch {
304    
305          # Get Record Information          # Get Record Information
306    
307          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
308    
309          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
310          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
311    
312          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
313    
# Line 361  sub fetch { Line 324  sub fetch {
324    
325          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
326    
327          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
328    
329          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
330    
# Line 381  sub fetch { Line 344  sub fetch {
344    
345          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
346    
347                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
348    
349                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
350    
# Line 422  sub fetch { Line 385  sub fetch {
385          return $self->{'record'};          return $self->{'record'};
386  }  }
387    
388    =head2 mfn
389    
390    Returns current MFN position
391    
392      my $mfn = $isis->mfn;
393    
394    =cut
395    
396    # This function should be simple return $self->{current_mfn},
397    # but if new is called with _hack_mfn it becomes setter.
398    # It's useful in tests when setting $isis->{record} directly
399    
400    sub mfn {
401            my $self = shift;
402            return $self->{current_mfn};
403    };
404    
405    
406  =head2 to_ascii  =head2 to_ascii
407    
408  Returns ASCII output of record with specified MFN  Returns ASCII output of record with specified MFN
# Line 445  sub to_ascii { Line 426  sub to_ascii {
426    
427          my $mfn = shift || croak "need MFN";          my $mfn = shift || croak "need MFN";
428    
429          my $rec = $self->fetch($mfn);          my $rec = $self->fetch($mfn) || return;
430    
431          my $out = "0\t$mfn";          my $out = "0\t$mfn";
432    
# Line 501  which will be used for identifiers, C<i1 Line 482  which will be used for identifiers, C<i1
482               }               }
483             ],             ],
484    
485    In case there are repeatable subfields in record, this will create
486    following structure:
487    
488      '900' => [ {
489            'a' => [ 'foo', 'bar', 'baz' ],
490      }]
491    
492  This method will also create additional field C<000> with MFN.  This method will also create additional field C<000> with MFN.
493    
494  =cut  =cut
# Line 513  sub to_hash { Line 501  sub to_hash {
501          # init record to include MFN as field 000          # init record to include MFN as field 000
502          my $rec = { '000' => [ $mfn ] };          my $rec = { '000' => [ $mfn ] };
503    
504          my $row = $self->fetch($mfn);          my $row = $self->fetch($mfn) || return;
505    
506          foreach my $k (keys %{$row}) {          foreach my $k (keys %{$row}) {
507                  foreach my $l (@{$row->{$k}}) {                  foreach my $l (@{$row->{$k}}) {
508    
509                          # filter output                          # filter output
510                          $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});                          if ($self->{'hash_filter'}) {
511                                    $l = $self->{'hash_filter'}->($l);
512                                    next unless defined($l);
513                            }
514    
515                          my $val;                          my $val;
516    
# Line 530  sub to_hash { Line 521  sub to_hash {
521                          if ($l =~ m/\^/) {                          if ($l =~ m/\^/) {
522                                  foreach my $t (split(/\^/,$l)) {                                  foreach my $t (split(/\^/,$l)) {
523                                          next if (! $t);                                          next if (! $t);
524                                          $val->{substr($t,0,1)} = substr($t,1);                                          my ($sf,$v) = (substr($t,0,1), substr($t,1));
525                                            # FIXME make this option !
526                                            next unless ($v);
527    #                                       warn "### $k^$sf:$v",$/ if ($self->{debug} > 1);
528    
529                                            # FIXME array return optional, by default unroll to ' ; '
530                                            if (ref( $val->{$sf} ) eq 'ARRAY') {
531    
532                                                    push @{ $val->{$sf} }, $v;
533                                            } elsif (defined( $val->{$sf} )) {
534                                                    # convert scalar field to array
535                                                    $val->{$sf} = [ $val->{$sf}, $v ];
536                                            } else {
537                                                    $val->{$sf} = $v;
538                                            }
539                                  }                                  }
540                          } else {                          } else {
541                                  $val = $l;                                  $val = $l;
# Line 557  sub tag_name { Line 562  sub tag_name {
562          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
563  }  }
564    
565    
566    =head2 read_cnt
567    
568    Read content of C<.CNT> file and return hash containing it.
569    
570      print Dumper($isis->read_cnt);
571    
572    This function is not used by module (C<.CNT> files are not required for this
573    module to work), but it can be useful to examine your index (while debugging
574    for example).
575    
576    =cut
577    
578    sub read_cnt  {
579            my $self = shift;
580    
581            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
582    
583            # Get the index information from $db.CNT
584      
585            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
586            binmode($fileCNT);
587    
588            my $buff;
589    
590            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
591            $self->unpack_cnt($buff);
592    
593            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
594            $self->unpack_cnt($buff);
595    
596            close($fileCNT);
597    
598            return $self->{cnt};
599    }
600    
601    =head2 unpack_cnt
602    
603    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
604    
605    Here is definition of record:
606    
607     off key        description                             size
608      0: IDTYPE     BTree type                              s
609      2: ORDN       Nodes Order                             s
610      4: ORDF       Leafs Order                             s
611      6: N          Number of Memory buffers for nodes      s
612      8: K          Number of buffers for first level index s
613     10: LIV        Current number of Index Levels          s
614     12: POSRX      Pointer to Root Record in N0x           l
615     16: NMAXPOS    Next Available position in N0x          l
616     20: FMAXPOS    Next available position in L0x          l
617     24: ABNORMAL   Formal BTree normality indicator        s
618     length: 26 bytes
619    
620    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
621    
622    =cut
623    
624    sub unpack_cnt {
625            my $self = shift;
626    
627            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
628    
629            my $buff = shift || return;
630            my @arr = unpack("vvvvvvVVVv", $buff);
631    
632            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
633    
634            my $IDTYPE = shift @arr;
635            foreach (@flds) {
636                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
637            }
638    }
639    
640  1;  1;
641    
642  =head1 BUGS  =head1 BUGS
# Line 574  module with databases from programs othe Line 654  module with databases from programs othe
654  tested this against ouput of one C<isis.dll>-based application, but I don't  tested this against ouput of one C<isis.dll>-based application, but I don't
655  know any details about it's version.  know any details about it's version.
656    
657    =head1 VERSIONS
658    
659    You can find version dependencies documented here
660    
661    =over 8
662    
663    =item 0.20
664    
665    Added C<< $isis->mfn >> and support for repeatable subfields
666    
667    =back
668    
669  =head1 AUTHOR  =head1 AUTHOR
670    
671          Dobrica Pavlinusic          Dobrica Pavlinusic

Legend:
Removed from v.27  
changed lines
  Added in v.54

  ViewVC Help
Powered by ViewVC 1.1.26