/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/IsisDB.pm revision 34 by dpavlin, Thu Jan 6 00:40:07 2005 UTC trunk/lib/Biblio/Isis.pm revision 50 by dpavlin, Fri Jul 7 21:11:01 2006 UTC
# Line 1  Line 1 
1  package IsisDB;  package Biblio::Isis;
2  use strict;  use strict;
3    
4  use Carp;  use Carp;
5  use File::Glob qw(:globally :nocase);  use File::Glob qw(:globally :nocase);
6    
 use Data::Dumper;  
   
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.09;          $VERSION     = 0.20;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 20  BEGIN { Line 18  BEGIN {
18    
19  =head1 NAME  =head1 NAME
20    
21  IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database  Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use IsisDB;    use Biblio::Isis;
26    
27    my $isis = new IsisDB(    my $isis = new Biblio::Isis(
28          isisdb => './cds/cds',          isisdb => './cds/cds',
29    );    );
30    
# Line 81  rarely an issue). Line 79  rarely an issue).
79    
80  Open ISIS database  Open ISIS database
81    
82   my $isis = new IsisDB(   my $isis = new Biblio::Isis(
83          isisdb => './cds/cds',          isisdb => './cds/cds',
84          read_fdt => 1,          read_fdt => 1,
85          include_deleted => 1,          include_deleted => 1,
# Line 147  sub new { Line 145  sub new {
145          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
146    
147          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
148                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  unless ($self->{$ext."_file"}) {
149                            carp "missing ",uc($ext)," file in ",$self->{isisdb};
150                            return;
151                    }
152          }          }
153    
154          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          if ($self->{debug}) {
155                    print STDERR "## using files: ",join(" ",@isis_files),"\n";
156                    eval "use Data::Dump";
157    
158                    if (! $@) {
159                            *Dumper = *Data::Dump::dump;
160                    } else {
161                            use Data::Dumper;
162                    }
163            }
164    
165          # if you want to read .FDT file use read_fdt argument when creating class!          # if you want to read .FDT file use read_fdt argument when creating class!
166          if ($self->{read_fdt} && -e $self->{fdt_file}) {          if ($self->{read_fdt} && -e $self->{fdt_file}) {
# Line 199  sub new { Line 209  sub new {
209          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";          read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
210          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
211    
212          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
213    
214          # open files for later          # open files for later
215          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
# Line 221  sub count { Line 231  sub count {
231          return $self->{'NXTMFN'} - 1;          return $self->{'NXTMFN'} - 1;
232  }  }
233    
 =head2 read_cnt  
   
 Read content of C<.CNT> file and return hash containing it.  
   
   print Dumper($isis->read_cnt);  
   
 This function is not used by module (C<.CNT> files are not required for this  
 module to work), but it can be useful to examine your index (while debugging  
 for example).  
   
 =cut  
   
 sub read_cnt  {  
         my $self = shift;  
   
         croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});  
   
         # Get the index information from $db.CNT  
     
         open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";  
         binmode($fileCNT);  
   
         my $buff;  
   
         read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";  
         $self->unpack_cnt($buff);  
   
         read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";  
         $self->unpack_cnt($buff);  
   
         close($fileCNT);  
   
         return $self->{cnt};  
 }  
   
 =head2 unpack_cnt  
   
 Unpack one of two 26 bytes fixed length record in C<.CNT> file.  
   
 Here is definition of record:  
   
  off key        description                             size  
   0: IDTYPE     BTree type                              s  
   2: ORDN       Nodes Order                             s  
   4: ORDF       Leafs Order                             s  
   6: N          Number of Memory buffers for nodes      s  
   8: K          Number of buffers for first level index s  
  10: LIV        Current number of Index Levels          s  
  12: POSRX      Pointer to Root Record in N0x           l  
  16: NMAXPOS    Next Available position in N0x          l  
  20: FMAXPOS    Next available position in L0x          l  
  24: ABNORMAL   Formal BTree normality indicator        s  
  length: 26 bytes  
   
 This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.  
   
 =cut  
   
 sub unpack_cnt {  
         my $self = shift;  
   
         my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
         my $buff = shift || return;  
         my @arr = unpack("vvvvvvVVVv", $buff);  
   
         print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
         my $IDTYPE = shift @arr;  
         foreach (@flds) {  
                 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
         }  
 }  
   
234  =head2 fetch  =head2 fetch
235    
236  Read record with selected MFN  Read record with selected MFN
# Line 335  sub fetch { Line 271  sub fetch {
271    
272          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
273          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
274          my $pointer=unpack("V",$buff) || croak "pointer is null";          my $pointer=unpack("V",$buff);
275            if (! $pointer) {
276                    if ($self->{include_deleted}) {
277                            return;
278                    } else {
279                            warn "pointer for MFN $mfn is null\n";
280                            return;
281                    }
282            }
283    
284          # check for logically deleted record          # check for logically deleted record
285          if ($pointer & 0x80000000) {          if ($pointer & 0x80000000) {
# Line 464  sub to_ascii { Line 408  sub to_ascii {
408    
409          my $mfn = shift || croak "need MFN";          my $mfn = shift || croak "need MFN";
410    
411          my $rec = $self->fetch($mfn);          my $rec = $self->fetch($mfn) || return;
412    
413          my $out = "0\t$mfn";          my $out = "0\t$mfn";
414    
# Line 520  which will be used for identifiers, C<i1 Line 464  which will be used for identifiers, C<i1
464               }               }
465             ],             ],
466    
467    In case there are repeatable subfields in record, this will create
468    following structure:
469    
470      '900' => [ {
471            'a' => [ 'foo', 'bar', 'baz' ],
472      }]
473    
474  This method will also create additional field C<000> with MFN.  This method will also create additional field C<000> with MFN.
475    
476  =cut  =cut
# Line 532  sub to_hash { Line 483  sub to_hash {
483          # init record to include MFN as field 000          # init record to include MFN as field 000
484          my $rec = { '000' => [ $mfn ] };          my $rec = { '000' => [ $mfn ] };
485    
486          my $row = $self->fetch($mfn);          my $row = $self->fetch($mfn) || return;
487    
488          foreach my $k (keys %{$row}) {          foreach my $k (keys %{$row}) {
489                  foreach my $l (@{$row->{$k}}) {                  foreach my $l (@{$row->{$k}}) {
490    
491                          # filter output                          # filter output
492                          $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});                          if ($self->{'hash_filter'}) {
493                                    $l = $self->{'hash_filter'}->($l);
494                                    next unless defined($l);
495                            }
496    
497                          my $val;                          my $val;
498    
# Line 549  sub to_hash { Line 503  sub to_hash {
503                          if ($l =~ m/\^/) {                          if ($l =~ m/\^/) {
504                                  foreach my $t (split(/\^/,$l)) {                                  foreach my $t (split(/\^/,$l)) {
505                                          next if (! $t);                                          next if (! $t);
506                                          $val->{substr($t,0,1)} = substr($t,1);                                          my ($sf,$v) = (substr($t,0,1), substr($t,1));
507                                            warn "### $k^$sf:$v",$/ if ($self->{debug} > 1);
508                                            if (ref( $val->{$sf} ) eq 'ARRAY') {
509                                                    push @{ $val->{$sf} }, $v;
510                                            } elsif (defined( $val->{$sf} )) {
511                                                    # convert scalar field to array
512                                                    $val->{$sf} = [ $val->{$sf}, $v ];
513                                            } else {
514                                                    $val->{$sf} = $v;
515                                            }
516                                  }                                  }
517                          } else {                          } else {
518                                  $val = $l;                                  $val = $l;
# Line 576  sub tag_name { Line 539  sub tag_name {
539          return $self->{'TagName'}->{$tag} || $tag;          return $self->{'TagName'}->{$tag} || $tag;
540  }  }
541    
542    
543    =head2 read_cnt
544    
545    Read content of C<.CNT> file and return hash containing it.
546    
547      print Dumper($isis->read_cnt);
548    
549    This function is not used by module (C<.CNT> files are not required for this
550    module to work), but it can be useful to examine your index (while debugging
551    for example).
552    
553    =cut
554    
555    sub read_cnt  {
556            my $self = shift;
557    
558            croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
559    
560            # Get the index information from $db.CNT
561      
562            open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
563            binmode($fileCNT);
564    
565            my $buff;
566    
567            read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
568            $self->unpack_cnt($buff);
569    
570            read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
571            $self->unpack_cnt($buff);
572    
573            close($fileCNT);
574    
575            return $self->{cnt};
576    }
577    
578    =head2 unpack_cnt
579    
580    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
581    
582    Here is definition of record:
583    
584     off key        description                             size
585      0: IDTYPE     BTree type                              s
586      2: ORDN       Nodes Order                             s
587      4: ORDF       Leafs Order                             s
588      6: N          Number of Memory buffers for nodes      s
589      8: K          Number of buffers for first level index s
590     10: LIV        Current number of Index Levels          s
591     12: POSRX      Pointer to Root Record in N0x           l
592     16: NMAXPOS    Next Available position in N0x          l
593     20: FMAXPOS    Next available position in L0x          l
594     24: ABNORMAL   Formal BTree normality indicator        s
595     length: 26 bytes
596    
597    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
598    
599    =cut
600    
601    sub unpack_cnt {
602            my $self = shift;
603    
604            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
605    
606            my $buff = shift || return;
607            my @arr = unpack("vvvvvvVVVv", $buff);
608    
609            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
610    
611            my $IDTYPE = shift @arr;
612            foreach (@flds) {
613                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
614            }
615    }
616    
617  1;  1;
618    
619  =head1 BUGS  =head1 BUGS

Legend:
Removed from v.34  
changed lines
  Added in v.50

  ViewVC Help
Powered by ViewVC 1.1.26