/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 18 by dpavlin, Thu Dec 30 22:40:53 2004 UTC revision 32 by dpavlin, Wed Jan 5 15:46:26 2005 UTC
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.07;          $VERSION     = 0.09;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
33    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
35    }    }
36    
37  =head1 DESCRIPTION  =head1 DESCRIPTION
38    
39  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40  IsisMarc. It can be used as perl-only alternative to OpenIsis module.  IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41    seems to depriciate it's old C<XS> bindings for perl.
42    
43  It can create hash values from data in ISIS database (using C<to_hash>),  It can create hash values from data in ISIS database (using C<to_hash>),
44  ASCII dump (using C<to_ascii>) or just hash with field names and packed  ASCII dump (using C<to_ascii>) or just hash with field names and packed
# Line 50  fields which are zero sized will be fill Line 51  fields which are zero sized will be fill
51  It also has support for identifiers (only if ISIS database is created by  It also has support for identifiers (only if ISIS database is created by
52  IsisMarc), see C<to_hash>.  IsisMarc), see C<to_hash>.
53    
54  This will module will always be slower than OpenIsis module which use C  This module will always be slower than OpenIsis module which use C
55  library. However, since it's written in perl, it's platform independent (so  library. However, since it's written in perl, it's platform independent (so
56  you don't need C compiler), and can be easily modified. I hope that it  you don't need C compiler), and can be easily modified. I hope that it
57  creates data structures which are easier to use than ones created by  creates data structures which are easier to use than ones created by
# Line 122  Dump a B<lot> of debugging output. Line 123  Dump a B<lot> of debugging output.
123    
124  =back  =back
125    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
126  =cut  =cut
127    
128  sub new {  sub new {
# Line 148  sub new { Line 147  sub new {
147          push @must_exist, "fdt" if ($self->{read_fdt});          push @must_exist, "fdt" if ($self->{read_fdt});
148    
149          foreach my $ext (@must_exist) {          foreach my $ext (@must_exist) {
150                  confess "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});                  croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
151          }          }
152    
153          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});          print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
# Line 198  sub new { Line 197  sub new {
197          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4);
198          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
199    
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
   
200    
201    
202    
# Line 212  sub new { Line 208  sub new {
208          $self ? return $self : return undef;          $self ? return $self : return undef;
209  }  }
210    
211    =head2 count
212    
213    Return number of records in database
214    
215      print $isis->count;
216    
217    =cut
218    
219    sub count {
220            my $self = shift;
221            return $self->{'NXTMFN'} - 1;
222    }
223    
224  =head2 read_cnt  =head2 read_cnt
225    
226  This function is not really used by module, but can be useful to find info  Read content of C<.CNT> file and return hash containing it.
 about your index (if debugging it for example).  
227    
228    print Dumper($isis->read_cnt);    print Dumper($isis->read_cnt);
229    
230    This function is not used by module (C<.CNT> files are not required for this
231    module to work), but it can be useful to examine your index (while debugging
232    for example).
233    
234  =cut  =cut
235    
236  sub read_cnt  {  sub read_cnt  {
237          my $self = shift;          my $self = shift;
238    
239          confess "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});          croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
240    
241          # Get the index information from $db.CNT          # Get the index information from $db.CNT
242        
243          open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";          open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
244    
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
   
245          my $buff;          my $buff;
246    
247          read(fileCNT, $buff, 26);          read(fileCNT, $buff, 26);
# Line 273  sub read_cnt  { Line 255  sub read_cnt  {
255          return $self->{cnt};          return $self->{cnt};
256  }  }
257    
258    =head2 unpack_cnt
259    
260    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
261    
262    Here is definition of record:
263    
264     off key        description                             size
265      0: IDTYPE     BTree type                              s
266      2: ORDN       Nodes Order                             s
267      4: ORDF       Leafs Order                             s
268      6: N          Number of Memory buffers for nodes      s
269      8: K          Number of buffers for first level index s
270     10: LIV        Current number of Index Levels          s
271     12: POSRX      Pointer to Root Record in N0x           l
272     16: NMAXPOS    Next Available position in N0x          l
273     20: FMAXPOS    Next available position in L0x          l
274     24: ABNORMAL   Formal BTree normality indicator        s
275     length: 26 bytes
276    
277    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
278    
279    =cut
280    
281    sub unpack_cnt {
282            my $self = shift;
283    
284            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
285    
286            my $buff = shift || return;
287            my @arr = unpack("ssssssllls", $buff);
288    
289            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
290    
291            my $IDTYPE = shift @arr;
292            foreach (@flds) {
293                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
294            }
295    }
296    
297  =head2 fetch  =head2 fetch
298    
299  Read record with selected MFN  Read record with selected MFN
# Line 296  sub fetch { Line 317  sub fetch {
317    
318          # is mfn allready in memory?          # is mfn allready in memory?
319          my $old_mfn = $self->{'current_mfn'} || -1;          my $old_mfn = $self->{'current_mfn'} || -1;
320          return if ($mfn == $old_mfn);          return $self->{record} if ($mfn == $old_mfn);
321    
322          print STDERR "## fetch: $mfn\n" if ($self->{debug});          print STDERR "## fetch: $mfn\n" if ($self->{debug});
323    
# Line 308  sub fetch { Line 329  sub fetch {
329    
330          my $buff;          my $buff;
331    
332            # delete old record
333            delete $self->{record};
334    
335          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
336          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
337          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("l",$buff) || carp "pointer is null";
338    
339            # check for logically deleted record
340            if ($pointer < 0) {
341                    print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
342                    $self->{deleted} = $mfn;
343    
344                    return unless $self->{include_deleted};
345    
346                    $pointer = abs($pointer);
347            }
348    
349          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
350          my $XRFMFP = $pointer - ($XRFMFB*2048);          my $XRFMFP = $pointer - ($XRFMFB*2048);
351    
   
352          # (XRFMFB - 1) * 512 + XRFMFP          # (XRFMFB - 1) * 512 + XRFMFP
353          # why do i have to do XRFMFP % 1024 ?          # why do i have to do XRFMFP % 1024 ?
354    
355          my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);          my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
356    
357          print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});          print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
358    
# Line 333  sub fetch { Line 366  sub fetch {
366          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
367    
368          if ($value!=$mfn) {          if ($value!=$mfn) {
369                  carp "Error: MFN ".$mfn." not found in MST(".$value.")";                      if ($value == 0) {
370                  #return;                # XXX deleted record?                          print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
371          }                          $self->{deleted} = $mfn;
372                            return;
373                    }
374    
375  #       $MFRL=$self->Read16($fileMST);                  carp "Error: MFN ".$mfn." not found in MST file, found $value";    
376  #       $MFBWB=$self->Read32($fileMST);                  return;
377  #       $MFBWP=$self->Read16($fileMST);          }
 #       $BASE=$self->Read16($fileMST);  
 #       $NVF=$self->Read16($fileMST);  
 #       $STATUS=$self->Read16($fileMST);  
378    
379          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
380    
# Line 350  sub fetch { Line 382  sub fetch {
382    
383          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
384    
385          # delete old record          warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
         delete $self->{record};  
   
         ## FIXME this is a bug  
         if (! $self->{'include_deleted'} && $MFRL < 0) {  
                 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});  
                 return;  
         }  
386    
387          warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);          warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
388    
# Line 373  sub fetch { Line 398  sub fetch {
398    
399          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
400    
 #               $TAG=$self->Read16($fileMST);  
 #               $POS=$self->Read16($fileMST);  
 #               $LEN=$self->Read16($fileMST);  
   
401                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));                  my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
402    
403                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
# Line 413  sub fetch { Line 434  sub fetch {
434    
435          $self->{'current_mfn'} = $mfn;          $self->{'current_mfn'} = $mfn;
436    
437          print Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
438    
439          return $self->{'record'};          return $self->{'record'};
440  }  }
441    
442  =head2 to_ascii  =head2 to_ascii
443    
444  Dump ASCII output of record with specified MFN  Returns ASCII output of record with specified MFN
445    
446    print $isis->to_ascii(42);    print $isis->to_ascii(42);
447    
448  It outputs something like this:  This outputs something like this:
449    
450    210   ^aNew York^cNew York University press^dcop. 1988    210   ^aNew York^cNew York University press^dcop. 1988
451    990   2140    990   2140
# Line 461  Read record with specified MFN and conve Line 482  Read record with specified MFN and conve
482    
483    my $hash = $isis->to_hash($mfn);    my $hash = $isis->to_hash($mfn);
484    
485  It has ability to convert characters (using C<hash_filter> from ISIS  It has ability to convert characters (using C<hash_filter>) from ISIS
486  database before creating structures enabling character re-mapping or quick  database before creating structures enabling character re-mapping or quick
487  fix-up of data.  fix-up of data.
488    
# Line 520  sub to_hash { Line 541  sub to_hash {
541                          my $val;                          my $val;
542    
543                          # has identifiers?                          # has identifiers?
544                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
545    
546                          # has subfields?                          # has subfields?
547                          if ($l =~ m/\^/) {                          if ($l =~ m/\^/) {
# Line 557  sub tag_name { Line 578  sub tag_name {
578    
579  =head1 BUGS  =head1 BUGS
580    
581  This module has been very lightly tested. Use with caution and report bugs.  Some parts of CDS/ISIS documentation are not detailed enough to exmplain
582    some variations in input databases which has been tested with this module.
583    When I was in doubt, I assumed that OpenIsis's implementation was right
584    (except for obvious bugs).
585    
586    However, every effort has been made to test this module with as much
587    databases (and programs that create them) as possible.
588    
589    I would be very greatful for success or failure reports about usage of this
590    module with databases from programs other than WinIsis and IsisMarc. I had
591    tested this against ouput of one C<isis.dll>-based application, but I don't
592    know any details about it's version.
593    
594  =head1 AUTHOR  =head1 AUTHOR
595    

Legend:
Removed from v.18  
changed lines
  Added in v.32

  ViewVC Help
Powered by ViewVC 1.1.26