/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 19 by dpavlin, Thu Dec 30 23:16:20 2004 UTC revision 33 by dpavlin, Wed Jan 5 21:23:04 2005 UTC
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.07;          $VERSION     = 0.09;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
33    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
35    }    }
36    
37  =head1 DESCRIPTION  =head1 DESCRIPTION
38    
39  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40  IsisMarc. It can be used as perl-only alternative to OpenIsis module.  IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41    seems to depriciate it's old C<XS> bindings for perl.
42    
43  It can create hash values from data in ISIS database (using C<to_hash>),  It can create hash values from data in ISIS database (using C<to_hash>),
44  ASCII dump (using C<to_ascii>) or just hash with field names and packed  ASCII dump (using C<to_ascii>) or just hash with field names and packed
# Line 50  fields which are zero sized will be fill Line 51  fields which are zero sized will be fill
51  It also has support for identifiers (only if ISIS database is created by  It also has support for identifiers (only if ISIS database is created by
52  IsisMarc), see C<to_hash>.  IsisMarc), see C<to_hash>.
53    
54  This will module will always be slower than OpenIsis module which use C  This module will always be slower than OpenIsis module which use C
55  library. However, since it's written in perl, it's platform independent (so  library. However, since it's written in perl, it's platform independent (so
56  you don't need C compiler), and can be easily modified. I hope that it  you don't need C compiler), and can be easily modified. I hope that it
57  creates data structures which are easier to use than ones created by  creates data structures which are easier to use than ones created by
# Line 122  Dump a B<lot> of debugging output. Line 123  Dump a B<lot> of debugging output.
123    
124  =back  =back
125    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
126  =cut  =cut
127    
128  sub new {  sub new {
# Line 159  sub new { Line 158  sub new {
158                  # read the $db.FDT file for tags                  # read the $db.FDT file for tags
159                  my $fieldzone=0;                  my $fieldzone=0;
160    
161                  open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";                  open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
162                    binmode($fileFDT);
163    
164                  while (<fileFDT>) {                  while (<$fileFDT>) {
165                          chomp;                          chomp;
166                          if ($fieldzone) {                          if ($fieldzone) {
167                                  my $name=substr($_,0,30);                                  my $name=substr($_,0,30);
# Line 178  sub new { Line 178  sub new {
178                          }                          }
179                  }                  }
180                                    
181                  close(fileFDT);                  close($fileFDT);
182          }          }
183    
184          # Get the Maximum MFN from $db.MST          # Get the Maximum MFN from $db.MST
185    
186          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";          open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
187            binmode($self->{'fileMST'});
188    
189          # MST format:   (* = 32 bit signed)          # MST format:   (* = 32 bit signed)
190          # CTLMFN*       always 0          # CTLMFN*       always 0
# Line 191  sub new { Line 192  sub new {
192          # NXTMFB*       last block allocated to master file          # NXTMFB*       last block allocated to master file
193          # NXTMFP        offset to next available position in last block          # NXTMFP        offset to next available position in last block
194          # MFTYPE        always 0 for user db file (1 for system)          # MFTYPE        always 0 for user db file (1 for system)
195          seek($self->{'fileMST'},4,0);          seek($self->{'fileMST'},4,0) || carp "can't seek to offset 0 in MST: $!";
196    
197          my $buff;          my $buff;
198    
199          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || carp "can't read NXTMFN from MST: $!";
200          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("V",$buff) || carp "NXTNFN is zero";
   
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
   
   
   
201    
202          print STDERR Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
203    
204          # open files for later          # open files for later
205          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";          open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
206            binmode($self->{'fileXRF'});
207    
208          $self ? return $self : return undef;          $self ? return $self : return undef;
209  }  }
210    
211    =head2 count
212    
213    Return number of records in database
214    
215      print $isis->count;
216    
217    =cut
218    
219    sub count {
220            my $self = shift;
221            return $self->{'NXTMFN'} - 1;
222    }
223    
224  =head2 read_cnt  =head2 read_cnt
225    
226  This function is not really used by module, but can be useful to find info  Read content of C<.CNT> file and return hash containing it.
 about your index (if debugging it for example).  
227    
228    print Dumper($isis->read_cnt);    print Dumper($isis->read_cnt);
229    
230    This function is not used by module (C<.CNT> files are not required for this
231    module to work), but it can be useful to examine your index (while debugging
232    for example).
233    
234  =cut  =cut
235    
236  sub read_cnt  {  sub read_cnt  {
# Line 228  sub read_cnt  { Line 240  sub read_cnt  {
240    
241          # Get the index information from $db.CNT          # Get the index information from $db.CNT
242        
243          open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";          open(my $fileCNT, $self->{cnt_file}) || carp "can't read '$self->{cnt_file}': $!";
244            binmode($fileCNT);
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
245    
246          my $buff;          my $buff;
247    
248          read(fileCNT, $buff, 26);          read($fileCNT, $buff, 26) || carp "can't read first table from CNT: $!";
249          $self->unpack_cnt($buff);          $self->unpack_cnt($buff);
250    
251          read(fileCNT, $buff, 26);          read($fileCNT, $buff, 26) || carp "can't read second table from CNT: $!";
252          $self->unpack_cnt($buff);          $self->unpack_cnt($buff);
253    
254          close(fileCNT);          close($fileCNT);
255    
256          return $self->{cnt};          return $self->{cnt};
257  }  }
258    
259    =head2 unpack_cnt
260    
261    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
262    
263    Here is definition of record:
264    
265     off key        description                             size
266      0: IDTYPE     BTree type                              s
267      2: ORDN       Nodes Order                             s
268      4: ORDF       Leafs Order                             s
269      6: N          Number of Memory buffers for nodes      s
270      8: K          Number of buffers for first level index s
271     10: LIV        Current number of Index Levels          s
272     12: POSRX      Pointer to Root Record in N0x           l
273     16: NMAXPOS    Next Available position in N0x          l
274     20: FMAXPOS    Next available position in L0x          l
275     24: ABNORMAL   Formal BTree normality indicator        s
276     length: 26 bytes
277    
278    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
279    
280    =cut
281    
282    sub unpack_cnt {
283            my $self = shift;
284    
285            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
286    
287            my $buff = shift || return;
288            my @arr = unpack("vvvvvvVVVv", $buff);
289    
290            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
291    
292            my $IDTYPE = shift @arr;
293            foreach (@flds) {
294                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
295            }
296    }
297    
298  =head2 fetch  =head2 fetch
299    
300  Read record with selected MFN  Read record with selected MFN
# Line 296  sub fetch { Line 318  sub fetch {
318    
319          # is mfn allready in memory?          # is mfn allready in memory?
320          my $old_mfn = $self->{'current_mfn'} || -1;          my $old_mfn = $self->{'current_mfn'} || -1;
321          return if ($mfn == $old_mfn);          return $self->{record} if ($mfn == $old_mfn);
322    
323          print STDERR "## fetch: $mfn\n" if ($self->{debug});          print STDERR "## fetch: $mfn\n" if ($self->{debug});
324    
# Line 308  sub fetch { Line 330  sub fetch {
330    
331          my $buff;          my $buff;
332    
333            # delete old record
334            delete $self->{record};
335    
336          # read XRFMFB abd XRFMFP          # read XRFMFB abd XRFMFP
337          read($self->{'fileXRF'}, $buff, 4);          read($self->{'fileXRF'}, $buff, 4);
338          my $pointer=unpack("l",$buff) || carp "pointer is null";          my $pointer=unpack("V",$buff) || carp "pointer is null";
339    
340            # check for logically deleted record
341            if ($pointer & 0x80000000) {
342                    print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
343                    $self->{deleted} = $mfn;
344    
345                    return unless $self->{include_deleted};
346    
347                    # abs
348                    $pointer = ($pointer ^ 0xffffffff) + 1;
349            }
350    
351          my $XRFMFB = int($pointer/2048);          my $XRFMFB = int($pointer/2048);
352          my $XRFMFP = $pointer - ($XRFMFB*2048);          my $XRFMFP = $pointer - ($XRFMFB*2048);
353    
   
354          # (XRFMFB - 1) * 512 + XRFMFP          # (XRFMFB - 1) * 512 + XRFMFP
355          # why do i have to do XRFMFP % 1024 ?          # why do i have to do XRFMFP % 1024 ?
356    
357          my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);          my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
358    
359          print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});          print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
360    
361          # Get Record Information          # Get Record Information
362    
363          seek($self->{'fileMST'},$blk_off,0);          seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
364    
365          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
366          my $value=unpack("l",$buff);          my $value=unpack("V",$buff);
367    
368          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});          print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
369    
370          if ($value!=$mfn) {          if ($value!=$mfn) {
371                  carp "Error: MFN ".$mfn." not found in MST(".$value.")";                      if ($value == 0) {
372                  #return;                # XXX deleted record?                          print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
373          }                          $self->{deleted} = $mfn;
374                            return;
375                    }
376    
377  #       $MFRL=$self->Read16($fileMST);                  carp "Error: MFN ".$mfn." not found in MST file, found $value";    
378  #       $MFBWB=$self->Read32($fileMST);                  return;
379  #       $MFBWP=$self->Read16($fileMST);          }
 #       $BASE=$self->Read16($fileMST);  
 #       $NVF=$self->Read16($fileMST);  
 #       $STATUS=$self->Read16($fileMST);  
380    
381          read($self->{'fileMST'}, $buff, 14);          read($self->{'fileMST'}, $buff, 14);
382    
383          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);          my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
384    
385          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});          print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
386    
387          # delete old record          warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
         delete $self->{record};  
   
         ## FIXME this is a bug  
         if (! $self->{'include_deleted'} && $MFRL < 0) {  
                 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});  
                 return;  
         }  
388    
389          warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);          warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
390    
# Line 373  sub fetch { Line 400  sub fetch {
400    
401          for (my $i = 0 ; $i < $NVF ; $i++) {          for (my $i = 0 ; $i < $NVF ; $i++) {
402    
403  #               $TAG=$self->Read16($fileMST);                  my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
 #               $POS=$self->Read16($fileMST);  
 #               $LEN=$self->Read16($fileMST);  
   
                 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));  
404    
405                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});                  print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
406    
# Line 413  sub fetch { Line 436  sub fetch {
436    
437          $self->{'current_mfn'} = $mfn;          $self->{'current_mfn'} = $mfn;
438    
439          print Dumper($self),"\n" if ($self->{debug});          print STDERR Dumper($self),"\n" if ($self->{debug});
440    
441          return $self->{'record'};          return $self->{'record'};
442  }  }
443    
444  =head2 to_ascii  =head2 to_ascii
445    
446  Dump ASCII output of record with specified MFN  Returns ASCII output of record with specified MFN
447    
448    print $isis->to_ascii(42);    print $isis->to_ascii(42);
449    
450  It outputs something like this:  This outputs something like this:
451    
452    210   ^aNew York^cNew York University press^dcop. 1988    210   ^aNew York^cNew York University press^dcop. 1988
453    990   2140    990   2140
# Line 461  Read record with specified MFN and conve Line 484  Read record with specified MFN and conve
484    
485    my $hash = $isis->to_hash($mfn);    my $hash = $isis->to_hash($mfn);
486    
487  It has ability to convert characters (using C<hash_filter> from ISIS  It has ability to convert characters (using C<hash_filter>) from ISIS
488  database before creating structures enabling character re-mapping or quick  database before creating structures enabling character re-mapping or quick
489  fix-up of data.  fix-up of data.
490    
# Line 520  sub to_hash { Line 543  sub to_hash {
543                          my $val;                          my $val;
544    
545                          # has identifiers?                          # has identifiers?
546                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
547    
548                          # has subfields?                          # has subfields?
549                          if ($l =~ m/\^/) {                          if ($l =~ m/\^/) {
# Line 557  sub tag_name { Line 580  sub tag_name {
580    
581  =head1 BUGS  =head1 BUGS
582    
583  This module has been very lightly tested. Use with caution and report bugs.  Some parts of CDS/ISIS documentation are not detailed enough to exmplain
584    some variations in input databases which has been tested with this module.
585    When I was in doubt, I assumed that OpenIsis's implementation was right
586    (except for obvious bugs).
587    
588    However, every effort has been made to test this module with as much
589    databases (and programs that create them) as possible.
590    
591    I would be very greatful for success or failure reports about usage of this
592    module with databases from programs other than WinIsis and IsisMarc. I had
593    tested this against ouput of one C<isis.dll>-based application, but I don't
594    know any details about it's version.
595    
596  =head1 AUTHOR  =head1 AUTHOR
597    

Legend:
Removed from v.19  
changed lines
  Added in v.33

  ViewVC Help
Powered by ViewVC 1.1.26