/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 26 by dpavlin, Fri Dec 31 07:16:02 2004 UTC revision 32 by dpavlin, Wed Jan 5 15:46:26 2005 UTC
# Line 9  use Data::Dumper; Line 9  use Data::Dumper;
9  BEGIN {  BEGIN {
10          use Exporter ();          use Exporter ();
11          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12          $VERSION     = 0.08;          $VERSION     = 0.09;
13          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
14          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
15          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis Line 30  IsisDB - Read CDS/ISIS, WinISIS and Isis
30          isisdb => './cds/cds',          isisdb => './cds/cds',
31    );    );
32    
33    for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {    for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34          print $isis->to_ascii($mfn),"\n";          print $isis->to_ascii($mfn),"\n";
35    }    }
36    
37  =head1 DESCRIPTION  =head1 DESCRIPTION
38    
39  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or  This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40  IsisMarc. It can be used as perl-only alternative to OpenIsis module.  IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41    seems to depriciate it's old C<XS> bindings for perl.
42    
43  It can create hash values from data in ISIS database (using C<to_hash>),  It can create hash values from data in ISIS database (using C<to_hash>),
44  ASCII dump (using C<to_ascii>) or just hash with field names and packed  ASCII dump (using C<to_ascii>) or just hash with field names and packed
# Line 50  fields which are zero sized will be fill Line 51  fields which are zero sized will be fill
51  It also has support for identifiers (only if ISIS database is created by  It also has support for identifiers (only if ISIS database is created by
52  IsisMarc), see C<to_hash>.  IsisMarc), see C<to_hash>.
53    
54  This will module will always be slower than OpenIsis module which use C  This module will always be slower than OpenIsis module which use C
55  library. However, since it's written in perl, it's platform independent (so  library. However, since it's written in perl, it's platform independent (so
56  you don't need C compiler), and can be easily modified. I hope that it  you don't need C compiler), and can be easily modified. I hope that it
57  creates data structures which are easier to use than ones created by  creates data structures which are easier to use than ones created by
# Line 122  Dump a B<lot> of debugging output. Line 123  Dump a B<lot> of debugging output.
123    
124  =back  =back
125    
 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.  
   
126  =cut  =cut
127    
128  sub new {  sub new {
# Line 198  sub new { Line 197  sub new {
197          read($self->{'fileMST'}, $buff, 4);          read($self->{'fileMST'}, $buff, 4);
198          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";          $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
199    
         # save maximum MFN  
         $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;  
   
200    
201    
202    
# Line 212  sub new { Line 208  sub new {
208          $self ? return $self : return undef;          $self ? return $self : return undef;
209  }  }
210    
211    =head2 count
212    
213    Return number of records in database
214    
215      print $isis->count;
216    
217    =cut
218    
219    sub count {
220            my $self = shift;
221            return $self->{'NXTMFN'} - 1;
222    }
223    
224  =head2 read_cnt  =head2 read_cnt
225    
226  This function is not really used by module, but can be useful to find info  Read content of C<.CNT> file and return hash containing it.
 about your index (if debugging it for example).  
227    
228    print Dumper($isis->read_cnt);    print Dumper($isis->read_cnt);
229    
230    This function is not used by module (C<.CNT> files are not required for this
231    module to work), but it can be useful to examine your index (while debugging
232    for example).
233    
234  =cut  =cut
235    
236  sub read_cnt  {  sub read_cnt  {
# Line 230  sub read_cnt  { Line 242  sub read_cnt  {
242        
243          open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";          open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
244    
         # There is two 26 Bytes fixed lenght records  
   
         #  0: IDTYPE    BTree type                              16  
         #  2: ORDN      Nodes Order                             16  
         #  4: ORDF      Leafs Order                             16  
         #  6: N         Number of Memory buffers for nodes      16  
         #  8: K         Number of buffers for first level index 16  
         # 10: LIV       Current number of Index Levels          16  
         # 12: POSRX*    Pointer to Root Record in N0x           32  
         # 16: NMAXPOS*  Next Available position in N0x          32  
         # 20: FMAXPOS*  Next available position in L0x          32  
         # 24: ABNORMAL  Formal BTree normality indicator        16  
         # length: 26 bytes  
   
         sub unpack_cnt {  
                 my $self = shift;  
   
                 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);  
   
                 my $buff = shift || return;  
                 my @arr = unpack("ssssssllls", $buff);  
   
                 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});  
   
                 my $IDTYPE = shift @arr;  
                 foreach (@flds) {  
                         $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);  
                 }  
         }  
   
245          my $buff;          my $buff;
246    
247          read(fileCNT, $buff, 26);          read(fileCNT, $buff, 26);
# Line 273  sub read_cnt  { Line 255  sub read_cnt  {
255          return $self->{cnt};          return $self->{cnt};
256  }  }
257    
258    =head2 unpack_cnt
259    
260    Unpack one of two 26 bytes fixed length record in C<.CNT> file.
261    
262    Here is definition of record:
263    
264     off key        description                             size
265      0: IDTYPE     BTree type                              s
266      2: ORDN       Nodes Order                             s
267      4: ORDF       Leafs Order                             s
268      6: N          Number of Memory buffers for nodes      s
269      8: K          Number of buffers for first level index s
270     10: LIV        Current number of Index Levels          s
271     12: POSRX      Pointer to Root Record in N0x           l
272     16: NMAXPOS    Next Available position in N0x          l
273     20: FMAXPOS    Next available position in L0x          l
274     24: ABNORMAL   Formal BTree normality indicator        s
275     length: 26 bytes
276    
277    This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
278    
279    =cut
280    
281    sub unpack_cnt {
282            my $self = shift;
283    
284            my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
285    
286            my $buff = shift || return;
287            my @arr = unpack("ssssssllls", $buff);
288    
289            print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
290    
291            my $IDTYPE = shift @arr;
292            foreach (@flds) {
293                    $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
294            }
295    }
296    
297  =head2 fetch  =head2 fetch
298    
299  Read record with selected MFN  Read record with selected MFN
# Line 420  sub fetch { Line 441  sub fetch {
441    
442  =head2 to_ascii  =head2 to_ascii
443    
444  Dump ASCII output of record with specified MFN  Returns ASCII output of record with specified MFN
445    
446    print $isis->to_ascii(42);    print $isis->to_ascii(42);
447    
448  It outputs something like this:  This outputs something like this:
449    
450    210   ^aNew York^cNew York University press^dcop. 1988    210   ^aNew York^cNew York University press^dcop. 1988
451    990   2140    990   2140
# Line 461  Read record with specified MFN and conve Line 482  Read record with specified MFN and conve
482    
483    my $hash = $isis->to_hash($mfn);    my $hash = $isis->to_hash($mfn);
484    
485  It has ability to convert characters (using C<hash_filter> from ISIS  It has ability to convert characters (using C<hash_filter>) from ISIS
486  database before creating structures enabling character re-mapping or quick  database before creating structures enabling character re-mapping or quick
487  fix-up of data.  fix-up of data.
488    
# Line 557  sub tag_name { Line 578  sub tag_name {
578    
579  =head1 BUGS  =head1 BUGS
580    
581  This module has been very lightly tested. Use with caution and report bugs.  Some parts of CDS/ISIS documentation are not detailed enough to exmplain
582    some variations in input databases which has been tested with this module.
583    When I was in doubt, I assumed that OpenIsis's implementation was right
584    (except for obvious bugs).
585    
586    However, every effort has been made to test this module with as much
587    databases (and programs that create them) as possible.
588    
589    I would be very greatful for success or failure reports about usage of this
590    module with databases from programs other than WinIsis and IsisMarc. I had
591    tested this against ouput of one C<isis.dll>-based application, but I don't
592    know any details about it's version.
593    
594  =head1 AUTHOR  =head1 AUTHOR
595    

Legend:
Removed from v.26  
changed lines
  Added in v.32

  ViewVC Help
Powered by ViewVC 1.1.26