Biblio-Isis/trunk/IsisDB.pm

package IsisDB;
use strict;

use Carp;
use Data::Dumper;

BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
        $VERSION     = 0.05;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
        @EXPORT_OK   = qw ();
        %EXPORT_TAGS = ();

}

=head1 NAME

IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database

=head1 SYNOPSIS

  use IsisDB;

  my $isis = new IsisDB(
        isisdb => './cds/cds',
  );

  for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
        print $isis->to_ascii($mfn),"\n";
  }

=head1 DESCRIPTION

This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
IsisMarc. It can be used as perl-only alternative to OpenIsis module.

It can create hash values from data in ISIS database (using C<to_hash>),
ASCII dump (using C<to_ascii>) or just hash with field names and packed
values (like C<^asomething^belse>).

Unique feature of this module is ability to C<include_deleted> records.
It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
fields which are zero sized will be filled with random junk from memory).

It also has support for identifiers (only if ISIS database is created by
IsisMarc), see C<to_hash>.

This will module will always be slower than OpenIsis module which use C
library. However, since it's written in perl, it's platform independent (so
you don't need C compiler), and can be easily modified. I hope that it
creates data structures which are easier to use than ones created by
OpenIsis, so reduced time in other parts of the code should compensate for
slower performance of this module (speed of reading ISIS database is
rarely an issue).

=head1 METHODS

=cut

#  my $ORDN;            # Nodes Order
#  my $ORDF;            # Leafs Order
#  my $N;               # Number of Memory buffers for nodes
#  my $K;               # Number of buffers for first level index
#  my $LIV;             # Current number of Index Levels
#  my $POSRX;           # Pointer to Root Record in N0x
#  my $NMAXPOS;         # Next Available position in N0x
#  my $FMAXPOS;         # Next available position in L0x
#  my $ABNORMAL;        # Formal BTree normality indicator

#
# some binary reads
#

=head2 new

Open ISIS database

 my $isis = new IsisDB(
        isisdb => './cds/cds',
        read_fdt => 1,
        include_deleted => 1,
        hash_filter => sub {
                my $v = shift;
                $v =~ s#foo#bar#g;
        },
        debug => 1,
 );

Options are described below:

=over 5

=item isisdb

This is full or relative path to ISIS database files which include
common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.

In this example it uses C<./cds/cds.MST> and related files.

=item read_fdt

Boolean flag to specify if field definition table should be read. It's off
by default.

=item include_deleted

Don't skip logically deleted records in ISIS.

=item hash_filter

Filter code ref which will be used before data is converted to hash.

=item debug

Dump a B<lot> of debugging output.

=back

It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.

=cut

sub new {
        my $class = shift;
        my $self = {};
        bless($self, $class);

        croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});

        foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
                $self->{$v} = {@_}->{$v};
        }

        # if you want to read .FDT file use read_fdt argument when creating class!
        if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {

                # read the $db.FDT file for tags
                my $fieldzone=0;

                open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";

                while (<fileFDT>) {
                        chomp;
                        if ($fieldzone) {
                                my $name=substr($_,0,30);
                                my $tag=substr($_,50,3);

                                $name =~ s/\s+$//;
                                $tag =~ s/\s+$//;

                                $self->{'TagName'}->{$tag}=$name;  
                        }

                        if (/^\*\*\*/) {
                                $fieldzone=1;
                        }
                }
                
                close(fileFDT);
        }

        # Get the Maximum MFN from $db.MST

        open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";

        # MST format:   (* = 32 bit signed)
        # CTLMFN*       always 0
        # NXTMFN*       MFN to be assigned to the next record created
        # NXTMFB*       last block allocated to master file
        # NXTMFP        offset to next available position in last block
        # MFTYPE        always 0 for user db file (1 for system)
        seek(fileMST,4,0);

        my $buff;

        read(fileMST, $buff, 4);
        $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";

        # save maximum MFN
        $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;

        close(fileMST);

        # Get the index information from $db.CNT
   
        open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";

        # There is two 26 Bytes fixed lenght records

        #  0: IDTYPE    BTree type                              16
        #  2: ORDN      Nodes Order                             16
        #  4: ORDF      Leafs Order                             16
        #  6: N         Number of Memory buffers for nodes      16
        #  8: K         Number of buffers for first level index 16
        # 10: LIV       Current number of Index Levels          16
        # 12: POSRX*    Pointer to Root Record in N0x           32
        # 16: NMAXPOS*  Next Available position in N0x          32
        # 20: FMAXPOS*  Next available position in L0x          32
        # 24: ABNORMAL  Formal BTree normality indicator        16
        # length: 26 bytes

        sub unpack_cnt {
                my $self = shift;

                my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);

                my $buff = shift || return;
                my @arr = unpack("ssssssllls", $buff);

                print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});

                my $IDTYPE = shift @arr;
                foreach (@flds) {
                        $self->{$IDTYPE}->{$_} = abs(shift @arr);
                }
        }

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);

        read(fileCNT, $buff, 26);
        $self->unpack_cnt($buff);


        close(fileCNT);

        print Dumper($self),"\n" if ($self->{debug});

        # open files for later
        open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";

        open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";

        $self ? return $self : return undef;
}

=head2 fetch

Read record with selected MFN

  my $rec = $isis->fetch(55);

Returns hash with keys which are field names and values are unpacked values
for that field like this:

  $rec = {
    '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
    '990' => [ '2140', '88', 'HAY' ],
  };

=cut

sub fetch {
        my $self = shift;

        my $mfn = shift || croak "fetch needs MFN as argument!";

        print "fetch: $mfn\n" if ($self->{debug});

        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;

        print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
        seek($self->{'fileXRF'},$mfnpos,0);

        my $buff;

        # read XRFMFB abd XRFMFP
        read($self->{'fileXRF'}, $buff, 4);
        my $pointer=unpack("l",$buff) || carp "pointer is null";

        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);

        print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});

        # XXX fix this to be more readable!!
        # e.g. (XRFMFB - 1) * 512 + XRFMFP

        my $offset = $pointer;
        my $offset2=int($offset/2048)-1;
        my $offset22=int($offset/4096);
        my $offset3=$offset-($offset22*4096);
        if ($offset3>512) {
                $offset3=$offset3-2048;
        }
        my $offset4=($offset2*512)+$offset3;

        print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});

        # Get Record Information

        seek($self->{'fileMST'},$offset4,0);

        read($self->{'fileMST'}, $buff, 4);
        my $value=unpack("l",$buff);

        if ($value!=$mfn) {
print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");    
                return -1;      # XXX deleted record?
        }

#       $MFRL=$self->Read16($fileMST);
#       $MFBWB=$self->Read32($fileMST);
#       $MFBWP=$self->Read16($fileMST);
#       $BASE=$self->Read16($fileMST);
#       $NVF=$self->Read16($fileMST);
#       $STATUS=$self->Read16($fileMST);

        read($self->{'fileMST'}, $buff, 14);

        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);

        print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});

        # delete old record
        delete $self->{record};

        if (! $self->{'include_deleted'} && $MFRL < 0) {
                print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
                return;
        }

        # Get Directory Format

        my @FieldPOS;
        my @FieldLEN;
        my @FieldTAG;

        read($self->{'fileMST'}, $buff, 6 * $NVF);

        my $fld_len = 0;

        for (my $i = 0 ; $i < $NVF ; $i++) {

#               $TAG=$self->Read16($fileMST);
#               $POS=$self->Read16($fileMST);
#               $LEN=$self->Read16($fileMST);

                my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));

                print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});

                # The TAG does not exists in .FDT so we set it to 0.
                #
                # XXX This is removed from perl version; .FDT file is updated manually, so
                # you will often have fields in .MST file which aren't in .FDT. On the other
                # hand, IsisMarc doesn't use .FDT files at all!

                #if (! $self->{TagName}->{$TAG}) {
                #       $TAG=0;
                #}

                push @FieldTAG,$TAG;
                push @FieldPOS,$POS;
                push @FieldLEN,$LEN;

                $fld_len += $LEN;
        }

        # Get Variable Fields

        read($self->{'fileMST'},$buff,$fld_len);

        for (my $i = 0 ; $i < $NVF ; $i++) {
                # skip zero-sized fields
                next if ($FieldLEN[$i] == 0);

                push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
        }
        close(fileMST);

        print Dumper($self),"\n" if ($self->{debug});

        return $self->{'record'};
}

=head2 to_ascii

Dump ASCII output of record with specified MFN

  print $isis->to_ascii(42);

It outputs something like this:

  210   ^aNew York^cNew York University press^dcop. 1988
  990   2140
  990   88
  990   HAY

If C<read_fdt> is specified when calling C<new> it will display field names
from C<.FDT> file instead of numeric tags.

=cut

sub to_ascii {
        my $self = shift;

        my $mfn = shift || croak "need MFN";

        my $rec = $self->fetch($mfn);

        my $out = "0\t$mfn";

        foreach my $f (sort keys %{$rec}) {
                my $fn = $self->tag_name($f);
                $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
        }

        $out .= "\n";

        return $out;
}

=head2 to_hash

Read record with specified MFN and convert it to hash

  my $hash = $isis->to_hash($mfn);

It has ability to convert characters (using C<hash_filter> from ISIS
database before creating structures enabling character re-mapping or quick
fix-up of data.

This function returns hash which is like this:

  $hash = {
    '210' => [
               {
                 'c' => 'New York University press',
                 'a' => 'New York',
                 'd' => 'cop. 1988'
               }
             ],
    '990' => [
               '2140',
               '88',
               'HAY'
             ],
  };

You can later use that hash to produce any output from ISIS data.

If database is created using IsisMarc, it will also have to special fields
which will be used for identifiers, C<i1> and C<i2> like this:

  '200' => [
             {
               'i1' => '1',
               'i2' => ' '
               'a' => 'Goa',
               'f' => 'Valdo D\'Arienzo',
               'e' => 'tipografie e tipografi nel XVI secolo',
             }
           ],

This method will also create additional field C<000> with MFN.

=cut

sub to_hash {
        my $self = shift;

        my $mfn = shift || confess "need mfn!";

        # init record to include MFN as field 000
        my $rec = { '000' => $mfn };

        my $row = $self->fetch($mfn);

        foreach my $k (keys %{$row}) {
                foreach my $l (@{$row->{$k}}) {

                        # filter output
                        $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});

                        my $val;

                        # has identifiers?
                        ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);

                        # has subfields?
                        if ($l =~ m/\^/) {
                                foreach my $t (split(/\^/,$l)) {
                                        next if (! $t);
                                        $val->{substr($t,0,1)} = substr($t,1);
                                }
                        } else {
                                $val = $l;
                        }

                        push @{$rec->{$k}}, $val;
                }
        }

        return $rec;
}

=head2 tag_name

Return name of selected tag

 print $isis->tag_name('200');

=cut

sub tag_name {
        my $self = shift;
        my $tag = shift || return;
        return $self->{'TagName'}->{$tag} || $tag;
}

1;

=head1 BUGS

This module has been very lightly tested. Use with caution and report bugs.

=head1 AUTHOR

        Dobrica Pavlinusic
        CPAN ID: DPAVLIN
        dpavlin@rot13.org
        http://www.rot13.org/~dpavlin/

This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.

=head1 COPYRIGHT

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

OpenIsis web site L<http://www.openisis.org>

perl4lib site L<http://perl4lib.perl.org>

1	dpavlin	1	package IsisDB;
2			use strict;
3
4			use Carp;
5			use Data::Dumper;
6
7			BEGIN {
8			use Exporter ();
9			use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10	dpavlin	15	$VERSION = 0.05;
11	dpavlin	1	@ISA = qw (Exporter);
12			#Give a hoot don't pollute, do not export more than needed by default
13			@EXPORT = qw ();
14			@EXPORT_OK = qw ();
15			%EXPORT_TAGS = ();
16
17			}
18
19			=head1 NAME
20
21	dpavlin	15	IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
22	dpavlin	1
23			=head1 SYNOPSIS
24
25	dpavlin	11	use IsisDB;
26
27	dpavlin	1	my $isis = new IsisDB(
28			isisdb => './cds/cds',
29			);
30
31	dpavlin	11	for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32			print $isis->to_ascii($mfn),"\n";
33			}
34
35	dpavlin	1	=head1 DESCRIPTION
36
37	dpavlin	15	This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38			IsisMarc. It can be used as perl-only alternative to OpenIsis module.
39	dpavlin	1
40	dpavlin	15	It can create hash values from data in ISIS database (using C<to_hash>),
41			ASCII dump (using C<to_ascii>) or just hash with field names and packed
42			values (like C<^asomething^belse>).
43	dpavlin	11
44			Unique feature of this module is ability to C<include_deleted> records.
45			It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46			fields which are zero sized will be filled with random junk from memory).
47
48	dpavlin	15	It also has support for identifiers (only if ISIS database is created by
49			IsisMarc), see C<to_hash>.
50
51			This will module will always be slower than OpenIsis module which use C
52			library. However, since it's written in perl, it's platform independent (so
53			you don't need C compiler), and can be easily modified. I hope that it
54			creates data structures which are easier to use than ones created by
55			OpenIsis, so reduced time in other parts of the code should compensate for
56			slower performance of this module (speed of reading ISIS database is
57			rarely an issue).
58
59	dpavlin	1	=head1 METHODS
60
61			=cut
62
63			# my $ORDN; # Nodes Order
64			# my $ORDF; # Leafs Order
65			# my $N; # Number of Memory buffers for nodes
66			# my $K; # Number of buffers for first level index
67			# my $LIV; # Current number of Index Levels
68			# my $POSRX; # Pointer to Root Record in N0x
69			# my $NMAXPOS; # Next Available position in N0x
70			# my $FMAXPOS; # Next available position in L0x
71			# my $ABNORMAL; # Formal BTree normality indicator
72
73			#
74			# some binary reads
75			#
76
77			=head2 new
78
79	dpavlin	15	Open ISIS database
80	dpavlin	1
81			my $isis = new IsisDB(
82			isisdb => './cds/cds',
83			read_fdt => 1,
84	dpavlin	12	include_deleted => 1,
85			hash_filter => sub {
86			my $v = shift;
87			$v =~ s#foo#bar#g;
88			},
89	dpavlin	1	debug => 1,
90			);
91
92	dpavlin	2	Options are described below:
93
94			=over 5
95
96	dpavlin	1	=item isisdb
97
98	dpavlin	15	This is full or relative path to ISIS database files which include
99			common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
100	dpavlin	1
101	dpavlin	15	In this example it uses C<./cds/cds.MST> and related files.
102
103	dpavlin	1	=item read_fdt
104
105			Boolean flag to specify if field definition table should be read. It's off
106			by default.
107
108	dpavlin	9	=item include_deleted
109
110	dpavlin	11	Don't skip logically deleted records in ISIS.
111	dpavlin	9
112	dpavlin	12	=item hash_filter
113
114			Filter code ref which will be used before data is converted to hash.
115
116			=item debug
117
118			Dump a B<lot> of debugging output.
119
120	dpavlin	2	=back
121
122			It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
123
124	dpavlin	1	=cut
125
126			sub new {
127			my $class = shift;
128			my $self = {};
129			bless($self, $class);
130
131	dpavlin	9	croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
132	dpavlin	1
133	dpavlin	12	foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
134	dpavlin	9	$self->{$v} = {@_}->{$v};
135			}
136	dpavlin	1
137			# if you want to read .FDT file use read_fdt argument when creating class!
138			if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
139
140			# read the $db.FDT file for tags
141			my $fieldzone=0;
142
143			open(fileFDT, $self->{isisdb}.".FDT") \|\| croak "can't read '$self->{isisdb}.FDT': $!";
144
145			while (<fileFDT>) {
146			chomp;
147			if ($fieldzone) {
148			my $name=substr($_,0,30);
149			my $tag=substr($_,50,3);
150
151			$name =~ s/\s+$//;
152			$tag =~ s/\s+$//;
153
154			$self->{'TagName'}->{$tag}=$name;
155			}
156
157			if (/^\\\*/) {
158			$fieldzone=1;
159			}
160			}
161
162			close(fileFDT);
163			}
164
165			# Get the Maximum MFN from $db.MST
166
167			open(fileMST,$self->{isisdb}.".MST") \|\| croak "can't read '$self->{isisdb}.MST': $!";
168
169			# MST format: (* = 32 bit signed)
170			# CTLMFN* always 0
171			# NXTMFN* MFN to be assigned to the next record created
172			# NXTMFB* last block allocated to master file
173			# NXTMFP offset to next available position in last block
174			# MFTYPE always 0 for user db file (1 for system)
175			seek(fileMST,4,0);
176
177	dpavlin	11	my $buff;
178
179			read(fileMST, $buff, 4);
180			$self->{'NXTMFN'}=unpack("l",$buff) \|\| carp "NXTNFN is zero";
181
182	dpavlin	2	# save maximum MFN
183			$self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
184
185	dpavlin	1	close(fileMST);
186
187			# Get the index information from $db.CNT
188
189			open(fileCNT, $self->{isisdb}.".CNT") \|\| croak "can't read '$self->{isisdb}.CNT': $!";
190
191			# There is two 26 Bytes fixed lenght records
192
193			# 0: IDTYPE BTree type 16
194			# 2: ORDN Nodes Order 16
195			# 4: ORDF Leafs Order 16
196			# 6: N Number of Memory buffers for nodes 16
197			# 8: K Number of buffers for first level index 16
198			# 10: LIV Current number of Index Levels 16
199			# 12: POSRX* Pointer to Root Record in N0x 32
200			# 16: NMAXPOS* Next Available position in N0x 32
201			# 20: FMAXPOS* Next available position in L0x 32
202			# 24: ABNORMAL Formal BTree normality indicator 16
203			# length: 26 bytes
204
205			sub unpack_cnt {
206			my $self = shift;
207
208			my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
209
210			my $buff = shift \|\| return;
211			my @arr = unpack("ssssssllls", $buff);
212
213	dpavlin	9	print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
214
215	dpavlin	1	my $IDTYPE = shift @arr;
216			foreach (@flds) {
217			$self->{$IDTYPE}->{$_} = abs(shift @arr);
218			}
219			}
220
221			read(fileCNT, $buff, 26);
222			$self->unpack_cnt($buff);
223
224			read(fileCNT, $buff, 26);
225			$self->unpack_cnt($buff);
226
227
228			close(fileCNT);
229
230	dpavlin	15	print Dumper($self),"\n" if ($self->{debug});
231	dpavlin	1
232	dpavlin	7	# open files for later
233			open($self->{'fileXRF'}, $self->{isisdb}.".XRF") \|\| croak "can't open '$self->{isisdb}.XRF': $!";
234
235			open($self->{'fileMST'}, $self->{isisdb}.".MST") \|\| croak "can't open '$self->{isisdb}.MST': $!";
236
237	dpavlin	1	$self ? return $self : return undef;
238			}
239
240	dpavlin	7	=head2 fetch
241	dpavlin	1
242	dpavlin	2	Read record with selected MFN
243	dpavlin	1
244	dpavlin	7	my $rec = $isis->fetch(55);
245	dpavlin	2
246			Returns hash with keys which are field names and values are unpacked values
247	dpavlin	15	for that field like this:
248	dpavlin	2
249	dpavlin	15	$rec = {
250			'210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
251			'990' => [ '2140', '88', 'HAY' ],
252			};
253
254	dpavlin	2	=cut
255
256	dpavlin	7	sub fetch {
257	dpavlin	1	my $self = shift;
258
259	dpavlin	7	my $mfn = shift \|\| croak "fetch needs MFN as argument!";
260	dpavlin	1
261	dpavlin	7	print "fetch: $mfn\n" if ($self->{debug});
262	dpavlin	1
263			# XXX check this?
264			my $mfnpos=($mfn+int(($mfn-1)/127))*4;
265
266			print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
267	dpavlin	7	seek($self->{'fileXRF'},$mfnpos,0);
268	dpavlin	1
269	dpavlin	11	my $buff;
270
271	dpavlin	1	# read XRFMFB abd XRFMFP
272	dpavlin	11	read($self->{'fileXRF'}, $buff, 4);
273			my $pointer=unpack("l",$buff) \|\| carp "pointer is null";
274	dpavlin	1
275			my $XRFMFB = int($pointer/2048);
276			my $XRFMFP = $pointer - ($XRFMFB*2048);
277
278			print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
279
280			# XXX fix this to be more readable!!
281			# e.g. (XRFMFB - 1) * 512 + XRFMFP
282
283			my $offset = $pointer;
284			my $offset2=int($offset/2048)-1;
285			my $offset22=int($offset/4096);
286			my $offset3=$offset-($offset22*4096);
287			if ($offset3>512) {
288			$offset3=$offset3-2048;
289			}
290			my $offset4=($offset2*512)+$offset3;
291
292			print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
293
294			# Get Record Information
295
296	dpavlin	7	seek($self->{'fileMST'},$offset4,0);
297	dpavlin	1
298	dpavlin	11	read($self->{'fileMST'}, $buff, 4);
299			my $value=unpack("l",$buff);
300	dpavlin	1
301			if ($value!=$mfn) {
302			print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
303			return -1; # XXX deleted record?
304			}
305
306			# $MFRL=$self->Read16($fileMST);
307			# $MFBWB=$self->Read32($fileMST);
308			# $MFBWP=$self->Read16($fileMST);
309			# $BASE=$self->Read16($fileMST);
310			# $NVF=$self->Read16($fileMST);
311			# $STATUS=$self->Read16($fileMST);
312
313	dpavlin	7	read($self->{'fileMST'}, $buff, 14);
314	dpavlin	1
315			my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
316
317			print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
318
319	dpavlin	9	# delete old record
320			delete $self->{record};
321
322			if (! $self->{'include_deleted'} && $MFRL < 0) {
323			print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
324			return;
325			}
326
327	dpavlin	1	# Get Directory Format
328
329			my @FieldPOS;
330			my @FieldLEN;
331			my @FieldTAG;
332
333	dpavlin	8	read($self->{'fileMST'}, $buff, 6 * $NVF);
334
335			my $fld_len = 0;
336
337	dpavlin	1	for (my $i = 0 ; $i < $NVF ; $i++) {
338
339			# $TAG=$self->Read16($fileMST);
340			# $POS=$self->Read16($fileMST);
341			# $LEN=$self->Read16($fileMST);
342
343	dpavlin	8	my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
344	dpavlin	1
345			print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
346
347			# The TAG does not exists in .FDT so we set it to 0.
348			#
349			# XXX This is removed from perl version; .FDT file is updated manually, so
350			# you will often have fields in .MST file which aren't in .FDT. On the other
351			# hand, IsisMarc doesn't use .FDT files at all!
352
353			#if (! $self->{TagName}->{$TAG}) {
354			# $TAG=0;
355			#}
356
357			push @FieldTAG,$TAG;
358			push @FieldPOS,$POS;
359			push @FieldLEN,$LEN;
360	dpavlin	8
361			$fld_len += $LEN;
362	dpavlin	1	}
363
364			# Get Variable Fields
365
366	dpavlin	8	read($self->{'fileMST'},$buff,$fld_len);
367
368	dpavlin	1	for (my $i = 0 ; $i < $NVF ; $i++) {
369	dpavlin	10	# skip zero-sized fields
370			next if ($FieldLEN[$i] == 0);
371
372	dpavlin	8	push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
373	dpavlin	1	}
374			close(fileMST);
375
376	dpavlin	15	print Dumper($self),"\n" if ($self->{debug});
377	dpavlin	1
378	dpavlin	2	return $self->{'record'};
379	dpavlin	1	}
380
381	dpavlin	2	=head2 to_ascii
382
383	dpavlin	15	Dump ASCII output of record with specified MFN
384	dpavlin	2
385	dpavlin	15	print $isis->to_ascii(42);
386	dpavlin	2
387	dpavlin	15	It outputs something like this:
388
389			210 ^aNew York^cNew York University press^dcop. 1988
390			990 2140
391			990 88
392			990 HAY
393
394			If C<read_fdt> is specified when calling C<new> it will display field names
395			from C<.FDT> file instead of numeric tags.
396
397	dpavlin	2	=cut
398
399			sub to_ascii {
400			my $self = shift;
401
402			my $mfn = shift \|\| croak "need MFN";
403
404	dpavlin	7	my $rec = $self->fetch($mfn);
405	dpavlin	2
406			my $out = "0\t$mfn";
407
408			foreach my $f (sort keys %{$rec}) {
409	dpavlin	15	my $fn = $self->tag_name($f);
410			$out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
411	dpavlin	2	}
412
413			$out .= "\n";
414
415			return $out;
416			}
417
418	dpavlin	12	=head2 to_hash
419
420	dpavlin	15	Read record with specified MFN and convert it to hash
421	dpavlin	12
422			my $hash = $isis->to_hash($mfn);
423
424			It has ability to convert characters (using C<hash_filter> from ISIS
425	dpavlin	15	database before creating structures enabling character re-mapping or quick
426			fix-up of data.
427	dpavlin	12
428			This function returns hash which is like this:
429
430			$hash = {
431			'210' => [
432			{
433			'c' => 'New York University press',
434			'a' => 'New York',
435			'd' => 'cop. 1988'
436			}
437			],
438			'990' => [
439			'2140',
440			'88',
441			'HAY'
442			],
443			};
444
445	dpavlin	15	You can later use that hash to produce any output from ISIS data.
446	dpavlin	12
447	dpavlin	15	If database is created using IsisMarc, it will also have to special fields
448			which will be used for identifiers, C<i1> and C<i2> like this:
449
450			'200' => [
451			{
452			'i1' => '1',
453			'i2' => ' '
454			'a' => 'Goa',
455			'f' => 'Valdo D\'Arienzo',
456			'e' => 'tipografie e tipografi nel XVI secolo',
457			}
458			],
459
460			This method will also create additional field C<000> with MFN.
461
462	dpavlin	12	=cut
463
464			sub to_hash {
465			my $self = shift;
466
467			my $mfn = shift \|\| confess "need mfn!";
468
469	dpavlin	15	# init record to include MFN as field 000
470			my $rec = { '000' => $mfn };
471
472	dpavlin	12	my $row = $self->fetch($mfn);
473
474			foreach my $k (keys %{$row}) {
475			foreach my $l (@{$row->{$k}}) {
476
477			# filter output
478			$l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
479
480	dpavlin	15	my $val;
481
482			# has identifiers?
483			($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);
484
485	dpavlin	12	# has subfields?
486			if ($l =~ m/\^/) {
487			foreach my $t (split(/\^/,$l)) {
488			next if (! $t);
489			$val->{substr($t,0,1)} = substr($t,1);
490			}
491			} else {
492			$val = $l;
493			}
494
495			push @{$rec->{$k}}, $val;
496			}
497			}
498
499			return $rec;
500			}
501
502	dpavlin	15	=head2 tag_name
503	dpavlin	1
504	dpavlin	15	Return name of selected tag
505	dpavlin	1
506	dpavlin	15	print $isis->tag_name('200');
507
508			=cut
509
510			sub tag_name {
511	dpavlin	1	my $self = shift;
512	dpavlin	15	my $tag = shift \|\| return;
513			return $self->{'TagName'}->{$tag} \|\| $tag;
514	dpavlin	1	}
515
516			1;
517
518			=head1 BUGS
519
520			This module has been very lightly tested. Use with caution and report bugs.
521
522			=head1 AUTHOR
523
524			Dobrica Pavlinusic
525			CPAN ID: DPAVLIN
526			dpavlin@rot13.org
527			http://www.rot13.org/~dpavlin/
528
529	dpavlin	15	This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
530			written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
531	dpavlin	1
532			=head1 COPYRIGHT
533
534			This program is free software; you can redistribute
535			it and/or modify it under the same terms as Perl itself.
536
537			The full text of the license can be found in the
538			LICENSE file included with this module.
539
540
541			=head1 SEE ALSO
542
543	dpavlin	15	OpenIsis web site L<http://www.openisis.org>
544	dpavlin	1
545	dpavlin	15	perl4lib site L<http://perl4lib.perl.org>
546