/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 59 - (hide annotations)
Sun Jul 9 12:22:09 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 18835 byte(s)
added link to Biblio::Isis::Manual
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 57 $VERSION = 0.21;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87     my $v = shift;
88     $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 1 );
93    
94 dpavlin 2 Options are described below:
95    
96     =over 5
97    
98 dpavlin 1 =item isisdb
99    
100 dpavlin 15 This is full or relative path to ISIS database files which include
101 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102     C<read_fdt> option) files.
103 dpavlin 1
104 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
105    
106 dpavlin 1 =item read_fdt
107    
108     Boolean flag to specify if field definition table should be read. It's off
109     by default.
110    
111 dpavlin 9 =item include_deleted
112    
113 dpavlin 11 Don't skip logically deleted records in ISIS.
114 dpavlin 9
115 dpavlin 12 =item hash_filter
116    
117     Filter code ref which will be used before data is converted to hash.
118    
119     =item debug
120    
121 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
122 dpavlin 12
123 dpavlin 57 =item join_subfields_with
124    
125     Define delimiter which will be used to join repeatable subfields. This
126     option is included to support lagacy application written against version
127     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
128    
129 dpavlin 2 =back
130    
131 dpavlin 1 =cut
132    
133     sub new {
134     my $class = shift;
135     my $self = {};
136     bless($self, $class);
137    
138 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
139 dpavlin 1
140 dpavlin 12 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
141 dpavlin 9 $self->{$v} = {@_}->{$v};
142     }
143 dpavlin 1
144 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
145    
146     foreach my $f (@isis_files) {
147     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
148     $self->{lc($ext)."_file"} = $f;
149     }
150    
151     my @must_exist = qw(mst xrf);
152     push @must_exist, "fdt" if ($self->{read_fdt});
153    
154     foreach my $ext (@must_exist) {
155 dpavlin 39 unless ($self->{$ext."_file"}) {
156     carp "missing ",uc($ext)," file in ",$self->{isisdb};
157     return;
158     }
159 dpavlin 18 }
160    
161 dpavlin 45 if ($self->{debug}) {
162     print STDERR "## using files: ",join(" ",@isis_files),"\n";
163     eval "use Data::Dump";
164 dpavlin 18
165 dpavlin 45 if (! $@) {
166     *Dumper = *Data::Dump::dump;
167     } else {
168     use Data::Dumper;
169     }
170     }
171    
172 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
173 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
174 dpavlin 1
175     # read the $db.FDT file for tags
176     my $fieldzone=0;
177    
178 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
179     binmode($fileFDT);
180 dpavlin 1
181 dpavlin 33 while (<$fileFDT>) {
182 dpavlin 1 chomp;
183     if ($fieldzone) {
184     my $name=substr($_,0,30);
185     my $tag=substr($_,50,3);
186    
187     $name =~ s/\s+$//;
188     $tag =~ s/\s+$//;
189    
190     $self->{'TagName'}->{$tag}=$name;
191     }
192    
193     if (/^\*\*\*/) {
194     $fieldzone=1;
195     }
196     }
197    
198 dpavlin 33 close($fileFDT);
199 dpavlin 1 }
200    
201     # Get the Maximum MFN from $db.MST
202    
203 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
204 dpavlin 33 binmode($self->{'fileMST'});
205 dpavlin 1
206     # MST format: (* = 32 bit signed)
207     # CTLMFN* always 0
208     # NXTMFN* MFN to be assigned to the next record created
209     # NXTMFB* last block allocated to master file
210     # NXTMFP offset to next available position in last block
211     # MFTYPE always 0 for user db file (1 for system)
212 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
213 dpavlin 1
214 dpavlin 11 my $buff;
215    
216 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
217     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
218 dpavlin 11
219 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
220 dpavlin 18
221     # open files for later
222     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
223 dpavlin 33 binmode($self->{'fileXRF'});
224 dpavlin 18
225     $self ? return $self : return undef;
226     }
227    
228 dpavlin 32 =head2 count
229    
230     Return number of records in database
231    
232     print $isis->count;
233    
234     =cut
235    
236     sub count {
237     my $self = shift;
238     return $self->{'NXTMFN'} - 1;
239     }
240    
241 dpavlin 7 =head2 fetch
242 dpavlin 1
243 dpavlin 2 Read record with selected MFN
244 dpavlin 1
245 dpavlin 7 my $rec = $isis->fetch(55);
246 dpavlin 2
247     Returns hash with keys which are field names and values are unpacked values
248 dpavlin 15 for that field like this:
249 dpavlin 2
250 dpavlin 15 $rec = {
251     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
252     '990' => [ '2140', '88', 'HAY' ],
253     };
254    
255 dpavlin 2 =cut
256    
257 dpavlin 7 sub fetch {
258 dpavlin 1 my $self = shift;
259    
260 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
261 dpavlin 1
262 dpavlin 16 # is mfn allready in memory?
263     my $old_mfn = $self->{'current_mfn'} || -1;
264 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
265 dpavlin 1
266 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
267    
268 dpavlin 1 # XXX check this?
269     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
270    
271 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
272 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
273 dpavlin 1
274 dpavlin 11 my $buff;
275    
276 dpavlin 25 # delete old record
277     delete $self->{record};
278    
279 dpavlin 1 # read XRFMFB abd XRFMFP
280 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
281 dpavlin 41 my $pointer=unpack("V",$buff);
282     if (! $pointer) {
283     if ($self->{include_deleted}) {
284     return;
285     } else {
286     warn "pointer for MFN $mfn is null\n";
287     return;
288     }
289     }
290 dpavlin 1
291 dpavlin 25 # check for logically deleted record
292 dpavlin 33 if ($pointer & 0x80000000) {
293 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
294     $self->{deleted} = $mfn;
295    
296     return unless $self->{include_deleted};
297    
298 dpavlin 33 # abs
299     $pointer = ($pointer ^ 0xffffffff) + 1;
300 dpavlin 25 }
301    
302 dpavlin 1 my $XRFMFB = int($pointer/2048);
303     my $XRFMFP = $pointer - ($XRFMFB*2048);
304    
305 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
306     # why do i have to do XRFMFP % 1024 ?
307 dpavlin 1
308 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
309 dpavlin 1
310 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
311 dpavlin 1
312     # Get Record Information
313    
314 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
315 dpavlin 1
316 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
317     my $value=unpack("V",$buff);
318 dpavlin 1
319 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
320    
321 dpavlin 1 if ($value!=$mfn) {
322 dpavlin 26 if ($value == 0) {
323     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
324     $self->{deleted} = $mfn;
325     return;
326     }
327    
328     carp "Error: MFN ".$mfn." not found in MST file, found $value";
329     return;
330 dpavlin 1 }
331    
332 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
333 dpavlin 1
334 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
335 dpavlin 1
336 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
337 dpavlin 1
338 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
339 dpavlin 9
340 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
341    
342 dpavlin 1 # Get Directory Format
343    
344     my @FieldPOS;
345     my @FieldLEN;
346     my @FieldTAG;
347    
348 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
349    
350 dpavlin 16 my $rec_len = 0;
351 dpavlin 8
352 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
353    
354 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
355 dpavlin 1
356 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
357 dpavlin 1
358     # The TAG does not exists in .FDT so we set it to 0.
359     #
360     # XXX This is removed from perl version; .FDT file is updated manually, so
361     # you will often have fields in .MST file which aren't in .FDT. On the other
362     # hand, IsisMarc doesn't use .FDT files at all!
363    
364     #if (! $self->{TagName}->{$TAG}) {
365     # $TAG=0;
366     #}
367    
368     push @FieldTAG,$TAG;
369     push @FieldPOS,$POS;
370     push @FieldLEN,$LEN;
371 dpavlin 8
372 dpavlin 16 $rec_len += $LEN;
373 dpavlin 1 }
374    
375     # Get Variable Fields
376    
377 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
378 dpavlin 8
379 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
380    
381 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
382 dpavlin 10 # skip zero-sized fields
383     next if ($FieldLEN[$i] == 0);
384    
385 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
386 dpavlin 1 }
387    
388 dpavlin 16 $self->{'current_mfn'} = $mfn;
389    
390 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
391 dpavlin 1
392 dpavlin 2 return $self->{'record'};
393 dpavlin 1 }
394    
395 dpavlin 54 =head2 mfn
396    
397     Returns current MFN position
398    
399     my $mfn = $isis->mfn;
400    
401     =cut
402    
403     # This function should be simple return $self->{current_mfn},
404     # but if new is called with _hack_mfn it becomes setter.
405     # It's useful in tests when setting $isis->{record} directly
406    
407     sub mfn {
408     my $self = shift;
409     return $self->{current_mfn};
410     };
411    
412    
413 dpavlin 2 =head2 to_ascii
414    
415 dpavlin 27 Returns ASCII output of record with specified MFN
416 dpavlin 2
417 dpavlin 15 print $isis->to_ascii(42);
418 dpavlin 2
419 dpavlin 27 This outputs something like this:
420 dpavlin 15
421     210 ^aNew York^cNew York University press^dcop. 1988
422     990 2140
423     990 88
424     990 HAY
425    
426     If C<read_fdt> is specified when calling C<new> it will display field names
427     from C<.FDT> file instead of numeric tags.
428    
429 dpavlin 2 =cut
430    
431     sub to_ascii {
432     my $self = shift;
433    
434     my $mfn = shift || croak "need MFN";
435    
436 dpavlin 41 my $rec = $self->fetch($mfn) || return;
437 dpavlin 2
438     my $out = "0\t$mfn";
439    
440     foreach my $f (sort keys %{$rec}) {
441 dpavlin 15 my $fn = $self->tag_name($f);
442     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
443 dpavlin 2 }
444    
445     $out .= "\n";
446    
447     return $out;
448     }
449    
450 dpavlin 12 =head2 to_hash
451    
452 dpavlin 15 Read record with specified MFN and convert it to hash
453 dpavlin 12
454     my $hash = $isis->to_hash($mfn);
455    
456 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
457 dpavlin 15 database before creating structures enabling character re-mapping or quick
458     fix-up of data.
459 dpavlin 12
460     This function returns hash which is like this:
461    
462     $hash = {
463     '210' => [
464     {
465     'c' => 'New York University press',
466     'a' => 'New York',
467     'd' => 'cop. 1988'
468     }
469     ],
470     '990' => [
471     '2140',
472     '88',
473     'HAY'
474     ],
475     };
476    
477 dpavlin 15 You can later use that hash to produce any output from ISIS data.
478 dpavlin 12
479 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
480     which will be used for identifiers, C<i1> and C<i2> like this:
481    
482     '200' => [
483     {
484     'i1' => '1',
485     'i2' => ' '
486     'a' => 'Goa',
487     'f' => 'Valdo D\'Arienzo',
488     'e' => 'tipografie e tipografi nel XVI secolo',
489     }
490     ],
491    
492 dpavlin 50 In case there are repeatable subfields in record, this will create
493     following structure:
494    
495     '900' => [ {
496     'a' => [ 'foo', 'bar', 'baz' ],
497     }]
498    
499 dpavlin 57 Or in more complex example of
500    
501     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
502    
503     it will create
504    
505     902 => [
506     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
507     ],
508    
509     This behaviour can be changed using C<join_subfields_with> option to L</new>,
510     in which case C<to_hash> will always create single value for each subfield.
511     This will change result to:
512    
513    
514    
515 dpavlin 15 This method will also create additional field C<000> with MFN.
516    
517 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
518    
519     my $hash = $isis->to_hash({
520     mfn => 42,
521 dpavlin 57 include_subfields => 1,
522 dpavlin 56 });
523    
524 dpavlin 57 Each option controll creation of hash:
525    
526     =over 4
527    
528     =item mfn
529    
530     Specify MFN number of record
531    
532     =item include_subfields
533    
534     This option will create additional key in hash called C<subfields> which will
535     have original record subfield order and index to that subfield like this:
536    
537     902 => [ {
538     a => ["a1", "a2", "a3", "a4", "a5"],
539     b => ["b1", "b2"],
540     c => "c1",
541     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
542     } ],
543    
544     =item join_subfields_with
545    
546     Define delimiter which will be used to join repeatable subfields. You can
547 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
548 dpavlin 57
549     =back
550    
551 dpavlin 12 =cut
552    
553     sub to_hash {
554     my $self = shift;
555    
556 dpavlin 56
557 dpavlin 12 my $mfn = shift || confess "need mfn!";
558 dpavlin 56 my $arg;
559 dpavlin 12
560 dpavlin 56 if (ref($mfn) eq 'HASH') {
561     $arg = $mfn;
562     $mfn = $arg->{mfn} || confess "need mfn in arguments";
563     }
564    
565 dpavlin 15 # init record to include MFN as field 000
566 dpavlin 16 my $rec = { '000' => [ $mfn ] };
567 dpavlin 15
568 dpavlin 41 my $row = $self->fetch($mfn) || return;
569 dpavlin 12
570 dpavlin 58 my $j_rs = $arg->{join_subfields_with};
571     $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
572 dpavlin 57 my $i_sf = $arg->{include_subfields};
573 dpavlin 12
574 dpavlin 57 foreach my $f_nr (keys %{$row}) {
575     foreach my $l (@{$row->{$f_nr}}) {
576    
577 dpavlin 12 # filter output
578 dpavlin 44 if ($self->{'hash_filter'}) {
579     $l = $self->{'hash_filter'}->($l);
580     next unless defined($l);
581     }
582 dpavlin 12
583 dpavlin 15 my $val;
584 dpavlin 57 my $r_sf; # repeatable subfields in this record
585 dpavlin 15
586     # has identifiers?
587 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
588 dpavlin 15
589 dpavlin 12 # has subfields?
590     if ($l =~ m/\^/) {
591     foreach my $t (split(/\^/,$l)) {
592     next if (! $t);
593 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
594 dpavlin 57 # XXX this might be option, but why?
595 dpavlin 54 next unless ($v);
596 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
597 dpavlin 54
598 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
599 dpavlin 54
600 dpavlin 50 push @{ $val->{$sf} }, $v;
601 dpavlin 57
602     # record repeatable subfield it it's offset
603     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
604     $r_sf->{$sf}++;
605    
606 dpavlin 50 } elsif (defined( $val->{$sf} )) {
607 dpavlin 57
608 dpavlin 50 # convert scalar field to array
609     $val->{$sf} = [ $val->{$sf}, $v ];
610 dpavlin 57
611     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
612     $r_sf->{$sf}++;
613    
614 dpavlin 50 } else {
615     $val->{$sf} = $v;
616 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
617 dpavlin 50 }
618 dpavlin 12 }
619     } else {
620     $val = $l;
621     }
622    
623 dpavlin 57 if ($j_rs) {
624     map {
625     $val->{$_} = join($j_rs, @{ $val->{$_} });
626     } keys %$r_sf
627     }
628    
629     push @{$rec->{$f_nr}}, $val;
630 dpavlin 12 }
631     }
632    
633     return $rec;
634     }
635    
636 dpavlin 15 =head2 tag_name
637 dpavlin 1
638 dpavlin 15 Return name of selected tag
639 dpavlin 1
640 dpavlin 15 print $isis->tag_name('200');
641    
642     =cut
643    
644     sub tag_name {
645 dpavlin 1 my $self = shift;
646 dpavlin 15 my $tag = shift || return;
647     return $self->{'TagName'}->{$tag} || $tag;
648 dpavlin 1 }
649    
650 dpavlin 35
651     =head2 read_cnt
652    
653     Read content of C<.CNT> file and return hash containing it.
654    
655     print Dumper($isis->read_cnt);
656    
657     This function is not used by module (C<.CNT> files are not required for this
658     module to work), but it can be useful to examine your index (while debugging
659     for example).
660    
661     =cut
662    
663     sub read_cnt {
664     my $self = shift;
665    
666     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
667    
668     # Get the index information from $db.CNT
669    
670     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
671     binmode($fileCNT);
672    
673     my $buff;
674    
675     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
676     $self->unpack_cnt($buff);
677    
678     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
679     $self->unpack_cnt($buff);
680    
681     close($fileCNT);
682    
683     return $self->{cnt};
684     }
685    
686     =head2 unpack_cnt
687    
688     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
689    
690     Here is definition of record:
691    
692     off key description size
693     0: IDTYPE BTree type s
694     2: ORDN Nodes Order s
695     4: ORDF Leafs Order s
696     6: N Number of Memory buffers for nodes s
697     8: K Number of buffers for first level index s
698     10: LIV Current number of Index Levels s
699     12: POSRX Pointer to Root Record in N0x l
700     16: NMAXPOS Next Available position in N0x l
701     20: FMAXPOS Next available position in L0x l
702     24: ABNORMAL Formal BTree normality indicator s
703     length: 26 bytes
704    
705     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
706    
707     =cut
708    
709     sub unpack_cnt {
710     my $self = shift;
711    
712     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
713    
714     my $buff = shift || return;
715     my @arr = unpack("vvvvvvVVVv", $buff);
716    
717     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
718    
719     my $IDTYPE = shift @arr;
720     foreach (@flds) {
721     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
722     }
723     }
724    
725 dpavlin 1 1;
726    
727     =head1 BUGS
728    
729 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
730     some variations in input databases which has been tested with this module.
731     When I was in doubt, I assumed that OpenIsis's implementation was right
732     (except for obvious bugs).
733 dpavlin 1
734 dpavlin 27 However, every effort has been made to test this module with as much
735     databases (and programs that create them) as possible.
736    
737     I would be very greatful for success or failure reports about usage of this
738     module with databases from programs other than WinIsis and IsisMarc. I had
739     tested this against ouput of one C<isis.dll>-based application, but I don't
740     know any details about it's version.
741    
742 dpavlin 54 =head1 VERSIONS
743    
744 dpavlin 57 As this is young module, new features are added in subsequent version. It's
745     a good idea to specify version when using this module like this:
746 dpavlin 54
747 dpavlin 57 use Biblio::Isis 0.21
748    
749     Below is list of changes in specific version of module (so you can target
750     older versions if you really have to):
751    
752 dpavlin 54 =over 8
753    
754 dpavlin 57 =item 0.21
755    
756     Added C<join_subfields_with> to L</new> and L</to_hash>.
757    
758     Added C<include_subfields> to L</to_hash>.
759    
760 dpavlin 54 =item 0.20
761    
762 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
763     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
764 dpavlin 54
765     =back
766    
767 dpavlin 1 =head1 AUTHOR
768    
769     Dobrica Pavlinusic
770     CPAN ID: DPAVLIN
771     dpavlin@rot13.org
772     http://www.rot13.org/~dpavlin/
773    
774 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
775     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
776 dpavlin 1
777     =head1 COPYRIGHT
778    
779     This program is free software; you can redistribute
780     it and/or modify it under the same terms as Perl itself.
781    
782     The full text of the license can be found in the
783     LICENSE file included with this module.
784    
785    
786     =head1 SEE ALSO
787    
788 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
789    
790 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
791 dpavlin 1
792 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
793    

  ViewVC Help
Powered by ViewVC 1.1.26