/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 66 - (hide annotations)
Fri Aug 25 10:20:58 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 19242 byte(s)
added hash_filter to to_hash [0.23]
1 dpavlin 36 package Biblio::Isis;
2 dpavlin 1 use strict;
3    
4     use Carp;
5 dpavlin 18 use File::Glob qw(:globally :nocase);
6    
7 dpavlin 1 BEGIN {
8     use Exporter ();
9     use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10 dpavlin 66 $VERSION = 0.23;
11 dpavlin 1 @ISA = qw (Exporter);
12     #Give a hoot don't pollute, do not export more than needed by default
13     @EXPORT = qw ();
14     @EXPORT_OK = qw ();
15     %EXPORT_TAGS = ();
16    
17     }
18    
19     =head1 NAME
20    
21 dpavlin 36 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25 dpavlin 36 use Biblio::Isis;
26 dpavlin 11
27 dpavlin 36 my $isis = new Biblio::Isis(
28 dpavlin 1 isisdb => './cds/cds',
29     );
30    
31 dpavlin 32 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
32 dpavlin 11 print $isis->to_ascii($mfn),"\n";
33     }
34    
35 dpavlin 1 =head1 DESCRIPTION
36    
37 dpavlin 15 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 dpavlin 27 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
39     seems to depriciate it's old C<XS> bindings for perl.
40 dpavlin 1
41 dpavlin 15 It can create hash values from data in ISIS database (using C<to_hash>),
42     ASCII dump (using C<to_ascii>) or just hash with field names and packed
43     values (like C<^asomething^belse>).
44 dpavlin 11
45     Unique feature of this module is ability to C<include_deleted> records.
46     It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
47     fields which are zero sized will be filled with random junk from memory).
48    
49 dpavlin 15 It also has support for identifiers (only if ISIS database is created by
50     IsisMarc), see C<to_hash>.
51    
52 dpavlin 27 This module will always be slower than OpenIsis module which use C
53 dpavlin 15 library. However, since it's written in perl, it's platform independent (so
54     you don't need C compiler), and can be easily modified. I hope that it
55     creates data structures which are easier to use than ones created by
56     OpenIsis, so reduced time in other parts of the code should compensate for
57     slower performance of this module (speed of reading ISIS database is
58     rarely an issue).
59    
60 dpavlin 1 =head1 METHODS
61    
62     =cut
63    
64     # my $ORDN; # Nodes Order
65     # my $ORDF; # Leafs Order
66     # my $N; # Number of Memory buffers for nodes
67     # my $K; # Number of buffers for first level index
68     # my $LIV; # Current number of Index Levels
69     # my $POSRX; # Pointer to Root Record in N0x
70     # my $NMAXPOS; # Next Available position in N0x
71     # my $FMAXPOS; # Next available position in L0x
72     # my $ABNORMAL; # Formal BTree normality indicator
73    
74     #
75     # some binary reads
76     #
77    
78     =head2 new
79    
80 dpavlin 15 Open ISIS database
81 dpavlin 1
82 dpavlin 36 my $isis = new Biblio::Isis(
83 dpavlin 1 isisdb => './cds/cds',
84     read_fdt => 1,
85 dpavlin 12 include_deleted => 1,
86     hash_filter => sub {
87 dpavlin 64 my ($v,$field_number) = @_;
88 dpavlin 12 $v =~ s#foo#bar#g;
89     },
90 dpavlin 1 debug => 1,
91 dpavlin 57 join_subfields_with => ' ; ',
92 dpavlin 1 );
93    
94 dpavlin 2 Options are described below:
95    
96     =over 5
97    
98 dpavlin 1 =item isisdb
99    
100 dpavlin 15 This is full or relative path to ISIS database files which include
101 dpavlin 18 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102     C<read_fdt> option) files.
103 dpavlin 1
104 dpavlin 15 In this example it uses C<./cds/cds.MST> and related files.
105    
106 dpavlin 1 =item read_fdt
107    
108     Boolean flag to specify if field definition table should be read. It's off
109     by default.
110    
111 dpavlin 9 =item include_deleted
112    
113 dpavlin 11 Don't skip logically deleted records in ISIS.
114 dpavlin 9
115 dpavlin 12 =item hash_filter
116    
117 dpavlin 65 Filter code ref which will be used before data is converted to hash. It will
118     receive two arguments, whole line from current field (in C<< $_[0] >>) and
119     field number (in C<< $_[1] >>).
120 dpavlin 12
121     =item debug
122    
123 dpavlin 54 Dump a B<lot> of debugging output even at level 1. For even more increase level.
124 dpavlin 12
125 dpavlin 57 =item join_subfields_with
126    
127     Define delimiter which will be used to join repeatable subfields. This
128     option is included to support lagacy application written against version
129     older than 0.21 of this module. By default, it disabled. See L</to_hash>.
130    
131 dpavlin 2 =back
132    
133 dpavlin 1 =cut
134    
135     sub new {
136     my $class = shift;
137     my $self = {};
138     bless($self, $class);
139    
140 dpavlin 9 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
141 dpavlin 1
142 dpavlin 12 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
143 dpavlin 9 $self->{$v} = {@_}->{$v};
144     }
145 dpavlin 1
146 dpavlin 18 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
147    
148     foreach my $f (@isis_files) {
149     my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
150     $self->{lc($ext)."_file"} = $f;
151     }
152    
153     my @must_exist = qw(mst xrf);
154     push @must_exist, "fdt" if ($self->{read_fdt});
155    
156     foreach my $ext (@must_exist) {
157 dpavlin 39 unless ($self->{$ext."_file"}) {
158     carp "missing ",uc($ext)," file in ",$self->{isisdb};
159     return;
160     }
161 dpavlin 18 }
162    
163 dpavlin 45 if ($self->{debug}) {
164     print STDERR "## using files: ",join(" ",@isis_files),"\n";
165     eval "use Data::Dump";
166 dpavlin 18
167 dpavlin 45 if (! $@) {
168     *Dumper = *Data::Dump::dump;
169     } else {
170     use Data::Dumper;
171     }
172     }
173    
174 dpavlin 1 # if you want to read .FDT file use read_fdt argument when creating class!
175 dpavlin 18 if ($self->{read_fdt} && -e $self->{fdt_file}) {
176 dpavlin 1
177     # read the $db.FDT file for tags
178     my $fieldzone=0;
179    
180 dpavlin 33 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
181     binmode($fileFDT);
182 dpavlin 1
183 dpavlin 33 while (<$fileFDT>) {
184 dpavlin 1 chomp;
185     if ($fieldzone) {
186     my $name=substr($_,0,30);
187     my $tag=substr($_,50,3);
188    
189     $name =~ s/\s+$//;
190     $tag =~ s/\s+$//;
191    
192     $self->{'TagName'}->{$tag}=$name;
193     }
194    
195     if (/^\*\*\*/) {
196     $fieldzone=1;
197     }
198     }
199    
200 dpavlin 33 close($fileFDT);
201 dpavlin 1 }
202    
203     # Get the Maximum MFN from $db.MST
204    
205 dpavlin 18 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
206 dpavlin 33 binmode($self->{'fileMST'});
207 dpavlin 1
208     # MST format: (* = 32 bit signed)
209     # CTLMFN* always 0
210     # NXTMFN* MFN to be assigned to the next record created
211     # NXTMFB* last block allocated to master file
212     # NXTMFP offset to next available position in last block
213     # MFTYPE always 0 for user db file (1 for system)
214 dpavlin 34 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
215 dpavlin 1
216 dpavlin 11 my $buff;
217    
218 dpavlin 34 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
219     $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
220 dpavlin 11
221 dpavlin 45 print STDERR "## self ",Dumper($self),"\n" if ($self->{debug});
222 dpavlin 18
223     # open files for later
224     open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
225 dpavlin 33 binmode($self->{'fileXRF'});
226 dpavlin 18
227     $self ? return $self : return undef;
228     }
229    
230 dpavlin 32 =head2 count
231    
232     Return number of records in database
233    
234     print $isis->count;
235    
236     =cut
237    
238     sub count {
239     my $self = shift;
240     return $self->{'NXTMFN'} - 1;
241     }
242    
243 dpavlin 7 =head2 fetch
244 dpavlin 1
245 dpavlin 2 Read record with selected MFN
246 dpavlin 1
247 dpavlin 7 my $rec = $isis->fetch(55);
248 dpavlin 2
249     Returns hash with keys which are field names and values are unpacked values
250 dpavlin 15 for that field like this:
251 dpavlin 2
252 dpavlin 15 $rec = {
253     '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
254     '990' => [ '2140', '88', 'HAY' ],
255     };
256    
257 dpavlin 2 =cut
258    
259 dpavlin 7 sub fetch {
260 dpavlin 1 my $self = shift;
261    
262 dpavlin 7 my $mfn = shift || croak "fetch needs MFN as argument!";
263 dpavlin 1
264 dpavlin 16 # is mfn allready in memory?
265     my $old_mfn = $self->{'current_mfn'} || -1;
266 dpavlin 25 return $self->{record} if ($mfn == $old_mfn);
267 dpavlin 1
268 dpavlin 16 print STDERR "## fetch: $mfn\n" if ($self->{debug});
269    
270 dpavlin 1 # XXX check this?
271     my $mfnpos=($mfn+int(($mfn-1)/127))*4;
272    
273 dpavlin 18 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
274 dpavlin 7 seek($self->{'fileXRF'},$mfnpos,0);
275 dpavlin 1
276 dpavlin 11 my $buff;
277    
278 dpavlin 25 # delete old record
279     delete $self->{record};
280    
281 dpavlin 1 # read XRFMFB abd XRFMFP
282 dpavlin 11 read($self->{'fileXRF'}, $buff, 4);
283 dpavlin 41 my $pointer=unpack("V",$buff);
284     if (! $pointer) {
285     if ($self->{include_deleted}) {
286     return;
287     } else {
288     warn "pointer for MFN $mfn is null\n";
289     return;
290     }
291     }
292 dpavlin 1
293 dpavlin 25 # check for logically deleted record
294 dpavlin 33 if ($pointer & 0x80000000) {
295 dpavlin 25 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
296     $self->{deleted} = $mfn;
297    
298     return unless $self->{include_deleted};
299    
300 dpavlin 33 # abs
301     $pointer = ($pointer ^ 0xffffffff) + 1;
302 dpavlin 25 }
303    
304 dpavlin 1 my $XRFMFB = int($pointer/2048);
305     my $XRFMFP = $pointer - ($XRFMFB*2048);
306    
307 dpavlin 16 # (XRFMFB - 1) * 512 + XRFMFP
308     # why do i have to do XRFMFP % 1024 ?
309 dpavlin 1
310 dpavlin 26 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
311 dpavlin 1
312 dpavlin 16 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
313 dpavlin 1
314     # Get Record Information
315    
316 dpavlin 33 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
317 dpavlin 1
318 dpavlin 33 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
319     my $value=unpack("V",$buff);
320 dpavlin 1
321 dpavlin 16 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
322    
323 dpavlin 1 if ($value!=$mfn) {
324 dpavlin 26 if ($value == 0) {
325     print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
326     $self->{deleted} = $mfn;
327     return;
328     }
329    
330     carp "Error: MFN ".$mfn." not found in MST file, found $value";
331     return;
332 dpavlin 1 }
333    
334 dpavlin 7 read($self->{'fileMST'}, $buff, 14);
335 dpavlin 1
336 dpavlin 33 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
337 dpavlin 1
338 dpavlin 16 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
339 dpavlin 1
340 dpavlin 25 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
341 dpavlin 9
342 dpavlin 16 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
343    
344 dpavlin 1 # Get Directory Format
345    
346     my @FieldPOS;
347     my @FieldLEN;
348     my @FieldTAG;
349    
350 dpavlin 8 read($self->{'fileMST'}, $buff, 6 * $NVF);
351    
352 dpavlin 16 my $rec_len = 0;
353 dpavlin 8
354 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
355    
356 dpavlin 33 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
357 dpavlin 1
358 dpavlin 16 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
359 dpavlin 1
360     # The TAG does not exists in .FDT so we set it to 0.
361     #
362     # XXX This is removed from perl version; .FDT file is updated manually, so
363     # you will often have fields in .MST file which aren't in .FDT. On the other
364     # hand, IsisMarc doesn't use .FDT files at all!
365    
366     #if (! $self->{TagName}->{$TAG}) {
367     # $TAG=0;
368     #}
369    
370     push @FieldTAG,$TAG;
371     push @FieldPOS,$POS;
372     push @FieldLEN,$LEN;
373 dpavlin 8
374 dpavlin 16 $rec_len += $LEN;
375 dpavlin 1 }
376    
377     # Get Variable Fields
378    
379 dpavlin 16 read($self->{'fileMST'},$buff,$rec_len);
380 dpavlin 8
381 dpavlin 16 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
382    
383 dpavlin 1 for (my $i = 0 ; $i < $NVF ; $i++) {
384 dpavlin 10 # skip zero-sized fields
385     next if ($FieldLEN[$i] == 0);
386    
387 dpavlin 8 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
388 dpavlin 1 }
389    
390 dpavlin 16 $self->{'current_mfn'} = $mfn;
391    
392 dpavlin 25 print STDERR Dumper($self),"\n" if ($self->{debug});
393 dpavlin 1
394 dpavlin 2 return $self->{'record'};
395 dpavlin 1 }
396    
397 dpavlin 54 =head2 mfn
398    
399     Returns current MFN position
400    
401     my $mfn = $isis->mfn;
402    
403     =cut
404    
405     # This function should be simple return $self->{current_mfn},
406     # but if new is called with _hack_mfn it becomes setter.
407     # It's useful in tests when setting $isis->{record} directly
408    
409     sub mfn {
410     my $self = shift;
411     return $self->{current_mfn};
412     };
413    
414    
415 dpavlin 2 =head2 to_ascii
416    
417 dpavlin 27 Returns ASCII output of record with specified MFN
418 dpavlin 2
419 dpavlin 15 print $isis->to_ascii(42);
420 dpavlin 2
421 dpavlin 27 This outputs something like this:
422 dpavlin 15
423     210 ^aNew York^cNew York University press^dcop. 1988
424     990 2140
425     990 88
426     990 HAY
427    
428     If C<read_fdt> is specified when calling C<new> it will display field names
429     from C<.FDT> file instead of numeric tags.
430    
431 dpavlin 2 =cut
432    
433     sub to_ascii {
434     my $self = shift;
435    
436     my $mfn = shift || croak "need MFN";
437    
438 dpavlin 41 my $rec = $self->fetch($mfn) || return;
439 dpavlin 2
440     my $out = "0\t$mfn";
441    
442     foreach my $f (sort keys %{$rec}) {
443 dpavlin 15 my $fn = $self->tag_name($f);
444     $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
445 dpavlin 2 }
446    
447     $out .= "\n";
448    
449     return $out;
450     }
451    
452 dpavlin 12 =head2 to_hash
453    
454 dpavlin 15 Read record with specified MFN and convert it to hash
455 dpavlin 12
456     my $hash = $isis->to_hash($mfn);
457    
458 dpavlin 27 It has ability to convert characters (using C<hash_filter>) from ISIS
459 dpavlin 15 database before creating structures enabling character re-mapping or quick
460     fix-up of data.
461 dpavlin 12
462     This function returns hash which is like this:
463    
464     $hash = {
465     '210' => [
466     {
467     'c' => 'New York University press',
468     'a' => 'New York',
469     'd' => 'cop. 1988'
470     }
471     ],
472     '990' => [
473     '2140',
474     '88',
475     'HAY'
476     ],
477     };
478    
479 dpavlin 15 You can later use that hash to produce any output from ISIS data.
480 dpavlin 12
481 dpavlin 15 If database is created using IsisMarc, it will also have to special fields
482     which will be used for identifiers, C<i1> and C<i2> like this:
483    
484     '200' => [
485     {
486     'i1' => '1',
487     'i2' => ' '
488     'a' => 'Goa',
489     'f' => 'Valdo D\'Arienzo',
490     'e' => 'tipografie e tipografi nel XVI secolo',
491     }
492     ],
493    
494 dpavlin 50 In case there are repeatable subfields in record, this will create
495     following structure:
496    
497     '900' => [ {
498     'a' => [ 'foo', 'bar', 'baz' ],
499     }]
500    
501 dpavlin 57 Or in more complex example of
502    
503     902 ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
504    
505     it will create
506    
507     902 => [
508     { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
509     ],
510    
511     This behaviour can be changed using C<join_subfields_with> option to L</new>,
512     in which case C<to_hash> will always create single value for each subfield.
513     This will change result to:
514    
515    
516    
517 dpavlin 15 This method will also create additional field C<000> with MFN.
518    
519 dpavlin 56 There is also more elaborative way to call C<to_hash> like this:
520    
521     my $hash = $isis->to_hash({
522     mfn => 42,
523 dpavlin 57 include_subfields => 1,
524 dpavlin 56 });
525    
526 dpavlin 57 Each option controll creation of hash:
527    
528     =over 4
529    
530     =item mfn
531    
532     Specify MFN number of record
533    
534     =item include_subfields
535    
536     This option will create additional key in hash called C<subfields> which will
537     have original record subfield order and index to that subfield like this:
538    
539     902 => [ {
540     a => ["a1", "a2", "a3", "a4", "a5"],
541     b => ["b1", "b2"],
542     c => "c1",
543     subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
544     } ],
545    
546     =item join_subfields_with
547    
548     Define delimiter which will be used to join repeatable subfields. You can
549 dpavlin 58 specify option here instead in L</new> if you want to have per-record control.
550 dpavlin 57
551 dpavlin 66 =item hash_filter
552    
553     You can override C<hash_filter> defined in L</new> using this option.
554    
555 dpavlin 57 =back
556    
557 dpavlin 12 =cut
558    
559     sub to_hash {
560     my $self = shift;
561    
562 dpavlin 56
563 dpavlin 12 my $mfn = shift || confess "need mfn!";
564 dpavlin 56 my $arg;
565 dpavlin 12
566 dpavlin 66 my $hash_filter = $self->{hash_filter};
567    
568 dpavlin 56 if (ref($mfn) eq 'HASH') {
569     $arg = $mfn;
570     $mfn = $arg->{mfn} || confess "need mfn in arguments";
571 dpavlin 66 $hash_filter = $arg->{hash_filter} if ($arg->{hash_filter});
572 dpavlin 56 }
573    
574 dpavlin 15 # init record to include MFN as field 000
575 dpavlin 16 my $rec = { '000' => [ $mfn ] };
576 dpavlin 15
577 dpavlin 41 my $row = $self->fetch($mfn) || return;
578 dpavlin 12
579 dpavlin 58 my $j_rs = $arg->{join_subfields_with};
580     $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
581 dpavlin 57 my $i_sf = $arg->{include_subfields};
582 dpavlin 12
583 dpavlin 57 foreach my $f_nr (keys %{$row}) {
584     foreach my $l (@{$row->{$f_nr}}) {
585    
586 dpavlin 12 # filter output
587 dpavlin 66 $l = $hash_filter->($l, $f_nr) if ($hash_filter);
588     next unless defined($l);
589 dpavlin 12
590 dpavlin 15 my $val;
591 dpavlin 57 my $r_sf; # repeatable subfields in this record
592 dpavlin 15
593     # has identifiers?
594 dpavlin 23 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
595 dpavlin 15
596 dpavlin 12 # has subfields?
597     if ($l =~ m/\^/) {
598     foreach my $t (split(/\^/,$l)) {
599     next if (! $t);
600 dpavlin 50 my ($sf,$v) = (substr($t,0,1), substr($t,1));
601 dpavlin 57 # XXX this might be option, but why?
602 dpavlin 54 next unless ($v);
603 dpavlin 57 # warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
604 dpavlin 54
605 dpavlin 50 if (ref( $val->{$sf} ) eq 'ARRAY') {
606 dpavlin 54
607 dpavlin 50 push @{ $val->{$sf} }, $v;
608 dpavlin 57
609     # record repeatable subfield it it's offset
610     push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
611     $r_sf->{$sf}++;
612    
613 dpavlin 50 } elsif (defined( $val->{$sf} )) {
614 dpavlin 57
615 dpavlin 50 # convert scalar field to array
616     $val->{$sf} = [ $val->{$sf}, $v ];
617 dpavlin 57
618     push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
619     $r_sf->{$sf}++;
620    
621 dpavlin 50 } else {
622     $val->{$sf} = $v;
623 dpavlin 57 push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
624 dpavlin 50 }
625 dpavlin 12 }
626     } else {
627     $val = $l;
628     }
629    
630 dpavlin 57 if ($j_rs) {
631     map {
632     $val->{$_} = join($j_rs, @{ $val->{$_} });
633     } keys %$r_sf
634     }
635    
636     push @{$rec->{$f_nr}}, $val;
637 dpavlin 12 }
638     }
639    
640     return $rec;
641     }
642    
643 dpavlin 15 =head2 tag_name
644 dpavlin 1
645 dpavlin 15 Return name of selected tag
646 dpavlin 1
647 dpavlin 15 print $isis->tag_name('200');
648    
649     =cut
650    
651     sub tag_name {
652 dpavlin 1 my $self = shift;
653 dpavlin 15 my $tag = shift || return;
654     return $self->{'TagName'}->{$tag} || $tag;
655 dpavlin 1 }
656    
657 dpavlin 35
658     =head2 read_cnt
659    
660     Read content of C<.CNT> file and return hash containing it.
661    
662     print Dumper($isis->read_cnt);
663    
664     This function is not used by module (C<.CNT> files are not required for this
665     module to work), but it can be useful to examine your index (while debugging
666     for example).
667    
668     =cut
669    
670     sub read_cnt {
671     my $self = shift;
672    
673     croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
674    
675     # Get the index information from $db.CNT
676    
677     open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
678     binmode($fileCNT);
679    
680     my $buff;
681    
682     read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
683     $self->unpack_cnt($buff);
684    
685     read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
686     $self->unpack_cnt($buff);
687    
688     close($fileCNT);
689    
690     return $self->{cnt};
691     }
692    
693     =head2 unpack_cnt
694    
695     Unpack one of two 26 bytes fixed length record in C<.CNT> file.
696    
697     Here is definition of record:
698    
699     off key description size
700     0: IDTYPE BTree type s
701     2: ORDN Nodes Order s
702     4: ORDF Leafs Order s
703     6: N Number of Memory buffers for nodes s
704     8: K Number of buffers for first level index s
705     10: LIV Current number of Index Levels s
706     12: POSRX Pointer to Root Record in N0x l
707     16: NMAXPOS Next Available position in N0x l
708     20: FMAXPOS Next available position in L0x l
709     24: ABNORMAL Formal BTree normality indicator s
710     length: 26 bytes
711    
712     This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
713    
714     =cut
715    
716     sub unpack_cnt {
717     my $self = shift;
718    
719     my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
720    
721     my $buff = shift || return;
722     my @arr = unpack("vvvvvvVVVv", $buff);
723    
724     print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
725    
726     my $IDTYPE = shift @arr;
727     foreach (@flds) {
728     $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
729     }
730     }
731    
732 dpavlin 1 1;
733    
734     =head1 BUGS
735    
736 dpavlin 27 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
737     some variations in input databases which has been tested with this module.
738     When I was in doubt, I assumed that OpenIsis's implementation was right
739     (except for obvious bugs).
740 dpavlin 1
741 dpavlin 27 However, every effort has been made to test this module with as much
742     databases (and programs that create them) as possible.
743    
744     I would be very greatful for success or failure reports about usage of this
745     module with databases from programs other than WinIsis and IsisMarc. I had
746     tested this against ouput of one C<isis.dll>-based application, but I don't
747     know any details about it's version.
748    
749 dpavlin 54 =head1 VERSIONS
750    
751 dpavlin 57 As this is young module, new features are added in subsequent version. It's
752     a good idea to specify version when using this module like this:
753 dpavlin 54
754 dpavlin 66 use Biblio::Isis 0.23
755 dpavlin 57
756     Below is list of changes in specific version of module (so you can target
757     older versions if you really have to):
758    
759 dpavlin 54 =over 8
760    
761 dpavlin 66 =item 0.23
762    
763     Added C<hash_filter> to L</to_hash>
764    
765 dpavlin 65 =item 0.22
766    
767     Added field number when calling C<hash_filter>
768    
769 dpavlin 57 =item 0.21
770    
771     Added C<join_subfields_with> to L</new> and L</to_hash>.
772    
773     Added C<include_subfields> to L</to_hash>.
774    
775 dpavlin 54 =item 0.20
776    
777 dpavlin 56 Added C<< $isis->mfn >>, support for repeatable subfields and
778     C<< $isis->to_hash({ mfn => 42, ... }) >> calling convention
779 dpavlin 54
780     =back
781    
782 dpavlin 1 =head1 AUTHOR
783    
784     Dobrica Pavlinusic
785     CPAN ID: DPAVLIN
786     dpavlin@rot13.org
787     http://www.rot13.org/~dpavlin/
788    
789 dpavlin 15 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
790     written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
791 dpavlin 1
792     =head1 COPYRIGHT
793    
794     This program is free software; you can redistribute
795     it and/or modify it under the same terms as Perl itself.
796    
797     The full text of the license can be found in the
798     LICENSE file included with this module.
799    
800    
801     =head1 SEE ALSO
802    
803 dpavlin 59 L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
804    
805 dpavlin 15 OpenIsis web site L<http://www.openisis.org>
806 dpavlin 1
807 dpavlin 15 perl4lib site L<http://perl4lib.perl.org>
808    

  ViewVC Help
Powered by ViewVC 1.1.26