/[Biblio-Isis]/trunk/lib/Biblio/Isis.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Biblio/Isis.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 56 by dpavlin, Sat Jul 8 16:03:52 2006 UTC revision 70 by dpavlin, Fri May 18 20:26:01 2007 UTC
# Line 7  use File::Glob qw(:globally :nocase); Line 7  use File::Glob qw(:globally :nocase);
7  BEGIN {  BEGIN {
8          use Exporter ();          use Exporter ();
9          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
10          $VERSION     = 0.20;          $VERSION     = 0.24_1;
11          @ISA         = qw (Exporter);          @ISA         = qw (Exporter);
12          #Give a hoot don't pollute, do not export more than needed by default          #Give a hoot don't pollute, do not export more than needed by default
13          @EXPORT      = qw ();          @EXPORT      = qw ();
# Line 84  Open ISIS database Line 84  Open ISIS database
84          read_fdt => 1,          read_fdt => 1,
85          include_deleted => 1,          include_deleted => 1,
86          hash_filter => sub {          hash_filter => sub {
87                  my $v = shift;                  my ($v,$field_number) = @_;
88                  $v =~ s#foo#bar#g;                  $v =~ s#foo#bar#g;
89          },          },
90          debug => 1,          debug => 1,
91            join_subfields_with => ' ; ',
92   );   );
93    
94  Options are described below:  Options are described below:
# Line 113  Don't skip logically deleted records in Line 114  Don't skip logically deleted records in
114    
115  =item hash_filter  =item hash_filter
116    
117  Filter code ref which will be used before data is converted to hash.  Filter code ref which will be used before data is converted to hash. It will
118    receive two arguments, whole line from current field (in C<< $_[0] >>) and
119    field number (in C<< $_[1] >>).
120    
121  =item debug  =item debug
122    
123  Dump a B<lot> of debugging output even at level 1. For even more increase level.  Dump a B<lot> of debugging output even at level 1. For even more increase level.
124    
125    =item join_subfields_with
126    
127    Define delimiter which will be used to join repeatable subfields. This
128    option is included to support lagacy application written against version
129    older than 0.21 of this module. By default, it disabled. See L</to_hash>.
130    
131    =item ignore_empty_subfields
132    
133    Remove all empty subfields while reading from ISIS file.
134    
135  =back  =back
136    
137  =cut  =cut
# Line 130  sub new { Line 143  sub new {
143    
144          croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});          croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
145    
146          foreach my $v (qw{isisdb debug include_deleted hash_filter}) {          foreach my $v (qw{isisdb debug include_deleted hash_filter join_subfields_with ignore_empty_subfields}) {
147                  $self->{$v} = {@_}->{$v};                  $self->{$v} = {@_}->{$v} if defined({@_}->{$v});
148          }          }
149    
150          my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));          my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
# Line 375  sub fetch { Line 388  sub fetch {
388                  # skip zero-sized fields                  # skip zero-sized fields
389                  next if ($FieldLEN[$i] == 0);                  next if ($FieldLEN[$i] == 0);
390    
391                  push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);                  my $v = substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
392    
393                    if ( $self->{ignore_empty_subfields} ) {
394                            $v =~ s/(\^\w)+(\^\w)/$2/g;
395                            $v =~ s/\^\w$//;                        # last on line?
396                            next if ($v eq '');
397                    }
398    
399                    push @{$self->{record}->{$FieldTAG[$i]}}, $v;
400          }          }
401    
402          $self->{'current_mfn'} = $mfn;          $self->{'current_mfn'} = $mfn;
# Line 489  following structure: Line 510  following structure:
510          'a' => [ 'foo', 'bar', 'baz' ],          'a' => [ 'foo', 'bar', 'baz' ],
511    }]    }]
512    
513    Or in more complex example of
514    
515      902   ^aa1^aa2^aa3^bb1^aa4^bb2^cc1^aa5
516    
517    it will create
518    
519      902   => [
520            { a => ["a1", "a2", "a3", "a4", "a5"], b => ["b1", "b2"], c => "c1" },
521      ],
522    
523    This behaviour can be changed using C<join_subfields_with> option to L</new>,
524    in which case C<to_hash> will always create single value for each subfield.
525    This will change result to:
526    
527    
528    
529  This method will also create additional field C<000> with MFN.  This method will also create additional field C<000> with MFN.
530    
531  There is also more elaborative way to call C<to_hash> like this:  There is also more elaborative way to call C<to_hash> like this:
532    
533    my $hash = $isis->to_hash({    my $hash = $isis->to_hash({
534          mfn => 42,          mfn => 42,
535          include_empty_subfields => 1,          include_subfields => 1,
536    });    });
537    
538    Each option controll creation of hash:
539    
540    =over 4
541    
542    =item mfn
543    
544    Specify MFN number of record
545    
546    =item include_subfields
547    
548    This option will create additional key in hash called C<subfields> which will
549    have original record subfield order and index to that subfield like this:
550    
551      902   => [ {
552            a => ["a1", "a2", "a3", "a4", "a5"],
553            b => ["b1", "b2"],
554            c => "c1",
555            subfields => ["a", 0, "a", 1, "a", 2, "b", 0, "a", 3, "b", 1, "c", 0, "a", 4],
556      } ],
557    
558    =item join_subfields_with
559    
560    Define delimiter which will be used to join repeatable subfields. You can
561    specify option here instead in L</new> if you want to have per-record control.
562    
563    =item hash_filter
564    
565    You can override C<hash_filter> defined in L</new> using this option.
566    
567    =back
568    
569  =cut  =cut
570    
571  sub to_hash {  sub to_hash {
# Line 507  sub to_hash { Line 575  sub to_hash {
575          my $mfn = shift || confess "need mfn!";          my $mfn = shift || confess "need mfn!";
576          my $arg;          my $arg;
577    
578            my $hash_filter = $self->{hash_filter};
579    
580          if (ref($mfn) eq 'HASH') {          if (ref($mfn) eq 'HASH') {
581                  $arg = $mfn;                  $arg = $mfn;
582                  $mfn = $arg->{mfn} || confess "need mfn in arguments";                  $mfn = $arg->{mfn} || confess "need mfn in arguments";
583                    $hash_filter = $arg->{hash_filter} if ($arg->{hash_filter});
584          }          }
585    
586          # init record to include MFN as field 000          # init record to include MFN as field 000
# Line 517  sub to_hash { Line 588  sub to_hash {
588    
589          my $row = $self->fetch($mfn) || return;          my $row = $self->fetch($mfn) || return;
590    
591          foreach my $k (keys %{$row}) {          my $j_rs = $arg->{join_subfields_with} || $self->{join_subfields_with};
592                  foreach my $l (@{$row->{$k}}) {          $j_rs = $self->{join_subfields_with} unless(defined($j_rs));
593            my $i_sf = $arg->{include_subfields};
594    
595            foreach my $f_nr (keys %{$row}) {
596                    foreach my $l (@{$row->{$f_nr}}) {
597    
598                          # filter output                          # filter output
599                          if ($self->{'hash_filter'}) {                          $l = $hash_filter->($l, $f_nr) if ($hash_filter);
600                                  $l = $self->{'hash_filter'}->($l);                          next unless defined($l);
                                 next unless defined($l);  
                         }  
601    
602                          my $val;                          my $val;
603                            my $r_sf;       # repeatable subfields in this record
604    
605                          # has identifiers?                          # has identifiers?
606                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
# Line 536  sub to_hash { Line 610  sub to_hash {
610                                  foreach my $t (split(/\^/,$l)) {                                  foreach my $t (split(/\^/,$l)) {
611                                          next if (! $t);                                          next if (! $t);
612                                          my ($sf,$v) = (substr($t,0,1), substr($t,1));                                          my ($sf,$v) = (substr($t,0,1), substr($t,1));
613                                          # FIXME make this option !                                          # XXX this might be option, but why?
614                                          next unless ($v);                                          next unless (defined($v) && $v ne '');
615  #                                       warn "### $k^$sf:$v",$/ if ($self->{debug} > 1);  #                                       warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1);
616    
                                         # FIXME array return optional, by default unroll to ' ; '  
617                                          if (ref( $val->{$sf} ) eq 'ARRAY') {                                          if (ref( $val->{$sf} ) eq 'ARRAY') {
618    
619                                                  push @{ $val->{$sf} }, $v;                                                  push @{ $val->{$sf} }, $v;
620    
621                                                    # record repeatable subfield it it's offset
622                                                    push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } ) if (! $j_rs && $i_sf);
623                                                    $r_sf->{$sf}++;
624    
625                                          } elsif (defined( $val->{$sf} )) {                                          } elsif (defined( $val->{$sf} )) {
626    
627                                                  # convert scalar field to array                                                  # convert scalar field to array
628                                                  $val->{$sf} = [ $val->{$sf}, $v ];                                                  $val->{$sf} = [ $val->{$sf}, $v ];
629    
630                                                    push @{ $val->{subfields} }, ( $sf, 1 ) if (! $j_rs && $i_sf);
631                                                    $r_sf->{$sf}++;
632    
633                                          } else {                                          } else {
634                                                  $val->{$sf} = $v;                                                  $val->{$sf} = $v;
635                                                    push @{ $val->{subfields} }, ( $sf, 0 ) if ($i_sf);
636                                          }                                          }
637                                  }                                  }
638                          } else {                          } else {
639                                  $val = $l;                                  $val = $l;
640                          }                          }
641    
642                          push @{$rec->{$k}}, $val;                          if ($j_rs) {
643                                    map {
644                                            $val->{$_} = join($j_rs, @{ $val->{$_} });
645                                    } keys %$r_sf
646                            }
647    
648                            push @{$rec->{$f_nr}}, $val;
649                  }                  }
650          }          }
651    
# Line 670  know any details about it's version. Line 760  know any details about it's version.
760    
761  =head1 VERSIONS  =head1 VERSIONS
762    
763  You can find version dependencies documented here  As this is young module, new features are added in subsequent version. It's
764    a good idea to specify version when using this module like this:
765    
766      use Biblio::Isis 0.23
767    
768    Below is list of changes in specific version of module (so you can target
769    older versions if you really have to):
770    
771  =over 8  =over 8
772    
773    =item 0.24
774    
775    Added C<ignore_empty_subfields>
776    
777    =item 0.23
778    
779    Added C<hash_filter> to L</to_hash>
780    
781    Fixed bug with documented C<join_subfields_with> in L</new> which wasn't
782    implemented
783    
784    =item 0.22
785    
786    Added field number when calling C<hash_filter>
787    
788    =item 0.21
789    
790    Added C<join_subfields_with> to L</new> and L</to_hash>.
791    
792    Added C<include_subfields> to L</to_hash>.
793    
794  =item 0.20  =item 0.20
795    
796  Added C<< $isis->mfn >>, support for repeatable subfields and  Added C<< $isis->mfn >>, support for repeatable subfields and
# Line 702  LICENSE file included with this module. Line 819  LICENSE file included with this module.
819    
820  =head1 SEE ALSO  =head1 SEE ALSO
821    
822    L<Biblio::Isis::Manual> for CDS/ISIS manual appendix F, G and H which describe file format
823    
824  OpenIsis web site L<http://www.openisis.org>  OpenIsis web site L<http://www.openisis.org>
825    
826  perl4lib site L<http://perl4lib.perl.org>  perl4lib site L<http://perl4lib.perl.org>

Legend:
Removed from v.56  
changed lines
  Added in v.70

  ViewVC Help
Powered by ViewVC 1.1.26