/[MARC-Fast]/trunk/lib/MARC/Fast.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk/lib/MARC/Fast.pm

Parent Directory | Revision Log | View Patch Patch

-trunk/Fast.pm
revision 23 by dpavlin,
Sun Nov  4 22:44:42 2007 UTC
+trunk/lib/MARC/Fast.pm
revision 47 by dpavlin,
Thu Aug 22 11:24:36 2013 UTC
 Line 2 
 package MARC::Fast;
  use strict;
  use Carp;
- use Data::Dumper;
+ use Data::Dump qw/dump/;
  BEGIN {
          use Exporter ();
          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
-         $VERSION     = 0.09;
+         $VERSION     = 0.12;
          @ISA         = qw (Exporter);
          #Give a hoot don't pollute, do not export more than needed by default
          @EXPORT      = qw ();
 Line 87 
 sub new {
                  my $len = read($self->{fh}, $leader, 24);
                  if ($len < 24) {
-                         carp "short read of leader, aborting\n";
+                         warn "short read of leader, aborting\n";
+                         $self->{count}--;
                          last;
                  }
-Line 124 
 sub new {
+Line 125 
 sub new {
                  # skip to next record
                  my $o = substr($leader,0,5);
+                 warn "# in record ", $self->{count}," record length isn't number but: ",dump($o),"\n" unless $o =~ m/^\d+$/;
                  if ($o > 24) {
                          seek($self->{fh},$o-24,1) if ($o);
                  } else {
-Line 275 
 sub last_leader {
+Line 277 
 sub last_leader {
  Read record with specified MFN and convert it to hash
-   my $hash = $marc->to_hash( $mfn, include_subfields => 1, );
+   my $hash = $marc->to_hash( $mfn, include_subfields => 1,
+         hash_filter => sub { my ($l,$tag) = @_; return $l; }
+   );
  It has ability to convert characters (using C<hash_filter>) from MARC
  database before creating structures enabling character re-mapping or quick
- fix-up of data.
+ fix-up of data. If you specified C<hash_filter> both in C<new> and C<to_hash>
+ only the one from C<to_hash> will be used.
  This function returns hash which is like this:
-Line 303 
 sub to_hash {
+Line 308 
 sub to_hash {
          my $mfn = shift || confess "need mfn!";
          my $args = {@_};
+         my $filter_coderef = $args->{'hash_filter'} || $self->{'hash_filter'};
          # init record to include MFN as field 000
          my $rec = { '000' => [ $mfn ] };
          my $row = $self->fetch($mfn) || return;
-         foreach my $rec_nr (keys %{$row}) {
+         foreach my $tag (keys %{$row}) {
-                 foreach my $l (@{$row->{$rec_nr}}) {
+                 foreach my $l (@{$row->{$tag}}) {
                          # remove end marker
                          $l =~ s/\x1E$//;
                          # filter output
-                         $l = $self->{'hash_filter'}->($l, $rec_nr) if ($self->{'hash_filter'});
+                         $l = $filter_coderef->($l, $tag) if $filter_coderef;
                          my $val;
-Line 331 
 sub to_hash {
+Line 337 
 sub to_hash {
                                  foreach my $t (split(/\x1F/,$l)) {
                                          next if (! $t);
                                          my $f = substr($t,0,1);
+                                         my $v = substr($t,1);
                                          push @subfields, ( $f, $sf_usage->{$f}++ || 0 );
                                          # repeatable subfiled -- convert it to array
-                                         if ($val->{$f}) {
+                                         if ( defined $val->{$f} ) {
-                                                 if ( $sf_usage->{$f} == 2 ) {
+                                                 if ( ref($val->{$f}) ne 'ARRAY' ) {
-                                                         $val->{$f} = [ $val->{$f}, $val ];
+                                                         $val->{$f} = [ $val->{$f}, $v ];
                                                  } else {
-                                                         push @{$val->{$f}}, $val;
+                                                         push @{$val->{$f}}, $v;
                                                  }
+                                         } else {
+                                                 $val->{$f} = $v;
                                          }
-                                         $val->{substr($t,0,1)} = substr($t,1);
                                  }
                                  $val->{subfields} = [ @subfields ] if $args->{include_subfields};
                          } else {
                                  $val = $l;
                          }
-                         push @{$rec->{$rec_nr}}, $val;
+                         push @{$rec->{$tag}}, $val;
                  }
          }
-Line 383 
 sub to_ascii {
+Line 391 
 sub to_ascii {
 ;
  __END__
+ =head1 UTF-8 ENCODING
+ This module does nothing with encoding. But, since MARC format is byte
+ oriented even when using UTF-8 which has variable number of bytes for each
+ character, file is opened in binary mode.
+ As a result, all scalars recturned to perl don't have utf-8 flag. Solution is
+ to use C<hash_filter> and L<Encode> to decode utf-8 encoding like this:
+   use Encode;
+   my $marc = new MARC::Fast(
+         marcdb => 'utf8.marc',
+         hash_filter => sub {
+                 Encode::decode( 'utf-8', $_[0] );
+         },
+   );
+ This will affect C<to_hash>, but C<fetch> will still return binary representation
+ since it doesn't support C<hash_filter>.
  =head1 AUTHOR
          Dobrica Pavlinusic

 Legend:



Removed from v.23
 


changed lines


 
Added in v.47
 Legend:



Removed from v.23
 


changed lines


 
Added in v.47
-Removed from v.23
+Added in v.47

	ViewVC Help
Powered by ViewVC 1.1.26