/[MARC-Fast]/trunk/lib/MARC/Fast.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk/lib/MARC/Fast.pm

Parent Directory | Revision Log | View Patch Patch

-trunk/Fast.pm
revision 18 by dpavlin,
Mon Oct 29 22:33:35 2007 UTC
+trunk/lib/MARC/Fast.pm
revision 39 by dpavlin,
Thu Sep 23 12:55:35 2010 UTC
 Line 2 
 package MARC::Fast;
  use strict;
  use Carp;
- use Data::Dumper;
+ use Data::Dump qw/dump/;
  BEGIN {
          use Exporter ();
          use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
-         $VERSION     = 0.08;
+         $VERSION     = 0.10;
          @ISA         = qw (Exporter);
          #Give a hoot don't pollute, do not export more than needed by default
          @EXPORT      = qw ();
 Line 87 
 sub new {
                  my $len = read($self->{fh}, $leader, 24);
                  if ($len < 24) {
-                         carp "short read of leader, aborting\n";
+                         warn "short read of leader, aborting\n";
+                         $self->{count}--;
                          last;
                  }
-Line 124 
 sub new {
+Line 125 
 sub new {
                  # skip to next record
                  my $o = substr($leader,0,5);
+                 warn "# in record ", $self->{count}," record length isn't number but: ",dump($o),"\n" unless $o =~ m/^\d+$/;
                  if ($o > 24) {
                          seek($self->{fh},$o-24,1) if ($o);
                  } else {
-Line 275 
 sub last_leader {
+Line 277 
 sub last_leader {
  Read record with specified MFN and convert it to hash
-   my $hash = $marc->to_hash($mfn);
+   my $hash = $marc->to_hash( $mfn, include_subfields => 1, );
  It has ability to convert characters (using C<hash_filter>) from MARC
  database before creating structures enabling character re-mapping or quick
-Line 302 
 sub to_hash {
+Line 304 
 sub to_hash {
          my $mfn = shift || confess "need mfn!";
+         my $args = {@_};
          # init record to include MFN as field 000
          my $rec = { '000' => [ $mfn ] };
          my $row = $self->fetch($mfn) || return;
-         foreach my $rec_nr (keys %{$row}) {
+         foreach my $tag (keys %{$row}) {
-                 foreach my $l (@{$row->{$rec_nr}}) {
+                 foreach my $l (@{$row->{$tag}}) {
                          # remove end marker
                          $l =~ s/\x1E$//;
                          # filter output
-                         $l = $self->{'hash_filter'}->($l, $rec_nr) if ($self->{'hash_filter'});
+                         $l = $self->{'hash_filter'}->($l, $tag) if ($self->{'hash_filter'});
                          my $val;
                          # has identifiers?
                          ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\x1F/\x1F/);
+                         my $sf_usage;
+                         my @subfields;
                          # has subfields?
                          if ($l =~ m/\x1F/) {
                                  foreach my $t (split(/\x1F/,$l)) {
                                          next if (! $t);
                                          my $f = substr($t,0,1);
-                                         # repeatable subfileds. When we hit first one,
-                                         # store CURRENT (up to that) in first repetition
+                                         push @subfields, ( $f, $sf_usage->{$f}++ || 0 );
-                                         # of this record. Then, new record with same
-                                         # identifiers will be created.
+                                         # repeatable subfiled -- convert it to array
                                          if ($val->{$f}) {
-                                                 push @{$rec->{$rec_nr}}, $val;
+                                                 if ( ref($val->{$f}) ne 'ARRAY' ) {
-                                                 $val = {
+                                                         $val->{$f} = [ $val->{$f}, $val ];
-                                                         i1 => $val->{i1},
+                                                 } else {
-                                                         i2 => $val->{i2},
+                                                         push @{$val->{$f}}, $val;
-                                                 };
+                                                 }
                                          }
                                          $val->{substr($t,0,1)} = substr($t,1);
                                  }
+                                 $val->{subfields} = [ @subfields ] if $args->{include_subfields};
                          } else {
                                  $val = $l;
                          }
-                         push @{$rec->{$rec_nr}}, $val;
+                         push @{$rec->{$tag}}, $val;
                  }
          }
-Line 377 
 sub to_ascii {
+Line 385 
 sub to_ascii {
 ;
  __END__
+ =head1 UTF-8 ENCODING
+ This module does nothing with encoding. But, since MARC format is byte
+ oriented even when using UTF-8 which has variable number of bytes for each
+ character, file is opened in binary mode.
+ As a result, all scalars recturned to perl don't have utf-8 flag. Solution is
+ to use C<hash_filter> and L<Encode> to decode utf-8 encoding like this:
+   use Encode;
+   my $marc = new MARC::Fast(
+         marcdb => 'utf8.marc',
+         hash_filter => sub {
+                 Encode::decode( 'utf-8', $_[0] );
+         },
+   );
+ This will affect C<to_hash>, but C<fetch> will still return binary representation
+ since it doesn't support C<hash_filter>.
  =head1 AUTHOR
          Dobrica Pavlinusic

 Legend:



Removed from v.18
 


changed lines


 
Added in v.39
 Legend:



Removed from v.18
 


changed lines


 
Added in v.39
-Removed from v.18
+Added in v.39

	ViewVC Help
Powered by ViewVC 1.1.26