7 |
BEGIN { |
BEGIN { |
8 |
use Exporter (); |
use Exporter (); |
9 |
use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
10 |
$VERSION = 0.22_2; |
$VERSION = 0.24_1; |
11 |
@ISA = qw (Exporter); |
@ISA = qw (Exporter); |
12 |
#Give a hoot don't pollute, do not export more than needed by default |
#Give a hoot don't pollute, do not export more than needed by default |
13 |
@EXPORT = qw (); |
@EXPORT = qw (); |
84 |
read_fdt => 1, |
read_fdt => 1, |
85 |
include_deleted => 1, |
include_deleted => 1, |
86 |
hash_filter => sub { |
hash_filter => sub { |
87 |
my $v = shift; |
my ($v,$field_number) = @_; |
88 |
$v =~ s#foo#bar#g; |
$v =~ s#foo#bar#g; |
89 |
}, |
}, |
90 |
debug => 1, |
debug => 1, |
91 |
join_subfields_with => ' ; ', |
join_subfields_with => ' ; ', |
|
regexps => [ |
|
|
's/something/else/g', |
|
|
], |
|
92 |
); |
); |
93 |
|
|
94 |
Options are described below: |
Options are described below: |
114 |
|
|
115 |
=item hash_filter |
=item hash_filter |
116 |
|
|
117 |
Filter code ref which will be used before data is converted to hash. |
Filter code ref which will be used before data is converted to hash. It will |
118 |
|
receive two arguments, whole line from current field (in C<< $_[0] >>) and |
119 |
|
field number (in C<< $_[1] >>). |
120 |
|
|
121 |
=item debug |
=item debug |
122 |
|
|
128 |
option is included to support lagacy application written against version |
option is included to support lagacy application written against version |
129 |
older than 0.21 of this module. By default, it disabled. See L</to_hash>. |
older than 0.21 of this module. By default, it disabled. See L</to_hash>. |
130 |
|
|
131 |
=item regexpes |
=item ignore_empty_subfields |
132 |
|
|
133 |
Define (any number) of regexpes to apply at field values before they are |
Remove all empty subfields while reading from ISIS file. |
|
splitted into subfield. This is great place to split subfields in input to |
|
|
mulitple subfields if needed or rename subfields. |
|
134 |
|
|
135 |
=back |
=back |
136 |
|
|
143 |
|
|
144 |
croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb}); |
croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb}); |
145 |
|
|
146 |
foreach my $v (qw{isisdb debug include_deleted hash_filter}) { |
foreach my $v (qw{isisdb debug include_deleted hash_filter join_subfields_with ignore_empty_subfields}) { |
147 |
$self->{$v} = {@_}->{$v}; |
$self->{$v} = {@_}->{$v} if defined({@_}->{$v}); |
148 |
} |
} |
149 |
|
|
150 |
my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*")); |
my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*")); |
388 |
# skip zero-sized fields |
# skip zero-sized fields |
389 |
next if ($FieldLEN[$i] == 0); |
next if ($FieldLEN[$i] == 0); |
390 |
|
|
391 |
push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]); |
my $v = substr($buff,$FieldPOS[$i],$FieldLEN[$i]); |
392 |
|
|
393 |
|
if ( $self->{ignore_empty_subfields} ) { |
394 |
|
$v =~ s/(\^\w)+(\^\w)/$2/g; |
395 |
|
$v =~ s/\^\w$//; # last on line? |
396 |
|
next if ($v eq ''); |
397 |
|
} |
398 |
|
|
399 |
|
push @{$self->{record}->{$FieldTAG[$i]}}, $v; |
400 |
} |
} |
401 |
|
|
402 |
$self->{'current_mfn'} = $mfn; |
$self->{'current_mfn'} = $mfn; |
533 |
my $hash = $isis->to_hash({ |
my $hash = $isis->to_hash({ |
534 |
mfn => 42, |
mfn => 42, |
535 |
include_subfields => 1, |
include_subfields => 1, |
|
regexps => [ |
|
|
's/something/else/g', |
|
|
], |
|
536 |
}); |
}); |
537 |
|
|
538 |
Each option controll creation of hash: |
Each option controll creation of hash: |
560 |
Define delimiter which will be used to join repeatable subfields. You can |
Define delimiter which will be used to join repeatable subfields. You can |
561 |
specify option here instead in L</new> if you want to have per-record control. |
specify option here instead in L</new> if you want to have per-record control. |
562 |
|
|
563 |
=item regexpes |
=item hash_filter |
564 |
|
|
565 |
Override C<regexpes> specified in L</new>. |
You can override C<hash_filter> defined in L</new> using this option. |
566 |
|
|
567 |
=back |
=back |
568 |
|
|
575 |
my $mfn = shift || confess "need mfn!"; |
my $mfn = shift || confess "need mfn!"; |
576 |
my $arg; |
my $arg; |
577 |
|
|
578 |
|
my $hash_filter = $self->{hash_filter}; |
579 |
|
|
580 |
if (ref($mfn) eq 'HASH') { |
if (ref($mfn) eq 'HASH') { |
581 |
$arg = $mfn; |
$arg = $mfn; |
582 |
$mfn = $arg->{mfn} || confess "need mfn in arguments"; |
$mfn = $arg->{mfn} || confess "need mfn in arguments"; |
583 |
|
$hash_filter = $arg->{hash_filter} if ($arg->{hash_filter}); |
584 |
} |
} |
585 |
|
|
|
$arg->{regexpes} ||= $self->{regexpes}; |
|
|
|
|
|
confess "regexps must be HASH" if ($arg->{regexps} && ref($arg->{regexps}) ne 'HASH'); |
|
|
|
|
586 |
# init record to include MFN as field 000 |
# init record to include MFN as field 000 |
587 |
my $rec = { '000' => [ $mfn ] }; |
my $rec = { '000' => [ $mfn ] }; |
588 |
|
|
589 |
my $row = $self->fetch($mfn) || return; |
my $row = $self->fetch($mfn) || return; |
590 |
|
|
591 |
my $j_rs = $arg->{join_subfields_with}; |
my $j_rs = $arg->{join_subfields_with} || $self->{join_subfields_with}; |
592 |
$j_rs = $self->{join_subfields_with} unless(defined($j_rs)); |
$j_rs = $self->{join_subfields_with} unless(defined($j_rs)); |
593 |
my $i_sf = $arg->{include_subfields}; |
my $i_sf = $arg->{include_subfields}; |
594 |
|
|
596 |
foreach my $l (@{$row->{$f_nr}}) { |
foreach my $l (@{$row->{$f_nr}}) { |
597 |
|
|
598 |
# filter output |
# filter output |
599 |
if ($self->{'hash_filter'}) { |
$l = $hash_filter->($l, $f_nr) if ($hash_filter); |
600 |
$l = $self->{'hash_filter'}->($l); |
next unless defined($l); |
|
next unless defined($l); |
|
|
} |
|
|
|
|
|
# apply regexps |
|
|
if ($arg->{regexps} && defined($arg->{regexps}->{$f_nr})) { |
|
|
confess "regexps->{$f_nr} must be ARRAY" if (ref($arg->{regexps}->{$f_nr}) ne 'ARRAY'); |
|
|
my $c = 0; |
|
|
foreach my $r (@{ $arg->{regexps}->{$f_nr} }) { |
|
|
while ( eval '$l =~ ' . $r ) { $c++ }; |
|
|
} |
|
|
warn "## field $f_nr triggered $c regexpes\n" if ($c && $self->{debug}); |
|
|
} |
|
601 |
|
|
602 |
my $val; |
my $val; |
603 |
my $r_sf; # repeatable subfields in this record |
my $r_sf; # repeatable subfields in this record |
611 |
next if (! $t); |
next if (! $t); |
612 |
my ($sf,$v) = (substr($t,0,1), substr($t,1)); |
my ($sf,$v) = (substr($t,0,1), substr($t,1)); |
613 |
# XXX this might be option, but why? |
# XXX this might be option, but why? |
614 |
next unless ($v); |
next unless (defined($v) && $v ne ''); |
615 |
# warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1); |
# warn "### $f_nr^$sf:$v",$/ if ($self->{debug} > 1); |
616 |
|
|
617 |
if (ref( $val->{$sf} ) eq 'ARRAY') { |
if (ref( $val->{$sf} ) eq 'ARRAY') { |
763 |
As this is young module, new features are added in subsequent version. It's |
As this is young module, new features are added in subsequent version. It's |
764 |
a good idea to specify version when using this module like this: |
a good idea to specify version when using this module like this: |
765 |
|
|
766 |
use Biblio::Isis 0.21 |
use Biblio::Isis 0.23 |
767 |
|
|
768 |
Below is list of changes in specific version of module (so you can target |
Below is list of changes in specific version of module (so you can target |
769 |
older versions if you really have to): |
older versions if you really have to): |
770 |
|
|
771 |
=over 8 |
=over 8 |
772 |
|
|
773 |
|
=item 0.24 |
774 |
|
|
775 |
|
Added C<ignore_empty_subfields> |
776 |
|
|
777 |
|
=item 0.23 |
778 |
|
|
779 |
|
Added C<hash_filter> to L</to_hash> |
780 |
|
|
781 |
|
Fixed bug with documented C<join_subfields_with> in L</new> which wasn't |
782 |
|
implemented |
783 |
|
|
784 |
|
=item 0.22 |
785 |
|
|
786 |
|
Added field number when calling C<hash_filter> |
787 |
|
|
788 |
=item 0.21 |
=item 0.21 |
789 |
|
|
790 |
Added C<join_subfields_with> to L</new> and L</to_hash>. |
Added C<join_subfields_with> to L</new> and L</to_hash>. |