1 |
dpavlin |
578 |
package WebPAC::Output::MARC; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use base qw/WebPAC::Common/; |
7 |
|
|
|
8 |
|
|
use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net |
9 |
|
|
use MARC::Lint; |
10 |
|
|
use Data::Dump qw/dump/; |
11 |
dpavlin |
626 |
use Encode qw/from_to decode/; |
12 |
dpavlin |
578 |
|
13 |
|
|
=head1 NAME |
14 |
|
|
|
15 |
|
|
WebPAC::Output::MARC - Create MARC records from C<marc_*> normalisation rules |
16 |
|
|
|
17 |
|
|
=head1 VERSION |
18 |
|
|
|
19 |
dpavlin |
753 |
Version 0.04 |
20 |
dpavlin |
578 |
|
21 |
|
|
=cut |
22 |
|
|
|
23 |
dpavlin |
753 |
our $VERSION = '0.04'; |
24 |
dpavlin |
578 |
|
25 |
|
|
=head1 SYNOPSIS |
26 |
|
|
|
27 |
|
|
Create MARC records from C<marc_*> normalisation rules described in |
28 |
|
|
L<WebPAC::Normalize>. |
29 |
|
|
|
30 |
|
|
|
31 |
|
|
=head1 FUNCTIONS |
32 |
|
|
|
33 |
|
|
=head2 new |
34 |
|
|
|
35 |
|
|
my $marc = new WebPAC::Output::MARC( |
36 |
|
|
path => '/path/to/output.marc', |
37 |
dpavlin |
626 |
native_encoding => 'iso-8859-2', |
38 |
|
|
marc_encoding => 'utf-8', |
39 |
dpavlin |
578 |
lint => 1, |
40 |
|
|
dump => 0, |
41 |
|
|
) |
42 |
|
|
|
43 |
|
|
=cut |
44 |
|
|
|
45 |
|
|
sub new { |
46 |
|
|
my $class = shift; |
47 |
|
|
my $self = {@_}; |
48 |
|
|
bless($self, $class); |
49 |
|
|
|
50 |
|
|
my $log = $self->_get_logger; |
51 |
|
|
|
52 |
|
|
if ($self->{lint}) { |
53 |
|
|
$self->{lint}= new MARC::Lint or |
54 |
|
|
$log->warn("Can't create MARC::Lint object, linting is disabled"); |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
if (my $path = $self->{path}) { |
58 |
|
|
open($self->{fh}, '>', $path) || |
59 |
|
|
$log->logdie("can't open MARC output $path: $!"); |
60 |
dpavlin |
662 |
binmode($self->{fh}, ':utf8'); |
61 |
dpavlin |
578 |
|
62 |
|
|
$log->info("Creating MARC export file $path", $self->{lint} ? ' (with lint)' : '', "\n"); |
63 |
|
|
} else { |
64 |
|
|
$log->logconfess("new called without path"); |
65 |
|
|
} |
66 |
|
|
|
67 |
dpavlin |
626 |
$self->{native_encoding} ||= 'iso-8859-2'; |
68 |
|
|
$self->{marc_encoding} ||= 'utf-8'; |
69 |
dpavlin |
578 |
|
70 |
|
|
$self ? return $self : return undef; |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
=head2 add |
74 |
|
|
|
75 |
|
|
$marc->add( |
76 |
|
|
id => $mfn, |
77 |
|
|
fields => WebPAC::Normalize::_get_marc_fields(), |
78 |
|
|
leader => WebPAC::Normalize::marc_leader(), |
79 |
dpavlin |
582 |
row => $row, |
80 |
dpavlin |
578 |
); |
81 |
|
|
|
82 |
dpavlin |
582 |
C<row> is optional parametar which is used when dumping original row to |
83 |
|
|
error log. |
84 |
|
|
|
85 |
dpavlin |
578 |
=cut |
86 |
|
|
|
87 |
|
|
sub add { |
88 |
|
|
my $self = shift; |
89 |
|
|
|
90 |
|
|
my $arg = {@_}; |
91 |
|
|
|
92 |
|
|
my $log = $self->_get_logger; |
93 |
|
|
|
94 |
|
|
$log->logconfess("add needs fields and id arguments") |
95 |
|
|
unless ($arg->{fields} && defined $arg->{id}); |
96 |
|
|
|
97 |
|
|
my $marc = new MARC::Record; |
98 |
dpavlin |
626 |
$marc->encoding( $self->{marc_encoding} ); |
99 |
dpavlin |
578 |
|
100 |
dpavlin |
590 |
my $id = $arg->{id}; |
101 |
dpavlin |
578 |
|
102 |
|
|
$log->logconfess("fields isn't array") unless (ref($arg->{fields}) eq 'ARRAY'); |
103 |
|
|
|
104 |
dpavlin |
626 |
my $fields = $arg->{fields}; |
105 |
dpavlin |
578 |
|
106 |
dpavlin |
626 |
$log->debug("original fields = ", sub { dump( $fields ) }); |
107 |
|
|
|
108 |
|
|
# recode fields to marc_encoding |
109 |
|
|
foreach my $j ( 0 .. $#$fields ) { |
110 |
|
|
foreach my $i ( 0 .. ( ( $#{$fields->[$j]} - 3 ) / 2 ) ) { |
111 |
|
|
my $f = $fields->[$j]->[ ($i * 2) + 4 ]; |
112 |
|
|
$f = decode( $self->{native_encoding}, $f ); |
113 |
|
|
$fields->[$j]->[ ($i * 2) + 4 ] = $f; |
114 |
|
|
} |
115 |
|
|
} |
116 |
|
|
|
117 |
dpavlin |
753 |
# sort fields |
118 |
|
|
@$fields = sort { $a->[0] <=> $b->[0] } @$fields; |
119 |
|
|
|
120 |
dpavlin |
626 |
$log->debug("recode fields = ", sub { dump( $fields ) }); |
121 |
|
|
|
122 |
|
|
$marc->add_fields( @$fields ); |
123 |
|
|
|
124 |
dpavlin |
578 |
# tweak leader |
125 |
|
|
if (my $new_l = $arg->{leader}) { |
126 |
|
|
|
127 |
|
|
my $leader = $marc->leader; |
128 |
|
|
|
129 |
|
|
foreach my $o ( keys %$new_l ) { |
130 |
|
|
my $insert = $new_l->{$o}; |
131 |
|
|
$leader = substr($leader, 0, $o) . |
132 |
|
|
$insert . substr($leader, $o+length($insert)); |
133 |
|
|
} |
134 |
|
|
$marc->leader( $leader ); |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
if ($self->{lint}) { |
138 |
|
|
$self->{lint}->check_record( $marc ); |
139 |
dpavlin |
582 |
my @w = $self->{lint}->warnings; |
140 |
|
|
if (@w) { |
141 |
|
|
$log->error("MARC lint detected warning on record $id\n", |
142 |
dpavlin |
621 |
"<<<<< Original input row:\n",dump($arg->{row}), "\n", |
143 |
dpavlin |
753 |
">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n", |
144 |
dpavlin |
582 |
"!!!!! MARC lint warnings:\n",join("\n",@w),"\n" |
145 |
|
|
); |
146 |
|
|
map { $self->{_marc_lint_warnings}->{$_}++ } @w; |
147 |
|
|
} |
148 |
dpavlin |
578 |
} |
149 |
|
|
|
150 |
|
|
if ($self->{dump}) { |
151 |
|
|
$log->info("MARC record on record $id\n", |
152 |
dpavlin |
587 |
"<<<<< Original imput row:\n",dump($arg->{row}), "\n", |
153 |
dpavlin |
753 |
">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n", |
154 |
dpavlin |
578 |
); |
155 |
|
|
} |
156 |
|
|
|
157 |
dpavlin |
662 |
print {$self->{fh}} $marc->as_usmarc; |
158 |
dpavlin |
578 |
|
159 |
|
|
} |
160 |
|
|
|
161 |
|
|
=head2 finish |
162 |
|
|
|
163 |
|
|
Close MARC output file |
164 |
|
|
|
165 |
|
|
$marc->finish; |
166 |
|
|
|
167 |
dpavlin |
582 |
It will also dump MARC lint warnings summary if called with C<lint>. |
168 |
|
|
|
169 |
dpavlin |
578 |
=cut |
170 |
|
|
|
171 |
|
|
sub finish { |
172 |
|
|
my $self = shift; |
173 |
|
|
|
174 |
dpavlin |
582 |
my $log = $self->get_logger; |
175 |
|
|
|
176 |
|
|
close( $self->{fh} ) or $log->logdie("can't close ", $self->{path}, ": $!"); |
177 |
|
|
|
178 |
|
|
if (my $w = $self->{_marc_lint_warnings}) { |
179 |
|
|
$log->error("MARC lint warnings summary:\n", |
180 |
|
|
join ("\n", |
181 |
|
|
map { $w->{$_} . "\t" . $_ } |
182 |
|
|
sort { $w->{$b} <=> $w->{$a} } keys %$w |
183 |
|
|
) |
184 |
|
|
); |
185 |
|
|
} |
186 |
dpavlin |
578 |
} |
187 |
|
|
|
188 |
|
|
=head1 AUTHOR |
189 |
|
|
|
190 |
|
|
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >> |
191 |
|
|
|
192 |
|
|
=head1 COPYRIGHT & LICENSE |
193 |
|
|
|
194 |
|
|
Copyright 2006 Dobrica Pavlinusic, All Rights Reserved. |
195 |
|
|
|
196 |
|
|
This program is free software; you can redistribute it and/or modify it |
197 |
|
|
under the same terms as Perl itself. |
198 |
|
|
|
199 |
|
|
=cut |
200 |
|
|
|
201 |
|
|
1; # End of WebPAC::Output::MARC |