1 |
dpavlin |
228 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
dpavlin |
230 |
# This utility will convert some (or all, depending of definition in |
4 |
|
|
# configuration XMLfile) fields and subfields with remapping into MARC |
5 |
|
|
# file from one or more CDS/ISIS files |
6 |
dpavlin |
228 |
# |
7 |
|
|
# 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org> |
8 |
|
|
# |
9 |
dpavlin |
230 |
# |
10 |
|
|
# Run without parametars for usage instructions or run without parametars |
11 |
|
|
# and redirect STDOUT to file to create example configuration file like |
12 |
|
|
# this: |
13 |
|
|
# |
14 |
|
|
# ./isis2marc.pl > config.xml |
15 |
|
|
# |
16 |
|
|
# If you want to create unique records, you need to define one or more |
17 |
|
|
# fields as key (which will be used to produce just one record for one |
18 |
|
|
# key) |
19 |
|
|
# |
20 |
|
|
# Keys are global for one run of script (that means for all ISIS databases |
21 |
|
|
# used in one run), but you can write arbitrary values (as opposed to field |
22 |
|
|
# names) inside key tag to produce unique key. For example, |
23 |
|
|
# |
24 |
|
|
# <key>author</key> |
25 |
|
|
# <key>700$a</key> |
26 |
|
|
# |
27 |
|
|
# WARNING: When using <key> tag you can enter field with subfield |
28 |
|
|
# (in format 700$a) just filed name (for fields which doesn't have subfileds |
29 |
|
|
# like 005) or literal value. Fields which doesn't exist in that record |
30 |
|
|
# will be skipped, and if key is empty no output record will be produced. |
31 |
|
|
# |
32 |
|
|
# So, best way to produce just few record in output is to specify field which |
33 |
|
|
# doesn't exist at all in ISIS database for key, or just one literal value!! |
34 |
|
|
# |
35 |
|
|
# |
36 |
|
|
# If ISIS databases are named same as directories in which they |
37 |
dpavlin |
228 |
# reside, you can specify just directories (so that shell globing work) |
38 |
|
|
# like this: |
39 |
|
|
# |
40 |
dpavlin |
230 |
# ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI |
41 |
|
|
# |
42 |
dpavlin |
228 |
|
43 |
|
|
use strict; |
44 |
|
|
use OpenIsis; |
45 |
|
|
use MARC; |
46 |
dpavlin |
230 |
use XML::Simple; |
47 |
dpavlin |
228 |
use Data::Dumper; |
48 |
|
|
|
49 |
dpavlin |
230 |
if ($#ARGV < 2) { |
50 |
|
|
print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n"; |
51 |
|
|
print STDERR <<'_END_OF_USAGE_'; |
52 |
dpavlin |
228 |
|
53 |
dpavlin |
230 |
isis_db can be path to directory (if ISIS database is called |
54 |
|
|
same as database) which will make shell globing work |
55 |
|
|
or full path to ISIS database (without any extension) |
56 |
dpavlin |
228 |
|
57 |
dpavlin |
230 |
Example configuration file will be dumped to standard output |
58 |
|
|
after this, so you can just re-direct output of this script |
59 |
|
|
to produce config file like this: |
60 |
|
|
|
61 |
|
|
$ ./isis2marc.pl > config.xml |
62 |
|
|
|
63 |
|
|
_END_OF_USAGE_ |
64 |
|
|
|
65 |
|
|
print <<'_END_OF_CONFIG_'; |
66 |
|
|
|
67 |
|
|
<?xml version="1.0" encoding="ISO-8859-2"?> |
68 |
|
|
<!-- template configuration file --> |
69 |
|
|
<mapping> |
70 |
|
|
<record> |
71 |
|
|
<key>700$a</key> |
72 |
|
|
<key>700$b</key> |
73 |
|
|
<field tag="700"> |
74 |
|
|
<indicator1>0</indicator1> |
75 |
|
|
<indicator2>#</indicator2> |
76 |
|
|
<subfield id="a">700$a</subfield> |
77 |
|
|
<subfield id="b">700$b</subfield> |
78 |
|
|
</field> |
79 |
|
|
<field tag="009"> |
80 |
|
|
<nosubfield>900</nosubfield> |
81 |
|
|
</field> |
82 |
|
|
</record> |
83 |
|
|
|
84 |
|
|
</mapping> |
85 |
|
|
|
86 |
|
|
_END_OF_CONFIG_ |
87 |
|
|
|
88 |
|
|
exit 1; |
89 |
|
|
} |
90 |
|
|
|
91 |
|
|
my $xml = new XML::Simple(); |
92 |
|
|
|
93 |
|
|
my $config_file = shift @ARGV || die "no config file?"; |
94 |
|
|
|
95 |
|
|
my $config = $xml->XMLin($config_file, |
96 |
|
|
KeyAttr => { subfield => 'id' }, |
97 |
|
|
ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ], |
98 |
|
|
ContentKey => '-content', |
99 |
|
|
) || die "can't open configuration file '$config_file': $!"; |
100 |
|
|
|
101 |
|
|
my $marc_file = shift @ARGV || die "no marc file?"; |
102 |
|
|
|
103 |
dpavlin |
228 |
my $marc=MARC->new; |
104 |
|
|
|
105 |
|
|
# it seems that I can't specify invalid template for 005 and prevent |
106 |
|
|
# output from creating field 005 |
107 |
|
|
#$num->add_005s({record=>1}); |
108 |
|
|
|
109 |
|
|
select(STDOUT); $|=1; |
110 |
|
|
|
111 |
|
|
my %stored; |
112 |
|
|
my $total = 0; |
113 |
|
|
|
114 |
dpavlin |
230 |
|
115 |
dpavlin |
228 |
foreach my $db_file (@ARGV) { |
116 |
|
|
|
117 |
|
|
print "reading '$db_file'"; |
118 |
|
|
|
119 |
|
|
if (-d $db_file) { |
120 |
|
|
$db_file =~ s,([^/]+)/*$,$1/$1,; |
121 |
|
|
} |
122 |
|
|
|
123 |
|
|
my $db = OpenIsis::open( $db_file ); |
124 |
|
|
my $maxmfn = OpenIsis::maxRowid( $db ) || 1; |
125 |
|
|
|
126 |
|
|
print " [rows: $maxmfn]\n"; |
127 |
|
|
|
128 |
|
|
my $progress_len = 50; |
129 |
|
|
|
130 |
|
|
my $step = int($maxmfn/$progress_len); |
131 |
|
|
$step = 1 if ($step == 0); |
132 |
|
|
|
133 |
|
|
my $new = 0; |
134 |
|
|
|
135 |
|
|
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
136 |
|
|
print "." if ($mfn % $step == 0); |
137 |
|
|
my $row = OpenIsis::read( $db, $mfn ); |
138 |
dpavlin |
230 |
|
139 |
|
|
# unroll this field to in-memory structure data |
140 |
|
|
my %data; |
141 |
|
|
|
142 |
|
|
# delete mfn from $row because it's literal value and |
143 |
|
|
# not array, so rest of code would croak |
144 |
|
|
delete($row->{mfn}); |
145 |
|
|
|
146 |
dpavlin |
228 |
foreach my $fld (keys %{$row}) { |
147 |
|
|
|
148 |
dpavlin |
230 |
foreach my $rec_data (@{$row->{$fld}}) { |
149 |
|
|
|
150 |
|
|
while ($rec_data =~ s/\^(\w)([^\^]+)//) { |
151 |
|
|
$data{$fld.'$'.$1} = $2; |
152 |
|
|
|
153 |
|
|
# delete last subfield delimiter |
154 |
|
|
$rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/); |
155 |
|
|
} |
156 |
|
|
|
157 |
|
|
# record data still exist? it's field without |
158 |
|
|
# subfields, then... |
159 |
|
|
if ($rec_data) { |
160 |
|
|
$data{$fld} = $rec_data; |
161 |
|
|
} |
162 |
|
|
} |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
# now, create output MARC record(s) |
166 |
|
|
|
167 |
|
|
foreach my $cfg_rec (@{$config->{record}}) { |
168 |
|
|
|
169 |
|
|
# do we have unique key? |
170 |
|
|
my $key; |
171 |
|
|
foreach (@{$cfg_rec->{key}}) { |
172 |
|
|
if ($data{$_}) { |
173 |
|
|
$key .= $data{$_}; |
174 |
|
|
} elsif (! m/^\d{3,4}(\$\w)*$/) { |
175 |
|
|
$key .= $_; |
176 |
|
|
} else { |
177 |
|
|
$key .= ""; |
178 |
|
|
} |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
next if ($key && $stored{$key} || $key eq ""); |
182 |
|
|
|
183 |
|
|
$stored{$key}++ if ($key); |
184 |
|
|
|
185 |
|
|
|
186 |
|
|
# this will be new record (if needed) |
187 |
dpavlin |
228 |
my $num; |
188 |
|
|
|
189 |
dpavlin |
230 |
# with one or more fields |
190 |
|
|
foreach my $cfg_fld (@{$cfg_rec->{field}}) { |
191 |
dpavlin |
228 |
|
192 |
dpavlin |
230 |
my $new_fld = $cfg_fld->{tag}; |
193 |
dpavlin |
228 |
|
194 |
dpavlin |
230 |
# |
195 |
|
|
# first create fields without subfields |
196 |
|
|
# |
197 |
dpavlin |
228 |
|
198 |
dpavlin |
230 |
# with one or more subfields |
199 |
|
|
foreach my $f (@{$cfg_fld->{nosubfield}}) { |
200 |
|
|
next if (! $data{$f}); |
201 |
dpavlin |
228 |
|
202 |
dpavlin |
230 |
if (! $num) { |
203 |
|
|
$num=$marc->createrecord(); |
204 |
|
|
$new++; |
205 |
|
|
} |
206 |
|
|
my $i1 = $cfg_fld->{indicator1} || ' '; |
207 |
|
|
my $i2 = $cfg_fld->{indicator2} || ' '; |
208 |
|
|
$marc->addfield({record=>$num, |
209 |
|
|
field=>$new_fld, |
210 |
|
|
i1=>$i1, |
211 |
|
|
i2=>$i2, |
212 |
|
|
value=>$data{$f} |
213 |
|
|
}); |
214 |
dpavlin |
228 |
} |
215 |
dpavlin |
230 |
|
216 |
|
|
# |
217 |
|
|
# then create fields with subfields |
218 |
|
|
# |
219 |
|
|
|
220 |
|
|
# this will hold subfield values |
221 |
|
|
my @values; |
222 |
|
|
|
223 |
|
|
# with one or more subfields |
224 |
|
|
foreach my $new_sf (keys %{$cfg_fld->{subfield}}) { |
225 |
|
|
# field$subfield |
226 |
|
|
my $f = $cfg_fld->{subfield}->{$new_sf}; |
227 |
|
|
if ($data{$f}) { |
228 |
|
|
push @values, $new_sf; |
229 |
|
|
push @values, $data{$f}; |
230 |
|
|
} |
231 |
dpavlin |
228 |
} |
232 |
|
|
next if (! @values); |
233 |
|
|
|
234 |
|
|
if (! $num) { |
235 |
|
|
$num=$marc->createrecord(); |
236 |
|
|
$new++; |
237 |
|
|
} |
238 |
dpavlin |
230 |
my $i1 = $cfg_fld->{indicator1} || ' '; |
239 |
|
|
my $i2 = $cfg_fld->{indicator2} || ' '; |
240 |
dpavlin |
228 |
$marc->addfield({record=>$num, |
241 |
dpavlin |
230 |
field=>$new_fld, |
242 |
|
|
i1=>$i1, |
243 |
|
|
i2=>$i2, |
244 |
|
|
value=>\@values} |
245 |
|
|
); |
246 |
dpavlin |
228 |
} |
247 |
|
|
|
248 |
|
|
} |
249 |
|
|
} |
250 |
|
|
$total += $new; |
251 |
|
|
printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total; |
252 |
|
|
} |
253 |
|
|
|
254 |
dpavlin |
229 |
$marc->output({file=>"> $marc_file",'format'=>"usmarc"}) |