1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
|
5 |
use Test::More tests => 89; |
6 |
use Test::Exception; |
7 |
use Cwd qw/abs_path/; |
8 |
use blib; |
9 |
use File::Slurp; |
10 |
use Getopt::Long; |
11 |
|
12 |
BEGIN { |
13 |
use_ok( 'WebPAC::Normalize' ); |
14 |
} |
15 |
|
16 |
use Data::Dump qw/dump/; |
17 |
|
18 |
my $debug = 0; |
19 |
GetOptions( |
20 |
"debug+", \$debug |
21 |
); |
22 |
|
23 |
cmp_ok(_debug(1), '==', 1, '_debug level'); |
24 |
cmp_ok(_debug(0), '==', 0, '_debug level'); |
25 |
|
26 |
diag "debug level for $0 is $debug" if ($debug); |
27 |
if ($debug > 2) { |
28 |
diag "debug level for WebPAC::Normalize is ", _debug( $debug - 2 ); |
29 |
} |
30 |
|
31 |
ok(my $abs_path = abs_path($0), "abs_path"); |
32 |
$abs_path =~ s#/[^/]*$#/#; |
33 |
diag "abs_path: $abs_path" if ($debug); |
34 |
|
35 |
#throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup"; |
36 |
|
37 |
my $rec1 = { |
38 |
'200' => [{ |
39 |
'a' => '200a', |
40 |
'b' => '200b', |
41 |
},{ |
42 |
'c' => '200c', |
43 |
'd' => '200d', |
44 |
},{ |
45 |
'a' => '200a*2', |
46 |
'd' => '200d*2', |
47 |
}], |
48 |
'201' => [{ |
49 |
'x' => '201x', |
50 |
'y' => '201y', |
51 |
}], |
52 |
'900' => [ |
53 |
'900-no_subfield' |
54 |
], |
55 |
'901' => [{ |
56 |
'a' => '900a', |
57 |
}], |
58 |
'902' => [{ |
59 |
'z' => '900', |
60 |
}], |
61 |
}; |
62 |
|
63 |
my $rec2 = { |
64 |
'675' => [ { |
65 |
'a' => '159.9' |
66 |
} ], |
67 |
'210' => [ { |
68 |
'c' => 'New York University press', |
69 |
'a' => 'New York', |
70 |
'd' => 'cop. 1988' |
71 |
} ], |
72 |
'700' => [ { |
73 |
'a' => 'Haynal', |
74 |
'b' => 'André' |
75 |
} ], |
76 |
'801' => [ 'FFZG' ], |
77 |
'991' => [ '8302' ], |
78 |
'000' => [ 1 ], |
79 |
'702' => [ { |
80 |
'a' => 'Holder', |
81 |
'b' => 'Elizabeth' |
82 |
} ], |
83 |
'215' => [ { |
84 |
'c' => 'ilustr', |
85 |
'a' => 'xix, 202 str', |
86 |
'd' => '23cm' |
87 |
} ], |
88 |
'990' => [ |
89 |
'2140', |
90 |
'88', |
91 |
'HAY' |
92 |
], |
93 |
'200' => [ { |
94 |
'e' => 'from Freud and Ferenczi to Michael balint', |
95 |
'a' => 'Controversies in psychoanalytic method', |
96 |
'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern', |
97 |
'f' => 'by André E. Haynal' |
98 |
} ], |
99 |
'610' => [ 'povijest psihoanalize' ], |
100 |
'994' => [ { |
101 |
'c' => '', |
102 |
'a' => 'PS', |
103 |
'b' => 'MG' |
104 |
} ], |
105 |
'320' => [ 'Kazalo' ], |
106 |
'101' => [ 'ENG' ], |
107 |
'686' => [ '2140' ], |
108 |
'300' => [ 'Prijevod djela: ' ], |
109 |
}; |
110 |
|
111 |
|
112 |
my $lookup1 = { |
113 |
'00900' => [ |
114 |
'lookup 1', |
115 |
'lookup 2', |
116 |
], |
117 |
}; |
118 |
|
119 |
my $lookup2 = { |
120 |
'00900' => 'lookup', |
121 |
}; |
122 |
|
123 |
|
124 |
sub test { |
125 |
print dump( @_ ), ("-" x 78), "\n"; |
126 |
ok( defined(@_) ); |
127 |
} |
128 |
|
129 |
# how much of string evaled to display? |
130 |
my $max_eval_output = 170; |
131 |
|
132 |
sub dump_error { |
133 |
my ($msg,$code) = @_; |
134 |
|
135 |
my @l = split(/[\n\r]/, $code); |
136 |
my $out = "$msg\n"; |
137 |
|
138 |
foreach my $i ( 0 .. $#l ) { |
139 |
$out .= sprintf("%2d: %s\n", $i, $l[$i]); |
140 |
} |
141 |
|
142 |
return $out; |
143 |
} |
144 |
|
145 |
sub test_s { |
146 |
my $t = shift || die; |
147 |
|
148 |
my $eval_t = $t; |
149 |
$eval_t =~ s/[\n\r\s]+/ /gs; |
150 |
$eval_t = substr($eval_t,0,$max_eval_output) . '...' if (length($eval_t) > $max_eval_output); |
151 |
|
152 |
eval "$t"; |
153 |
ok(! $@, $@ ? dump_error($@, $t) : "eval: $eval_t"); |
154 |
} |
155 |
|
156 |
{ |
157 |
no strict 'subs'; |
158 |
use WebPAC::Normalize; |
159 |
|
160 |
ok(! _set_lookup( undef ), "set_lookup(undef)"); |
161 |
|
162 |
_set_rec( $rec1 ); |
163 |
|
164 |
cmp_ok( join(",", rec2('200','a') ), 'eq', '200a,200a*2', 'join rec2' ); |
165 |
cmp_ok( join(",", rec2('200','a'), rec2('200','b') ), 'eq', '200a,200a*2,200b', 'join rec2 rec2' ); |
166 |
cmp_ok( join(" * ", sort(rec1('200'), rec1('201') )), 'eq', '200a * 200a*2 * 200b * 200c * 200d * 200d*2 * 201x * 201y', 'join sort rec1 rec1' ); |
167 |
diag "is_deeply checks\n"; |
168 |
is_deeply( \[ rec1('200') ], \[ qw/200a 200b 200c 200d 200a*2 200d*2/ ] ); |
169 |
is_deeply( \[ regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa 2oob 2ooc 2ood 2ooa*2 2ood*2/ ]); |
170 |
is_deeply( \[ grep { /\*/ } regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa*2 2ood*2/ ]); |
171 |
is_deeply( \[ rec('902') ], \[ '900' ] ); |
172 |
|
173 |
cmp_ok( rec('902'), 'eq', rec('902','z'), 'rec sf' ); |
174 |
|
175 |
# simple list manipulatons |
176 |
cmp_ok( join('', prefix('ab', 'cd') ), 'eq', 'abcd', 'prefix'); |
177 |
cmp_ok( join('', suffix('xy', 'cd') ), 'eq', 'cdxy', 'suffix'); |
178 |
cmp_ok( join('', surround('->', '<-', 'a','b','c') ), 'eq', '->a<-->b<-->c<-', 'surround'); |
179 |
|
180 |
|
181 |
_set_lookup( $lookup1 ); |
182 |
|
183 |
cmp_ok( |
184 |
join_with(" i ", |
185 |
lookup( |
186 |
regex( 's/^/00/', |
187 |
rec2('902','z') |
188 |
) |
189 |
) |
190 |
), |
191 |
'eq', 'lookup 1 i lookup 2', 'join lookup regex rec2'); |
192 |
|
193 |
# check join_with operations |
194 |
|
195 |
sub test_join_with_2 { |
196 |
my ($a,$b,$e) = @_; |
197 |
|
198 |
cmp_ok( |
199 |
join_with(" <1> ", |
200 |
rec('201',$a), |
201 |
rec('201',$b), |
202 |
), |
203 |
'eq', $e, "join_with $a <1> $b = $e"); |
204 |
} |
205 |
|
206 |
test_join_with_2('_','_',''); |
207 |
test_join_with_2('x','_','201x'); |
208 |
test_join_with_2('_','x','201x'); |
209 |
test_join_with_2('x','y','201x <1> 201y'); |
210 |
|
211 |
sub test_join_with_3 { |
212 |
my ($a,$b,$c,$e) = @_; |
213 |
|
214 |
cmp_ok( |
215 |
join_with(" <1> ", rec('201',$a), |
216 |
join_with(" <2> ", rec('201',$b), |
217 |
rec('201',$c), |
218 |
) |
219 |
), |
220 |
'eq', $e, "join_with $a <1> $b <2> $c = $e"); |
221 |
}; |
222 |
|
223 |
test_join_with_3('_','_','_',''); |
224 |
test_join_with_3('x','_','_','201x'); |
225 |
test_join_with_3('_','x','_','201x'); |
226 |
test_join_with_3('_','_','x','201x'); |
227 |
test_join_with_3('x','y','_','201x <1> 201y'); |
228 |
test_join_with_3('x','_','y','201x <1> 201y'); |
229 |
test_join_with_3('_','x','y','201x <2> 201y'); |
230 |
test_join_with_3('x','_','y','201x <1> 201y'); |
231 |
test_join_with_3('x','y','x','201x <1> 201y <2> 201x'); |
232 |
|
233 |
# test lookups |
234 |
|
235 |
_set_lookup( $lookup2 ); |
236 |
|
237 |
is_deeply( \[ lookup( prefix( '00', rec('902') ) ) ], \[ 'lookup' ], 'lookup prefix' ); |
238 |
|
239 |
ok(! lookup('non-existent'), 'lookup non-existant' ); |
240 |
|
241 |
_set_rec( $rec2 ); |
242 |
|
243 |
test_s(qq{ |
244 |
tag('Title', |
245 |
rec('200','a') |
246 |
); |
247 |
}); |
248 |
test_s(qq{ |
249 |
tag('Who', |
250 |
join_with(" ", |
251 |
rec('702','a'), |
252 |
rec('702','b') |
253 |
) |
254 |
); |
255 |
}); |
256 |
|
257 |
test_s(qq{ |
258 |
display('Publisher', |
259 |
rec('210','c') |
260 |
) |
261 |
}); |
262 |
|
263 |
test_s(qq{ |
264 |
search('Year', |
265 |
regex( 's/[^\\d]+//', |
266 |
rec('210','d') |
267 |
) |
268 |
) |
269 |
}); |
270 |
|
271 |
ok(my $ds = _get_ds(), "get_ds"); |
272 |
diag "ds = ", dump($ds) if ($debug); |
273 |
|
274 |
|
275 |
sub test_check_ds { |
276 |
|
277 |
my $t = shift; |
278 |
|
279 |
ok($ds = _get_ds(), 'get_ds'); |
280 |
diag dump( $ds ) if ($debug); |
281 |
|
282 |
ok( $ds && $ds->{something}, 'get_ds->something exists' ); |
283 |
ok( $ds && $ds->{something}->{$t}, 'get_ds->something->'.$t.' exists') if ($t); |
284 |
ok( $ds && !$ds->{empty}, 'get_ds->empty doesn\'t' ); |
285 |
|
286 |
return $ds; |
287 |
} |
288 |
|
289 |
_clean_ds(); |
290 |
test_s(qq{ search('something', '42'); }); |
291 |
test_s(qq{ search('empty', ''); }); |
292 |
test_check_ds('search'); |
293 |
|
294 |
_clean_ds(); |
295 |
test_s(qq{ display('something', '42'); }); |
296 |
test_s(qq{ display('empty', ''); }); |
297 |
test_check_ds('display'); |
298 |
|
299 |
_clean_ds(); |
300 |
test_s(qq{ tag('something', '42'); }); |
301 |
test_s(qq{ tag('empty', ''); }); |
302 |
test_check_ds('search'); |
303 |
test_check_ds('display'); |
304 |
|
305 |
_clean_ds(); |
306 |
my $n = read_file( "$abs_path/data/normalize.pl" ); |
307 |
$n .= "\n1;\n"; |
308 |
#diag "normalize code:\n$n\n"; |
309 |
test_s( $n ); |
310 |
|
311 |
ok($ds = _get_ds(), "get_ds"); |
312 |
diag "ds = ", dump($ds) if ($debug); |
313 |
|
314 |
my $rec = { |
315 |
'200' => [{ |
316 |
'a' => '200a', |
317 |
'b' => '200b', |
318 |
}], |
319 |
}; |
320 |
my $rules = qq{ search('mixed', rec('200') ) }; |
321 |
|
322 |
_clean_ds(); |
323 |
_set_rec( $rec ); |
324 |
test_s( $rules ); |
325 |
ok($ds = _get_ds(), "get_ds"); |
326 |
is_deeply( $ds, { |
327 |
'mixed' => { |
328 |
'search' => [ '200a', '200b' ], |
329 |
'tag' => 'mixed' |
330 |
} |
331 |
}, 'correct get_ds'); |
332 |
|
333 |
ok(my $ds2 = WebPAC::Normalize::data_structure( |
334 |
row => $rec, |
335 |
rules => $rules, |
336 |
), 'data_structure'); |
337 |
is_deeply( $ds, $ds2, 'data_structure(s) same'); |
338 |
|
339 |
# wird and non-valid structure which is supported anyway |
340 |
_clean_ds(); |
341 |
_set_rec({ |
342 |
'200' => [{ |
343 |
'a' => '200a', |
344 |
}, |
345 |
'200-solo' |
346 |
] |
347 |
}); |
348 |
test_s(qq{ search('mixed', rec('200') ) }); |
349 |
ok($ds = _get_ds(), "get_ds"); |
350 |
is_deeply( $ds, { |
351 |
'mixed' => { |
352 |
'search' => [ '200a', '200-solo' ], |
353 |
'tag' => 'mixed' |
354 |
} |
355 |
}, 'correct get_ds'); |
356 |
|
357 |
# |
358 |
# MARC |
359 |
# |
360 |
|
361 |
test_s(qq{ marc_indicators('900',1,2) }); |
362 |
test_s(qq{ marc('900','a', rec('200') ) }); |
363 |
my @marc; |
364 |
ok(@marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields"); |
365 |
diag dump( \@marc ) if ($debug); |
366 |
|
367 |
is_deeply( \@marc, [ |
368 |
[ '900', 1, 2, 'a', '200a' ], |
369 |
[ '900', 1, 2, 'a', '200-solo' ] |
370 |
], 'correct marc with indicators'); |
371 |
|
372 |
test_s(qq{ marc_indicators('900',' ',9) }); |
373 |
test_s(qq{ marc_repeatable_subfield('900','a', rec('200') ) }); |
374 |
|
375 |
ok(@marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields"); |
376 |
diag dump( \@marc ) if ($debug); |
377 |
|
378 |
is_deeply( \@marc, [ |
379 |
[ '900', 1, 2, 'a', '200a', 'a', '200-solo' ], |
380 |
[ '900', ' ', 9, 'a', '200a', 'a', '200-solo' ] |
381 |
], 'correct marc with repetable subfield'); |
382 |
|
383 |
# |
384 |
# test magic re-ordering of input data |
385 |
# |
386 |
|
387 |
sub test_rec_rules { |
388 |
my ($msg, $rec, $rules, $struct) = @_; |
389 |
|
390 |
_clean_ds(); |
391 |
_set_rec($rec); |
392 |
|
393 |
foreach my $r (split(/;/, $rules)) { |
394 |
$r =~ s/[\s\n\r]+/ /gs; |
395 |
$r =~ s/^\s+//gs; |
396 |
$r =~ s/\s+$//gs; |
397 |
test_s($r) if ($r); |
398 |
} |
399 |
|
400 |
ok(@marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields"); |
401 |
diag dump( \@marc ) if ($debug); |
402 |
|
403 |
is_deeply( \@marc, $struct, $msg ); |
404 |
} |
405 |
|
406 |
test_rec_rules( |
407 |
'correct marc with repetable subfield', |
408 |
{ |
409 |
'200' => [{ |
410 |
'a' => '200a-1', |
411 |
'b' => '200b-1', |
412 |
'c' => '200c-1', |
413 |
}, { |
414 |
'a' => '200a-2', |
415 |
'b' => '200b-2', |
416 |
}, { |
417 |
'a' => '200a-3', |
418 |
}], |
419 |
}, |
420 |
qq{ |
421 |
marc_indicators('900',1 ,0); |
422 |
marc('900','a', rec('200','a') ); |
423 |
marc('900','b', rec('200','b') ); |
424 |
marc('900','c', rec('200','c') ); |
425 |
}, |
426 |
[ |
427 |
[ '900', 1, 0, 'a', '200a-1', 'b', '200b-1', 'c', '200c-1' ], |
428 |
[ '900', 1, 0, 'a', '200a-2', 'b', '200b-2' ], |
429 |
[ '900', 1, 0, 'a', '200a-3' ], |
430 |
], |
431 |
); |
432 |
|
433 |
|
434 |
test_rec_rules( |
435 |
'marc_repeatable_subfield', |
436 |
{ |
437 |
'200' => [{ |
438 |
'a' => '200a-1', |
439 |
'b' => '200b-1', |
440 |
'c' => '200c-1', |
441 |
}, { |
442 |
'a' => '200a-2', |
443 |
'b' => '200b-2', |
444 |
'c' => '200c-2', |
445 |
}, { |
446 |
'a' => '200a-3', |
447 |
'c' => '200c-3', |
448 |
}], |
449 |
}, |
450 |
qq{ |
451 |
marc_indicators('900',1 ,0); |
452 |
marc_repeatable_subfield('900','a', rec('200','a') ); |
453 |
marc('900','b', rec('200','b') ); |
454 |
marc('900','c', rec('200','c') ); |
455 |
}, |
456 |
[ |
457 |
[ '900', 1, 0, 'a', '200a-1', 'a', '200a-2', 'a', '200a-3', 'b', '200b-1', 'c', '200c-1' ], |
458 |
[ '900', 1, 0, 'b', '200b-2', 'c', '200c-2' ], |
459 |
[ '900', 1, 0, 'c', '200c-3' ], |
460 |
], |
461 |
); |
462 |
} |
463 |
|