/[webpac2]/trunk/t/3-normalize.t
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/t/3-normalize.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 579 - (show annotations)
Tue Jul 4 11:08:43 2006 UTC (17 years, 9 months ago) by dpavlin
File MIME type: application/x-troff
File size: 13689 byte(s)
 r798@llin:  dpavlin | 2006-07-04 13:08:44 +0200
 changed _get_marc_fields to return arrayref, tests and fix for marc_remove(field)

1 #!/usr/bin/perl -w
2
3 use strict;
4
5 use Test::More tests => 147;
6 use Test::Exception;
7 use Cwd qw/abs_path/;
8 use blib;
9 use File::Slurp;
10 use Getopt::Long;
11
12 BEGIN {
13 use_ok( 'WebPAC::Normalize' );
14 }
15
16 use Data::Dump qw/dump/;
17
18 my $debug = 0;
19 GetOptions(
20 "debug+", \$debug
21 );
22
23 cmp_ok(_debug(1), '==', 1, '_debug level');
24 cmp_ok(_debug(0), '==', 0, '_debug level');
25
26 diag "debug level for $0 is $debug" if ($debug);
27 if ($debug > 2) {
28 diag "debug level for WebPAC::Normalize is ", _debug( $debug - 2 );
29 }
30
31 ok(my $abs_path = abs_path($0), "abs_path");
32 $abs_path =~ s#/[^/]*$#/#;
33 diag "abs_path: $abs_path" if ($debug);
34
35 #throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup";
36
37 my $rec1 = {
38 '200' => [{
39 'a' => '200a',
40 'b' => '200b',
41 },{
42 'c' => '200c',
43 'd' => '200d',
44 },{
45 'a' => '200a*2',
46 'd' => '200d*2',
47 }],
48 '201' => [{
49 'x' => '201x',
50 'y' => '201y',
51 }],
52 '900' => [
53 '900-no_subfield'
54 ],
55 '901' => [{
56 'a' => '900a',
57 }],
58 '902' => [{
59 'z' => '900',
60 }],
61 };
62
63 my $rec2 = {
64 '675' => [ {
65 'a' => '159.9'
66 } ],
67 '210' => [ {
68 'c' => 'New York University press',
69 'a' => 'New York',
70 'd' => 'cop. 1988'
71 } ],
72 '700' => [ {
73 'a' => 'Haynal',
74 'b' => 'André'
75 } ],
76 '801' => [ 'FFZG' ],
77 '991' => [ '8302' ],
78 '000' => [ 1 ],
79 '702' => [ {
80 'a' => 'Holder',
81 'b' => 'Elizabeth'
82 } ],
83 '215' => [ {
84 'c' => 'ilustr',
85 'a' => 'xix, 202 str',
86 'd' => '23cm'
87 } ],
88 '990' => [
89 '2140',
90 '88',
91 'HAY'
92 ],
93 '200' => [ {
94 'e' => 'from Freud and Ferenczi to Michael balint',
95 'a' => 'Controversies in psychoanalytic method',
96 'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern',
97 'f' => 'by André E. Haynal'
98 } ],
99 '610' => [ 'povijest psihoanalize' ],
100 '994' => [ {
101 'c' => '',
102 'a' => 'PS',
103 'b' => 'MG'
104 } ],
105 '320' => [ 'Kazalo' ],
106 '101' => [ 'ENG' ],
107 '686' => [ '2140' ],
108 '300' => [ 'Prijevod djela: ' ],
109 };
110
111
112 my $lookup1 = {
113 '00900' => [
114 'lookup 1',
115 'lookup 2',
116 ],
117 };
118
119 my $lookup2 = {
120 '00900' => 'lookup',
121 };
122
123
124 sub test {
125 print dump( @_ ), ("-" x 78), "\n";
126 ok( defined(@_) );
127 }
128
129 # how much of string evaled to display?
130 my $max_eval_output = 170;
131
132 sub dump_error {
133 my ($msg,$code) = @_;
134
135 my @l = split(/[\n\r]/, $code);
136 my $out = "$msg\n";
137
138 foreach my $i ( 0 .. $#l ) {
139 $out .= sprintf("%2d: %s\n", $i, $l[$i]);
140 }
141
142 return $out;
143 }
144
145 sub test_s {
146 my $t = shift || die;
147
148 my $eval_t = $t;
149 $eval_t =~ s/[\n\r\s]+/ /gs;
150 $eval_t = substr($eval_t,0,$max_eval_output) . '...' if (length($eval_t) > $max_eval_output);
151 $eval_t =~ s/\\/\\\\/gs;
152
153 my @__ret;
154 eval "\@__ret = $t";
155 ok(! $@, $@ ? dump_error($@, $t) : "eval: $eval_t = " . dump(@__ret));
156 return \@__ret;
157 }
158
159 {
160 no strict 'subs';
161 use WebPAC::Normalize;
162
163 ok(! _set_lookup( undef ), "set_lookup(undef)");
164
165 _set_rec( $rec1 );
166
167 cmp_ok( join(",", rec2('200','a') ), 'eq', '200a,200a*2', 'join rec2' );
168 cmp_ok( join(",", rec2('200','a'), rec2('200','b') ), 'eq', '200a,200a*2,200b', 'join rec2 rec2' );
169 cmp_ok( join(" * ", sort(rec1('200'), rec1('201') )), 'eq', '200a * 200a*2 * 200b * 200c * 200d * 200d*2 * 201x * 201y', 'join sort rec1 rec1' );
170 diag "is_deeply checks\n";
171 is_deeply( \[ rec1('200') ], \[ qw/200a 200b 200c 200d 200a*2 200d*2/ ] );
172 is_deeply( \[ regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa 2oob 2ooc 2ood 2ooa*2 2ood*2/ ]);
173 is_deeply( \[ grep { /\*/ } regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa*2 2ood*2/ ]);
174 is_deeply( \[ rec('902') ], \[ '900' ] );
175
176 cmp_ok( rec('902'), 'eq', rec('902','z'), 'rec sf' );
177
178 # simple list manipulatons
179 cmp_ok( join('', prefix('ab', 'cd') ), 'eq', 'abcd', 'prefix');
180 cmp_ok( join('', suffix('xy', 'cd') ), 'eq', 'cdxy', 'suffix');
181 cmp_ok( join('', surround('->', '<-', 'a','b','c') ), 'eq', '->a<-->b<-->c<-', 'surround');
182
183
184 _set_lookup( $lookup1 );
185
186 cmp_ok(
187 join_with(" i ",
188 lookup(
189 regex( 's/^/00/',
190 rec2('902','z')
191 )
192 )
193 ),
194 'eq', 'lookup 1 i lookup 2', 'join lookup regex rec2');
195
196 # check join_with operations
197
198 sub test_join_with_2 {
199 my ($a,$b,$e) = @_;
200
201 cmp_ok(
202 join_with(" <1> ",
203 rec('201',$a),
204 rec('201',$b),
205 ),
206 'eq', $e, "join_with $a <1> $b = $e");
207 }
208
209 test_join_with_2('_','_','');
210 test_join_with_2('x','_','201x');
211 test_join_with_2('_','x','201x');
212 test_join_with_2('x','y','201x <1> 201y');
213
214 sub test_join_with_3 {
215 my ($a,$b,$c,$e) = @_;
216
217 cmp_ok(
218 join_with(" <1> ", rec('201',$a),
219 join_with(" <2> ", rec('201',$b),
220 rec('201',$c),
221 )
222 ),
223 'eq', $e, "join_with $a <1> $b <2> $c = $e");
224 };
225
226 test_join_with_3('_','_','_','');
227 test_join_with_3('x','_','_','201x');
228 test_join_with_3('_','x','_','201x');
229 test_join_with_3('_','_','x','201x');
230 test_join_with_3('x','y','_','201x <1> 201y');
231 test_join_with_3('x','_','y','201x <1> 201y');
232 test_join_with_3('_','x','y','201x <2> 201y');
233 test_join_with_3('x','_','y','201x <1> 201y');
234 test_join_with_3('x','y','x','201x <1> 201y <2> 201x');
235
236 # test lookups
237
238 _set_lookup( $lookup2 );
239
240 is_deeply( \[ lookup( prefix( '00', rec('902') ) ) ], \[ 'lookup' ], 'lookup prefix' );
241
242 ok(! lookup('non-existent'), 'lookup non-existant' );
243
244 _set_rec( $rec2 );
245
246 test_s(qq{
247 tag('Title',
248 rec('200','a')
249 );
250 });
251 test_s(qq{
252 tag('Who',
253 join_with(" ",
254 rec('702','a'),
255 rec('702','b')
256 )
257 );
258 });
259
260 test_s(qq{
261 display('Publisher',
262 rec('210','c')
263 )
264 });
265
266 test_s(qq{
267 search('Year',
268 regex( 's/[^\\d]+//',
269 rec('210','d')
270 )
271 )
272 });
273
274 ok(my $ds = _get_ds(), "get_ds");
275 diag "ds = ", dump($ds) if ($debug);
276
277
278 sub test_check_ds {
279
280 my $t = shift;
281
282 ok($ds = _get_ds(), 'get_ds');
283 diag dump( $ds ) if ($debug);
284
285 ok( $ds && $ds->{something}, 'get_ds->something exists' );
286 ok( $ds && $ds->{something}->{$t}, 'get_ds->something->'.$t.' exists') if ($t);
287 ok( $ds && !$ds->{empty}, 'get_ds->empty doesn\'t' );
288
289 return $ds;
290 }
291
292 _clean_ds();
293 test_s(qq{ search('something', '42'); });
294 test_s(qq{ search('empty', ''); });
295 test_check_ds('search');
296
297 _clean_ds();
298 test_s(qq{ display('something', '42'); });
299 test_s(qq{ display('empty', ''); });
300 test_check_ds('display');
301
302 _clean_ds();
303 test_s(qq{ tag('something', '42'); });
304 test_s(qq{ tag('empty', ''); });
305 test_check_ds('search');
306 test_check_ds('display');
307
308 _clean_ds();
309 my $n = read_file( "$abs_path/data/normalize.pl" );
310 $n .= "\n1;\n";
311 #diag "normalize code:\n$n\n";
312 test_s( $n );
313
314 ok($ds = _get_ds(), "get_ds");
315 diag "ds = ", dump($ds) if ($debug);
316
317 my $rec = {
318 '200' => [{
319 'a' => '200a',
320 'b' => '200b',
321 }],
322 };
323 my $rules = qq{ search('mixed', rec('200') ) };
324
325 _clean_ds();
326 _set_rec( $rec );
327 test_s( $rules );
328 ok($ds = _get_ds(), "get_ds");
329 is_deeply( $ds, {
330 'mixed' => {
331 'search' => [ '200a', '200b' ],
332 'tag' => 'mixed'
333 }
334 }, 'correct get_ds');
335
336 ok(my $ds2 = WebPAC::Normalize::data_structure(
337 row => $rec,
338 rules => $rules,
339 ), 'data_structure');
340 is_deeply( $ds, $ds2, 'data_structure(s) same');
341
342 # wird and non-valid structure which is supported anyway
343 _clean_ds();
344 _set_rec({
345 '200' => [{
346 'a' => '200a',
347 },
348 '200-solo'
349 ]
350 });
351 test_s(qq{ search('mixed', rec('200') ) });
352 ok($ds = _get_ds(), "get_ds");
353 is_deeply( $ds, {
354 'mixed' => {
355 'search' => [ '200a', '200-solo' ],
356 'tag' => 'mixed'
357 }
358 }, 'correct get_ds');
359
360 #
361 # MARC
362 #
363 _debug( 4 );
364
365 test_s(qq{ marc_indicators('900',1,2) });
366 test_s(qq{ marc('900','a', rec('200') ) });
367 my $marc;
368 ok($marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
369 diag dump( $marc ) if ($debug);
370
371 is_deeply( $marc, [
372 [ '900', 1, 2, 'a', '200a' ],
373 [ '900', 1, 2, 'a', '200-solo' ]
374 ], 'correct marc with indicators');
375
376 test_s(qq{ marc_indicators('900',' ',9) });
377 test_s(qq{ marc_repeatable_subfield('900','a', rec('200') ) });
378
379 ok($marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
380 diag dump( $marc ) if ($debug);
381
382 is_deeply( $marc, [
383 [ '900', 1, 2, 'a', '200a', 'a', '200-solo' ],
384 [ '900', ' ', 9, 'a', '200a', 'a', '200-solo' ]
385 ], 'correct marc with repetable subfield');
386
387 #
388 # test magic re-ordering of input data
389 #
390
391 sub test_rec_rules {
392 my ($msg, $rec, $rules, $struct) = @_;
393
394 _clean_ds();
395 _set_rec($rec);
396
397 foreach my $r (split(/;/, $rules)) {
398 $r =~ s/[\s\n\r]+/ /gs;
399 $r =~ s/^\s+//gs;
400 $r =~ s/\s+$//gs;
401 test_s($r) if ($r);
402 }
403
404 ok(my $marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
405 diag dump( $marc ) if ($debug);
406 diag "expects:\n", dump($struct) if ($debug > 1);
407 is_deeply( $marc, $struct, $msg );
408 }
409
410 test_rec_rules(
411 'correct marc with repetable subfield',
412 {
413 '200' => [{
414 'a' => '200a-1',
415 'b' => '200b-1',
416 'c' => '200c-1',
417 }, {
418 'a' => '200a-2',
419 'b' => '200b-2',
420 }, {
421 'a' => '200a-3',
422 }],
423 },
424 qq{
425 marc_indicators('900',1 ,0);
426 marc('900','a', rec('200','a') );
427 marc('900','b', rec('200','b') );
428 marc('900','c', rec('200','c') );
429 },
430 [
431 [ '900', 1, 0, 'a', '200a-1', 'b', '200b-1', 'c', '200c-1' ],
432 [ '900', 1, 0, 'a', '200a-2', 'b', '200b-2' ],
433 [ '900', 1, 0, 'a', '200a-3' ],
434 ],
435 );
436
437
438 test_rec_rules(
439 'marc_repeatable_subfield',
440 {
441 '200' => [{
442 'a' => '200a-1',
443 'b' => '200b-1',
444 'c' => '200c-1',
445 }, {
446 'a' => '200a-2',
447 'b' => '200b-2',
448 'c' => '200c-2',
449 }, {
450 'a' => '200a-3',
451 'c' => '200c-3',
452 }],
453 },
454 qq{
455 marc_indicators('900',1 ,0);
456 marc_repeatable_subfield('900','a', rec('200','a') );
457 marc('900','b', rec('200','b') );
458 marc('900','c', rec('200','c') );
459 },
460 [
461 [ '900', 1, 0, 'a', '200a-1', 'a', '200a-2', 'a', '200a-3', 'b', '200b-1', 'c', '200c-1' ],
462 [ '900', 1, 0, 'b', '200b-2', 'c', '200c-2' ],
463 [ '900', 1, 0, 'c', '200c-3' ],
464 ],
465 );
466
467 test_rec_rules(
468 'marc_compose',
469 { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
470 qq{
471 marc_compose('900',
472 'c', rec(200,'b'),
473 'b', rec(200,'a'),
474 'a', rec(200,'c'),
475 );
476 },
477 [
478 [ '900', ' ', ' ', 'c', 42, 'b', 'foo ; bar', 'a', 'baz' ]
479 ],
480 );
481
482 #
483 # test rules
484 #
485 sub test_rule {
486 my ($msg, $rec, $rule, $struct) = @_;
487 _clean_ds();
488 _set_rec( $rec );
489 $rule =~ s/\\/\\/gs;
490 my $r = test_s( $rule );
491 diag "for ", dump($rec), " got:\n", dump($r), "\nexpect:\n" if ($debug > 1);
492 diag dump($struct) if ($debug);
493 is_deeply( $r, $struct, $msg );
494 }
495
496 # test split_rec_on
497 test_rule(
498 'split_rec_on',
499 { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
500 qq{ split_rec_on('200','a', qr/\\s*;\\s*/, 1) },
501 [ 'foo' ],
502 );
503 test_rule(
504 'split_rec_on',
505 { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
506 qq{ split_rec_on('200','a', qr/\\s*;\\s*/, 2) },
507 [ 'bar' ],
508 );
509 test_rule(
510 'split_rec_on no part',
511 { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
512 qq{ split_rec_on('200','a', qr/\\s*;\\s*/) },
513 [ 'foo', 'bar' ],
514 );
515 test_rule(
516 'split_rec_on no record',
517 {},
518 qq{ split_rec_on('200','a', qr/\\s*;\\s*/) },
519 [ '' ],
520 );
521
522 test_rec_rules(
523 'marc_compose+split_rec_on',
524 { '200' => [{ a => 'foo ! bar', b => 42, c => 'baz' }] },
525 qq{
526 marc_compose('900',
527 'a', split_rec_on(200,'a', qr/\\s*!\\s*/, 1),
528 'c', rec(200,'c'),
529 'a', split_rec_on(200,'a', qr/\\s*!\\s*/, 2),
530 'b', rec(200,'b'),
531 );
532 },
533 [
534 [ '900', ' ', ' ',
535 'a', 'foo',
536 'c', 'baz',
537 'a', 'bar',
538 'b', 42,
539 ]
540 ],
541 );
542
543 cmp_ok(marc_leader('06',42), '==', 42, 'marc_leader');
544 cmp_ok(marc_leader('11',5), '==', 5, 'marc_leader');
545 ok(marc_leader(), 'marc_leader get');
546 diag "leader: ", dump(marc_leader()) if ($debug);
547 is_deeply(marc_leader(), { '06' => 42, 11 => 5 }, "marc_leader full");
548
549 test_rule(
550 'rec1(000)',
551 { '000' => [ 42 ]},
552 qq{ rec('000') },
553 [ 42 ],
554 );
555
556 test_rec_rules(
557 'marc(001,rec(000))',
558 { '000' => [ 42 ]},
559 qq{
560 marc('001', rec('000') );
561 },
562 [
563 [ '001', 42, ]
564 ],
565 );
566
567 test_rec_rules(
568 'marc_remove subfield',
569 { '200' => [{ a => 42, b => 'bar', c => 'baz' }] },
570 qq{
571 marc('900', 'a', rec('200','a') );
572 marc('900', 'b', rec('200','b') );
573 marc_remove('900','b');
574 marc('900', 'b', rec('200','c') );
575 marc_remove('900','a');
576 },
577 [
578 [ '900', ' ', ' ', 'b', 'baz' ],
579 ],
580 );
581
582 test_rec_rules(
583 'marc_remove field',
584 { '200' => [{ a => 42, b => 'bar', c => 'baz' }] },
585 qq{
586 marc('900', 'a', rec('200','a') );
587 marc('900', 'b', rec('200','b') );
588 marc('901', 'b', rec('200','b') );
589 marc('901', 'c', rec('200','c') );
590 marc_remove('900');
591 },
592 [
593 [ '901', ' ', ' ', 'b', 'bar', 'c', 'baz' ],
594 ],
595 );
596 test_rec_rules(
597 'marc_duplicate',
598 { '200' => [{ a => 42, b => 'bar', c => 'baz', d => 'bing', e => 'bong' }] },
599 qq{
600 marc('900', 'a', rec('200','a') );
601 marc('900', 'b', rec('200','b') );
602 marc_duplicate;
603 marc_remove('900','b');
604 marc('900', 'b', rec('200','c') );
605 marc_duplicate;
606 marc_remove('900','b');
607 marc('900', 'b', rec('200','d') );
608 marc_duplicate;
609 marc_remove('900','b');
610 marc('900', 'b', rec('200','e') );
611 },
612 [
613 # this will return FIRST record
614 [ '900', ' ', ' ', 'a', 42, 'b', 'bar' ],
615 ],
616 );
617
618 my $i = 0;
619 foreach my $v ( qw/bar baz bing bong/ ) {
620
621 ok($marc = WebPAC::Normalize::_get_marc_fields( offset => $i ),
622 "_get_marc_fields( offset => $i )"
623 );
624 diag "marc $i = ", dump( $marc ) if ($debug);
625 is_deeply( $marc,
626 [ [ '900', ' ', ' ', 'a', 42, 'b', $v ] ],
627 "MARC copy $i has $v",
628 );
629 $i++;
630 }
631 }
632

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26