/[webpac2]/trunk/t/3-normalize.t
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/t/3-normalize.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 571 - (hide annotations)
Mon Jul 3 14:30:22 2006 UTC (17 years, 9 months ago) by dpavlin
File MIME type: application/x-troff
File size: 12171 byte(s)
marc() now supports fields < 10 which don't have indicators and subfields

1 dpavlin 536 #!/usr/bin/perl -w
2    
3     use strict;
4    
5 dpavlin 571 use Test::More tests => 112;
6 dpavlin 536 use Test::Exception;
7     use Cwd qw/abs_path/;
8     use blib;
9     use File::Slurp;
10 dpavlin 555 use Getopt::Long;
11 dpavlin 536
12     BEGIN {
13     use_ok( 'WebPAC::Normalize' );
14     }
15    
16 dpavlin 555 use Data::Dump qw/dump/;
17    
18     my $debug = 0;
19     GetOptions(
20     "debug+", \$debug
21     );
22    
23     cmp_ok(_debug(1), '==', 1, '_debug level');
24     cmp_ok(_debug(0), '==', 0, '_debug level');
25    
26     diag "debug level for $0 is $debug" if ($debug);
27     if ($debug > 2) {
28     diag "debug level for WebPAC::Normalize is ", _debug( $debug - 2 );
29     }
30    
31 dpavlin 536 ok(my $abs_path = abs_path($0), "abs_path");
32     $abs_path =~ s#/[^/]*$#/#;
33     diag "abs_path: $abs_path" if ($debug);
34    
35     #throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup";
36    
37     my $rec1 = {
38     '200' => [{
39     'a' => '200a',
40     'b' => '200b',
41     },{
42     'c' => '200c',
43     'd' => '200d',
44     },{
45     'a' => '200a*2',
46     'd' => '200d*2',
47     }],
48     '201' => [{
49     'x' => '201x',
50     'y' => '201y',
51     }],
52     '900' => [
53     '900-no_subfield'
54     ],
55     '901' => [{
56     'a' => '900a',
57     }],
58     '902' => [{
59     'z' => '900',
60     }],
61     };
62    
63     my $rec2 = {
64     '675' => [ {
65     'a' => '159.9'
66     } ],
67     '210' => [ {
68     'c' => 'New York University press',
69     'a' => 'New York',
70     'd' => 'cop. 1988'
71     } ],
72     '700' => [ {
73     'a' => 'Haynal',
74     'b' => 'AndrĂ©'
75     } ],
76     '801' => [ 'FFZG' ],
77     '991' => [ '8302' ],
78     '000' => [ 1 ],
79     '702' => [ {
80     'a' => 'Holder',
81     'b' => 'Elizabeth'
82     } ],
83     '215' => [ {
84     'c' => 'ilustr',
85     'a' => 'xix, 202 str',
86     'd' => '23cm'
87     } ],
88     '990' => [
89     '2140',
90     '88',
91     'HAY'
92     ],
93     '200' => [ {
94     'e' => 'from Freud and Ferenczi to Michael balint',
95     'a' => 'Controversies in psychoanalytic method',
96     'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern',
97     'f' => 'by AndrĂ© E. Haynal'
98     } ],
99     '610' => [ 'povijest psihoanalize' ],
100     '994' => [ {
101     'c' => '',
102     'a' => 'PS',
103     'b' => 'MG'
104     } ],
105     '320' => [ 'Kazalo' ],
106     '101' => [ 'ENG' ],
107     '686' => [ '2140' ],
108     '300' => [ 'Prijevod djela: ' ],
109     };
110    
111    
112     my $lookup1 = {
113     '00900' => [
114     'lookup 1',
115     'lookup 2',
116     ],
117     };
118    
119     my $lookup2 = {
120     '00900' => 'lookup',
121     };
122    
123    
124     sub test {
125 dpavlin 550 print dump( @_ ), ("-" x 78), "\n";
126 dpavlin 536 ok( defined(@_) );
127     }
128    
129     # how much of string evaled to display?
130     my $max_eval_output = 170;
131    
132     sub dump_error {
133     my ($msg,$code) = @_;
134    
135     my @l = split(/[\n\r]/, $code);
136     my $out = "$msg\n";
137    
138     foreach my $i ( 0 .. $#l ) {
139     $out .= sprintf("%2d: %s\n", $i, $l[$i]);
140     }
141    
142     return $out;
143     }
144    
145     sub test_s {
146     my $t = shift || die;
147    
148     my $eval_t = $t;
149     $eval_t =~ s/[\n\r\s]+/ /gs;
150     $eval_t = substr($eval_t,0,$max_eval_output) . '...' if (length($eval_t) > $max_eval_output);
151 dpavlin 562 $eval_t =~ s/\\/\\\\/gs;
152 dpavlin 536
153 dpavlin 571 my @__ret;
154     eval "\@__ret = $t";
155     ok(! $@, $@ ? dump_error($@, $t) : "eval: $eval_t = " . dump(@__ret));
156     return \@__ret;
157 dpavlin 536 }
158    
159     {
160     no strict 'subs';
161     use WebPAC::Normalize;
162    
163 dpavlin 538 ok(! _set_lookup( undef ), "set_lookup(undef)");
164 dpavlin 536
165 dpavlin 538 _set_rec( $rec1 );
166 dpavlin 536
167     cmp_ok( join(",", rec2('200','a') ), 'eq', '200a,200a*2', 'join rec2' );
168     cmp_ok( join(",", rec2('200','a'), rec2('200','b') ), 'eq', '200a,200a*2,200b', 'join rec2 rec2' );
169     cmp_ok( join(" * ", sort(rec1('200'), rec1('201') )), 'eq', '200a * 200a*2 * 200b * 200c * 200d * 200d*2 * 201x * 201y', 'join sort rec1 rec1' );
170     diag "is_deeply checks\n";
171     is_deeply( \[ rec1('200') ], \[ qw/200a 200b 200c 200d 200a*2 200d*2/ ] );
172     is_deeply( \[ regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa 2oob 2ooc 2ood 2ooa*2 2ood*2/ ]);
173     is_deeply( \[ grep { /\*/ } regex( 's/0/o/g', rec1('200') ) ], \[ qw/2ooa*2 2ood*2/ ]);
174     is_deeply( \[ rec('902') ], \[ '900' ] );
175    
176     cmp_ok( rec('902'), 'eq', rec('902','z'), 'rec sf' );
177    
178     # simple list manipulatons
179     cmp_ok( join('', prefix('ab', 'cd') ), 'eq', 'abcd', 'prefix');
180     cmp_ok( join('', suffix('xy', 'cd') ), 'eq', 'cdxy', 'suffix');
181     cmp_ok( join('', surround('->', '<-', 'a','b','c') ), 'eq', '->a<-->b<-->c<-', 'surround');
182    
183    
184 dpavlin 538 _set_lookup( $lookup1 );
185 dpavlin 536
186     cmp_ok(
187     join_with(" i ",
188     lookup(
189     regex( 's/^/00/',
190     rec2('902','z')
191     )
192     )
193     ),
194     'eq', 'lookup 1 i lookup 2', 'join lookup regex rec2');
195    
196     # check join_with operations
197    
198     sub test_join_with_2 {
199     my ($a,$b,$e) = @_;
200    
201     cmp_ok(
202     join_with(" <1> ",
203     rec('201',$a),
204     rec('201',$b),
205     ),
206     'eq', $e, "join_with $a <1> $b = $e");
207     }
208    
209     test_join_with_2('_','_','');
210     test_join_with_2('x','_','201x');
211     test_join_with_2('_','x','201x');
212     test_join_with_2('x','y','201x <1> 201y');
213    
214     sub test_join_with_3 {
215     my ($a,$b,$c,$e) = @_;
216    
217     cmp_ok(
218     join_with(" <1> ", rec('201',$a),
219     join_with(" <2> ", rec('201',$b),
220     rec('201',$c),
221     )
222     ),
223     'eq', $e, "join_with $a <1> $b <2> $c = $e");
224     };
225    
226     test_join_with_3('_','_','_','');
227     test_join_with_3('x','_','_','201x');
228     test_join_with_3('_','x','_','201x');
229     test_join_with_3('_','_','x','201x');
230     test_join_with_3('x','y','_','201x <1> 201y');
231     test_join_with_3('x','_','y','201x <1> 201y');
232     test_join_with_3('_','x','y','201x <2> 201y');
233     test_join_with_3('x','_','y','201x <1> 201y');
234     test_join_with_3('x','y','x','201x <1> 201y <2> 201x');
235    
236     # test lookups
237    
238 dpavlin 538 _set_lookup( $lookup2 );
239 dpavlin 536
240     is_deeply( \[ lookup( prefix( '00', rec('902') ) ) ], \[ 'lookup' ], 'lookup prefix' );
241    
242     ok(! lookup('non-existent'), 'lookup non-existant' );
243    
244 dpavlin 538 _set_rec( $rec2 );
245 dpavlin 536
246     test_s(qq{
247     tag('Title',
248     rec('200','a')
249     );
250     });
251     test_s(qq{
252     tag('Who',
253     join_with(" ",
254     rec('702','a'),
255     rec('702','b')
256     )
257     );
258     });
259    
260     test_s(qq{
261     display('Publisher',
262     rec('210','c')
263     )
264     });
265    
266     test_s(qq{
267     search('Year',
268     regex( 's/[^\\d]+//',
269     rec('210','d')
270     )
271     )
272     });
273    
274 dpavlin 538 ok(my $ds = _get_ds(), "get_ds");
275 dpavlin 550 diag "ds = ", dump($ds) if ($debug);
276 dpavlin 536
277    
278     sub test_check_ds {
279    
280     my $t = shift;
281    
282 dpavlin 538 ok($ds = _get_ds(), 'get_ds');
283 dpavlin 550 diag dump( $ds ) if ($debug);
284 dpavlin 536
285     ok( $ds && $ds->{something}, 'get_ds->something exists' );
286     ok( $ds && $ds->{something}->{$t}, 'get_ds->something->'.$t.' exists') if ($t);
287     ok( $ds && !$ds->{empty}, 'get_ds->empty doesn\'t' );
288    
289     return $ds;
290     }
291    
292 dpavlin 538 _clean_ds();
293 dpavlin 536 test_s(qq{ search('something', '42'); });
294     test_s(qq{ search('empty', ''); });
295     test_check_ds('search');
296    
297 dpavlin 538 _clean_ds();
298 dpavlin 536 test_s(qq{ display('something', '42'); });
299     test_s(qq{ display('empty', ''); });
300     test_check_ds('display');
301    
302 dpavlin 538 _clean_ds();
303 dpavlin 536 test_s(qq{ tag('something', '42'); });
304     test_s(qq{ tag('empty', ''); });
305     test_check_ds('search');
306     test_check_ds('display');
307    
308 dpavlin 538 _clean_ds();
309 dpavlin 536 my $n = read_file( "$abs_path/data/normalize.pl" );
310     $n .= "\n1;\n";
311     #diag "normalize code:\n$n\n";
312     test_s( $n );
313    
314 dpavlin 538 ok($ds = _get_ds(), "get_ds");
315 dpavlin 550 diag "ds = ", dump($ds) if ($debug);
316 dpavlin 536
317     my $rec = {
318     '200' => [{
319     'a' => '200a',
320     'b' => '200b',
321     }],
322     };
323     my $rules = qq{ search('mixed', rec('200') ) };
324    
325 dpavlin 538 _clean_ds();
326     _set_rec( $rec );
327 dpavlin 536 test_s( $rules );
328 dpavlin 538 ok($ds = _get_ds(), "get_ds");
329 dpavlin 536 is_deeply( $ds, {
330     'mixed' => {
331     'search' => [ '200a', '200b' ],
332     'tag' => 'mixed'
333     }
334     }, 'correct get_ds');
335    
336     ok(my $ds2 = WebPAC::Normalize::data_structure(
337     row => $rec,
338     rules => $rules,
339     ), 'data_structure');
340     is_deeply( $ds, $ds2, 'data_structure(s) same');
341    
342     # wird and non-valid structure which is supported anyway
343 dpavlin 538 _clean_ds();
344     _set_rec({
345 dpavlin 536 '200' => [{
346     'a' => '200a',
347     },
348     '200-solo'
349     ]
350     });
351     test_s(qq{ search('mixed', rec('200') ) });
352 dpavlin 538 ok($ds = _get_ds(), "get_ds");
353 dpavlin 536 is_deeply( $ds, {
354     'mixed' => {
355     'search' => [ '200a', '200-solo' ],
356     'tag' => 'mixed'
357     }
358     }, 'correct get_ds');
359    
360 dpavlin 555 #
361 dpavlin 540 # MARC
362 dpavlin 555 #
363    
364 dpavlin 547 test_s(qq{ marc_indicators('900',1,2) });
365     test_s(qq{ marc('900','a', rec('200') ) });
366 dpavlin 540 my @marc;
367 dpavlin 547 ok(@marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
368 dpavlin 550 diag dump( \@marc ) if ($debug);
369 dpavlin 547
370     is_deeply( \@marc, [
371     [ '900', 1, 2, 'a', '200a' ],
372     [ '900', 1, 2, 'a', '200-solo' ]
373     ], 'correct marc with indicators');
374    
375     test_s(qq{ marc_indicators('900',' ',9) });
376     test_s(qq{ marc_repeatable_subfield('900','a', rec('200') ) });
377    
378     ok(@marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
379 dpavlin 550 diag dump( \@marc ) if ($debug);
380 dpavlin 547
381     is_deeply( \@marc, [
382     [ '900', 1, 2, 'a', '200a', 'a', '200-solo' ],
383     [ '900', ' ', 9, 'a', '200a', 'a', '200-solo' ]
384     ], 'correct marc with repetable subfield');
385 dpavlin 550
386 dpavlin 555 #
387     # test magic re-ordering of input data
388     #
389 dpavlin 550
390 dpavlin 555 sub test_rec_rules {
391     my ($msg, $rec, $rules, $struct) = @_;
392 dpavlin 550
393 dpavlin 555 _clean_ds();
394     _set_rec($rec);
395    
396     foreach my $r (split(/;/, $rules)) {
397     $r =~ s/[\s\n\r]+/ /gs;
398     $r =~ s/^\s+//gs;
399     $r =~ s/\s+$//gs;
400     test_s($r) if ($r);
401     }
402    
403 dpavlin 562 ok(my @marc = WebPAC::Normalize::_get_marc_fields(), "_get_marc_fields");
404 dpavlin 555 diag dump( \@marc ) if ($debug);
405 dpavlin 562 diag "expects:\n", dump($struct) if ($debug > 1);
406 dpavlin 555 is_deeply( \@marc, $struct, $msg );
407     }
408    
409     test_rec_rules(
410     'correct marc with repetable subfield',
411     {
412     '200' => [{
413     'a' => '200a-1',
414     'b' => '200b-1',
415     'c' => '200c-1',
416     }, {
417     'a' => '200a-2',
418     'b' => '200b-2',
419     }, {
420     'a' => '200a-3',
421     }],
422     },
423     qq{
424     marc_indicators('900',1 ,0);
425     marc('900','a', rec('200','a') );
426     marc('900','b', rec('200','b') );
427     marc('900','c', rec('200','c') );
428     },
429     [
430     [ '900', 1, 0, 'a', '200a-1', 'b', '200b-1', 'c', '200c-1' ],
431     [ '900', 1, 0, 'a', '200a-2', 'b', '200b-2' ],
432     [ '900', 1, 0, 'a', '200a-3' ],
433     ],
434     );
435    
436    
437     test_rec_rules(
438     'marc_repeatable_subfield',
439     {
440     '200' => [{
441     'a' => '200a-1',
442     'b' => '200b-1',
443     'c' => '200c-1',
444     }, {
445     'a' => '200a-2',
446     'b' => '200b-2',
447     'c' => '200c-2',
448     }, {
449     'a' => '200a-3',
450     'c' => '200c-3',
451     }],
452     },
453     qq{
454     marc_indicators('900',1 ,0);
455     marc_repeatable_subfield('900','a', rec('200','a') );
456     marc('900','b', rec('200','b') );
457     marc('900','c', rec('200','c') );
458     },
459     [
460     [ '900', 1, 0, 'a', '200a-1', 'a', '200a-2', 'a', '200a-3', 'b', '200b-1', 'c', '200c-1' ],
461     [ '900', 1, 0, 'b', '200b-2', 'c', '200c-2' ],
462     [ '900', 1, 0, 'c', '200c-3' ],
463     ],
464     );
465 dpavlin 562
466     test_rec_rules(
467     'marc_compose',
468     { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
469     qq{
470     marc_compose('900',
471     'c', rec(200,'b'),
472     'b', rec(200,'a'),
473     'a', rec(200,'c'),
474     );
475     },
476     [
477     [ '900', ' ', ' ', 'c', 42, 'b', 'foo ; bar', 'a', 'baz' ]
478     ],
479     );
480    
481     #
482     # test rules
483     #
484     sub test_rule {
485     my ($msg, $rec, $rule, $struct) = @_;
486     _clean_ds();
487     _set_rec( $rec );
488     $rule =~ s/\\/\\/gs;
489     my $r = test_s( $rule );
490     diag "for ", dump($rec), " got:\n", dump($r), "\nexpect:\n" if ($debug > 1);
491     diag dump($struct) if ($debug);
492     is_deeply( $r, $struct, $msg );
493     }
494    
495     # test split_rec_on
496     test_rule(
497     'split_rec_on',
498     { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
499     qq{ split_rec_on('200','a', qr/\\s*;\\s*/, 1) },
500 dpavlin 571 [ 'foo' ],
501 dpavlin 562 );
502     test_rule(
503     'split_rec_on',
504     { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
505     qq{ split_rec_on('200','a', qr/\\s*;\\s*/, 2) },
506 dpavlin 571 [ 'bar' ],
507 dpavlin 562 );
508 dpavlin 566 test_rule(
509     'split_rec_on no part',
510     { '200' => [{ a => 'foo ; bar', b => 42, c => 'baz' }] },
511 dpavlin 571 qq{ split_rec_on('200','a', qr/\\s*;\\s*/) },
512 dpavlin 566 [ 'foo', 'bar' ],
513     );
514     test_rule(
515     'split_rec_on no record',
516     {},
517     qq{ split_rec_on('200','a', qr/\\s*;\\s*/) },
518 dpavlin 571 [ '' ],
519 dpavlin 566 );
520 dpavlin 562
521     test_rec_rules(
522     'marc_compose+split_rec_on',
523     { '200' => [{ a => 'foo ! bar', b => 42, c => 'baz' }] },
524     qq{
525     marc_compose('900',
526     'a', split_rec_on(200,'a', qr/\\s*!\\s*/, 1),
527     'c', rec(200,'c'),
528     'a', split_rec_on(200,'a', qr/\\s*!\\s*/, 2),
529     'b', rec(200,'b'),
530     );
531     },
532     [
533     [ '900', ' ', ' ',
534     'a', 'foo',
535     'c', 'baz',
536     'a', 'bar',
537     'b', 42,
538     ]
539     ],
540     );
541 dpavlin 564
542     cmp_ok(marc_leader('06',42), '==', 42, 'marc_leader');
543     cmp_ok(marc_leader('11',5), '==', 5, 'marc_leader');
544     ok(marc_leader(), 'marc_leader get');
545     diag "leader: ", dump(marc_leader()) if ($debug);
546     is_deeply(marc_leader(), { '06' => 42, 11 => 5 }, "marc_leader full");
547 dpavlin 571
548     _debug(2);
549     test_rule(
550     'rec1(000)',
551     { '000' => [ 42 ]},
552     qq{ rec('000') },
553     [ 42 ],
554     );
555    
556     test_rec_rules(
557     'marc_compose+split_rec_on',
558     { '000' => [ 42 ]},
559     qq{
560     marc('001', rec('000') );
561     },
562     [
563     [ '001', ' ', ' ', 42, ]
564     ],
565     );
566 dpavlin 536 }
567    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26