1 |
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
2 |
|
|
3 |
use Test::More tests => 8; |
use Test::More tests => 92; |
4 |
use Test::Exception; |
use Test::Exception; |
5 |
use Cwd qw/abs_path/; |
use Cwd qw/abs_path/; |
6 |
use blib; |
use blib; |
7 |
use strict; |
use strict; |
8 |
|
use Data::Dumper; |
9 |
|
|
10 |
BEGIN { |
BEGIN { |
11 |
use_ok( 'WebPAC::Normalize::XML' ); |
use_ok( 'WebPAC::Normalize::XML' ); |
13 |
|
|
14 |
ok(my $abs_path = abs_path($0), "abs_path"); |
ok(my $abs_path = abs_path($0), "abs_path"); |
15 |
$abs_path =~ s#/[^/]*$#/#; |
$abs_path =~ s#/[^/]*$#/#; |
16 |
diag "abs_path: $abs_path"; |
#diag "abs_path: $abs_path"; |
17 |
|
|
18 |
|
throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup"; |
19 |
|
throws_ok { new WebPAC::Normalize::XML( lookup => 'bar' ) } qr/pair/, "lookup without lookup_regex"; |
20 |
|
|
21 |
ok(my $n = new WebPAC::Normalize::XML( debug => 0 ), "new"); |
ok(my $n = new WebPAC::Normalize::XML( |
22 |
|
debug => 0, |
23 |
|
filter => { |
24 |
|
regex => sub { |
25 |
|
my ($val, $regex) = @_; |
26 |
|
eval "\$val =~ $regex"; |
27 |
|
return $val; |
28 |
|
}, |
29 |
|
}, |
30 |
|
), "new"); |
31 |
|
|
32 |
throws_ok { $n->open() } qr/tag/, "open without tag"; |
throws_ok { $n->open() } qr/tag/, "open without tag"; |
33 |
throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file"; |
throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file"; |
34 |
throws_ok { $n->open( tag => 'isis', xml_file => 'foo' ) } qr/file.*doesn't exist/, "open with invalid xml_file"; |
throws_ok { $n->open( tag => 'isis', xml_file => '/foo/bar/baz' ) } qr/file.*doesn't exist/, "open with invalid xml_file"; |
35 |
ok( $n->open( |
ok( $n->open( |
36 |
tag => 'isis', |
tag => 'isis', |
37 |
xml_file => "$abs_path../conf/normalize/isis.xml", |
xml_file => "$abs_path/data/normalize.xml", |
38 |
), "open"); |
), "open"); |
39 |
|
|
40 |
my $rec = { |
my $rec = { |
41 |
'000' => [ '001' ], |
'675' => [ |
42 |
'800' => [ 'foo' ], |
{ |
43 |
'900' => [ 'bar' ], |
'a' => '159.9' |
44 |
|
} |
45 |
|
], |
46 |
|
'210' => [ |
47 |
|
{ |
48 |
|
'c' => 'New York University press', |
49 |
|
'a' => 'New York', |
50 |
|
'd' => 'cop. 1988' |
51 |
|
} |
52 |
|
], |
53 |
|
'700' => [ |
54 |
|
{ |
55 |
|
'a' => 'Haynal', |
56 |
|
'b' => 'André' |
57 |
|
} |
58 |
|
], |
59 |
|
'801' => [ |
60 |
|
'FFZG' |
61 |
|
], |
62 |
|
'991' => [ |
63 |
|
'8302' |
64 |
|
], |
65 |
|
'000' => [ |
66 |
|
1 |
67 |
|
], |
68 |
|
'702' => [ |
69 |
|
{ |
70 |
|
'a' => 'Holder', |
71 |
|
'b' => 'Elizabeth' |
72 |
|
} |
73 |
|
], |
74 |
|
'215' => [ |
75 |
|
{ |
76 |
|
'c' => 'ilustr', |
77 |
|
'a' => 'xix, 202 str', |
78 |
|
'd' => '23cm' |
79 |
|
} |
80 |
|
], |
81 |
|
'990' => [ |
82 |
|
'2140', |
83 |
|
'88', |
84 |
|
'HAY' |
85 |
|
], |
86 |
|
'200' => [ |
87 |
|
{ |
88 |
|
'e' => 'from Freud and Ferenczi to Michael balint', |
89 |
|
'a' => 'Controversies in psychoanalytic method', |
90 |
|
'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern', |
91 |
|
'f' => 'by André E. Haynal' |
92 |
|
} |
93 |
|
], |
94 |
|
'610' => [ |
95 |
|
'povijest psihoanalize' |
96 |
|
], |
97 |
|
'994' => [ |
98 |
|
{ |
99 |
|
'c' => '', |
100 |
|
'a' => 'PS', |
101 |
|
'b' => 'MG' |
102 |
|
} |
103 |
|
], |
104 |
|
'320' => [ |
105 |
|
'Kazalo' |
106 |
|
], |
107 |
|
'101' => [ |
108 |
|
'ENG' |
109 |
|
], |
110 |
|
'686' => [ |
111 |
|
'2140' |
112 |
|
], |
113 |
|
'300' => [ |
114 |
|
'Prijevod djela: ' |
115 |
|
] |
116 |
}; |
}; |
117 |
|
|
118 |
ok(my @ds = $n->data_structure( $rec ), "data_structure"); |
foreach my $fld (keys %$rec) { |
119 |
|
my $r = 0; |
120 |
|
foreach my $item ($rec->{$fld}) { |
121 |
|
if (ref($item) eq 'HASH') { |
122 |
|
foreach my $sf (keys %$item) { |
123 |
|
my $found = 0; |
124 |
|
ok($n->get_data(\$rec, $fld, $sf, $r, \$found), "v${fld}^${sf}"); |
125 |
|
ok($found, "found"); |
126 |
|
} |
127 |
|
my $found = 0; |
128 |
|
ok(! $n->get_data(\$rec, $fld, 'x', $r, \$found), "no v${fld}^x"); |
129 |
|
ok(! $found, "not found"); |
130 |
|
} else { |
131 |
|
my $found = 0; |
132 |
|
ok($n->get_data(\$rec, $fld, '', $r, \$found), "v${fld}"); |
133 |
|
ok($found, "found"); |
134 |
|
} |
135 |
|
} |
136 |
|
my $found = 0; |
137 |
|
ok(! $n->get_data(\$rec, '999', '', $r, \$found), "no v${fld}"); |
138 |
|
ok(! $found, "not found"); |
139 |
|
} |
140 |
|
|
141 |
|
ok(my $ds = $n->data_structure( $rec ), "data_structure"); |
142 |
|
|
143 |
use Data::Dumper; |
#diag Dumper($rec, $ds); |
144 |
diag Dumper(\@ds); |
|
145 |
|
# fake load of our test normalisation data |
146 |
|
$n->{tag} = 'isis'; |
147 |
|
|
148 |
|
#diag Dumper($n->{import_xml}->{indexer}); |
149 |
|
|
150 |
|
|
151 |
|
$rec = { |
152 |
|
'900' => [ { |
153 |
|
'a' => '1', |
154 |
|
'b' => '2', |
155 |
|
'c' => '3', |
156 |
|
'x' => 'yap', |
157 |
|
} ], |
158 |
|
}; |
159 |
|
|
160 |
|
my $import = { |
161 |
|
'Tag' => { 'isis' => [ |
162 |
|
{ content => 'v900^a + v900^b = v900^c [v900^x]' }, |
163 |
|
] }, |
164 |
|
}; |
165 |
|
|
166 |
|
sub parse_test($$$$) { |
167 |
|
my ($import,$rec,$i,$r) = @_; |
168 |
|
$n->{import_xml}->{indexer} = $import; |
169 |
|
# erase internal cache (yak!) |
170 |
|
delete($n->{tags_by_order}); |
171 |
|
push @{$rec->{'000'}}, 42 unless ($rec->{'000'}); |
172 |
|
#diag "test normalisation of: ",Dumper($n->{import_xml}->{indexer}, $rec); |
173 |
|
ok(my $ds = $n->data_structure( $rec ), "data_structure"); |
174 |
|
#diag Dumper($ds); |
175 |
|
cmp_ok($ds->{Tag}->{display}->[$i], 'eq', $r, "parse $i: $r"); |
176 |
|
} |
177 |
|
|
178 |
|
parse_test($import, $rec, 0, '1 + 2 = 3 [yap]'); |
179 |
|
|
180 |
|
delete($rec->{'900'}->[0]->{'b'}); |
181 |
|
parse_test($import, $rec, 0, '1 = 3 [yap]'); |
182 |
|
|
183 |
|
$rec->{'900'}->[0]->{'b'} = 5; |
184 |
|
$rec->{'900'}->[0]->{'c'} = 6; |
185 |
|
parse_test($import, $rec, 0, '1 + 5 = 6 [yap]'); |
186 |
|
|
187 |
|
delete($rec->{'900'}->[0]->{'c'}); |
188 |
|
$rec->{'900'}->[0]->{'x'} = 'hmmm'; |
189 |
|
parse_test($import, $rec, 0, '1 + 5 [hmmm]'); |
190 |
|
|
191 |
|
$rec->{'900'}->[0]->{'x'} = 'nope!'; |
192 |
|
delete($rec->{'900'}->[0]->{'a'}); |
193 |
|
parse_test($import, $rec, 0, '5 [nope!]'); |
194 |
|
|
195 |
|
$rec = { |
196 |
|
'900' => [ { |
197 |
|
'b' => 'b1', |
198 |
|
'x' => 'b1', |
199 |
|
},{ |
200 |
|
'a' => 'a1', |
201 |
|
'b' => 'b2', |
202 |
|
'x' => 'a1_b2', |
203 |
|
},{ |
204 |
|
'b' => 'b3', |
205 |
|
'c' => 'c1', |
206 |
|
'x' => 'b3_c1', |
207 |
|
},{ |
208 |
|
'a' => 'a2', |
209 |
|
'b' => 'b4', |
210 |
|
'c' => 'c2', |
211 |
|
'x' => 'a2_b4_c2', |
212 |
|
} ], |
213 |
|
}; |
214 |
|
|
215 |
|
my $import = { |
216 |
|
'Tag' => { 'isis' => [ |
217 |
|
{ content => '0 v900^a 1 v900^b 2 v900^c 3 v900^x 4' }, |
218 |
|
] }, |
219 |
|
}; |
220 |
|
|
221 |
|
parse_test($import, $rec, 0, '0 b1 3 b1 4'); |
222 |
|
parse_test($import, $rec, 1, '0 a1 1 b2 3 a1_b2 4'); |
223 |
|
parse_test($import, $rec, 2, '0 b3 2 c1 3 b3_c1 4'); |
224 |
|
parse_test($import, $rec, 3, '0 a2 1 b4 2 c2 3 a2_b4_c2 4'); |