/[webpac2]/trunk/lib/WebPAC/Validate.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Validate.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 768 - (show annotations)
Fri Nov 3 19:41:28 2006 UTC (17 years, 5 months ago) by dpavlin
File size: 7625 byte(s)
 r1117@llin:  dpavlin | 2006-11-03 20:42:24 +0100
 cleanup API a bit. validate_errors in now validate_rec [0.10]

1 package WebPAC::Validate;
2
3 use warnings;
4 use strict;
5
6 use blib;
7
8 use base 'WebPAC::Common';
9 use File::Slurp;
10 use List::Util qw/first/;
11 use Data::Dump qw/dump/;
12 use WebPAC::Normalize qw/_pack_subfields_hash/;
13 use Storable qw/dclone/;
14
15 =head1 NAME
16
17 WebPAC::Validate - provide simple validation for records
18
19 =head1 VERSION
20
21 Version 0.10
22
23 =cut
24
25 our $VERSION = '0.10';
26
27 =head1 SYNOPSIS
28
29 This module provide a simple way to validate your file against a simple
30 configuration file in following format:
31
32 # field 10 doesn't have any subfields
33 10
34 # same with 101
35 101
36 # field 200 have valid subfields a-g
37 # and field e is repeatable
38 200 a b c d e* f g
39 # field 205 can have only subfield a
40 # and must exists
41 205! a
42 # while 210 can have a c or d
43 210 a c d
44 # field which is ignored in validation
45 999-
46
47 =head1 FUNCTIONS
48
49 =head2 new
50
51 Create new validation object
52
53 my $validate = new WebPAC::Validate(
54 path => 'conf/validate/file',
55 );
56
57 =cut
58
59 sub new {
60 my $class = shift;
61 my $self = {@_};
62 bless($self, $class);
63
64 my $log = $self->_get_logger();
65
66 foreach my $p (qw/path/) {
67 $log->logconfess("need $p") unless ($self->{$p});
68 }
69
70 my $v_file = read_file( $self->{path} ) ||
71 $log->logdie("can't open validate path $self->{path}: $!");
72
73 my $v;
74 my $curr_line = 1;
75
76 foreach my $l (split(/[\n\r]+/, $v_file)) {
77 $curr_line++;
78
79 # skip comments and whitespaces
80 next if ($l =~ /^#/ || $l =~ /^\s*$/);
81
82 $l =~ s/^\s+//;
83 $l =~ s/\s+$//;
84
85 my @d = split(/\s+/, $l);
86
87 my $fld = shift @d;
88
89 if ($fld =~ s/!$//) {
90 $self->{must_exist}->{$fld}++;
91 } elsif ($fld =~ s/-$//) {
92 $self->{dont_validate}->{$fld}++;
93 }
94
95 $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
96
97 if (@d) {
98 $v->{$fld} = [ map {
99 my $sf = $_;
100 if ( $sf =~ s/!(\*)?$/$1/ ) {
101 $self->{must_exist_sf}->{ $fld }->{ $sf }++;
102 };
103 $sf;
104 } @d ];
105 } else {
106 $v->{$fld} = 1;
107 }
108
109 }
110
111 $log->debug("current validation rules: ", dump($v));
112
113 $self->{rules} = $v;
114
115 $log->info("validation uses rules from $self->{path}");
116
117 $self ? return $self : return undef;
118 }
119
120 =head2 validate_rec
121
122 Validate record and return errors
123
124 my @errors = $validate->validate_rec( $rec, $rec_dump );
125
126 =cut
127
128 sub validate_rec {
129 my $self = shift;
130
131 my $log = $self->_get_logger();
132
133 my $rec = shift || $log->logdie("validate_rec need record");
134 my $rec_dump = shift;
135
136 $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH');
137 $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
138
139 my $errors;
140
141 $log->debug("rec = ", sub { dump($rec) }, "keys = ", keys %{ $rec });
142
143 my $fields;
144
145 foreach my $f (keys %{ $rec }) {
146
147 next if (!defined($f) || $f eq '' || $f eq '000');
148
149 next if (defined( $self->{dont_validate}->{$f} ));
150
151 # track field usage
152 $fields->{$f}++;
153
154 if ( ! defined($r->{$f}) ) {
155 $errors->{ $f }->{unexpected} = "this field is not expected";
156 next;
157 }
158
159
160 if (ref($rec->{$f}) ne 'ARRAY') {
161 $errors->{ $f }->{not_repeatable} = "probably bug in parsing input data";
162 next;
163 }
164
165 foreach my $v (@{ $rec->{$f} }) {
166 # can we have subfields?
167 if (ref($r->{$f}) eq 'ARRAY') {
168 # are values hashes? (has subfields)
169 if (! defined($v)) {
170 # $errors->{$f}->{empty} = undef;
171 # $errors->{dump} = $rec_dump if ($rec_dump);
172 } elsif (ref($v) ne 'HASH') {
173 $errors->{$f}->{missing_subfield} = join(",", @{ $r->{$f} }) . " required";
174 next;
175 } else {
176
177 my $h = dclone( $v );
178
179 my $sf_repeatable;
180
181 delete($v->{subfields}) if (defined($v->{subfields}));
182
183 my $subfields;
184
185 foreach my $sf (keys %{ $v }) {
186
187 $subfields->{ $sf }++;
188
189 # is non-repeatable but with multiple values?
190 if ( ! first { $_ eq $sf.'*' } @{$r->{$f}} ) {
191 if ( ref($v->{$sf}) eq 'ARRAY' ) {
192 $sf_repeatable->{$sf}++;
193 };
194 if (! first { $_ eq $sf } @{ $r->{$f} }) {
195 $errors->{ $f }->{subfield}->{extra}->{$sf}++;
196 }
197 }
198
199 }
200 if (my @r_sf = sort keys( %$sf_repeatable )) {
201
202 foreach my $sf (@r_sf) {
203 $errors->{$f}->{subfield}->{extra_repeatable}->{$sf}++;
204 $errors->{$f}->{dump} = _pack_subfields_hash( $h, 1 );
205 }
206
207 }
208
209 if ( defined( $self->{must_exist_sf}->{$f} ) ) {
210 foreach my $sf (sort keys %{ $self->{must_exist_sf}->{$f} }) {
211 #warn "====> $f $sf must exist\n";
212 $errors->{$f}->{subfield}->{missing}->{$sf}++
213 unless defined( $subfields->{$sf} );
214 }
215 }
216
217 }
218 } elsif (ref($v) eq 'HASH') {
219 $errors->{$f}->{unexpected_subfields}++;
220 $errors->{$f}->{dump} = _pack_subfields_hash( $v, 1 );
221 }
222 }
223 }
224
225 foreach my $must (sort keys %{ $self->{must_exist} }) {
226 next if ($fields->{$must});
227 $errors->{$must}->{missing}++;
228 $errors->{dump} = $rec_dump if ($rec_dump);
229 }
230
231 if ($errors) {
232 $log->debug("errors: ", $self->report_error( $errors ) );
233
234 my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN");
235 $self->{errors}->{$mfn} = $errors;
236 }
237
238 #$log->logcluck("return from this function is ARRAY") unless wantarray;
239
240 return $errors;
241 }
242
243 =head2 reset_errors
244
245 Clean all accumulated errors for this input
246
247 $validate->reset_errors;
248
249 =cut
250
251 sub reset_errors {
252 my $self = shift;
253 delete ($self->{errors});
254 }
255
256 =head2 all_errors
257
258 Return hash with all errors
259
260 print dump( $validate->all_errors );
261
262 =cut
263
264 sub all_errors {
265 my $self = shift;
266 return $self->{errors};
267 }
268
269 =head2 report_error
270
271 Produce nice humanly readable report of single error
272
273 print $validate->report_error( $error_hash );
274
275 =cut
276
277 sub report_error {
278 my $self = shift;
279
280 my $h = shift || die "no hash?";
281
282 sub _unroll {
283 my ($self, $tree, $accumulated) = @_;
284
285 my $log = $self->_get_logger();
286
287 $log->debug("# ",
288 ( $tree ? "tree: $tree " : '' ),
289 ( $accumulated ? "accumulated: $accumulated " : '' ),
290 );
291
292 my $results;
293
294 if (ref($tree) ne 'HASH') {
295 return ("$accumulated\t($tree)", undef);
296 }
297
298 my $dump;
299
300 foreach my $k (sort keys %{ $tree }) {
301
302 if ($k eq 'dump') {
303 $dump = $tree->{dump};
304 # warn "## dump: ",dump($dump),"\n";
305 next;
306 }
307
308 $log->debug("current: $k");
309
310 my ($new_results, $new_dump) = $self->_unroll($tree->{$k},
311 $accumulated ? "$accumulated\t$k" : $k
312 );
313
314 $log->debug(
315 ( $new_results ? "new_results: " . dump($new_results) ." " : '' ),
316 );
317
318 push @$results, $new_results if ($new_results);
319 $dump = $new_dump if ($new_dump);
320
321 }
322
323 $log->debug(
324 ( $results ? "results: " . dump($results) ." " : '' ),
325 );
326
327 if ($#$results == 0) {
328 return ($results->[0], $dump);
329 } else {
330 return ($results, $dump);
331 }
332 }
333
334
335 sub _reformat {
336 my $l = shift;
337 $l =~ s/\t/ /g;
338 $l =~ s/_/ /;
339 return $l;
340 }
341
342 my $out = '';
343
344 for my $f (sort keys %{ $h }) {
345 $out .= "$f: ";
346
347 my ($r, $d) = $self->_unroll( $h->{$f} );
348 my $e;
349 if (ref($r) eq 'ARRAY') {
350 $e .= join(", ", map { _reformat( $_ ) } @$r);
351 } else {
352 $e .= _reformat( $r );
353 }
354 $e .= "\n\t$d" if ($d);
355
356 $out .= $e . "\n";
357 }
358 return $out;
359 }
360
361
362 =head2 report
363
364 Produce nice humanly readable report of errors
365
366 print $validate->report;
367
368 =cut
369
370 sub report {
371 my $self = shift;
372 my $e = $self->{errors} || return;
373
374 my $out;
375 foreach my $mfn (sort { $a <=> $b } keys %$e) {
376 $out .= "MFN $mfn\n" . $self->report_error( $e->{$mfn} ) . "\n";
377 }
378
379 return $out;
380
381 }
382
383 =head1 AUTHOR
384
385 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
386
387 =head1 COPYRIGHT & LICENSE
388
389 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
390
391 This program is free software; you can redistribute it and/or modify it
392 under the same terms as Perl itself.
393
394 =cut
395
396 1; # End of WebPAC::Validate

  ViewVC Help
Powered by ViewVC 1.1.26