--- trunk/lib/WebPAC/Validate.pm 2006/09/06 22:07:21 645 +++ trunk/lib/WebPAC/Validate.pm 2006/09/11 11:57:18 664 @@ -8,7 +8,7 @@ use base 'WebPAC::Common'; use File::Slurp; use List::Util qw/first/; -use Data::Dumper; +use Data::Dump qw/dump/; use WebPAC::Normalize qw/_pack_subfields_hash/; use Storable qw/dclone/; @@ -18,11 +18,11 @@ =head1 VERSION -Version 0.03 +Version 0.07 =cut -our $VERSION = '0.03'; +our $VERSION = '0.07'; =head1 SYNOPSIS @@ -91,14 +91,20 @@ $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); if (@d) { - $v->{$fld} = \@d; + $v->{$fld} = [ map { + my $sf = $_; + if ( $sf =~ s/!(\*)?$/$1/ ) { + $self->{must_exist_sf}->{ $fld }->{ $sf }++; + }; + $sf; + } @d ]; } else { $v->{$fld} = 1; } } - $log->debug("current validation rules: ", Dumper($v)); + $log->debug("current validation rules: ", dump($v)); $self->{rules} = $v; @@ -111,7 +117,7 @@ Validate record and return errors - my @errors = $validate->validate_errors( $rec ); + my @errors = $validate->validate_errors( $rec, $rec_dump ); =cut @@ -121,13 +127,14 @@ my $log = $self->_get_logger(); my $rec = shift || $log->logdie("validate_errors need record"); + my $rec_dump = shift; $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH'); $log->logdie("can't find validation rules") unless (my $r = $self->{rules}); - my @errors; + my $errors; - $log->debug("rec = ", sub { Dumper($rec) }, "keys = ", keys %{ $rec }); + $log->debug("rec = ", sub { dump($rec) }, "keys = ", keys %{ $rec }); my $fields; @@ -138,13 +145,13 @@ $fields->{$f}++; if ( ! defined($r->{$f}) ) { - push @errors, "field '$f' shouldn't exists"; + $errors->{field}->{ $f }->{unexpected} = "this field is not expected"; next; } if (ref($rec->{$f}) ne 'ARRAY') { - push @errors, "field '$f' isn't repetable, probably bug in parsing input data"; + $errors->{field}->{ $f }->{not_repeatable} = "probably bug in parsing input data"; next; } @@ -152,8 +159,11 @@ # can we have subfields? if (ref($r->{$f}) eq 'ARRAY') { # are values hashes? (has subfields) - if (ref($v) ne 'HASH') { - push @errors, "$f has value without subfields: $v"; + if (! defined($v)) { +# $errors->{field}->{$f}->{empty} = undef; +# $errors->{dump} = $rec_dump if ($rec_dump); + } elsif (ref($v) ne 'HASH') { + $errors->{field}->{$f}->{missing_subfield} = join(",", @{ $r->{$f} }) . " required"; next; } else { @@ -163,47 +173,141 @@ delete($v->{subfields}) if (defined($v->{subfields})); + my $subfields; + foreach my $sf (keys %{ $v }) { + $subfields->{ $sf }++; + # is non-repeatable but with multiple values? if ( ! first { $_ eq $sf.'*' } @{$r->{$f}} ) { if ( ref($v->{$sf}) eq 'ARRAY' ) { $sf_repeatable->{$sf}++; }; if (! first { $_ eq $sf } @{ $r->{$f} }) { - push @errors, "$f has unknown subfield: $sf"; + $errors->{field}->{ $f }->{subfield}->{extra}->{$sf}++; } } } if (my @r_sf = sort keys( %$sf_repeatable )) { - my $plural = $#r_sf > 0 ? 1 : 0; - push @errors, "$f subfield" . - ( $plural ? 's ' : ' ' ) . - join(', ', @r_sf) . - ( $plural ? ' are ' : ' is ' ) . - 'repeatable in: ' . - join('', _pack_subfields_hash( $h, 1) ); + foreach my $sf (@r_sf) { + $errors->{field}->{$f}->{subfield}->{extra_repeatable}->{$sf}++; + $errors->{field}->{$f}->{dump} = + join('', _pack_subfields_hash( $h, 1 ) ); + } + + } + + if ( defined( $self->{must_exist_sf}->{$f} ) ) { + foreach my $sf (sort keys %{ $self->{must_exist_sf}->{$f} }) { +#warn "====> $f $sf must exist\n"; + $errors->{field}->{$f}->{subfield}->{missing}->{$sf}++ + unless defined( $subfields->{$sf} ); + } } + } } elsif (ref($v) eq 'HASH') { - push @errors, "$f has subfields which is not valid"; + $errors->{field}->{$f}->{unexpected_subfields}++; + $errors->{field}->{$f}->{dump} = + join('', _pack_subfields_hash( $v, 1 ) ); } } } foreach my $must (sort keys %{ $self->{must_exist} }) { next if ($fields->{$must}); - push @errors, - "field $must should exist, but it doesn't"; + $errors->{field}->{$must}->{missing}++; + $errors->{dump} = $rec_dump if ($rec_dump); + } + + if ($errors) { + $log->debug("errors: ", sub { dump( $errors ) } ); + + my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN"); + $self->{errors}->{$mfn} = $errors; } #$log->logcluck("return from this function is ARRAY") unless wantarray; - $log->debug("errors: ", join(", ", @errors)) if (@errors); + return $errors; +} + +=head2 reset_errors + +Clean all accumulated errors for this input + + $validate->reset_errors; + +=cut + +sub reset_errors { + my $self = shift; + delete ($self->{errors}); +} + +=head2 all_errors + +Return hash with all errors + + print dump( $validate->all_errors ); + +=cut + +sub all_errors { + my $self = shift; + return $self->{errors}; +} + +=head2 report + +Produce nice humanly readable report of errors + + print $validate->report; + +=cut + +sub report { + my $self = shift; + + sub unroll { + my ($rest,$o, $dump) = @_; + +#warn "# rest: $rest o: $o\n"; + + return unless ($rest); + + if (ref($rest) ne 'HASH') { + $o .= "($rest)"; + return ($o,$dump); + } + + foreach my $k (sort keys %{ $rest }) { + + if ($k eq 'dump') { + $dump = $rest->{dump}; + warn "## dump: $dump\n"; + next; + } + my $u; + ($u, $dump) = unroll($rest->{$k}, $o, $dump); + $o .= "$k $u"; + + + } + return ($o,$dump); + } + + my $out = ''; + + foreach my $mfn (sort keys %{ $self->{errors} }) { + my ($msg,$dump) = unroll( $self->{errors}->{$mfn}, '', '' ); + $out .= "MFN $mfn\n$msg\t$dump\n\n"; + } - return @errors; + return $out; } =head1 AUTHOR