--- trunk/lib/WebPAC/Validate.pm 2006/09/11 12:32:51 666 +++ trunk/lib/WebPAC/Validate.pm 2007/05/24 12:44:43 836 @@ -18,11 +18,11 @@ =head1 VERSION -Version 0.07 +Version 0.11 =cut -our $VERSION = '0.07'; +our $VERSION = '0.11'; =head1 SYNOPSIS @@ -41,6 +41,8 @@ 205! a # while 210 can have a c or d 210 a c d + # field which is ignored in validation + 999- =head1 FUNCTIONS @@ -50,8 +52,13 @@ my $validate = new WebPAC::Validate( path => 'conf/validate/file', + delimiters => [ ' : ', ' / ', ' ; ', ' , ' ], ); +Optional parametar C will turn on validating of delimiters. Be +careful here, those delimiters are just stuck into regex, so they can +contain L regexpes. + =cut sub new { @@ -86,6 +93,8 @@ if ($fld =~ s/!$//) { $self->{must_exist}->{$fld}++; + } elsif ($fld =~ s/-$//) { + $self->{dont_validate}->{$fld}++; } $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); @@ -110,23 +119,28 @@ $log->info("validation uses rules from $self->{path}"); + if ( $self->{delimiters} ) { + $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')'; + $log->info("validation check delimiters with regex $self->{delimiters_regex}"); + } + $self ? return $self : return undef; } -=head2 validate_errors +=head2 validate_rec Validate record and return errors - my @errors = $validate->validate_errors( $rec, $rec_dump ); + my @errors = $validate->validate_rec( $rec, $rec_dump ); =cut -sub validate_errors { +sub validate_rec { my $self = shift; my $log = $self->_get_logger(); - my $rec = shift || $log->logdie("validate_errors need record"); + my $rec = shift || $log->logdie("validate_rec need record"); my $rec_dump = shift; $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH'); @@ -142,6 +156,34 @@ next if (!defined($f) || $f eq '' || $f eq '000'); + # first check delimiters + if ( my $regex = $self->{delimiters_regex} ) { + + foreach my $v (@{ $rec->{$f} }) { + my $l = _pack_subfields_hash( $v, 1 ); + my $subfield_dump = $l; + my $template = ''; + $l =~ s/$regex/$template.=$1/eg; + #warn "## template: $template\n"; + + if ( $template ) { + $self->{_delimiters_templates}->{$f}->{$template}++; + + if ( my $v = $self->{_validate_delimiters_templates} ) { + if ( ! defined( $v->{$template} ) ) { + $errors->{$f}->{invalid_delimiters_combination} = $template; + $errors->{$f}->{dump} = $subfield_dump; + } else { + warn "## $f $template ok\n"; + } + } + } + } + } + + next if (defined( $self->{dont_validate}->{$f} )); + + # track field usage $fields->{$f}++; if ( ! defined($r->{$f}) ) { @@ -194,8 +236,7 @@ foreach my $sf (@r_sf) { $errors->{$f}->{subfield}->{extra_repeatable}->{$sf}++; - $errors->{$f}->{dump} = - join('', _pack_subfields_hash( $h, 1 ) ); + $errors->{$f}->{dump} = _pack_subfields_hash( $h, 1 ); } } @@ -211,12 +252,13 @@ } } elsif (ref($v) eq 'HASH') { $errors->{$f}->{unexpected_subfields}++; - $errors->{$f}->{dump} = - join('', _pack_subfields_hash( $v, 1 ) ); + $errors->{$f}->{dump} = _pack_subfields_hash( $v, 1 ); } } } + $log->debug("_delimiters_templates = ", dump( $self->{_delimiters_templates} ) ); + foreach my $must (sort keys %{ $self->{must_exist} }) { next if ($fields->{$must}); $errors->{$must}->{missing}++; @@ -224,7 +266,7 @@ } if ($errors) { - $log->debug("errors: ", sub { dump( $errors ) } ); + $log->debug("errors: ", $self->report_error( $errors ) ); my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN"); $self->{errors}->{$mfn} = $errors; @@ -261,18 +303,20 @@ return $self->{errors}; } -=head2 report +=head2 report_error -Produce nice humanly readable report of errors +Produce nice humanly readable report of single error - print $validate->report; + print $validate->report_error( $error_hash ); =cut -sub report { +sub report_error { my $self = shift; - sub unroll { + my $h = shift || die "no hash?"; + + sub _unroll { my ($self, $tree, $accumulated) = @_; my $log = $self->_get_logger(); @@ -294,13 +338,13 @@ if ($k eq 'dump') { $dump = $tree->{dump}; - warn "## dump: ",dump($dump),"\n"; + #warn "## dump ",dump($dump),"\n"; next; } $log->debug("current: $k"); - my ($new_results, $new_dump) = $self->unroll($tree->{$k}, + my ($new_results, $new_dump) = $self->_unroll($tree->{$k}, $accumulated ? "$accumulated\t$k" : $k ); @@ -324,33 +368,99 @@ } } - my $log = $self->_get_logger(); - - my $out = ''; - my $e = $self->{errors} || return; - sub reformat { + sub _reformat { my $l = shift; $l =~ s/\t/ /g; - $l =~ s/_/ /; + $l =~ s/_/ /g; return $l; } - foreach my $mfn (sort keys %$e) { - $out .= "MFN $mfn\n"; + my $out = ''; - for my $f (sort keys %{ $e->{$mfn} }) { - my ($r, $d) = $self->unroll( $e->{$mfn}->{$f} ); - my $e = $f . ': '; - if (ref($r) eq 'ARRAY') { - $e .= join(", ", map { reformat( $_ ) } @$r); - } else { - $e .= reformat( $r ); - } - $e .= "\n\t$d" if ($d); - $e .= "\n"; - $log->debug("MFN $mfn | $e"); - $out .= $e; + for my $f (sort keys %{ $h }) { + $out .= "$f: "; + + my ($r, $d) = $self->_unroll( $h->{$f} ); + my $e; + if (ref($r) eq 'ARRAY') { + $e .= join(", ", map { _reformat( $_ ) } @$r); + } else { + $e .= _reformat( $r ); + } + $e .= "\n\t$d" if ($d); + + $out .= $e . "\n"; + } + return $out; +} + + +=head2 report + +Produce nice humanly readable report of errors + + print $validate->report; + +=cut + +sub report { + my $self = shift; + my $e = $self->{errors} || return; + + my $out; + foreach my $mfn (sort { $a <=> $b } keys %$e) { + $out .= "MFN $mfn\n" . $self->report_error( $e->{$mfn} ) . "\n"; + } + + return $out; + +} + +=head2 delimiters_templates + +Generate report of delimiter tamplates + + my $report = $validate->delimiter_teplates( + report => 1, + ); + +Options: + +=over 4 + +=item report + +Generate humanly readable report with single fields + +=back + +=cut + +sub delimiters_templates { + my $self = shift; + + my $args = {@_}; + + my $t = $self->{_delimiters_templates}; + + my $log = $self->_get_logger; + + unless ($t) { + $log->error("called without delimiters"); + return; + } + + my $out; + + foreach my $f (sort { $a <=> $b } keys %$t) { + $out .= "$f\n" if ( $args->{report} ); + foreach my $template (sort { $a cmp $b } keys %{ $t->{$f} }) { + my $count = $t->{$f}->{$template}; + $out .= + ( $count ? "" : "# " ) . + ( $args->{report} ? "" : "$f" ) . + "\t$count\t$template\n"; } }