--- trunk/lib/WebPAC/Validate.pm 2006/11/03 19:41:28 768 +++ trunk/lib/WebPAC/Validate.pm 2007/06/26 17:46:49 875 @@ -18,11 +18,11 @@ =head1 VERSION -Version 0.10 +Version 0.12 =cut -our $VERSION = '0.10'; +our $VERSION = '0.12'; =head1 SYNOPSIS @@ -52,8 +52,17 @@ my $validate = new WebPAC::Validate( path => 'conf/validate/file', + delimiters => [ ' : ', ' / ', ' ; ', ' , ' ], + delimiters_path => 'conf/validate/delimiters/file', ); +Optional parametar C will turn on validating of delimiters. Be +careful here, those delimiters are just stuck into regex, so they can +contain L regexpes. + +C and C can be specified by L and +L calls. + =cut sub new { @@ -63,14 +72,43 @@ my $log = $self->_get_logger(); - foreach my $p (qw/path/) { - $log->logconfess("need $p") unless ($self->{$p}); + $self->read_validate_file( $self->{path} ) if ( $self->{path} ); + + if ( $self->{delimiters} ) { + $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')'; + $log->info("validation check delimiters with regex $self->{delimiters_regex}"); } - my $v_file = read_file( $self->{path} ) || - $log->logdie("can't open validate path $self->{path}: $!"); + $self->read_validate_delimiters_file( $self->{delimiters_path} ) if ( $self->{delimiters_path} ); + + return $self; +} + + +=head2 read_validate_file + +Specify validate rules file + + $validate->read_validate_file( 'conf/validate/file' ); + +Returns number of lines in file + +=cut + +sub read_validate_file { + my $self = shift; + + my $path = shift || die "no path?"; + + my $log = $self->_get_logger(); + + my $v_file = read_file( $path ) || + $log->logdie("can't open validate path $path: $!"); my $v; + delete( $self->{must_exist} ); + delete( $self->{must_exist_sf} ); + delete( $self->{dont_validate} ); my $curr_line = 1; foreach my $l (split(/[\n\r]+/, $v_file)) { @@ -112,9 +150,45 @@ $self->{rules} = $v; - $log->info("validation uses rules from $self->{path}"); + $log->info("validation uses rules from $path"); + + return $curr_line; +} + +=head2 read_validate_delimiters_file + + $validate->read_validate_delimiters_file( 'conf/validate/delimiters/file' ); + +=cut + +sub read_validate_delimiters_file { + my $self = shift; + + my $path = shift || die "no path?"; + + my $log = $self->_get_logger(); - $self ? return $self : return undef; + delete( $self->{_validate_delimiters_templates} ); + delete( $self->{_delimiters_templates} ); + + if ( -e $path ) { + $log->info("using delimiter validation rules from $path"); + open(my $d, $path) || $log->fatal("can't open $path: $!"); + while(<$d>) { + chomp($d); + if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) { + my ($comment,$field,$count,$template) = ($1,$2,$3,$4); + $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment); + } else { + warn "## ignored $d\n"; + } + } + close($d); + #warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} ); + } else { + $log->warn("delimiters path $path doesn't exist, it will be created after this run"); + } + $self->{delimiters_path} = $path; } =head2 validate_rec @@ -134,7 +208,8 @@ my $rec_dump = shift; $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH'); - $log->logdie("can't find validation rules") unless (my $r = $self->{rules}); +# $log->logdie("can't find validation rules") unless (my $r = $self->{rules}); + my $r = $self->{rules}; my $errors; @@ -146,6 +221,33 @@ next if (!defined($f) || $f eq '' || $f eq '000'); + # first check delimiters + if ( my $regex = $self->{delimiters_regex} ) { + + foreach my $v (@{ $rec->{$f} }) { + my $l = _pack_subfields_hash( $v, 1 ); + my $subfield_dump = $l; + my $template = ''; + $l =~ s/$regex/$template.=$1/eg; + #warn "## template: $template\n"; + + if ( $template ) { + $self->{_delimiters_templates}->{$f}->{$template}++; + + if ( my $v = $self->{_validate_delimiters_templates} ) { + if ( ! defined( $v->{$f}->{$template} ) ) { + $errors->{$f}->{potentially_invalid_combination} = $template; + $errors->{$f}->{dump} = $subfield_dump; + #} else { + # warn "## $f $template ok\n"; + } + } + } + } + } + + next unless ( $r ); # skip validation of no rules are specified + next if (defined( $self->{dont_validate}->{$f} )); # track field usage @@ -171,6 +273,7 @@ # $errors->{dump} = $rec_dump if ($rec_dump); } elsif (ref($v) ne 'HASH') { $errors->{$f}->{missing_subfield} = join(",", @{ $r->{$f} }) . " required"; + $errors->{$f}->{dump} = $v; next; } else { @@ -222,6 +325,8 @@ } } + $log->debug("_delimiters_templates = ", sub { dump( $self->{_delimiters_templates} ) } ); + foreach my $must (sort keys %{ $self->{must_exist} }) { next if ($fields->{$must}); $errors->{$must}->{missing}++; @@ -231,7 +336,7 @@ if ($errors) { $log->debug("errors: ", $self->report_error( $errors ) ); - my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN"); + my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", sub { dump( $rec ) }, " doesn't have MFN"); $self->{errors}->{$mfn} = $errors; } @@ -240,17 +345,37 @@ return $errors; } -=head2 reset_errors +=head2 reset + +Clean all accumulated errors for this input and remember delimiter templates +for L -Clean all accumulated errors for this input + $validate->reset; - $validate->reset_errors; +This function B be called after each input to provide accurate statistics. =cut -sub reset_errors { +sub reset { my $self = shift; + + my $log = $self->_get_logger; + delete ($self->{errors}); + + if ( ! $self->{_delimiters_templates} ) { + $log->debug("called without _delimiters_templates?"); + return; + } + + foreach my $f ( keys %{ $self->{_delimiters_templates} } ) { + foreach my $t ( keys %{ $self->{_delimiters_templates}->{$f} } ) { + $self->{_accumulated_delimiters_templates}->{$f}->{$t} += + $self->{_delimiters_templates}->{$f}->{$t}; + } + } + $log->debug("_accumulated_delimiters_templates = ", sub { dump( $self->{_accumulated_delimiters_templates} ) } ); + delete ($self->{_delimiters_templates}); } =head2 all_errors @@ -301,7 +426,7 @@ if ($k eq 'dump') { $dump = $tree->{dump}; -# warn "## dump: ",dump($dump),"\n"; + #warn "## dump ",dump($dump),"\n"; next; } @@ -311,18 +436,14 @@ $accumulated ? "$accumulated\t$k" : $k ); - $log->debug( - ( $new_results ? "new_results: " . dump($new_results) ." " : '' ), - ); + $log->debug( "new_results: ", sub { dump($new_results) } ) if ( $new_results ); push @$results, $new_results if ($new_results); $dump = $new_dump if ($new_dump); } - $log->debug( - ( $results ? "results: " . dump($results) ." " : '' ), - ); + $log->debug( "results: ", sub { dump($results) } ) if ( $results ); if ($#$results == 0) { return ($results->[0], $dump); @@ -335,7 +456,7 @@ sub _reformat { my $l = shift; $l =~ s/\t/ /g; - $l =~ s/_/ /; + $l =~ s/_/ /g; return $l; } @@ -380,6 +501,100 @@ } +=head2 delimiters_templates + +Generate report of delimiter tamplates + + my $report = $validate->delimiter_teplates( + report => 1, + current_input => 1, + ); + +Options: + +=over 4 + +=item report + +Generate humanly readable report with single fields + +=item current_input + +Report just current_input and not accumulated data + +=back + +=cut + +sub delimiters_templates { + my $self = shift; + + my $args = {@_}; + + my $t = $self->{_accumulated_delimiters_templates}; + $t = $self->{_delimiters_templates} if ( $args->{current_input} ); + + my $log = $self->_get_logger; + + unless ($t) { + $log->error("called without delimiters"); + return; + } + + my $out; + + foreach my $f (sort { $a <=> $b } keys %$t) { + $out .= "$f\n" if ( $args->{report} ); + foreach my $template (sort { $a cmp $b } keys %{ $t->{$f} }) { + my $count = $t->{$f}->{$template}; + $out .= + ( $count ? "" : "# " ) . + ( $args->{report} ? "" : "$f" ) . + "\t$count\t$template\n"; + } + } + + return $out; +} + +=head2 save_delimiters_templates + +Save accumulated delimiter templates + + $validator->save_delimiters_template( '/path/to/validate/delimiters' ); + +=cut + +sub save_delimiters_templates { + my $self = shift; + + my $path = shift; + $path ||= $self->{delimiters_path}; + + my $log = $self->_get_logger; + + $log->logdie("need path") unless ( $path ); + + + if ( ! $self->{_accumulated_delimiters_templates} ) { + $log->error('no _accumulated_delimiters_templates found, reset'); + $self->reset; + } + + if ( $self->{_delimiters_templates} ) { + $log->error('found _delimiters_templates, calling reset'); + $self->reset; + } + + $path .= '.new' if ( -e $path ); + + open(my $d, '>', $path) || $log->fatal("can't open $path: $!"); + print $d $self->delimiters_templates; + close($d); + + $log->info("new delimiters templates saved to $path"); +} + =head1 AUTHOR Dobrica Pavlinusic, C<< >>