--- trunk/lib/WebPAC/Validate.pm 2007/05/24 12:44:45 837 +++ trunk/lib/WebPAC/Validate.pm 2007/05/27 16:49:15 857 @@ -18,11 +18,11 @@ =head1 VERSION -Version 0.11 +Version 0.12 =cut -our $VERSION = '0.11'; +our $VERSION = '0.12'; =head1 SYNOPSIS @@ -60,6 +60,9 @@ careful here, those delimiters are just stuck into regex, so they can contain L regexpes. +C and C can be specified by L and +L calls. + =cut sub new { @@ -67,86 +70,118 @@ my $self = {@_}; bless($self, $class); -warn dump( @_ ); - my $log = $self->_get_logger(); - if ( $self->{path} ) { + $self->read_validate_file( $self->{path} ) if ( $self->{path} ); - my $v_file = read_file( $self->{path} ) || - $log->logdie("can't open validate path $self->{path}: $!"); + if ( $self->{delimiters} ) { + $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')'; + $log->info("validation check delimiters with regex $self->{delimiters_regex}"); + } - my $v; - my $curr_line = 1; + $self->read_validate_delimiters_file( $self->{delimiters_path} ) if ( $self->{delimiters_path} ); - foreach my $l (split(/[\n\r]+/, $v_file)) { - $curr_line++; + return $self; +} - # skip comments and whitespaces - next if ($l =~ /^#/ || $l =~ /^\s*$/); - $l =~ s/^\s+//; - $l =~ s/\s+$//; +=head2 read_validate_file - my @d = split(/\s+/, $l); +Specify validate rules file - my $fld = shift @d; + $validate->read_validate_file( 'conf/validate/file' ); - if ($fld =~ s/!$//) { - $self->{must_exist}->{$fld}++; - } elsif ($fld =~ s/-$//) { - $self->{dont_validate}->{$fld}++; - } +Returns number of lines in file - $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); +=cut - if (@d) { - $v->{$fld} = [ map { - my $sf = $_; - if ( $sf =~ s/!(\*)?$/$1/ ) { - $self->{must_exist_sf}->{ $fld }->{ $sf }++; - }; - $sf; - } @d ]; - } else { - $v->{$fld} = 1; - } +sub read_validate_file { + my $self = shift; + + my $path = shift || die "no path?"; + + my $log = $self->_get_logger(); + + my $v_file = read_file( $path ) || + $log->logdie("can't open validate path $path: $!"); + + my $v; + my $curr_line = 1; + foreach my $l (split(/[\n\r]+/, $v_file)) { + $curr_line++; + + # skip comments and whitespaces + next if ($l =~ /^#/ || $l =~ /^\s*$/); + + $l =~ s/^\s+//; + $l =~ s/\s+$//; + + my @d = split(/\s+/, $l); + + my $fld = shift @d; + + if ($fld =~ s/!$//) { + $self->{must_exist}->{$fld}++; + } elsif ($fld =~ s/-$//) { + $self->{dont_validate}->{$fld}++; } - $log->debug("current validation rules: ", dump($v)); + $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); - $self->{rules} = $v; + if (@d) { + $v->{$fld} = [ map { + my $sf = $_; + if ( $sf =~ s/!(\*)?$/$1/ ) { + $self->{must_exist_sf}->{ $fld }->{ $sf }++; + }; + $sf; + } @d ]; + } else { + $v->{$fld} = 1; + } - $log->info("validation uses rules from $self->{path}"); } - if ( $self->{delimiters} ) { - $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')'; - $log->info("validation check delimiters with regex $self->{delimiters_regex}"); - } + $log->debug("current validation rules: ", dump($v)); - if ( my $path = $self->{delimiters_path} ) { - if ( -e $path ) { - $log->info("using delimiter validation rules from $path"); - open(my $d, $path) || $log->fatal("can't open $path: $!"); - while(<$d>) { - chomp($d); - if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) { - my ($comment,$field,$count,$template) = ($1,$2,$3,$4); - $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment); - } else { - warn "## ignored $d\n"; - } + $self->{rules} = $v; + + $log->info("validation uses rules from $path"); + + return $curr_line; +} + +=head2 read_validate_delimiters_file + + $validate->read_validate_delimiters_file( 'conf/validate/delimiters/file' ); + +=cut + +sub read_validate_delimiters_file { + my $self = shift; + + my $path = shift || die "no path?"; + + my $log = $self->_get_logger(); + + if ( -e $path ) { + $log->info("using delimiter validation rules from $path"); + open(my $d, $path) || $log->fatal("can't open $path: $!"); + while(<$d>) { + chomp($d); + if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) { + my ($comment,$field,$count,$template) = ($1,$2,$3,$4); + $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment); + } else { + warn "## ignored $d\n"; } - close($d); - warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} ); - } else { - $log->warn("delimiters path $path doesn't exist, it will be created after this run"); } + close($d); + #warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} ); + } else { + $log->warn("delimiters path $path doesn't exist, it will be created after this run"); } - - $self ? return $self : return undef; } =head2 validate_rec @@ -193,11 +228,11 @@ $self->{_delimiters_templates}->{$f}->{$template}++; if ( my $v = $self->{_validate_delimiters_templates} ) { - if ( ! defined( $v->{$template} ) ) { - $errors->{$f}->{invalid_delimiters_combination} = $template; + if ( ! defined( $v->{$f}->{$template} ) ) { + $errors->{$f}->{potentially_invalid_combination} = $template; $errors->{$f}->{dump} = $subfield_dump; - } else { - warn "## $f $template ok\n"; + #} else { + # warn "## $f $template ok\n"; } } } @@ -282,7 +317,7 @@ } } - $log->debug("_delimiters_templates = ", dump( $self->{_delimiters_templates} ) ); + $log->debug("_delimiters_templates = ", sub { dump( $self->{_delimiters_templates} ) } ); foreach my $must (sort keys %{ $self->{must_exist} }) { next if ($fields->{$must}); @@ -293,7 +328,7 @@ if ($errors) { $log->debug("errors: ", $self->report_error( $errors ) ); - my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN"); + my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", sub { dump( $rec ) }, " doesn't have MFN"); $self->{errors}->{$mfn} = $errors; } @@ -302,17 +337,37 @@ return $errors; } -=head2 reset_errors +=head2 reset + +Clean all accumulated errors for this input and remember delimiter templates +for L -Clean all accumulated errors for this input + $validate->reset; - $validate->reset_errors; +This function B be called after each input to provide accurate statistics. =cut -sub reset_errors { +sub reset { my $self = shift; + + my $log = $self->_get_logger; + delete ($self->{errors}); + + if ( ! $self->{_delimiters_templates} ) { + $log->debug("called without _delimiters_templates?"); + return; + } + + foreach my $f ( keys %{ $self->{_delimiters_templates} } ) { + foreach my $t ( keys %{ $self->{_delimiters_templates}->{$f} } ) { + $self->{_accumulated_delimiters_templates}->{$f}->{$t} += + $self->{_delimiters_templates}->{$f}->{$t}; + } + } + $log->debug("_accumulated_delimiters_templates = ", sub { dump( $self->{_accumulated_delimiter_templates} ) } ); + delete ($self->{_delimiters_templates}); } =head2 all_errors @@ -373,18 +428,14 @@ $accumulated ? "$accumulated\t$k" : $k ); - $log->debug( - ( $new_results ? "new_results: " . dump($new_results) ." " : '' ), - ); + $log->debug( "new_results: ", sub { dump($new_results) } ) if ( $new_results ); push @$results, $new_results if ($new_results); $dump = $new_dump if ($new_dump); } - $log->debug( - ( $results ? "results: " . dump($results) ." " : '' ), - ); + $log->debug( "results: ", sub { dump($results) } ) if ( $results ); if ($#$results == 0) { return ($results->[0], $dump); @@ -448,6 +499,7 @@ my $report = $validate->delimiter_teplates( report => 1, + current_input => 1, ); Options: @@ -458,6 +510,10 @@ Generate humanly readable report with single fields +=item current_input + +Report just current_input and not accumulated data + =back =cut @@ -467,7 +523,8 @@ my $args = {@_}; - my $t = $self->{_delimiters_templates}; + my $t = $self->{_accumulated_delimiters_templates}; + $t = $self->{_delimiters_templates} if ( $args->{current_input} ); my $log = $self->_get_logger; @@ -494,6 +551,10 @@ =head2 save_delimiters_templates +Save accumulated delimiter templates + + $validator->save_delimiters_template( '/path/to/validate/delimiters' ); + =cut sub save_delimiters_templates { @@ -505,6 +566,18 @@ my $log = $self->_get_logger; + if ( ! $self->{_accumulated_delimiters_templates} ) { + $log->error('no _accumulated_delimiters_templates found, reset'); + $self->reset; + } + + if ( ! $self->{_delimiters_templates} ) { + $log->error('found _delimiters_templates, calling reset'); + $self->reset; + } + + $path .= '.new' if ( -e $path ); + open(my $d, '>', $path) || $log->fatal("can't open $path: $!"); print $d $self->delimiters_templates; close($d);