--- trunk/lib/WebPAC/Validate.pm 2007/05/23 20:03:16 832 +++ trunk/lib/WebPAC/Validate.pm 2007/05/24 12:44:45 837 @@ -53,6 +53,7 @@ my $validate = new WebPAC::Validate( path => 'conf/validate/file', delimiters => [ ' : ', ' / ', ' ; ', ' , ' ], + delimiters_path => 'conf/validate/delimiters/file', ); Optional parametar C will turn on validating of delimiters. Be @@ -66,64 +67,85 @@ my $self = {@_}; bless($self, $class); +warn dump( @_ ); + my $log = $self->_get_logger(); - foreach my $p (qw/path/) { - $log->logconfess("need $p") unless ($self->{$p}); - } + if ( $self->{path} ) { - my $v_file = read_file( $self->{path} ) || - $log->logdie("can't open validate path $self->{path}: $!"); + my $v_file = read_file( $self->{path} ) || + $log->logdie("can't open validate path $self->{path}: $!"); - my $v; - my $curr_line = 1; + my $v; + my $curr_line = 1; - foreach my $l (split(/[\n\r]+/, $v_file)) { - $curr_line++; + foreach my $l (split(/[\n\r]+/, $v_file)) { + $curr_line++; - # skip comments and whitespaces - next if ($l =~ /^#/ || $l =~ /^\s*$/); + # skip comments and whitespaces + next if ($l =~ /^#/ || $l =~ /^\s*$/); - $l =~ s/^\s+//; - $l =~ s/\s+$//; + $l =~ s/^\s+//; + $l =~ s/\s+$//; - my @d = split(/\s+/, $l); + my @d = split(/\s+/, $l); - my $fld = shift @d; + my $fld = shift @d; - if ($fld =~ s/!$//) { - $self->{must_exist}->{$fld}++; - } elsif ($fld =~ s/-$//) { - $self->{dont_validate}->{$fld}++; - } + if ($fld =~ s/!$//) { + $self->{must_exist}->{$fld}++; + } elsif ($fld =~ s/-$//) { + $self->{dont_validate}->{$fld}++; + } - $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); + $log->logdie("need field name in line $curr_line: $l") unless (defined($fld)); - if (@d) { - $v->{$fld} = [ map { - my $sf = $_; - if ( $sf =~ s/!(\*)?$/$1/ ) { - $self->{must_exist_sf}->{ $fld }->{ $sf }++; - }; - $sf; - } @d ]; - } else { - $v->{$fld} = 1; - } + if (@d) { + $v->{$fld} = [ map { + my $sf = $_; + if ( $sf =~ s/!(\*)?$/$1/ ) { + $self->{must_exist_sf}->{ $fld }->{ $sf }++; + }; + $sf; + } @d ]; + } else { + $v->{$fld} = 1; + } - } + } - $log->debug("current validation rules: ", dump($v)); + $log->debug("current validation rules: ", dump($v)); - $self->{rules} = $v; + $self->{rules} = $v; - $log->info("validation uses rules from $self->{path}"); + $log->info("validation uses rules from $self->{path}"); + } if ( $self->{delimiters} ) { $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')'; $log->info("validation check delimiters with regex $self->{delimiters_regex}"); } + if ( my $path = $self->{delimiters_path} ) { + if ( -e $path ) { + $log->info("using delimiter validation rules from $path"); + open(my $d, $path) || $log->fatal("can't open $path: $!"); + while(<$d>) { + chomp($d); + if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) { + my ($comment,$field,$count,$template) = ($1,$2,$3,$4); + $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment); + } else { + warn "## ignored $d\n"; + } + } + close($d); + warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} ); + } else { + $log->warn("delimiters path $path doesn't exist, it will be created after this run"); + } + } + $self ? return $self : return undef; } @@ -144,7 +166,8 @@ my $rec_dump = shift; $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH'); - $log->logdie("can't find validation rules") unless (my $r = $self->{rules}); +# $log->logdie("can't find validation rules") unless (my $r = $self->{rules}); + my $r = $self->{rules}; my $errors; @@ -160,15 +183,29 @@ if ( my $regex = $self->{delimiters_regex} ) { foreach my $v (@{ $rec->{$f} }) { - my $l = _pack_subfields_hash( $v, 1 ); - my $template = ''; - $l =~ s/$regex/$template.=$1/eg; -# warn "## template: $template\n"; - $self->{_delimiters_templates}->{$f}->{$template}++ if ( $template ); + my $l = _pack_subfields_hash( $v, 1 ); + my $subfield_dump = $l; + my $template = ''; + $l =~ s/$regex/$template.=$1/eg; + #warn "## template: $template\n"; + + if ( $template ) { + $self->{_delimiters_templates}->{$f}->{$template}++; + + if ( my $v = $self->{_validate_delimiters_templates} ) { + if ( ! defined( $v->{$template} ) ) { + $errors->{$f}->{invalid_delimiters_combination} = $template; + $errors->{$f}->{dump} = $subfield_dump; + } else { + warn "## $f $template ok\n"; + } + } + } } - } + next unless ( $r ); # skip validation of no rules are specified + next if (defined( $self->{dont_validate}->{$f} )); # track field usage @@ -326,7 +363,7 @@ if ($k eq 'dump') { $dump = $tree->{dump}; -# warn "## dump: ",dump($dump),"\n"; + #warn "## dump ",dump($dump),"\n"; next; } @@ -360,7 +397,7 @@ sub _reformat { my $l = shift; $l =~ s/\t/ /g; - $l =~ s/_/ /; + $l =~ s/_/ /g; return $l; } @@ -407,33 +444,74 @@ =head2 delimiters_templates +Generate report of delimiter tamplates + + my $report = $validate->delimiter_teplates( + report => 1, + ); + +Options: + +=over 4 + +=item report + +Generate humanly readable report with single fields + +=back =cut sub delimiters_templates { my $self = shift; + my $args = {@_}; + my $t = $self->{_delimiters_templates}; my $log = $self->_get_logger; unless ($t) { - $log->warn("called without delimiters"); + $log->error("called without delimiters"); return; } my $out; foreach my $f (sort { $a <=> $b } keys %$t) { - $out .= "$f\n"; - foreach my $sft (sort { $a cmp $b } keys %{ $t->{$f} }) { - $out .= "\t" . $t->{$f}->{$sft} . "\t$sft\n"; + $out .= "$f\n" if ( $args->{report} ); + foreach my $template (sort { $a cmp $b } keys %{ $t->{$f} }) { + my $count = $t->{$f}->{$template}; + $out .= + ( $count ? "" : "# " ) . + ( $args->{report} ? "" : "$f" ) . + "\t$count\t$template\n"; } } return $out; } +=head2 save_delimiters_templates + +=cut + +sub save_delimiters_templates { + my $self = shift; + + my $path = $self->{delimiters_path}; + + return unless ( $path ); + + my $log = $self->_get_logger; + + open(my $d, '>', $path) || $log->fatal("can't open $path: $!"); + print $d $self->delimiters_templates; + close($d); + + $log->info("new delimiters templates saved to $path"); +} + =head1 AUTHOR Dobrica Pavlinusic, C<< >>