--- trunk/lib/WebPAC/Normalize.pm 2006/01/08 21:16:27 371 +++ trunk/lib/WebPAC/Normalize.pm 2006/04/30 12:17:19 436 @@ -13,11 +13,11 @@ =head1 VERSION -Version 0.08 +Version 0.09 =cut -our $VERSION = '0.08'; +our $VERSION = '0.09'; =head1 SYNOPSIS @@ -137,6 +137,37 @@ $self ? return $self : return undef; } +=head2 all_tags + +Returns all tags in document in specified order + + my $sorted_tags = $self->all_tags(); + +=cut + +sub all_tags { + my $self = shift; + + if (! $self->{_tags_by_order}) { + + my $log = $self->_get_logger; + # sanity check + $log->logdie("can't find self->{inport_xml}->{indexer}") unless ($self->{import_xml}->{indexer}); + + my @tags = keys %{ $self->{'import_xml'}->{'indexer'}}; + $log->debug("unsorted tags: " . join(", ", @tags)); + + @tags = sort { $self->_sort_by_order } @tags; + + $log->debug("sorted tags: " . join(",", @tags) ); + + $self->{_tags_by_order} = \@tags; + } + + return $self->{_tags_by_order}; +} + + =head2 data_structure @@ -172,19 +203,13 @@ $log->debug("cache miss, creating"); } - my @sorted_tags; - if ($self->{tags_by_order}) { - @sorted_tags = @{$self->{tags_by_order}}; - } else { - @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}}; - $self->{tags_by_order} = \@sorted_tags; - } + my $tags = $self->all_tags(); - my $ds; + $log->debug("tags: ",sub { join(", ",@{ $tags }) }); - $log->debug("tags: ",sub { join(", ",@sorted_tags) }); + my $ds; - foreach my $field (@sorted_tags) { + foreach my $field (@{ $tags }) { my $row; @@ -198,13 +223,13 @@ my @v; if ($self->{'lookup_regex'} && $format =~ $self->{'lookup_regex'}) { - @v = $self->fill_in_to_arr($rec,$format); + @v = $self->_rec_to_arr($rec,$format,'fill_in'); } else { - @v = $self->parse_to_arr($rec,$format); + @v = $self->_rec_to_arr($rec,$format,'parse'); } if (! @v) { $log->debug("$field <",$self->{tag},"> format: $format no values"); -# next; + next; } else { $log->debug("$field <",$self->{tag},"> format: $format values: ", join(",", @v)); } @@ -364,7 +389,7 @@ # we will skip delimiter before first occurence of field! push @out, $del unless($found_any->{$fld_type} == 1); - push @out, $tmp; + push @out, $tmp if ($tmp); } $f_step++; } @@ -410,40 +435,6 @@ return $out; } -=head2 parse_to_arr - -Similar to C, but returns array of all repeatable fields - - my @arr = $webpac->parse_to_arr($rec,'v250^a'); - -=cut - -sub parse_to_arr { - my $self = shift; - - my ($rec, $format_utf8) = @_; - - my $log = $self->_get_logger(); - - $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); - return if (! $format_utf8); - - my $i = 0; - my @arr; - - my $rec_size = { '_' => '_' }; - - while (my $v = $self->parse($rec,$format_utf8,$i++,\$rec_size)) { - push @arr, $v; - warn "parse rec_size = ", Dumper($rec_size); - } - - $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr); - - return @arr; -} - - =head2 fill_in Workhourse of all: takes record from in-memory structure of database and @@ -505,15 +496,20 @@ # remove filter{...} from beginning $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s); - # do actual replacement of placeholders - # repeatable fields - if ($format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found,$rec_size)/ges) { - $just_single = 0; - } + { + # fix warnings + no warnings 'uninitialized'; + + # do actual replacement of placeholders + # repeatable fields + if ($format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found,$rec_size)/ges) { + $just_single = 0; + } - # non-repeatable fields - if ($format =~ s/s(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,0,\$found,$rec_size)/ges) { - return if ($i > 0 && $just_single); + # non-repeatable fields + if ($format =~ s/s(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,0,\$found,$rec_size)/ges) { + return if ($i > 0 && $just_single); + } } if ($found) { @@ -546,34 +542,47 @@ } -=head2 fill_in_to_arr +=head2 _rec_to_arr -Similar to C, but returns array of all repeatable fields. Usable +Similar to C and C, but returns array of all repeatable fields. Usable for fields which have lookups, so they shouldn't be parsed but rather -Ced. +Cd or Ced. Last argument is name of operation: C or C. - my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]'); + my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]','paste'); =cut -sub fill_in_to_arr { +sub _rec_to_arr { my $self = shift; - my ($rec, $format_utf8) = @_; + my ($rec, $format_utf8, $code) = @_; my $log = $self->_get_logger(); $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); return if (! $format_utf8); + $log->debug("using $code on $format_utf8"); + my $i = 0; + my $max = 0; my @arr; + my $rec_size = {}; - my $rec_size; - - while (my $v = $self->fill_in($rec,$format_utf8,$i,\$rec_size)) { - push @arr, $v; - warn "rec_size = ", Dumper($rec_size); + while ($i <= $max) { + my @v = $self->$code($rec,$format_utf8,$i++,\$rec_size); + if ($rec_size) { + foreach my $f (keys %{ $rec_size }) { + $max = $rec_size->{$f} if ($rec_size->{$f} > $max); + } + $log->debug("max set to $max"); + undef $rec_size; + } + if (@v) { + push @arr, @v; + } else { + push @arr, '' if ($max > $i); + } } $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr); @@ -649,7 +658,8 @@ if ($$rec->{$f}->[$i] =~ /HASH/o) { my $out; foreach my $k (keys %{$$rec->{$f}->[$i]}) { - $out .= '$' . $k .':' . $$rec->{$f}->[$i]->{$k}." "; + my $v = $$rec->{$f}->[$i]->{$k}; + $out .= '$' . $k .':' . $v if ($v); } return $out; } else {