--- trunk/lib/WebPAC/Input/PDF.pm 2007/11/20 10:08:00 1056 +++ trunk/lib/WebPAC/Input/PDF.pm 2007/11/20 10:08:02 1057 @@ -69,7 +69,7 @@ $log->info("opend $file with $pages pages"); - my @lines; + my @lines = (); foreach my $p ( 1 .. $pages ) { my $tree = $doc->getPageContentTree($p); @@ -123,7 +123,7 @@ } } - $self->size( $#lines ); + $self->{_lines} = \@lines; $log->debug("loaded ", $self->size, " records", sub { dump( @lines ) }); @@ -136,6 +136,10 @@ my $rec = $input->fetch_rec( $mfn, $filter_coderef ); +Records are returned as field C, C and so on... + +Last supported column is C. + =cut sub fetch_rec { @@ -143,7 +147,36 @@ my ( $mfn, $filter_coderef ) = @_; - return $self->{_rec}->[$mfn-1]; + my $rec = { + '000' => [ $mfn ], + }; + + my $line = $self->{_lines}->[ $mfn - 1 ] || return; + confess "expected ARRAY for _lines $mfn" unless ref($line) eq 'ARRAY'; + +# warn "## line = ",dump( $line ); + + my $col = 'A'; + my $c = 0; + foreach my $e ( @$line ) { + $rec->{$col} = $e; + $c++; + # FIXME what about columns > ZZ + if ( $col eq 'Z' ) { + $col .= 'AA'; + } elsif ( $col eq 'ZZ' ) { + $self->_get_logger()->logwarn("ignoring colums above ZZ (original ", $#$line + 1, " > $c max columns)"); + last; + } elsif ( $col =~ m/([A-Z])Z$/ ) { + $col .= $1++ . 'A'; + } else { + $col++; + } + } + +# warn "## rec = ",dump( $rec ); + + return $rec; } @@ -157,7 +190,7 @@ sub size { my $self = shift; - return $#{$self->{_rec}} + 1; + return $#{$self->{_lines}} + 1; } =head1 SEE ALSO