--- trunk/lib/WebPAC/Input/XML.pm 2007/11/02 13:59:10 968 +++ trunk/lib/WebPAC/Input/XML.pm 2007/11/04 13:26:06 989 @@ -8,6 +8,7 @@ use XML::Simple; use File::Find; +use File::Slurp; use Data::Dump qw/dump/; @@ -17,7 +18,7 @@ =cut -our $VERSION = '0.01'; +our $VERSION = '0.02'; =head1 FUNCTIONS @@ -32,6 +33,7 @@ # do something with $l which is line of input file return $l; }, + mungle => 'conf/mungle/xml-mungle.pl', } Options: @@ -42,6 +44,15 @@ path to directory with xml files ending in C<.xml> +=item mungle + +path to perl data_structure mungler which will be called to pre-normalize +hash produced by this module. + +It's ugly and souldn't be really here, but I didn't wanted to write separate +input module for each possible XML in the face of the earth, and having perl +power to transform hash is just... Best solution :-) + =back =cut @@ -53,6 +64,8 @@ my $arg = {@_}; +warn "#### arg = ",dump( $arg ); + my $log = $self->_get_logger(); $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path}; @@ -72,7 +85,14 @@ $log->info("found ", $#files + 1, " XML files in ", $arg->{path}); - $self->{_files} = \@files; + $self->{_files} = [ sort @files ]; + + if ( my $path = $arg->{mungle} ) { + $log->logdie("can't find $path: $!") unless -r $path; + $log->info("using $path as mungle rules"); + + $self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!"); + } $self ? return $self : return undef; } @@ -92,8 +112,50 @@ my $path = $self->{_files}->[ $mfn - 1 ] || return; - warn "### fetch_rec( $mfn ) = $path"; + my $log = $self->_get_logger(); + + our $xml = XMLin( + $path, +# ForceArray => 1, +# ForceContent => 1, + KeepRoot => 1, +# SuppressEmpty => 1, # '' undef + + ) || $log->logdie("can't open $path: $!"); + + $log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) }); + + our $ds; + + if ( my $rules = $self->{mungle_rules} ) { + + sub get_ds { + warn "### get_ds xml = ",dump($xml); + return $xml; + } + sub set_ds { + my $hash = {@_}; + warn "### set_ds hash = ",dump($hash); + foreach my $f ( keys %$hash ) { + warn "+++ $f ", dump( $hash->{$f} ),"\n"; + $ds->{$f} = $hash->{$f}; + } + warn "### set_ds mungle_ds = ",dump($ds); + } + eval "$rules"; + $log->logdie("mungle rules $path error: $@") if $@; + + warn "### set_ds after mungle_rules ds = ",dump($ds); + } else { + + $ds = $xml; + + } + + # add mfn + $ds->{'000'} = [ $mfn ]; + return $ds; }