8 |
|
|
9 |
use XML::Simple; |
use XML::Simple; |
10 |
use File::Find; |
use File::Find; |
11 |
|
use File::Slurp; |
12 |
|
|
13 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
14 |
|
|
18 |
|
|
19 |
=cut |
=cut |
20 |
|
|
21 |
our $VERSION = '0.01'; |
our $VERSION = '0.02'; |
22 |
|
|
23 |
=head1 FUNCTIONS |
=head1 FUNCTIONS |
24 |
|
|
33 |
# do something with $l which is line of input file |
# do something with $l which is line of input file |
34 |
return $l; |
return $l; |
35 |
}, |
}, |
36 |
|
mungle => 'conf/mungle/xml-mungle.pl', |
37 |
} |
} |
38 |
|
|
39 |
Options: |
Options: |
44 |
|
|
45 |
path to directory with xml files ending in C<.xml> |
path to directory with xml files ending in C<.xml> |
46 |
|
|
47 |
|
=item mungle |
48 |
|
|
49 |
|
path to perl data_structure mungler which will be called to pre-normalize |
50 |
|
hash produced by this module. |
51 |
|
|
52 |
|
It's ugly and souldn't be really here, but I didn't wanted to write separate |
53 |
|
input module for each possible XML in the face of the earth, and having perl |
54 |
|
power to transform hash is just... Best solution :-) |
55 |
|
|
56 |
=back |
=back |
57 |
|
|
58 |
=cut |
=cut |
64 |
|
|
65 |
my $arg = {@_}; |
my $arg = {@_}; |
66 |
|
|
67 |
|
warn "#### arg = ",dump( $arg ); |
68 |
|
|
69 |
my $log = $self->_get_logger(); |
my $log = $self->_get_logger(); |
70 |
|
|
71 |
$log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path}; |
$log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path}; |
85 |
|
|
86 |
$log->info("found ", $#files + 1, " XML files in ", $arg->{path}); |
$log->info("found ", $#files + 1, " XML files in ", $arg->{path}); |
87 |
|
|
88 |
$self->{_files} = \@files; |
$self->{_files} = [ sort @files ]; |
89 |
|
|
90 |
|
if ( my $path = $arg->{mungle} ) { |
91 |
|
$log->logdie("can't find $path: $!") unless -r $path; |
92 |
|
$log->info("using $path as mungle rules"); |
93 |
|
|
94 |
|
$self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!"); |
95 |
|
} |
96 |
|
|
97 |
$self ? return $self : return undef; |
$self ? return $self : return undef; |
98 |
} |
} |
112 |
|
|
113 |
my $path = $self->{_files}->[ $mfn - 1 ] || return; |
my $path = $self->{_files}->[ $mfn - 1 ] || return; |
114 |
|
|
115 |
warn "### fetch_rec( $mfn ) = $path"; |
my $log = $self->_get_logger(); |
116 |
|
|
117 |
|
our $xml = XMLin( |
118 |
|
$path, |
119 |
|
# ForceArray => 1, |
120 |
|
# ForceContent => 1, |
121 |
|
KeepRoot => 1, |
122 |
|
# SuppressEmpty => 1, # '' undef |
123 |
|
|
124 |
|
) || $log->logdie("can't open $path: $!"); |
125 |
|
|
126 |
|
$log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) }); |
127 |
|
|
128 |
|
our $ds; |
129 |
|
|
130 |
|
if ( my $rules = $self->{mungle_rules} ) { |
131 |
|
|
132 |
|
sub get_ds { |
133 |
|
warn "### get_ds xml = ",dump($xml); |
134 |
|
return $xml; |
135 |
|
} |
136 |
|
sub set_ds { |
137 |
|
my $hash = {@_}; |
138 |
|
warn "### set_ds hash = ",dump($hash); |
139 |
|
foreach my $f ( keys %$hash ) { |
140 |
|
warn "+++ $f ", dump( $hash->{$f} ),"\n"; |
141 |
|
$ds->{$f} = $hash->{$f}; |
142 |
|
} |
143 |
|
warn "### set_ds mungle_ds = ",dump($ds); |
144 |
|
} |
145 |
|
eval "$rules"; |
146 |
|
$log->logdie("mungle rules $path error: $@") if $@; |
147 |
|
|
148 |
|
warn "### set_ds after mungle_rules ds = ",dump($ds); |
149 |
|
} else { |
150 |
|
|
151 |
|
$ds = $xml; |
152 |
|
|
153 |
|
} |
154 |
|
|
155 |
|
# add mfn |
156 |
|
$ds->{'000'} = [ $mfn ]; |
157 |
|
|
158 |
|
return $ds; |
159 |
} |
} |
160 |
|
|
161 |
|
|