/[webpac2]/trunk/lib/WebPAC/Input/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Input/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 998 - (show annotations)
Sun Nov 4 16:47:03 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3466 byte(s)
 r1536@llin:  dpavlin | 2007-11-04 17:47:03 +0100
 better handle invalid XML files

1 package WebPAC::Input::XML;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Input;
7 use base qw/WebPAC::Common/;
8
9 use XML::Simple;
10 use File::Find;
11 use File::Slurp;
12
13 use Data::Dump qw/dump/;
14
15 =head1 NAME
16
17 WebPAC::Input::XML - support for reading XML files
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 FUNCTIONS
24
25 =head2 new
26
27 Returns new low-level input API object
28
29 my $input = new WebPAC::Input::XML(
30 path => '/path/to/XML/records.txt'
31 filter => sub {
32 my ($l,$field_nr) = @_;
33 # do something with $l which is line of input file
34 return $l;
35 },
36 mungle => 'conf/mungle/xml-mungle.pl',
37 }
38
39 Options:
40
41 =over 4
42
43 =item path
44
45 path to directory with xml files ending in C<.xml>
46
47 =item mungle
48
49 path to perl data_structure mungler which will be called to pre-normalize
50 hash produced by this module.
51
52 It's ugly and souldn't be really here, but I didn't wanted to write separate
53 input module for each possible XML in the face of the earth, and having perl
54 power to transform hash is just... Best solution :-)
55
56 =back
57
58 =cut
59
60 sub new {
61 my $class = shift;
62 my $self = {@_};
63 bless($self, $class);
64
65 my $arg = {@_};
66
67 #warn "#### arg = ",dump( $arg );
68
69 my $log = $self->_get_logger();
70
71 $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path};
72
73 $log->info("collecting xml files from ", $arg->{path});
74
75 my @files;
76
77 find({
78 wanted => sub {
79 my $path = $File::Find::name;
80 return unless -f $path && $path =~ m/\.xml$/i;
81 push @files, $path;
82 },
83 follow => 1,
84 }, $arg->{path} );
85
86 $log->info("found ", $#files + 1, " XML files in ", $arg->{path});
87
88 $self->{_files} = [ sort @files ];
89
90 if ( my $path = $arg->{mungle} ) {
91 $log->logdie("can't find $path: $!") unless -r $path;
92 $log->info("using $path as mungle rules");
93
94 $self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!");
95 }
96
97 $self ? return $self : return undef;
98 }
99
100 =head2 fetch_rec
101
102 Return record with ID C<$mfn> from database
103
104 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
105
106 =cut
107
108 sub fetch_rec {
109 my $self = shift;
110
111 my ( $mfn, $filter_coderef ) = @_;
112
113 my $path = $self->{_files}->[ $mfn - 1 ] || return;
114
115 my $log = $self->_get_logger();
116
117 our $xml;
118 eval {
119 $xml = XMLin(
120 $path,
121 # ForceArray => 1,
122 # ForceContent => 1,
123 KeepRoot => 1,
124 # SuppressEmpty => 1, # '' undef
125 ) || die "can't open $path: $!";
126 };
127
128 if ( $@ ) {
129 $log->error("$@");
130 return;
131 }
132
133 $log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) });
134
135 our $ds;
136
137 if ( my $rules = $self->{mungle_rules} ) {
138
139 sub get_ds {
140 # warn "### get_ds xml = ",dump($xml);
141 return $xml;
142 }
143 sub set_ds {
144 my $hash = {@_};
145 # warn "### set_ds hash = ",dump($hash);
146 foreach my $f ( keys %$hash ) {
147 # warn "+++ $f ", dump( $hash->{$f} ),"\n";
148 $ds->{$f} = $hash->{$f};
149 }
150 # warn "### set_ds mungle_ds = ",dump($ds);
151 }
152 eval "$rules";
153 $log->logdie("mungle rules $path error: $@") if $@;
154
155 # warn "### set_ds after mungle_rules ds = ",dump($ds);
156 } else {
157
158 $ds = $xml;
159
160 }
161
162 # add mfn
163 $ds->{'000'} = [ $mfn ];
164
165 return $ds;
166 }
167
168
169 =head2 size
170
171 Return number of records in database
172
173 my $size = $input->size;
174
175 =cut
176
177 sub size {
178 my $self = shift;
179 return $#{$self->{_files}} + 1;
180 }
181
182 =head1 AUTHOR
183
184 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
185
186 =head1 COPYRIGHT & LICENSE
187
188 Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
189
190 This program is free software; you can redistribute it and/or modify it
191 under the same terms as Perl itself.
192
193 =cut
194
195 1;

  ViewVC Help
Powered by ViewVC 1.1.26