/[webpac2]/trunk/lib/WebPAC/Input/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 998 - (hide annotations)
Sun Nov 4 16:47:03 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3466 byte(s)
 r1536@llin:  dpavlin | 2007-11-04 17:47:03 +0100
 better handle invalid XML files

1 dpavlin 968 package WebPAC::Input::XML;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9     use XML::Simple;
10     use File::Find;
11 dpavlin 989 use File::Slurp;
12 dpavlin 968
13     use Data::Dump qw/dump/;
14    
15     =head1 NAME
16    
17     WebPAC::Input::XML - support for reading XML files
18    
19     =cut
20    
21 dpavlin 989 our $VERSION = '0.02';
22 dpavlin 968
23     =head1 FUNCTIONS
24    
25     =head2 new
26    
27     Returns new low-level input API object
28    
29     my $input = new WebPAC::Input::XML(
30     path => '/path/to/XML/records.txt'
31     filter => sub {
32     my ($l,$field_nr) = @_;
33     # do something with $l which is line of input file
34     return $l;
35     },
36 dpavlin 989 mungle => 'conf/mungle/xml-mungle.pl',
37 dpavlin 968 }
38    
39     Options:
40    
41     =over 4
42    
43     =item path
44    
45     path to directory with xml files ending in C<.xml>
46    
47 dpavlin 989 =item mungle
48    
49     path to perl data_structure mungler which will be called to pre-normalize
50     hash produced by this module.
51    
52     It's ugly and souldn't be really here, but I didn't wanted to write separate
53     input module for each possible XML in the face of the earth, and having perl
54     power to transform hash is just... Best solution :-)
55    
56 dpavlin 968 =back
57    
58     =cut
59    
60     sub new {
61     my $class = shift;
62     my $self = {@_};
63     bless($self, $class);
64    
65     my $arg = {@_};
66    
67 dpavlin 992 #warn "#### arg = ",dump( $arg );
68 dpavlin 989
69 dpavlin 968 my $log = $self->_get_logger();
70    
71     $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path};
72    
73     $log->info("collecting xml files from ", $arg->{path});
74    
75     my @files;
76    
77     find({
78     wanted => sub {
79     my $path = $File::Find::name;
80     return unless -f $path && $path =~ m/\.xml$/i;
81     push @files, $path;
82     },
83     follow => 1,
84     }, $arg->{path} );
85    
86     $log->info("found ", $#files + 1, " XML files in ", $arg->{path});
87    
88 dpavlin 984 $self->{_files} = [ sort @files ];
89 dpavlin 968
90 dpavlin 989 if ( my $path = $arg->{mungle} ) {
91     $log->logdie("can't find $path: $!") unless -r $path;
92     $log->info("using $path as mungle rules");
93    
94     $self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!");
95     }
96    
97 dpavlin 968 $self ? return $self : return undef;
98     }
99    
100     =head2 fetch_rec
101    
102     Return record with ID C<$mfn> from database
103    
104     my $rec = $input->fetch_rec( $mfn, $filter_coderef );
105    
106     =cut
107    
108     sub fetch_rec {
109     my $self = shift;
110    
111     my ( $mfn, $filter_coderef ) = @_;
112    
113     my $path = $self->{_files}->[ $mfn - 1 ] || return;
114    
115 dpavlin 970 my $log = $self->_get_logger();
116 dpavlin 968
117 dpavlin 998 our $xml;
118     eval {
119     $xml = XMLin(
120     $path,
121     # ForceArray => 1,
122     # ForceContent => 1,
123     KeepRoot => 1,
124     # SuppressEmpty => 1, # '' undef
125     ) || die "can't open $path: $!";
126     };
127 dpavlin 970
128 dpavlin 998 if ( $@ ) {
129     $log->error("$@");
130     return;
131     }
132 dpavlin 981
133 dpavlin 984 $log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) });
134 dpavlin 970
135 dpavlin 989 our $ds;
136    
137     if ( my $rules = $self->{mungle_rules} ) {
138    
139     sub get_ds {
140 dpavlin 992 # warn "### get_ds xml = ",dump($xml);
141 dpavlin 989 return $xml;
142     }
143     sub set_ds {
144     my $hash = {@_};
145 dpavlin 992 # warn "### set_ds hash = ",dump($hash);
146 dpavlin 989 foreach my $f ( keys %$hash ) {
147 dpavlin 992 # warn "+++ $f ", dump( $hash->{$f} ),"\n";
148 dpavlin 989 $ds->{$f} = $hash->{$f};
149     }
150 dpavlin 992 # warn "### set_ds mungle_ds = ",dump($ds);
151 dpavlin 989 }
152     eval "$rules";
153     $log->logdie("mungle rules $path error: $@") if $@;
154    
155 dpavlin 992 # warn "### set_ds after mungle_rules ds = ",dump($ds);
156 dpavlin 989 } else {
157    
158     $ds = $xml;
159    
160     }
161    
162 dpavlin 970 # add mfn
163 dpavlin 989 $ds->{'000'} = [ $mfn ];
164 dpavlin 970
165 dpavlin 989 return $ds;
166 dpavlin 968 }
167    
168    
169     =head2 size
170    
171     Return number of records in database
172    
173     my $size = $input->size;
174    
175     =cut
176    
177     sub size {
178     my $self = shift;
179     return $#{$self->{_files}} + 1;
180     }
181    
182     =head1 AUTHOR
183    
184     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
185    
186     =head1 COPYRIGHT & LICENSE
187    
188     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
189    
190     This program is free software; you can redistribute it and/or modify it
191     under the same terms as Perl itself.
192    
193     =cut
194    
195     1;

  ViewVC Help
Powered by ViewVC 1.1.26