/[webpac2]/trunk/lib/WebPAC/Input/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 992 - (hide annotations)
Sun Nov 4 13:47:02 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3403 byte(s)
 r1523@llin:  dpavlin | 2007-11-04 14:39:39 +0100
 hush all kind of debugging output

1 dpavlin 968 package WebPAC::Input::XML;
2    
3     use warnings;
4     use strict;
5    
6     use WebPAC::Input;
7     use base qw/WebPAC::Common/;
8    
9     use XML::Simple;
10     use File::Find;
11 dpavlin 989 use File::Slurp;
12 dpavlin 968
13     use Data::Dump qw/dump/;
14    
15     =head1 NAME
16    
17     WebPAC::Input::XML - support for reading XML files
18    
19     =cut
20    
21 dpavlin 989 our $VERSION = '0.02';
22 dpavlin 968
23     =head1 FUNCTIONS
24    
25     =head2 new
26    
27     Returns new low-level input API object
28    
29     my $input = new WebPAC::Input::XML(
30     path => '/path/to/XML/records.txt'
31     filter => sub {
32     my ($l,$field_nr) = @_;
33     # do something with $l which is line of input file
34     return $l;
35     },
36 dpavlin 989 mungle => 'conf/mungle/xml-mungle.pl',
37 dpavlin 968 }
38    
39     Options:
40    
41     =over 4
42    
43     =item path
44    
45     path to directory with xml files ending in C<.xml>
46    
47 dpavlin 989 =item mungle
48    
49     path to perl data_structure mungler which will be called to pre-normalize
50     hash produced by this module.
51    
52     It's ugly and souldn't be really here, but I didn't wanted to write separate
53     input module for each possible XML in the face of the earth, and having perl
54     power to transform hash is just... Best solution :-)
55    
56 dpavlin 968 =back
57    
58     =cut
59    
60     sub new {
61     my $class = shift;
62     my $self = {@_};
63     bless($self, $class);
64    
65     my $arg = {@_};
66    
67 dpavlin 992 #warn "#### arg = ",dump( $arg );
68 dpavlin 989
69 dpavlin 968 my $log = $self->_get_logger();
70    
71     $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path};
72    
73     $log->info("collecting xml files from ", $arg->{path});
74    
75     my @files;
76    
77     find({
78     wanted => sub {
79     my $path = $File::Find::name;
80     return unless -f $path && $path =~ m/\.xml$/i;
81     push @files, $path;
82     },
83     follow => 1,
84     }, $arg->{path} );
85    
86     $log->info("found ", $#files + 1, " XML files in ", $arg->{path});
87    
88 dpavlin 984 $self->{_files} = [ sort @files ];
89 dpavlin 968
90 dpavlin 989 if ( my $path = $arg->{mungle} ) {
91     $log->logdie("can't find $path: $!") unless -r $path;
92     $log->info("using $path as mungle rules");
93    
94     $self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!");
95     }
96    
97 dpavlin 968 $self ? return $self : return undef;
98     }
99    
100     =head2 fetch_rec
101    
102     Return record with ID C<$mfn> from database
103    
104     my $rec = $input->fetch_rec( $mfn, $filter_coderef );
105    
106     =cut
107    
108     sub fetch_rec {
109     my $self = shift;
110    
111     my ( $mfn, $filter_coderef ) = @_;
112    
113     my $path = $self->{_files}->[ $mfn - 1 ] || return;
114    
115 dpavlin 970 my $log = $self->_get_logger();
116 dpavlin 968
117 dpavlin 989 our $xml = XMLin(
118 dpavlin 981 $path,
119     # ForceArray => 1,
120     # ForceContent => 1,
121 dpavlin 989 KeepRoot => 1,
122 dpavlin 981 # SuppressEmpty => 1, # '' undef
123 dpavlin 970
124 dpavlin 981 ) || $log->logdie("can't open $path: $!");
125    
126 dpavlin 984 $log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) });
127 dpavlin 970
128 dpavlin 989 our $ds;
129    
130     if ( my $rules = $self->{mungle_rules} ) {
131    
132     sub get_ds {
133 dpavlin 992 # warn "### get_ds xml = ",dump($xml);
134 dpavlin 989 return $xml;
135     }
136     sub set_ds {
137     my $hash = {@_};
138 dpavlin 992 # warn "### set_ds hash = ",dump($hash);
139 dpavlin 989 foreach my $f ( keys %$hash ) {
140 dpavlin 992 # warn "+++ $f ", dump( $hash->{$f} ),"\n";
141 dpavlin 989 $ds->{$f} = $hash->{$f};
142     }
143 dpavlin 992 # warn "### set_ds mungle_ds = ",dump($ds);
144 dpavlin 989 }
145     eval "$rules";
146     $log->logdie("mungle rules $path error: $@") if $@;
147    
148 dpavlin 992 # warn "### set_ds after mungle_rules ds = ",dump($ds);
149 dpavlin 989 } else {
150    
151     $ds = $xml;
152    
153     }
154    
155 dpavlin 970 # add mfn
156 dpavlin 989 $ds->{'000'} = [ $mfn ];
157 dpavlin 970
158 dpavlin 989 return $ds;
159 dpavlin 968 }
160    
161    
162     =head2 size
163    
164     Return number of records in database
165    
166     my $size = $input->size;
167    
168     =cut
169    
170     sub size {
171     my $self = shift;
172     return $#{$self->{_files}} + 1;
173     }
174    
175     =head1 AUTHOR
176    
177     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
178    
179     =head1 COPYRIGHT & LICENSE
180    
181     Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
182    
183     This program is free software; you can redistribute it and/or modify it
184     under the same terms as Perl itself.
185    
186     =cut
187    
188     1;

  ViewVC Help
Powered by ViewVC 1.1.26