/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 701 - (show annotations)
Mon Sep 25 12:51:47 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 8728 byte(s)
 r994@llin:  dpavlin | 2006-09-25 14:49:05 +0200
 refactore support for multiple normalize files in each lookup

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common WebPAC::Normalize/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files and mungle it
17
18 =head1 VERSION
19
20 Version 0.04
21
22 =cut
23
24 our $VERSION = '0.04';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization.
30
31 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32 reading about LISP. It might be a bit over-the board, but at least it removed
33 separate configuration files for lookups.
34
35 This is experimental code, but it replaces all older formats which where,
36 at one point in time, available in WebPAC.
37
38 FIXME
39
40 =head1 FUNCTIONS
41
42 =head2 new
43
44 Create new parser object.
45
46 my $parser = new WebPAC::Parser(
47 config => new WebPAC::Config(),
48 base_path => '/optional/path/to/conf',
49 );
50
51 =cut
52
53 sub new {
54 my $class = shift;
55 my $self = {@_};
56 bless($self, $class);
57
58 my $log = $self->_get_logger();
59
60 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61
62 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63
64 $self->read_sources;
65
66 $self ? return $self : return undef;
67 }
68
69 =head2 read_sources
70
71 my $source_files = $parser->read_sources;
72
73 Called by L</new>.
74
75 =cut
76
77 sub read_sources {
78 my $self = shift;
79
80 my $log = $self->_get_logger();
81
82 my $nr = 0;
83
84 my @lookups;
85
86 $self->{config}->iterate_inputs( sub {
87 my ($input, $database) = @_;
88
89 $log->debug("database: $database input = ", dump($input));
90
91 foreach my $normalize (@{ $input->{normalize} }) {
92
93 my $path = $normalize->{path};
94 return unless($path);
95 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
96
97 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
98
99 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
100
101 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
102
103 $log->debug("$database/$input_name: adding $path");
104
105 $self->{valid_inputs}->{$database}->{$input_name}++;
106
107 push @lookups, sub {
108 $self->parse_lookups( $database, $input_name, $full, $s );
109 };
110
111 $nr++;
112 }
113 } );
114
115 $log->debug("found $nr source files");
116
117 # parse all lookups
118 $_->() foreach (@lookups);
119
120 return $nr;
121 }
122
123 =head2 parse_lookups
124
125 $parser->parse_lookups($database,$input,$path,$source);
126
127 Called for each normalize source in each input by L</new>
128
129 It will report invalid databases and inputs in error log after parsing.
130
131 =cut
132
133 sub parse_lookups {
134 my $self = shift;
135 my ($database, $input, $path, $source) = @_;
136
137 $input = _input_name($input);
138
139 my $log = $self->_get_logger();
140
141 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
142 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
143
144 $log->logdie("no source found for database $database input $input path $path") unless ($source);
145
146 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
147
148 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
149
150 $Document->prune('PPI::Token::Whitespace');
151 #$Document->prune('PPI::Token::Operator');
152
153 # Find all the named subroutines
154
155 $self->{_lookup_errors} = ();
156
157 sub _lookup_error {
158 my $self = shift;
159 my $msg = shift;
160 $self->_get_logger->logconfess("error without message?") unless ($msg);
161 push @{ $self->{_lookup_errors} }, $msg;
162 return '';
163 }
164
165 $Document->find( sub {
166 my ($Document,$Element) = @_;
167
168 $Element->isa('PPI::Token::Word') or return '';
169 $Element->content eq 'lookup' or return '';
170
171 $log->debug("expansion: ", $Element->snext_sibling);
172
173 my $args = $Element->snext_sibling;
174
175 my @e = $args->child(0)->elements;
176 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
177
178 if ($log->is_debug) {
179 my $report = "found " . scalar @e . " elements:\n";
180
181 foreach my $i ( 0 .. $#e ) {
182 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
183 }
184
185 $log->debug($report);
186 }
187
188 my $key_element = $e[8]->clone;
189
190 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
191
192 $log->debug("key part: ", $key_element);
193
194 my @key;
195
196 $key_element->find( sub {
197 my $e = $_[1] || die "no element?";
198 $e->isa('PPI::Token::Word') or return '';
199 $e->content eq 'rec' or return '';
200
201 my $kf = $e->snext_sibling;
202
203 $log->debug("key fragment = $kf");
204
205 push @key, eval $kf;
206 $log->logdie("can't eval { $kf }: $@") if ($@);
207
208 return 1;
209 });
210
211 my $key = join('-', @key ) || $log->logdie("no key found!");
212
213 $log->debug("key = $key");
214
215 my $create = '
216 $coderef = ' . $e[7] . $e[8] . ';
217 foreach my $v ($coderef->()) {
218 next unless (defined($v) && $v ne \'\');
219 push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
220 }
221 ';
222
223 $log->debug("create: $create");
224
225 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
226 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
227
228 # save code to create this lookup
229 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
230
231
232 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } )) {
233 my $dep_key = $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) };
234 $log->warn("dependency of $database/$input on $e[3]/$e[5] allready recorded as $dep_key, now changed to $key") if ($dep_key ne $key);
235 }
236
237 # save this dependency
238 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } .= $key;
239
240 if ($#e < 10) {
241 $e[8]->insert_after( $e[8]->clone );
242 $e[8]->insert_after( $e[7]->clone );
243 $e[8]->insert_after( $e[6]->clone );
244 }
245
246 $e[7]->remove;
247 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
248 $e[8]->remove;
249
250
251 $log->debug(">>> ", $Element->snext_sibling);
252 });
253
254 my $normalize_source = $Document->serialize;
255 $log->debug("create: ", dump($self->{_lookup_create}) );
256 $log->debug("normalize: $normalize_source");
257
258 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
259
260 if ($self->{debug}) {
261 my $Dumper = PPI::Dumper->new( $Document );
262 $Dumper->print;
263 }
264
265 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
266
267 return 1;
268 }
269
270
271 =head2 lookup_create_rules
272
273 my $source = $parser->lookup_create_rules($database, $input);
274
275 =cut
276
277 sub lookup_create_rules {
278 my $self = shift;
279 my ($database,$input) = @_;
280 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
281 }
282
283 =head2 valid_database
284
285 my $ok = $parse->valid_database('key');
286
287 =cut
288
289 sub valid_database {
290 my $self = shift;
291
292 my $database = shift || return;
293
294 return defined($self->{valid_inputs}->{ _q($database) });
295 }
296
297 =head2 valid_database_input
298
299 my $ok = $parse->valid_database('database_key','input_name');
300
301 =cut
302
303 sub valid_database_input {
304 my $self = shift;
305
306 my ($database,$input) = @_;
307 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
308 }
309
310 =head2 depends
311
312 Return all databases and inputs on which specified one depends
313
314 $depends_on = $parser->depends('database','input');
315
316 =cut
317
318 sub depends {
319 my $self = shift;
320 my ($database,$input) = @_;
321 $self->_get_logger->debug("depends($database,$input)");
322 return unless (
323 defined( $self->{depends}->{ _q($database) } ) &&
324 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
325 );
326 return $self->{depends}->{ _q($database) }->{ _q($input) };
327 }
328
329 =head1 PRIVATE
330
331 =head2 _q
332
333 Strip single or double quotes around value
334
335 _q(qq/'foo'/) -> foo
336
337 =cut
338
339 sub _q {
340 my $v = shift || return;
341 $v =~ s/^['"]*//g;
342 $v =~ s/['"]*$//g;
343 return $v;
344 }
345
346 =head2 _input_name
347
348 Return C<name> value if HASH or arg if scalar
349
350 _input_name($input)
351
352 =cut
353
354 sub _input_name {
355 my $input = shift || return;
356 if (ref($input) eq 'HASH') {
357 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
358 return $input->{name};
359 } else {
360 return $input;
361 }
362 }
363
364
365 =head1 AUTHOR
366
367 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
368
369 =head1 COPYRIGHT & LICENSE
370
371 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
372
373 This program is free software; you can redistribute it and/or modify it
374 under the same terms as Perl itself.
375
376 =cut
377
378 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26