/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 724 - (hide annotations)
Fri Sep 29 18:55:31 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9706 byte(s)
 r1038@llin:  dpavlin | 2006-09-29 14:27:45 +0200
 remove comments from normalize rules when parsing

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 717 Version 0.06
21 dpavlin 686
22     =cut
23    
24 dpavlin 717 our $VERSION = '0.06';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 703 $self->_read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69 dpavlin 703 =head2 valid_database
70    
71     my $ok = $parse->valid_database('key');
72    
73     =cut
74    
75     sub valid_database {
76     my $self = shift;
77    
78     my $database = shift || return;
79    
80     return defined($self->{valid_inputs}->{ _q($database) });
81     }
82    
83     =head2 valid_database_input
84    
85     my $ok = $parse->valid_database('database_key','input_name');
86    
87     =cut
88    
89     sub valid_database_input {
90     my $self = shift;
91     my ($database,$input) = @_;
92 dpavlin 705 $input = _input_name($input);
93 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
94     }
95    
96     =head2 depends
97    
98     Return all databases and inputs on which specified one depends
99    
100     $depends_on = $parser->depends('database','input');
101    
102     =cut
103    
104     sub depends {
105     my $self = shift;
106     my ($database,$input) = @_;
107 dpavlin 705 $input = _input_name($input);
108 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
109     return unless (
110     defined( $self->{depends}->{ _q($database) } ) &&
111     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
112     );
113     return $self->{depends}->{ _q($database) }->{ _q($input) };
114     }
115    
116 dpavlin 706 =head2 have_lookup_create
117    
118     my @keys = $parser->have_lookup_create($database, $input);
119    
120     =cut
121    
122     sub have_lookup_create {
123     my $self = shift;
124     my ($database,$input) = @_;
125     $input = _input_name($input);
126     return unless (
127     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
128     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
129     );
130     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
131     }
132    
133    
134 dpavlin 705 =head2 lookup_create_rules
135    
136     my $source = $parser->lookup_create_rules($database, $input);
137    
138     =cut
139    
140     sub lookup_create_rules {
141     my $self = shift;
142     my ($database,$input) = @_;
143     $input = _input_name($input);
144     return unless (
145     defined( $self->{_lookup_create}->{ _q($database) } ) &&
146     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
147     );
148     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
149     }
150    
151 dpavlin 717 =head2 normalize_rules
152    
153     my $source = $parser->normalize_rules($database, $input);
154    
155     =cut
156    
157     sub normalize_rules {
158     my $self = shift;
159     my ($database,$input) = @_;
160     $input = _input_name($input);
161     return unless (
162     defined( $self->{_normalize_source}->{ _q($database) } ) &&
163     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
164     );
165     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
166     }
167    
168 dpavlin 703 =head1 PRIVATE
169    
170     =head2 _read_sources
171    
172     my $source_files = $parser->_read_sources;
173    
174 dpavlin 691 Called by L</new>.
175    
176     =cut
177    
178 dpavlin 703 sub _read_sources {
179 dpavlin 691 my $self = shift;
180    
181     my $log = $self->_get_logger();
182    
183     my $nr = 0;
184    
185 dpavlin 701 my @lookups;
186    
187 dpavlin 691 $self->{config}->iterate_inputs( sub {
188     my ($input, $database) = @_;
189    
190 dpavlin 699 $log->debug("database: $database input = ", dump($input));
191    
192 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
193 dpavlin 691
194 dpavlin 698 my $path = $normalize->{path};
195     return unless($path);
196     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
197 dpavlin 691
198 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
199 dpavlin 691
200 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
201 dpavlin 691
202 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
203 dpavlin 691
204 dpavlin 698 $log->debug("$database/$input_name: adding $path");
205    
206 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
207 dpavlin 698
208 dpavlin 701 push @lookups, sub {
209 dpavlin 703 $self->_parse_lookups( $database, $input_name, $full, $s );
210 dpavlin 701 };
211 dpavlin 698
212     $nr++;
213     }
214 dpavlin 686 } );
215    
216 dpavlin 691 $log->debug("found $nr source files");
217 dpavlin 686
218 dpavlin 701 # parse all lookups
219     $_->() foreach (@lookups);
220    
221 dpavlin 691 return $nr;
222 dpavlin 686 }
223    
224 dpavlin 703 =head2 _parse_lookups
225 dpavlin 686
226 dpavlin 703 $parser->_parse_lookups($database,$input,$path,$source);
227 dpavlin 691
228 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
229 dpavlin 698
230     It will report invalid databases and inputs in error log after parsing.
231    
232 dpavlin 686 =cut
233    
234 dpavlin 703 sub _parse_lookups {
235 dpavlin 686 my $self = shift;
236 dpavlin 701 my ($database, $input, $path, $source) = @_;
237 dpavlin 686
238 dpavlin 698 $input = _input_name($input);
239    
240 dpavlin 686 my $log = $self->_get_logger();
241    
242 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
243     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
244 dpavlin 686
245 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
246 dpavlin 686
247 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
248 dpavlin 691
249     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
250    
251 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
252 dpavlin 724 $Document->prune('PPI::Token::Comment');
253 dpavlin 686 #$Document->prune('PPI::Token::Operator');
254    
255     # Find all the named subroutines
256    
257 dpavlin 691 $self->{_lookup_errors} = ();
258 dpavlin 686
259 dpavlin 691 sub _lookup_error {
260     my $self = shift;
261     my $msg = shift;
262     $self->_get_logger->logconfess("error without message?") unless ($msg);
263     push @{ $self->{_lookup_errors} }, $msg;
264 dpavlin 690 return '';
265     }
266    
267 dpavlin 686 $Document->find( sub {
268     my ($Document,$Element) = @_;
269    
270     $Element->isa('PPI::Token::Word') or return '';
271     $Element->content eq 'lookup' or return '';
272    
273 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
274 dpavlin 686
275     my $args = $Element->snext_sibling;
276    
277     my @e = $args->child(0)->elements;
278 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
279 dpavlin 686
280 dpavlin 687 if ($log->is_debug) {
281     my $report = "found " . scalar @e . " elements:\n";
282 dpavlin 686
283 dpavlin 687 foreach my $i ( 0 .. $#e ) {
284     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
285     }
286    
287     $log->debug($report);
288 dpavlin 686 }
289    
290     my $key_element = $e[8]->clone;
291    
292 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
293 dpavlin 686
294 dpavlin 687 $log->debug("key part: ", $key_element);
295 dpavlin 686
296     my @key;
297    
298     $key_element->find( sub {
299     my $e = $_[1] || die "no element?";
300     $e->isa('PPI::Token::Word') or return '';
301     $e->content eq 'rec' or return '';
302    
303     my $kf = $e->snext_sibling;
304    
305 dpavlin 687 $log->debug("key fragment = $kf");
306 dpavlin 686
307     push @key, eval $kf;
308 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
309 dpavlin 686
310     return 1;
311     });
312    
313 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
314 dpavlin 686
315 dpavlin 687 $log->debug("key = $key");
316 dpavlin 686
317 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
318     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
319 dpavlin 686
320 dpavlin 712 my $create = qq{
321     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
322     };
323    
324 dpavlin 687 $log->debug("create: $create");
325 dpavlin 686
326 dpavlin 698 # save code to create this lookup
327 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
328     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
329 dpavlin 686
330 dpavlin 698
331 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
332     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
333 dpavlin 698 }
334    
335     # save this dependency
336 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
337 dpavlin 698
338 dpavlin 686 if ($#e < 10) {
339     $e[8]->insert_after( $e[8]->clone );
340     $e[8]->insert_after( $e[7]->clone );
341     $e[8]->insert_after( $e[6]->clone );
342     }
343    
344     $e[7]->remove;
345     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
346     $e[8]->remove;
347    
348    
349 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
350 dpavlin 686 });
351    
352 dpavlin 693 my $normalize_source = $Document->serialize;
353 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
354 dpavlin 693 $log->debug("normalize: $normalize_source");
355 dpavlin 686
356 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
357 dpavlin 692
358 dpavlin 687 if ($self->{debug}) {
359     my $Dumper = PPI::Dumper->new( $Document );
360     $Dumper->print;
361     }
362 dpavlin 686
363 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
364 dpavlin 690
365 dpavlin 687 return 1;
366 dpavlin 686 }
367    
368 dpavlin 692
369 dpavlin 698 =head2 _q
370    
371     Strip single or double quotes around value
372    
373     _q(qq/'foo'/) -> foo
374    
375     =cut
376    
377     sub _q {
378     my $v = shift || return;
379     $v =~ s/^['"]*//g;
380     $v =~ s/['"]*$//g;
381     return $v;
382     }
383    
384     =head2 _input_name
385    
386     Return C<name> value if HASH or arg if scalar
387    
388     _input_name($input)
389    
390     =cut
391    
392     sub _input_name {
393     my $input = shift || return;
394     if (ref($input) eq 'HASH') {
395     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
396     return $input->{name};
397     } else {
398     return $input;
399     }
400     }
401    
402    
403 dpavlin 686 =head1 AUTHOR
404    
405     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
406    
407     =head1 COPYRIGHT & LICENSE
408    
409     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
410    
411     This program is free software; you can redistribute it and/or modify it
412     under the same terms as Perl itself.
413    
414     =cut
415    
416     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26