/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 720 - (hide annotations)
Fri Sep 29 12:27:36 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9664 byte(s)
 r1034@llin:  dpavlin | 2006-09-29 14:24:39 +0200
 important bugfix: this will collect ALL normalize rules for given
 database and input in single rule set

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 717 Version 0.06
21 dpavlin 686
22     =cut
23    
24 dpavlin 717 our $VERSION = '0.06';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 703 $self->_read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69 dpavlin 703 =head2 valid_database
70    
71     my $ok = $parse->valid_database('key');
72    
73     =cut
74    
75     sub valid_database {
76     my $self = shift;
77    
78     my $database = shift || return;
79    
80     return defined($self->{valid_inputs}->{ _q($database) });
81     }
82    
83     =head2 valid_database_input
84    
85     my $ok = $parse->valid_database('database_key','input_name');
86    
87     =cut
88    
89     sub valid_database_input {
90     my $self = shift;
91     my ($database,$input) = @_;
92 dpavlin 705 $input = _input_name($input);
93 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
94     }
95    
96     =head2 depends
97    
98     Return all databases and inputs on which specified one depends
99    
100     $depends_on = $parser->depends('database','input');
101    
102     =cut
103    
104     sub depends {
105     my $self = shift;
106     my ($database,$input) = @_;
107 dpavlin 705 $input = _input_name($input);
108 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
109     return unless (
110     defined( $self->{depends}->{ _q($database) } ) &&
111     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
112     );
113     return $self->{depends}->{ _q($database) }->{ _q($input) };
114     }
115    
116 dpavlin 706 =head2 have_lookup_create
117    
118     my @keys = $parser->have_lookup_create($database, $input);
119    
120     =cut
121    
122     sub have_lookup_create {
123     my $self = shift;
124     my ($database,$input) = @_;
125     $input = _input_name($input);
126     return unless (
127     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
128     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
129     );
130     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
131     }
132    
133    
134 dpavlin 705 =head2 lookup_create_rules
135    
136     my $source = $parser->lookup_create_rules($database, $input);
137    
138     =cut
139    
140     sub lookup_create_rules {
141     my $self = shift;
142     my ($database,$input) = @_;
143     $input = _input_name($input);
144     return unless (
145     defined( $self->{_lookup_create}->{ _q($database) } ) &&
146     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
147     );
148     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
149     }
150    
151 dpavlin 717 =head2 normalize_rules
152    
153     my $source = $parser->normalize_rules($database, $input);
154    
155     =cut
156    
157     sub normalize_rules {
158     my $self = shift;
159     my ($database,$input) = @_;
160     $input = _input_name($input);
161     return unless (
162     defined( $self->{_normalize_source}->{ _q($database) } ) &&
163     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
164     );
165     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
166     }
167    
168 dpavlin 703 =head1 PRIVATE
169    
170     =head2 _read_sources
171    
172     my $source_files = $parser->_read_sources;
173    
174 dpavlin 691 Called by L</new>.
175    
176     =cut
177    
178 dpavlin 703 sub _read_sources {
179 dpavlin 691 my $self = shift;
180    
181     my $log = $self->_get_logger();
182    
183     my $nr = 0;
184    
185 dpavlin 701 my @lookups;
186    
187 dpavlin 691 $self->{config}->iterate_inputs( sub {
188     my ($input, $database) = @_;
189    
190 dpavlin 699 $log->debug("database: $database input = ", dump($input));
191    
192 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
193 dpavlin 691
194 dpavlin 698 my $path = $normalize->{path};
195     return unless($path);
196     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
197 dpavlin 691
198 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
199 dpavlin 691
200 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
201 dpavlin 691
202 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
203 dpavlin 691
204 dpavlin 698 $log->debug("$database/$input_name: adding $path");
205    
206 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
207 dpavlin 698
208 dpavlin 701 push @lookups, sub {
209 dpavlin 703 $self->_parse_lookups( $database, $input_name, $full, $s );
210 dpavlin 701 };
211 dpavlin 698
212     $nr++;
213     }
214 dpavlin 686 } );
215    
216 dpavlin 691 $log->debug("found $nr source files");
217 dpavlin 686
218 dpavlin 701 # parse all lookups
219     $_->() foreach (@lookups);
220    
221 dpavlin 691 return $nr;
222 dpavlin 686 }
223    
224 dpavlin 703 =head2 _parse_lookups
225 dpavlin 686
226 dpavlin 703 $parser->_parse_lookups($database,$input,$path,$source);
227 dpavlin 691
228 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
229 dpavlin 698
230     It will report invalid databases and inputs in error log after parsing.
231    
232 dpavlin 686 =cut
233    
234 dpavlin 703 sub _parse_lookups {
235 dpavlin 686 my $self = shift;
236 dpavlin 701 my ($database, $input, $path, $source) = @_;
237 dpavlin 686
238 dpavlin 698 $input = _input_name($input);
239    
240 dpavlin 686 my $log = $self->_get_logger();
241    
242 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
243     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
244 dpavlin 686
245 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
246 dpavlin 686
247 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
248 dpavlin 691
249     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
250    
251 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
252     #$Document->prune('PPI::Token::Operator');
253    
254     # Find all the named subroutines
255    
256 dpavlin 691 $self->{_lookup_errors} = ();
257 dpavlin 686
258 dpavlin 691 sub _lookup_error {
259     my $self = shift;
260     my $msg = shift;
261     $self->_get_logger->logconfess("error without message?") unless ($msg);
262     push @{ $self->{_lookup_errors} }, $msg;
263 dpavlin 690 return '';
264     }
265    
266 dpavlin 686 $Document->find( sub {
267     my ($Document,$Element) = @_;
268    
269     $Element->isa('PPI::Token::Word') or return '';
270     $Element->content eq 'lookup' or return '';
271    
272 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
273 dpavlin 686
274     my $args = $Element->snext_sibling;
275    
276     my @e = $args->child(0)->elements;
277 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
278 dpavlin 686
279 dpavlin 687 if ($log->is_debug) {
280     my $report = "found " . scalar @e . " elements:\n";
281 dpavlin 686
282 dpavlin 687 foreach my $i ( 0 .. $#e ) {
283     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
284     }
285    
286     $log->debug($report);
287 dpavlin 686 }
288    
289     my $key_element = $e[8]->clone;
290    
291 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
292 dpavlin 686
293 dpavlin 687 $log->debug("key part: ", $key_element);
294 dpavlin 686
295     my @key;
296    
297     $key_element->find( sub {
298     my $e = $_[1] || die "no element?";
299     $e->isa('PPI::Token::Word') or return '';
300     $e->content eq 'rec' or return '';
301    
302     my $kf = $e->snext_sibling;
303    
304 dpavlin 687 $log->debug("key fragment = $kf");
305 dpavlin 686
306     push @key, eval $kf;
307 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
308 dpavlin 686
309     return 1;
310     });
311    
312 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
313 dpavlin 686
314 dpavlin 687 $log->debug("key = $key");
315 dpavlin 686
316 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
317     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
318 dpavlin 686
319 dpavlin 712 my $create = qq{
320     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
321     };
322    
323 dpavlin 687 $log->debug("create: $create");
324 dpavlin 686
325 dpavlin 698 # save code to create this lookup
326 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
327     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
328 dpavlin 686
329 dpavlin 698
330 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
331     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
332 dpavlin 698 }
333    
334     # save this dependency
335 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
336 dpavlin 698
337 dpavlin 686 if ($#e < 10) {
338     $e[8]->insert_after( $e[8]->clone );
339     $e[8]->insert_after( $e[7]->clone );
340     $e[8]->insert_after( $e[6]->clone );
341     }
342    
343     $e[7]->remove;
344     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
345     $e[8]->remove;
346    
347    
348 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
349 dpavlin 686 });
350    
351 dpavlin 693 my $normalize_source = $Document->serialize;
352 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
353 dpavlin 693 $log->debug("normalize: $normalize_source");
354 dpavlin 686
355 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
356 dpavlin 692
357 dpavlin 687 if ($self->{debug}) {
358     my $Dumper = PPI::Dumper->new( $Document );
359     $Dumper->print;
360     }
361 dpavlin 686
362 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
363 dpavlin 690
364 dpavlin 687 return 1;
365 dpavlin 686 }
366    
367 dpavlin 692
368 dpavlin 698 =head2 _q
369    
370     Strip single or double quotes around value
371    
372     _q(qq/'foo'/) -> foo
373    
374     =cut
375    
376     sub _q {
377     my $v = shift || return;
378     $v =~ s/^['"]*//g;
379     $v =~ s/['"]*$//g;
380     return $v;
381     }
382    
383     =head2 _input_name
384    
385     Return C<name> value if HASH or arg if scalar
386    
387     _input_name($input)
388    
389     =cut
390    
391     sub _input_name {
392     my $input = shift || return;
393     if (ref($input) eq 'HASH') {
394     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
395     return $input->{name};
396     } else {
397     return $input;
398     }
399     }
400    
401    
402 dpavlin 686 =head1 AUTHOR
403    
404     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
405    
406     =head1 COPYRIGHT & LICENSE
407    
408     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
409    
410     This program is free software; you can redistribute it and/or modify it
411     under the same terms as Perl itself.
412    
413     =cut
414    
415     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26