/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 701 - (hide annotations)
Mon Sep 25 12:51:47 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 8728 byte(s)
 r994@llin:  dpavlin | 2006-09-25 14:49:05 +0200
 refactore support for multiple normalize files in each lookup

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 691 use base qw/WebPAC::Common WebPAC::Normalize/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 698 Version 0.04
21 dpavlin 686
22     =cut
23    
24 dpavlin 698 our $VERSION = '0.04';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 691 $self->read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69     =head2 read_sources
70    
71     my $source_files = $parser->read_sources;
72    
73     Called by L</new>.
74    
75     =cut
76    
77     sub read_sources {
78     my $self = shift;
79    
80     my $log = $self->_get_logger();
81    
82     my $nr = 0;
83    
84 dpavlin 701 my @lookups;
85    
86 dpavlin 691 $self->{config}->iterate_inputs( sub {
87     my ($input, $database) = @_;
88    
89 dpavlin 699 $log->debug("database: $database input = ", dump($input));
90    
91 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
92 dpavlin 691
93 dpavlin 698 my $path = $normalize->{path};
94     return unless($path);
95     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
96 dpavlin 691
97 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
98 dpavlin 691
99 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
100 dpavlin 691
101 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
102 dpavlin 691
103 dpavlin 698 $log->debug("$database/$input_name: adding $path");
104    
105 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
106 dpavlin 698
107 dpavlin 701 push @lookups, sub {
108     $self->parse_lookups( $database, $input_name, $full, $s );
109     };
110 dpavlin 698
111     $nr++;
112     }
113 dpavlin 686 } );
114    
115 dpavlin 691 $log->debug("found $nr source files");
116 dpavlin 686
117 dpavlin 701 # parse all lookups
118     $_->() foreach (@lookups);
119    
120 dpavlin 691 return $nr;
121 dpavlin 686 }
122    
123 dpavlin 692 =head2 parse_lookups
124 dpavlin 686
125 dpavlin 701 $parser->parse_lookups($database,$input,$path,$source);
126 dpavlin 691
127 dpavlin 701 Called for each normalize source in each input by L</new>
128 dpavlin 698
129     It will report invalid databases and inputs in error log after parsing.
130    
131 dpavlin 686 =cut
132    
133 dpavlin 691 sub parse_lookups {
134 dpavlin 686 my $self = shift;
135 dpavlin 701 my ($database, $input, $path, $source) = @_;
136 dpavlin 686
137 dpavlin 698 $input = _input_name($input);
138    
139 dpavlin 686 my $log = $self->_get_logger();
140    
141 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
142     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
143 dpavlin 686
144 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
145 dpavlin 686
146 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
147 dpavlin 691
148     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
149    
150 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
151     #$Document->prune('PPI::Token::Operator');
152    
153     # Find all the named subroutines
154    
155 dpavlin 691 $self->{_lookup_errors} = ();
156 dpavlin 686
157 dpavlin 691 sub _lookup_error {
158     my $self = shift;
159     my $msg = shift;
160     $self->_get_logger->logconfess("error without message?") unless ($msg);
161     push @{ $self->{_lookup_errors} }, $msg;
162 dpavlin 690 return '';
163     }
164    
165 dpavlin 686 $Document->find( sub {
166     my ($Document,$Element) = @_;
167    
168     $Element->isa('PPI::Token::Word') or return '';
169     $Element->content eq 'lookup' or return '';
170    
171 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
172 dpavlin 686
173     my $args = $Element->snext_sibling;
174    
175     my @e = $args->child(0)->elements;
176 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
177 dpavlin 686
178 dpavlin 687 if ($log->is_debug) {
179     my $report = "found " . scalar @e . " elements:\n";
180 dpavlin 686
181 dpavlin 687 foreach my $i ( 0 .. $#e ) {
182     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
183     }
184    
185     $log->debug($report);
186 dpavlin 686 }
187    
188     my $key_element = $e[8]->clone;
189    
190 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
191 dpavlin 686
192 dpavlin 687 $log->debug("key part: ", $key_element);
193 dpavlin 686
194     my @key;
195    
196     $key_element->find( sub {
197     my $e = $_[1] || die "no element?";
198     $e->isa('PPI::Token::Word') or return '';
199     $e->content eq 'rec' or return '';
200    
201     my $kf = $e->snext_sibling;
202    
203 dpavlin 687 $log->debug("key fragment = $kf");
204 dpavlin 686
205     push @key, eval $kf;
206 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
207 dpavlin 686
208     return 1;
209     });
210    
211 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
212 dpavlin 686
213 dpavlin 687 $log->debug("key = $key");
214 dpavlin 686
215     my $create = '
216     $coderef = ' . $e[7] . $e[8] . ';
217     foreach my $v ($coderef->()) {
218     next unless (defined($v) && $v ne \'\');
219     push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
220     }
221     ';
222    
223 dpavlin 687 $log->debug("create: $create");
224 dpavlin 686
225 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
226     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
227 dpavlin 689
228 dpavlin 698 # save code to create this lookup
229     $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
230 dpavlin 686
231 dpavlin 698
232     if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } )) {
233     my $dep_key = $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) };
234     $log->warn("dependency of $database/$input on $e[3]/$e[5] allready recorded as $dep_key, now changed to $key") if ($dep_key ne $key);
235     }
236    
237     # save this dependency
238     $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } .= $key;
239    
240 dpavlin 686 if ($#e < 10) {
241     $e[8]->insert_after( $e[8]->clone );
242     $e[8]->insert_after( $e[7]->clone );
243     $e[8]->insert_after( $e[6]->clone );
244     }
245    
246     $e[7]->remove;
247     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
248     $e[8]->remove;
249    
250    
251 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
252 dpavlin 686 });
253    
254 dpavlin 693 my $normalize_source = $Document->serialize;
255 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
256 dpavlin 693 $log->debug("normalize: $normalize_source");
257 dpavlin 686
258 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
259 dpavlin 692
260 dpavlin 687 if ($self->{debug}) {
261     my $Dumper = PPI::Dumper->new( $Document );
262     $Dumper->print;
263     }
264 dpavlin 686
265 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
266 dpavlin 690
267 dpavlin 687 return 1;
268 dpavlin 686 }
269    
270 dpavlin 692
271 dpavlin 698 =head2 lookup_create_rules
272 dpavlin 692
273 dpavlin 698 my $source = $parser->lookup_create_rules($database, $input);
274    
275 dpavlin 692 =cut
276    
277 dpavlin 698 sub lookup_create_rules {
278 dpavlin 692 my $self = shift;
279 dpavlin 698 my ($database,$input) = @_;
280     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
281 dpavlin 692 }
282    
283 dpavlin 689 =head2 valid_database
284    
285     my $ok = $parse->valid_database('key');
286    
287     =cut
288    
289     sub valid_database {
290     my $self = shift;
291    
292     my $database = shift || return;
293    
294 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) });
295 dpavlin 689 }
296    
297     =head2 valid_database_input
298    
299     my $ok = $parse->valid_database('database_key','input_name');
300    
301     =cut
302    
303     sub valid_database_input {
304     my $self = shift;
305    
306     my ($database,$input) = @_;
307 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
308     }
309 dpavlin 689
310 dpavlin 698 =head2 depends
311    
312     Return all databases and inputs on which specified one depends
313    
314     $depends_on = $parser->depends('database','input');
315    
316     =cut
317    
318     sub depends {
319     my $self = shift;
320     my ($database,$input) = @_;
321     $self->_get_logger->debug("depends($database,$input)");
322 dpavlin 699 return unless (
323     defined( $self->{depends}->{ _q($database) } ) &&
324     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
325     );
326 dpavlin 698 return $self->{depends}->{ _q($database) }->{ _q($input) };
327 dpavlin 689 }
328    
329 dpavlin 698 =head1 PRIVATE
330    
331     =head2 _q
332    
333     Strip single or double quotes around value
334    
335     _q(qq/'foo'/) -> foo
336    
337     =cut
338    
339     sub _q {
340     my $v = shift || return;
341     $v =~ s/^['"]*//g;
342     $v =~ s/['"]*$//g;
343     return $v;
344     }
345    
346     =head2 _input_name
347    
348     Return C<name> value if HASH or arg if scalar
349    
350     _input_name($input)
351    
352     =cut
353    
354     sub _input_name {
355     my $input = shift || return;
356     if (ref($input) eq 'HASH') {
357     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
358     return $input->{name};
359     } else {
360     return $input;
361     }
362     }
363    
364    
365 dpavlin 686 =head1 AUTHOR
366    
367     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
368    
369     =head1 COPYRIGHT & LICENSE
370    
371     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
372    
373     This program is free software; you can redistribute it and/or modify it
374     under the same terms as Perl itself.
375    
376     =cut
377    
378     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26