/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 703 - (hide annotations)
Mon Sep 25 13:24:09 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 8627 byte(s)
 r1000@llin:  dpavlin | 2006-09-25 15:16:49 +0200
 support multiple lookups on same input file
 move _read_sources and _parse_lookups to private functions

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 691 use base qw/WebPAC::Common WebPAC::Normalize/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 703 Version 0.05
21 dpavlin 686
22     =cut
23    
24 dpavlin 703 our $VERSION = '0.05';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 703 $self->_read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69 dpavlin 703 =head2 lookup_create_rules
70 dpavlin 691
71 dpavlin 703 my $source = $parser->lookup_create_rules($database, $input);
72 dpavlin 691
73 dpavlin 703 =cut
74    
75     sub lookup_create_rules {
76     my $self = shift;
77     my ($database,$input) = @_;
78     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
79     }
80    
81     =head2 valid_database
82    
83     my $ok = $parse->valid_database('key');
84    
85     =cut
86    
87     sub valid_database {
88     my $self = shift;
89    
90     my $database = shift || return;
91    
92     return defined($self->{valid_inputs}->{ _q($database) });
93     }
94    
95     =head2 valid_database_input
96    
97     my $ok = $parse->valid_database('database_key','input_name');
98    
99     =cut
100    
101     sub valid_database_input {
102     my $self = shift;
103    
104     my ($database,$input) = @_;
105     return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
106     }
107    
108     =head2 depends
109    
110     Return all databases and inputs on which specified one depends
111    
112     $depends_on = $parser->depends('database','input');
113    
114     =cut
115    
116     sub depends {
117     my $self = shift;
118     my ($database,$input) = @_;
119     $self->_get_logger->debug("depends($database,$input)");
120     return unless (
121     defined( $self->{depends}->{ _q($database) } ) &&
122     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
123     );
124     return $self->{depends}->{ _q($database) }->{ _q($input) };
125     }
126    
127     =head1 PRIVATE
128    
129     =head2 _read_sources
130    
131     my $source_files = $parser->_read_sources;
132    
133 dpavlin 691 Called by L</new>.
134    
135     =cut
136    
137 dpavlin 703 sub _read_sources {
138 dpavlin 691 my $self = shift;
139    
140     my $log = $self->_get_logger();
141    
142     my $nr = 0;
143    
144 dpavlin 701 my @lookups;
145    
146 dpavlin 691 $self->{config}->iterate_inputs( sub {
147     my ($input, $database) = @_;
148    
149 dpavlin 699 $log->debug("database: $database input = ", dump($input));
150    
151 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
152 dpavlin 691
153 dpavlin 698 my $path = $normalize->{path};
154     return unless($path);
155     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
156 dpavlin 691
157 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
158 dpavlin 691
159 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
160 dpavlin 691
161 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
162 dpavlin 691
163 dpavlin 698 $log->debug("$database/$input_name: adding $path");
164    
165 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
166 dpavlin 698
167 dpavlin 701 push @lookups, sub {
168 dpavlin 703 $self->_parse_lookups( $database, $input_name, $full, $s );
169 dpavlin 701 };
170 dpavlin 698
171     $nr++;
172     }
173 dpavlin 686 } );
174    
175 dpavlin 691 $log->debug("found $nr source files");
176 dpavlin 686
177 dpavlin 701 # parse all lookups
178     $_->() foreach (@lookups);
179    
180 dpavlin 691 return $nr;
181 dpavlin 686 }
182    
183 dpavlin 703 =head2 _parse_lookups
184 dpavlin 686
185 dpavlin 703 $parser->_parse_lookups($database,$input,$path,$source);
186 dpavlin 691
187 dpavlin 703 Called for each normalize source (rules) in each input by L</read_sources>
188 dpavlin 698
189     It will report invalid databases and inputs in error log after parsing.
190    
191 dpavlin 686 =cut
192    
193 dpavlin 703 sub _parse_lookups {
194 dpavlin 686 my $self = shift;
195 dpavlin 701 my ($database, $input, $path, $source) = @_;
196 dpavlin 686
197 dpavlin 698 $input = _input_name($input);
198    
199 dpavlin 686 my $log = $self->_get_logger();
200    
201 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
202     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
203 dpavlin 686
204 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
205 dpavlin 686
206 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
207 dpavlin 691
208     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
209    
210 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
211     #$Document->prune('PPI::Token::Operator');
212    
213     # Find all the named subroutines
214    
215 dpavlin 691 $self->{_lookup_errors} = ();
216 dpavlin 686
217 dpavlin 691 sub _lookup_error {
218     my $self = shift;
219     my $msg = shift;
220     $self->_get_logger->logconfess("error without message?") unless ($msg);
221     push @{ $self->{_lookup_errors} }, $msg;
222 dpavlin 690 return '';
223     }
224    
225 dpavlin 686 $Document->find( sub {
226     my ($Document,$Element) = @_;
227    
228     $Element->isa('PPI::Token::Word') or return '';
229     $Element->content eq 'lookup' or return '';
230    
231 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
232 dpavlin 686
233     my $args = $Element->snext_sibling;
234    
235     my @e = $args->child(0)->elements;
236 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
237 dpavlin 686
238 dpavlin 687 if ($log->is_debug) {
239     my $report = "found " . scalar @e . " elements:\n";
240 dpavlin 686
241 dpavlin 687 foreach my $i ( 0 .. $#e ) {
242     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
243     }
244    
245     $log->debug($report);
246 dpavlin 686 }
247    
248     my $key_element = $e[8]->clone;
249    
250 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
251 dpavlin 686
252 dpavlin 687 $log->debug("key part: ", $key_element);
253 dpavlin 686
254     my @key;
255    
256     $key_element->find( sub {
257     my $e = $_[1] || die "no element?";
258     $e->isa('PPI::Token::Word') or return '';
259     $e->content eq 'rec' or return '';
260    
261     my $kf = $e->snext_sibling;
262    
263 dpavlin 687 $log->debug("key fragment = $kf");
264 dpavlin 686
265     push @key, eval $kf;
266 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
267 dpavlin 686
268     return 1;
269     });
270    
271 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
272 dpavlin 686
273 dpavlin 687 $log->debug("key = $key");
274 dpavlin 686
275     my $create = '
276     $coderef = ' . $e[7] . $e[8] . ';
277     foreach my $v ($coderef->()) {
278     next unless (defined($v) && $v ne \'\');
279     push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
280     }
281     ';
282    
283 dpavlin 687 $log->debug("create: $create");
284 dpavlin 686
285 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
286     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
287 dpavlin 689
288 dpavlin 698 # save code to create this lookup
289     $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
290 dpavlin 686
291 dpavlin 698
292 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
293     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
294 dpavlin 698 }
295    
296     # save this dependency
297 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
298 dpavlin 698
299 dpavlin 686 if ($#e < 10) {
300     $e[8]->insert_after( $e[8]->clone );
301     $e[8]->insert_after( $e[7]->clone );
302     $e[8]->insert_after( $e[6]->clone );
303     }
304    
305     $e[7]->remove;
306     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
307     $e[8]->remove;
308    
309    
310 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
311 dpavlin 686 });
312    
313 dpavlin 693 my $normalize_source = $Document->serialize;
314 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
315 dpavlin 693 $log->debug("normalize: $normalize_source");
316 dpavlin 686
317 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
318 dpavlin 692
319 dpavlin 687 if ($self->{debug}) {
320     my $Dumper = PPI::Dumper->new( $Document );
321     $Dumper->print;
322     }
323 dpavlin 686
324 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
325 dpavlin 690
326 dpavlin 687 return 1;
327 dpavlin 686 }
328    
329 dpavlin 692
330 dpavlin 698 =head2 _q
331    
332     Strip single or double quotes around value
333    
334     _q(qq/'foo'/) -> foo
335    
336     =cut
337    
338     sub _q {
339     my $v = shift || return;
340     $v =~ s/^['"]*//g;
341     $v =~ s/['"]*$//g;
342     return $v;
343     }
344    
345     =head2 _input_name
346    
347     Return C<name> value if HASH or arg if scalar
348    
349     _input_name($input)
350    
351     =cut
352    
353     sub _input_name {
354     my $input = shift || return;
355     if (ref($input) eq 'HASH') {
356     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
357     return $input->{name};
358     } else {
359     return $input;
360     }
361     }
362    
363    
364 dpavlin 686 =head1 AUTHOR
365    
366     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
367    
368     =head1 COPYRIGHT & LICENSE
369    
370     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
371    
372     This program is free software; you can redistribute it and/or modify it
373     under the same terms as Perl itself.
374    
375     =cut
376    
377     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26