/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 705 - (hide annotations)
Mon Sep 25 13:46:36 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 8906 byte(s)
 r1004@llin:  dpavlin | 2006-09-25 15:44:23 +0200
 test lookup_create_rules

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 691 use base qw/WebPAC::Common WebPAC::Normalize/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 703 Version 0.05
21 dpavlin 686
22     =cut
23    
24 dpavlin 703 our $VERSION = '0.05';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 703 $self->_read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69 dpavlin 703 =head2 valid_database
70    
71     my $ok = $parse->valid_database('key');
72    
73     =cut
74    
75     sub valid_database {
76     my $self = shift;
77    
78     my $database = shift || return;
79    
80     return defined($self->{valid_inputs}->{ _q($database) });
81     }
82    
83     =head2 valid_database_input
84    
85     my $ok = $parse->valid_database('database_key','input_name');
86    
87     =cut
88    
89     sub valid_database_input {
90     my $self = shift;
91     my ($database,$input) = @_;
92 dpavlin 705 $input = _input_name($input);
93 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
94     }
95    
96     =head2 depends
97    
98     Return all databases and inputs on which specified one depends
99    
100     $depends_on = $parser->depends('database','input');
101    
102     =cut
103    
104     sub depends {
105     my $self = shift;
106     my ($database,$input) = @_;
107 dpavlin 705 $input = _input_name($input);
108 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
109     return unless (
110     defined( $self->{depends}->{ _q($database) } ) &&
111     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
112     );
113     return $self->{depends}->{ _q($database) }->{ _q($input) };
114     }
115    
116 dpavlin 705 =head2 lookup_create_rules
117    
118     my $source = $parser->lookup_create_rules($database, $input);
119    
120     =cut
121    
122     sub lookup_create_rules {
123     my $self = shift;
124     my ($database,$input) = @_;
125     $input = _input_name($input);
126     return unless (
127     defined( $self->{_lookup_create}->{ _q($database) } ) &&
128     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
129     );
130     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
131     }
132    
133 dpavlin 703 =head1 PRIVATE
134    
135     =head2 _read_sources
136    
137     my $source_files = $parser->_read_sources;
138    
139 dpavlin 691 Called by L</new>.
140    
141     =cut
142    
143 dpavlin 703 sub _read_sources {
144 dpavlin 691 my $self = shift;
145    
146     my $log = $self->_get_logger();
147    
148     my $nr = 0;
149    
150 dpavlin 701 my @lookups;
151    
152 dpavlin 691 $self->{config}->iterate_inputs( sub {
153     my ($input, $database) = @_;
154    
155 dpavlin 699 $log->debug("database: $database input = ", dump($input));
156    
157 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
158 dpavlin 691
159 dpavlin 698 my $path = $normalize->{path};
160     return unless($path);
161     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
162 dpavlin 691
163 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
164 dpavlin 691
165 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
166 dpavlin 691
167 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
168 dpavlin 691
169 dpavlin 698 $log->debug("$database/$input_name: adding $path");
170    
171 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
172 dpavlin 698
173 dpavlin 701 push @lookups, sub {
174 dpavlin 703 $self->_parse_lookups( $database, $input_name, $full, $s );
175 dpavlin 701 };
176 dpavlin 698
177     $nr++;
178     }
179 dpavlin 686 } );
180    
181 dpavlin 691 $log->debug("found $nr source files");
182 dpavlin 686
183 dpavlin 701 # parse all lookups
184     $_->() foreach (@lookups);
185    
186 dpavlin 691 return $nr;
187 dpavlin 686 }
188    
189 dpavlin 703 =head2 _parse_lookups
190 dpavlin 686
191 dpavlin 703 $parser->_parse_lookups($database,$input,$path,$source);
192 dpavlin 691
193 dpavlin 703 Called for each normalize source (rules) in each input by L</read_sources>
194 dpavlin 698
195     It will report invalid databases and inputs in error log after parsing.
196    
197 dpavlin 686 =cut
198    
199 dpavlin 703 sub _parse_lookups {
200 dpavlin 686 my $self = shift;
201 dpavlin 701 my ($database, $input, $path, $source) = @_;
202 dpavlin 686
203 dpavlin 698 $input = _input_name($input);
204    
205 dpavlin 686 my $log = $self->_get_logger();
206    
207 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
208     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
209 dpavlin 686
210 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
211 dpavlin 686
212 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
213 dpavlin 691
214     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
215    
216 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
217     #$Document->prune('PPI::Token::Operator');
218    
219     # Find all the named subroutines
220    
221 dpavlin 691 $self->{_lookup_errors} = ();
222 dpavlin 686
223 dpavlin 691 sub _lookup_error {
224     my $self = shift;
225     my $msg = shift;
226     $self->_get_logger->logconfess("error without message?") unless ($msg);
227     push @{ $self->{_lookup_errors} }, $msg;
228 dpavlin 690 return '';
229     }
230    
231 dpavlin 686 $Document->find( sub {
232     my ($Document,$Element) = @_;
233    
234     $Element->isa('PPI::Token::Word') or return '';
235     $Element->content eq 'lookup' or return '';
236    
237 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
238 dpavlin 686
239     my $args = $Element->snext_sibling;
240    
241     my @e = $args->child(0)->elements;
242 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
243 dpavlin 686
244 dpavlin 687 if ($log->is_debug) {
245     my $report = "found " . scalar @e . " elements:\n";
246 dpavlin 686
247 dpavlin 687 foreach my $i ( 0 .. $#e ) {
248     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
249     }
250    
251     $log->debug($report);
252 dpavlin 686 }
253    
254     my $key_element = $e[8]->clone;
255    
256 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
257 dpavlin 686
258 dpavlin 687 $log->debug("key part: ", $key_element);
259 dpavlin 686
260     my @key;
261    
262     $key_element->find( sub {
263     my $e = $_[1] || die "no element?";
264     $e->isa('PPI::Token::Word') or return '';
265     $e->content eq 'rec' or return '';
266    
267     my $kf = $e->snext_sibling;
268    
269 dpavlin 687 $log->debug("key fragment = $kf");
270 dpavlin 686
271     push @key, eval $kf;
272 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
273 dpavlin 686
274     return 1;
275     });
276    
277 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
278 dpavlin 686
279 dpavlin 687 $log->debug("key = $key");
280 dpavlin 686
281     my $create = '
282     $coderef = ' . $e[7] . $e[8] . ';
283     foreach my $v ($coderef->()) {
284     next unless (defined($v) && $v ne \'\');
285     push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
286     }
287     ';
288    
289 dpavlin 687 $log->debug("create: $create");
290 dpavlin 686
291 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
292     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
293 dpavlin 689
294 dpavlin 698 # save code to create this lookup
295 dpavlin 705 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= "# lookup for $e[3]/$e[5]/$key\n\n$create";
296 dpavlin 686
297 dpavlin 698
298 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
299     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
300 dpavlin 698 }
301    
302     # save this dependency
303 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
304 dpavlin 698
305 dpavlin 686 if ($#e < 10) {
306     $e[8]->insert_after( $e[8]->clone );
307     $e[8]->insert_after( $e[7]->clone );
308     $e[8]->insert_after( $e[6]->clone );
309     }
310    
311     $e[7]->remove;
312     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
313     $e[8]->remove;
314    
315    
316 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
317 dpavlin 686 });
318    
319 dpavlin 693 my $normalize_source = $Document->serialize;
320 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
321 dpavlin 693 $log->debug("normalize: $normalize_source");
322 dpavlin 686
323 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
324 dpavlin 692
325 dpavlin 687 if ($self->{debug}) {
326     my $Dumper = PPI::Dumper->new( $Document );
327     $Dumper->print;
328     }
329 dpavlin 686
330 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
331 dpavlin 690
332 dpavlin 687 return 1;
333 dpavlin 686 }
334    
335 dpavlin 692
336 dpavlin 698 =head2 _q
337    
338     Strip single or double quotes around value
339    
340     _q(qq/'foo'/) -> foo
341    
342     =cut
343    
344     sub _q {
345     my $v = shift || return;
346     $v =~ s/^['"]*//g;
347     $v =~ s/['"]*$//g;
348     return $v;
349     }
350    
351     =head2 _input_name
352    
353     Return C<name> value if HASH or arg if scalar
354    
355     _input_name($input)
356    
357     =cut
358    
359     sub _input_name {
360     my $input = shift || return;
361     if (ref($input) eq 'HASH') {
362     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
363     return $input->{name};
364     } else {
365     return $input;
366     }
367     }
368    
369    
370 dpavlin 686 =head1 AUTHOR
371    
372     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
373    
374     =head1 COPYRIGHT & LICENSE
375    
376     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
377    
378     This program is free software; you can redistribute it and/or modify it
379     under the same terms as Perl itself.
380    
381     =cut
382    
383     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26