/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 708 - (hide annotations)
Mon Sep 25 16:07:08 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9218 byte(s)
 r1010@llin:  dpavlin | 2006-09-25 17:42:39 +0200
 typo

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 703 Version 0.05
21 dpavlin 686
22     =cut
23    
24 dpavlin 703 our $VERSION = '0.05';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 703 $self->_read_sources;
65 dpavlin 686
66 dpavlin 691 $self ? return $self : return undef;
67     }
68    
69 dpavlin 703 =head2 valid_database
70    
71     my $ok = $parse->valid_database('key');
72    
73     =cut
74    
75     sub valid_database {
76     my $self = shift;
77    
78     my $database = shift || return;
79    
80     return defined($self->{valid_inputs}->{ _q($database) });
81     }
82    
83     =head2 valid_database_input
84    
85     my $ok = $parse->valid_database('database_key','input_name');
86    
87     =cut
88    
89     sub valid_database_input {
90     my $self = shift;
91     my ($database,$input) = @_;
92 dpavlin 705 $input = _input_name($input);
93 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
94     }
95    
96     =head2 depends
97    
98     Return all databases and inputs on which specified one depends
99    
100     $depends_on = $parser->depends('database','input');
101    
102     =cut
103    
104     sub depends {
105     my $self = shift;
106     my ($database,$input) = @_;
107 dpavlin 705 $input = _input_name($input);
108 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
109     return unless (
110     defined( $self->{depends}->{ _q($database) } ) &&
111     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
112     );
113     return $self->{depends}->{ _q($database) }->{ _q($input) };
114     }
115    
116 dpavlin 706 =head2 have_lookup_create
117    
118     my @keys = $parser->have_lookup_create($database, $input);
119    
120     =cut
121    
122     sub have_lookup_create {
123     my $self = shift;
124     my ($database,$input) = @_;
125     $input = _input_name($input);
126     return unless (
127     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
128     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
129     );
130     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
131     }
132    
133    
134 dpavlin 705 =head2 lookup_create_rules
135    
136     my $source = $parser->lookup_create_rules($database, $input);
137    
138     =cut
139    
140     sub lookup_create_rules {
141     my $self = shift;
142     my ($database,$input) = @_;
143     $input = _input_name($input);
144     return unless (
145     defined( $self->{_lookup_create}->{ _q($database) } ) &&
146     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
147     );
148     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
149     }
150    
151 dpavlin 703 =head1 PRIVATE
152    
153     =head2 _read_sources
154    
155     my $source_files = $parser->_read_sources;
156    
157 dpavlin 691 Called by L</new>.
158    
159     =cut
160    
161 dpavlin 703 sub _read_sources {
162 dpavlin 691 my $self = shift;
163    
164     my $log = $self->_get_logger();
165    
166     my $nr = 0;
167    
168 dpavlin 701 my @lookups;
169    
170 dpavlin 691 $self->{config}->iterate_inputs( sub {
171     my ($input, $database) = @_;
172    
173 dpavlin 699 $log->debug("database: $database input = ", dump($input));
174    
175 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
176 dpavlin 691
177 dpavlin 698 my $path = $normalize->{path};
178     return unless($path);
179     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
180 dpavlin 691
181 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
182 dpavlin 691
183 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
184 dpavlin 691
185 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
186 dpavlin 691
187 dpavlin 698 $log->debug("$database/$input_name: adding $path");
188    
189 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
190 dpavlin 698
191 dpavlin 701 push @lookups, sub {
192 dpavlin 703 $self->_parse_lookups( $database, $input_name, $full, $s );
193 dpavlin 701 };
194 dpavlin 698
195     $nr++;
196     }
197 dpavlin 686 } );
198    
199 dpavlin 691 $log->debug("found $nr source files");
200 dpavlin 686
201 dpavlin 701 # parse all lookups
202     $_->() foreach (@lookups);
203    
204 dpavlin 691 return $nr;
205 dpavlin 686 }
206    
207 dpavlin 703 =head2 _parse_lookups
208 dpavlin 686
209 dpavlin 703 $parser->_parse_lookups($database,$input,$path,$source);
210 dpavlin 691
211 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
212 dpavlin 698
213     It will report invalid databases and inputs in error log after parsing.
214    
215 dpavlin 686 =cut
216    
217 dpavlin 703 sub _parse_lookups {
218 dpavlin 686 my $self = shift;
219 dpavlin 701 my ($database, $input, $path, $source) = @_;
220 dpavlin 686
221 dpavlin 698 $input = _input_name($input);
222    
223 dpavlin 686 my $log = $self->_get_logger();
224    
225 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
226     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
227 dpavlin 686
228 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
229 dpavlin 686
230 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
231 dpavlin 691
232     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
233    
234 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
235     #$Document->prune('PPI::Token::Operator');
236    
237     # Find all the named subroutines
238    
239 dpavlin 691 $self->{_lookup_errors} = ();
240 dpavlin 686
241 dpavlin 691 sub _lookup_error {
242     my $self = shift;
243     my $msg = shift;
244     $self->_get_logger->logconfess("error without message?") unless ($msg);
245     push @{ $self->{_lookup_errors} }, $msg;
246 dpavlin 690 return '';
247     }
248    
249 dpavlin 686 $Document->find( sub {
250     my ($Document,$Element) = @_;
251    
252     $Element->isa('PPI::Token::Word') or return '';
253     $Element->content eq 'lookup' or return '';
254    
255 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
256 dpavlin 686
257     my $args = $Element->snext_sibling;
258    
259     my @e = $args->child(0)->elements;
260 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
261 dpavlin 686
262 dpavlin 687 if ($log->is_debug) {
263     my $report = "found " . scalar @e . " elements:\n";
264 dpavlin 686
265 dpavlin 687 foreach my $i ( 0 .. $#e ) {
266     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
267     }
268    
269     $log->debug($report);
270 dpavlin 686 }
271    
272     my $key_element = $e[8]->clone;
273    
274 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
275 dpavlin 686
276 dpavlin 687 $log->debug("key part: ", $key_element);
277 dpavlin 686
278     my @key;
279    
280     $key_element->find( sub {
281     my $e = $_[1] || die "no element?";
282     $e->isa('PPI::Token::Word') or return '';
283     $e->content eq 'rec' or return '';
284    
285     my $kf = $e->snext_sibling;
286    
287 dpavlin 687 $log->debug("key fragment = $kf");
288 dpavlin 686
289     push @key, eval $kf;
290 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
291 dpavlin 686
292     return 1;
293     });
294    
295 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
296 dpavlin 686
297 dpavlin 687 $log->debug("key = $key");
298 dpavlin 686
299 dpavlin 707 my $create = "save_into_lookup('$key', $e[7] $e[8] );\n";
300 dpavlin 686
301 dpavlin 687 $log->debug("create: $create");
302 dpavlin 686
303 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
304     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
305 dpavlin 689
306 dpavlin 698 # save code to create this lookup
307 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
308     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
309 dpavlin 686
310 dpavlin 698
311 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
312     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
313 dpavlin 698 }
314    
315     # save this dependency
316 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
317 dpavlin 698
318 dpavlin 686 if ($#e < 10) {
319     $e[8]->insert_after( $e[8]->clone );
320     $e[8]->insert_after( $e[7]->clone );
321     $e[8]->insert_after( $e[6]->clone );
322     }
323    
324     $e[7]->remove;
325     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
326     $e[8]->remove;
327    
328    
329 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
330 dpavlin 686 });
331    
332 dpavlin 693 my $normalize_source = $Document->serialize;
333 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
334 dpavlin 693 $log->debug("normalize: $normalize_source");
335 dpavlin 686
336 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
337 dpavlin 692
338 dpavlin 687 if ($self->{debug}) {
339     my $Dumper = PPI::Dumper->new( $Document );
340     $Dumper->print;
341     }
342 dpavlin 686
343 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
344 dpavlin 690
345 dpavlin 687 return 1;
346 dpavlin 686 }
347    
348 dpavlin 692
349 dpavlin 698 =head2 _q
350    
351     Strip single or double quotes around value
352    
353     _q(qq/'foo'/) -> foo
354    
355     =cut
356    
357     sub _q {
358     my $v = shift || return;
359     $v =~ s/^['"]*//g;
360     $v =~ s/['"]*$//g;
361     return $v;
362     }
363    
364     =head2 _input_name
365    
366     Return C<name> value if HASH or arg if scalar
367    
368     _input_name($input)
369    
370     =cut
371    
372     sub _input_name {
373     my $input = shift || return;
374     if (ref($input) eq 'HASH') {
375     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
376     return $input->{name};
377     } else {
378     return $input;
379     }
380     }
381    
382    
383 dpavlin 686 =head1 AUTHOR
384    
385     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
386    
387     =head1 COPYRIGHT & LICENSE
388    
389     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
390    
391     This program is free software; you can redistribute it and/or modify it
392     under the same terms as Perl itself.
393    
394     =cut
395    
396     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26