/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 699 - (hide annotations)
Mon Sep 25 12:51:17 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9241 byte(s)
 r992@llin:  dpavlin | 2006-09-25 13:48:56 +0200
 tweaks

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 691 use base qw/WebPAC::Common WebPAC::Normalize/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 698 Version 0.04
21 dpavlin 686
22     =cut
23    
24 dpavlin 698 our $VERSION = '0.04';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 691 $self->read_sources;
65 dpavlin 686
66     $self->{config}->iterate_inputs( sub {
67 dpavlin 689 my ($input, $database) = @_;
68 dpavlin 698 return unless $self->valid_database_input($database, _input_name($input));
69     $self->parse_lookups($database, _input_name($input));
70 dpavlin 691 } );
71    
72     $self ? return $self : return undef;
73     }
74    
75     =head2 read_sources
76    
77     my $source_files = $parser->read_sources;
78    
79     Called by L</new>.
80    
81     =cut
82    
83 dpavlin 698 sub _input_name($);
84    
85 dpavlin 691 sub read_sources {
86     my $self = shift;
87    
88     my $log = $self->_get_logger();
89    
90     my $nr = 0;
91    
92     $self->{config}->iterate_inputs( sub {
93     my ($input, $database) = @_;
94    
95 dpavlin 699 $log->debug("database: $database input = ", dump($input));
96    
97 dpavlin 698 my @normalize;
98 dpavlin 691
99 dpavlin 698 if (ref($input->{normalize}) eq 'ARRAY') {
100     @normalize = @{ $input->{normalize} };
101     } else {
102     @normalize = ( $input->{normalize} );
103     }
104 dpavlin 691
105 dpavlin 699 $log->warn("normalize = ",dump(@normalize));
106    
107 dpavlin 698 foreach my $normalize (@normalize) {
108 dpavlin 691
109 dpavlin 698 my $path = $normalize->{path};
110     return unless($path);
111     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
112 dpavlin 691
113 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
114 dpavlin 691
115 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
116 dpavlin 691
117 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
118 dpavlin 691
119 dpavlin 698 $log->debug("$database/$input_name: adding $path");
120    
121     $self->{valid_inputs}->{$database}->{$input_name} = {
122     source => $s,
123     path => $full,
124     usage => 0,
125     } unless defined($self->{valid_inputs}->{$database}->{$input_name});
126    
127     $self->{valid_inputs}->{$database}->{$input_name}->{usage}++;
128    
129     $nr++;
130     }
131 dpavlin 686 } );
132    
133 dpavlin 691 $log->debug("found $nr source files");
134 dpavlin 686
135 dpavlin 691 return $nr;
136 dpavlin 686 }
137    
138 dpavlin 692 =head2 parse_lookups
139 dpavlin 686
140 dpavlin 691 $parser->parse_lookups($database,$input);
141    
142 dpavlin 698 Called for each input by L</new>
143    
144     It will report invalid databases and inputs in error log after parsing.
145    
146 dpavlin 686 =cut
147    
148 dpavlin 691 sub parse_lookups {
149 dpavlin 686 my $self = shift;
150 dpavlin 691 my ($database, $input) = @_;
151 dpavlin 686
152 dpavlin 698 $input = _input_name($input);
153    
154 dpavlin 686 my $log = $self->_get_logger();
155    
156 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
157     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
158 dpavlin 686
159 dpavlin 691 my $source = $self->{valid_inputs}->{$database}->{$input}->{source};
160     my $path = $self->{valid_inputs}->{$database}->{$input}->{path};
161 dpavlin 689
162 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
163 dpavlin 686
164 dpavlin 692 $log->info("parsing lookups for $database/$input from $path");
165 dpavlin 691
166     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
167    
168 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
169     #$Document->prune('PPI::Token::Operator');
170    
171     # Find all the named subroutines
172    
173 dpavlin 691 $self->{_lookup_errors} = ();
174 dpavlin 686
175 dpavlin 691 sub _lookup_error {
176     my $self = shift;
177     my $msg = shift;
178     $self->_get_logger->logconfess("error without message?") unless ($msg);
179     push @{ $self->{_lookup_errors} }, $msg;
180 dpavlin 690 return '';
181     }
182    
183 dpavlin 686 $Document->find( sub {
184     my ($Document,$Element) = @_;
185    
186     $Element->isa('PPI::Token::Word') or return '';
187     $Element->content eq 'lookup' or return '';
188    
189 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
190 dpavlin 686
191     my $args = $Element->snext_sibling;
192    
193     my @e = $args->child(0)->elements;
194 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
195 dpavlin 686
196 dpavlin 687 if ($log->is_debug) {
197     my $report = "found " . scalar @e . " elements:\n";
198 dpavlin 686
199 dpavlin 687 foreach my $i ( 0 .. $#e ) {
200     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
201     }
202    
203     $log->debug($report);
204 dpavlin 686 }
205    
206     my $key_element = $e[8]->clone;
207    
208 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
209 dpavlin 686
210 dpavlin 687 $log->debug("key part: ", $key_element);
211 dpavlin 686
212     my @key;
213    
214     $key_element->find( sub {
215     my $e = $_[1] || die "no element?";
216     $e->isa('PPI::Token::Word') or return '';
217     $e->content eq 'rec' or return '';
218    
219     my $kf = $e->snext_sibling;
220    
221 dpavlin 687 $log->debug("key fragment = $kf");
222 dpavlin 686
223     push @key, eval $kf;
224 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
225 dpavlin 686
226     return 1;
227     });
228    
229 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
230 dpavlin 686
231 dpavlin 687 $log->debug("key = $key");
232 dpavlin 686
233     my $create = '
234     $coderef = ' . $e[7] . $e[8] . ';
235     foreach my $v ($coderef->()) {
236     next unless (defined($v) && $v ne \'\');
237     push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
238     }
239     ';
240    
241 dpavlin 687 $log->debug("create: $create");
242 dpavlin 686
243 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
244     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
245 dpavlin 689
246 dpavlin 698 # save code to create this lookup
247     $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
248 dpavlin 686
249 dpavlin 698
250     if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } )) {
251     my $dep_key = $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) };
252     $log->warn("dependency of $database/$input on $e[3]/$e[5] allready recorded as $dep_key, now changed to $key") if ($dep_key ne $key);
253     }
254    
255     # save this dependency
256     $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } .= $key;
257    
258 dpavlin 686 if ($#e < 10) {
259     $e[8]->insert_after( $e[8]->clone );
260     $e[8]->insert_after( $e[7]->clone );
261     $e[8]->insert_after( $e[6]->clone );
262     }
263    
264     $e[7]->remove;
265     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
266     $e[8]->remove;
267    
268    
269 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
270 dpavlin 686 });
271    
272 dpavlin 693 my $normalize_source = $Document->serialize;
273 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
274 dpavlin 693 $log->debug("normalize: $normalize_source");
275 dpavlin 686
276 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
277 dpavlin 692
278 dpavlin 687 if ($self->{debug}) {
279     my $Dumper = PPI::Dumper->new( $Document );
280     $Dumper->print;
281     }
282 dpavlin 686
283 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
284 dpavlin 690
285 dpavlin 687 return 1;
286 dpavlin 686 }
287    
288 dpavlin 692
289 dpavlin 698 =head2 lookup_create_rules
290 dpavlin 692
291 dpavlin 698 my $source = $parser->lookup_create_rules($database, $input);
292    
293 dpavlin 692 =cut
294    
295 dpavlin 698 sub lookup_create_rules {
296 dpavlin 692 my $self = shift;
297 dpavlin 698 my ($database,$input) = @_;
298     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
299 dpavlin 692 }
300    
301 dpavlin 689 =head2 valid_database
302    
303     my $ok = $parse->valid_database('key');
304    
305     =cut
306    
307     sub valid_database {
308     my $self = shift;
309    
310     my $database = shift || return;
311    
312 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) });
313 dpavlin 689 }
314    
315     =head2 valid_database_input
316    
317     my $ok = $parse->valid_database('database_key','input_name');
318    
319     =cut
320    
321     sub valid_database_input {
322     my $self = shift;
323    
324     my ($database,$input) = @_;
325 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
326     }
327 dpavlin 689
328 dpavlin 698 =head2 depends
329    
330     Return all databases and inputs on which specified one depends
331    
332     $depends_on = $parser->depends('database','input');
333    
334     =cut
335    
336     sub depends {
337     my $self = shift;
338     my ($database,$input) = @_;
339     $self->_get_logger->debug("depends($database,$input)");
340 dpavlin 699 return unless (
341     defined( $self->{depends}->{ _q($database) } ) &&
342     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
343     );
344 dpavlin 698 return $self->{depends}->{ _q($database) }->{ _q($input) };
345 dpavlin 689 }
346    
347 dpavlin 698 =head1 PRIVATE
348    
349     =head2 _q
350    
351     Strip single or double quotes around value
352    
353     _q(qq/'foo'/) -> foo
354    
355     =cut
356    
357     sub _q {
358     my $v = shift || return;
359     $v =~ s/^['"]*//g;
360     $v =~ s/['"]*$//g;
361     return $v;
362     }
363    
364     =head2 _input_name
365    
366     Return C<name> value if HASH or arg if scalar
367    
368     _input_name($input)
369    
370     =cut
371    
372     sub _input_name {
373     my $input = shift || return;
374     if (ref($input) eq 'HASH') {
375     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
376     return $input->{name};
377     } else {
378     return $input;
379     }
380     }
381    
382    
383 dpavlin 686 =head1 AUTHOR
384    
385     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
386    
387     =head1 COPYRIGHT & LICENSE
388    
389     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
390    
391     This program is free software; you can redistribute it and/or modify it
392     under the same terms as Perl itself.
393    
394     =cut
395    
396     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26