/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 698 - (hide annotations)
Mon Sep 25 11:14:53 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9074 byte(s)
 r990@llin:  dpavlin | 2006-09-25 13:12:42 +0200
 new depends method to track dependencies, input in most places can be input name or
 hash with key 'name' which will be used as input (for exaple, from configuration file),
 database and input names will have correctly stripped quotes,
 begin removal of old lookup support

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 691 use base qw/WebPAC::Common WebPAC::Normalize/;
13 dpavlin 686
14     =head1 NAME
15    
16     WebPAC::Parser - parse perl normalization configuration files and mungle it
17    
18     =head1 VERSION
19    
20 dpavlin 698 Version 0.04
21 dpavlin 686
22     =cut
23    
24 dpavlin 698 our $VERSION = '0.04';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29     to produce lookups and normalization.
30    
31     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32     reading about LISP. It might be a bit over-the board, but at least it removed
33     separate configuration files for lookups.
34    
35     This is experimental code, but it replaces all older formats which where,
36     at one point in time, available in WebPAC.
37    
38 dpavlin 686 FIXME
39    
40     =head1 FUNCTIONS
41    
42     =head2 new
43    
44     Create new parser object.
45    
46     my $parser = new WebPAC::Parser(
47     config => new WebPAC::Config(),
48     base_path => '/optional/path/to/conf',
49     );
50    
51     =cut
52    
53     sub new {
54     my $class = shift;
55     my $self = {@_};
56     bless($self, $class);
57    
58     my $log = $self->_get_logger();
59    
60     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61    
62     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63    
64 dpavlin 691 $self->read_sources;
65 dpavlin 686
66     $self->{config}->iterate_inputs( sub {
67 dpavlin 689 my ($input, $database) = @_;
68 dpavlin 698 return unless $self->valid_database_input($database, _input_name($input));
69     $self->parse_lookups($database, _input_name($input));
70 dpavlin 691 } );
71    
72     $self ? return $self : return undef;
73     }
74    
75     =head2 read_sources
76    
77     my $source_files = $parser->read_sources;
78    
79     Called by L</new>.
80    
81     =cut
82    
83 dpavlin 698 sub _input_name($);
84    
85 dpavlin 691 sub read_sources {
86     my $self = shift;
87    
88     my $log = $self->_get_logger();
89    
90     my $nr = 0;
91    
92     $self->{config}->iterate_inputs( sub {
93     my ($input, $database) = @_;
94    
95 dpavlin 698 my @normalize;
96 dpavlin 691
97 dpavlin 698 if (ref($input->{normalize}) eq 'ARRAY') {
98     @normalize = @{ $input->{normalize} };
99     } else {
100     @normalize = ( $input->{normalize} );
101     }
102 dpavlin 691
103 dpavlin 698 foreach my $normalize (@normalize) {
104 dpavlin 691
105 dpavlin 698 my $path = $normalize->{path};
106     return unless($path);
107     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
108 dpavlin 691
109 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
110 dpavlin 691
111 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
112 dpavlin 691
113 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
114 dpavlin 691
115 dpavlin 698 $log->debug("$database/$input_name: adding $path");
116    
117     $self->{valid_inputs}->{$database}->{$input_name} = {
118     source => $s,
119     path => $full,
120     usage => 0,
121     } unless defined($self->{valid_inputs}->{$database}->{$input_name});
122    
123     $self->{valid_inputs}->{$database}->{$input_name}->{usage}++;
124    
125     $nr++;
126     }
127 dpavlin 686 } );
128    
129 dpavlin 691 $log->debug("found $nr source files");
130 dpavlin 686
131 dpavlin 691 return $nr;
132 dpavlin 686 }
133    
134 dpavlin 692 =head2 parse_lookups
135 dpavlin 686
136 dpavlin 691 $parser->parse_lookups($database,$input);
137    
138 dpavlin 698 Called for each input by L</new>
139    
140     It will report invalid databases and inputs in error log after parsing.
141    
142 dpavlin 686 =cut
143    
144 dpavlin 691 sub parse_lookups {
145 dpavlin 686 my $self = shift;
146 dpavlin 691 my ($database, $input) = @_;
147 dpavlin 686
148 dpavlin 698 $input = _input_name($input);
149    
150 dpavlin 686 my $log = $self->_get_logger();
151    
152 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
153     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
154 dpavlin 686
155 dpavlin 691 my $source = $self->{valid_inputs}->{$database}->{$input}->{source};
156     my $path = $self->{valid_inputs}->{$database}->{$input}->{path};
157 dpavlin 689
158 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
159 dpavlin 686
160 dpavlin 692 $log->info("parsing lookups for $database/$input from $path");
161 dpavlin 691
162     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
163    
164 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
165     #$Document->prune('PPI::Token::Operator');
166    
167     # Find all the named subroutines
168    
169 dpavlin 691 $self->{_lookup_errors} = ();
170 dpavlin 686
171 dpavlin 691 sub _lookup_error {
172     my $self = shift;
173     my $msg = shift;
174     $self->_get_logger->logconfess("error without message?") unless ($msg);
175     push @{ $self->{_lookup_errors} }, $msg;
176 dpavlin 690 return '';
177     }
178    
179 dpavlin 686 $Document->find( sub {
180     my ($Document,$Element) = @_;
181    
182     $Element->isa('PPI::Token::Word') or return '';
183     $Element->content eq 'lookup' or return '';
184    
185 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
186 dpavlin 686
187     my $args = $Element->snext_sibling;
188    
189     my @e = $args->child(0)->elements;
190 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
191 dpavlin 686
192 dpavlin 687 if ($log->is_debug) {
193     my $report = "found " . scalar @e . " elements:\n";
194 dpavlin 686
195 dpavlin 687 foreach my $i ( 0 .. $#e ) {
196     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
197     }
198    
199     $log->debug($report);
200 dpavlin 686 }
201    
202     my $key_element = $e[8]->clone;
203    
204 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
205 dpavlin 686
206 dpavlin 687 $log->debug("key part: ", $key_element);
207 dpavlin 686
208     my @key;
209    
210     $key_element->find( sub {
211     my $e = $_[1] || die "no element?";
212     $e->isa('PPI::Token::Word') or return '';
213     $e->content eq 'rec' or return '';
214    
215     my $kf = $e->snext_sibling;
216    
217 dpavlin 687 $log->debug("key fragment = $kf");
218 dpavlin 686
219     push @key, eval $kf;
220 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
221 dpavlin 686
222     return 1;
223     });
224    
225 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
226 dpavlin 686
227 dpavlin 687 $log->debug("key = $key");
228 dpavlin 686
229     my $create = '
230     $coderef = ' . $e[7] . $e[8] . ';
231     foreach my $v ($coderef->()) {
232     next unless (defined($v) && $v ne \'\');
233     push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
234     }
235     ';
236    
237 dpavlin 687 $log->debug("create: $create");
238 dpavlin 686
239 dpavlin 691 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
240     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
241 dpavlin 689
242 dpavlin 698 # save code to create this lookup
243     $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
244 dpavlin 686
245 dpavlin 698
246     if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } )) {
247     my $dep_key = $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) };
248     $log->warn("dependency of $database/$input on $e[3]/$e[5] allready recorded as $dep_key, now changed to $key") if ($dep_key ne $key);
249     }
250    
251     # save this dependency
252     $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } .= $key;
253    
254 dpavlin 686 if ($#e < 10) {
255     $e[8]->insert_after( $e[8]->clone );
256     $e[8]->insert_after( $e[7]->clone );
257     $e[8]->insert_after( $e[6]->clone );
258     }
259    
260     $e[7]->remove;
261     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
262     $e[8]->remove;
263    
264    
265 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
266 dpavlin 686 });
267    
268 dpavlin 693 my $normalize_source = $Document->serialize;
269 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
270 dpavlin 693 $log->debug("normalize: $normalize_source");
271 dpavlin 686
272 dpavlin 693 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
273 dpavlin 692
274 dpavlin 687 if ($self->{debug}) {
275     my $Dumper = PPI::Dumper->new( $Document );
276     $Dumper->print;
277     }
278 dpavlin 686
279 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
280 dpavlin 690
281 dpavlin 687 return 1;
282 dpavlin 686 }
283    
284 dpavlin 692
285 dpavlin 698 =head2 lookup_create_rules
286 dpavlin 692
287 dpavlin 698 my $source = $parser->lookup_create_rules($database, $input);
288    
289 dpavlin 692 =cut
290    
291 dpavlin 698 sub lookup_create_rules {
292 dpavlin 692 my $self = shift;
293 dpavlin 698 my ($database,$input) = @_;
294     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
295 dpavlin 692 }
296    
297 dpavlin 689 =head2 valid_database
298    
299     my $ok = $parse->valid_database('key');
300    
301     =cut
302    
303     sub valid_database {
304     my $self = shift;
305    
306     my $database = shift || return;
307    
308 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) });
309 dpavlin 689 }
310    
311     =head2 valid_database_input
312    
313     my $ok = $parse->valid_database('database_key','input_name');
314    
315     =cut
316    
317     sub valid_database_input {
318     my $self = shift;
319    
320     my ($database,$input) = @_;
321 dpavlin 698 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
322     }
323 dpavlin 689
324 dpavlin 698 =head2 depends
325    
326     Return all databases and inputs on which specified one depends
327    
328     $depends_on = $parser->depends('database','input');
329    
330     =cut
331    
332     sub depends {
333     my $self = shift;
334     my ($database,$input) = @_;
335     $self->_get_logger->debug("depends($database,$input)");
336     return unless defined( $self->{depends}->{ _q($database) }->{ _q($input) } );
337     return $self->{depends}->{ _q($database) }->{ _q($input) };
338 dpavlin 689 }
339    
340 dpavlin 698 =head1 PRIVATE
341    
342     =head2 _q
343    
344     Strip single or double quotes around value
345    
346     _q(qq/'foo'/) -> foo
347    
348     =cut
349    
350     sub _q {
351     my $v = shift || return;
352     $v =~ s/^['"]*//g;
353     $v =~ s/['"]*$//g;
354     return $v;
355     }
356    
357     =head2 _input_name
358    
359     Return C<name> value if HASH or arg if scalar
360    
361     _input_name($input)
362    
363     =cut
364    
365     sub _input_name {
366     my $input = shift || return;
367     if (ref($input) eq 'HASH') {
368     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
369     return $input->{name};
370     } else {
371     return $input;
372     }
373     }
374    
375    
376 dpavlin 686 =head1 AUTHOR
377    
378     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
379    
380     =head1 COPYRIGHT & LICENSE
381    
382     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
383    
384     This program is free software; you can redistribute it and/or modify it
385     under the same terms as Perl itself.
386    
387     =cut
388    
389     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26