/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 724 - (show annotations)
Fri Sep 29 18:55:31 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 9706 byte(s)
 r1038@llin:  dpavlin | 2006-09-29 14:27:45 +0200
 remove comments from normalize rules when parsing

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files and mungle it
17
18 =head1 VERSION
19
20 Version 0.06
21
22 =cut
23
24 our $VERSION = '0.06';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization.
30
31 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32 reading about LISP. It might be a bit over-the board, but at least it removed
33 separate configuration files for lookups.
34
35 This is experimental code, but it replaces all older formats which where,
36 at one point in time, available in WebPAC.
37
38 FIXME
39
40 =head1 FUNCTIONS
41
42 =head2 new
43
44 Create new parser object.
45
46 my $parser = new WebPAC::Parser(
47 config => new WebPAC::Config(),
48 base_path => '/optional/path/to/conf',
49 );
50
51 =cut
52
53 sub new {
54 my $class = shift;
55 my $self = {@_};
56 bless($self, $class);
57
58 my $log = $self->_get_logger();
59
60 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61
62 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63
64 $self->_read_sources;
65
66 $self ? return $self : return undef;
67 }
68
69 =head2 valid_database
70
71 my $ok = $parse->valid_database('key');
72
73 =cut
74
75 sub valid_database {
76 my $self = shift;
77
78 my $database = shift || return;
79
80 return defined($self->{valid_inputs}->{ _q($database) });
81 }
82
83 =head2 valid_database_input
84
85 my $ok = $parse->valid_database('database_key','input_name');
86
87 =cut
88
89 sub valid_database_input {
90 my $self = shift;
91 my ($database,$input) = @_;
92 $input = _input_name($input);
93 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
94 }
95
96 =head2 depends
97
98 Return all databases and inputs on which specified one depends
99
100 $depends_on = $parser->depends('database','input');
101
102 =cut
103
104 sub depends {
105 my $self = shift;
106 my ($database,$input) = @_;
107 $input = _input_name($input);
108 $self->_get_logger->debug("depends($database,$input)");
109 return unless (
110 defined( $self->{depends}->{ _q($database) } ) &&
111 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
112 );
113 return $self->{depends}->{ _q($database) }->{ _q($input) };
114 }
115
116 =head2 have_lookup_create
117
118 my @keys = $parser->have_lookup_create($database, $input);
119
120 =cut
121
122 sub have_lookup_create {
123 my $self = shift;
124 my ($database,$input) = @_;
125 $input = _input_name($input);
126 return unless (
127 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
128 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
129 );
130 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
131 }
132
133
134 =head2 lookup_create_rules
135
136 my $source = $parser->lookup_create_rules($database, $input);
137
138 =cut
139
140 sub lookup_create_rules {
141 my $self = shift;
142 my ($database,$input) = @_;
143 $input = _input_name($input);
144 return unless (
145 defined( $self->{_lookup_create}->{ _q($database) } ) &&
146 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
147 );
148 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
149 }
150
151 =head2 normalize_rules
152
153 my $source = $parser->normalize_rules($database, $input);
154
155 =cut
156
157 sub normalize_rules {
158 my $self = shift;
159 my ($database,$input) = @_;
160 $input = _input_name($input);
161 return unless (
162 defined( $self->{_normalize_source}->{ _q($database) } ) &&
163 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
164 );
165 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
166 }
167
168 =head1 PRIVATE
169
170 =head2 _read_sources
171
172 my $source_files = $parser->_read_sources;
173
174 Called by L</new>.
175
176 =cut
177
178 sub _read_sources {
179 my $self = shift;
180
181 my $log = $self->_get_logger();
182
183 my $nr = 0;
184
185 my @lookups;
186
187 $self->{config}->iterate_inputs( sub {
188 my ($input, $database) = @_;
189
190 $log->debug("database: $database input = ", dump($input));
191
192 foreach my $normalize (@{ $input->{normalize} }) {
193
194 my $path = $normalize->{path};
195 return unless($path);
196 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
197
198 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
199
200 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
201
202 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
203
204 $log->debug("$database/$input_name: adding $path");
205
206 $self->{valid_inputs}->{$database}->{$input_name}++;
207
208 push @lookups, sub {
209 $self->_parse_lookups( $database, $input_name, $full, $s );
210 };
211
212 $nr++;
213 }
214 } );
215
216 $log->debug("found $nr source files");
217
218 # parse all lookups
219 $_->() foreach (@lookups);
220
221 return $nr;
222 }
223
224 =head2 _parse_lookups
225
226 $parser->_parse_lookups($database,$input,$path,$source);
227
228 Called for each normalize source (rules) in each input by L</_read_sources>
229
230 It will report invalid databases and inputs in error log after parsing.
231
232 =cut
233
234 sub _parse_lookups {
235 my $self = shift;
236 my ($database, $input, $path, $source) = @_;
237
238 $input = _input_name($input);
239
240 my $log = $self->_get_logger();
241
242 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
243 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
244
245 $log->logdie("no source found for database $database input $input path $path") unless ($source);
246
247 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
248
249 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
250
251 $Document->prune('PPI::Token::Whitespace');
252 $Document->prune('PPI::Token::Comment');
253 #$Document->prune('PPI::Token::Operator');
254
255 # Find all the named subroutines
256
257 $self->{_lookup_errors} = ();
258
259 sub _lookup_error {
260 my $self = shift;
261 my $msg = shift;
262 $self->_get_logger->logconfess("error without message?") unless ($msg);
263 push @{ $self->{_lookup_errors} }, $msg;
264 return '';
265 }
266
267 $Document->find( sub {
268 my ($Document,$Element) = @_;
269
270 $Element->isa('PPI::Token::Word') or return '';
271 $Element->content eq 'lookup' or return '';
272
273 $log->debug("expansion: ", $Element->snext_sibling);
274
275 my $args = $Element->snext_sibling;
276
277 my @e = $args->child(0)->elements;
278 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
279
280 if ($log->is_debug) {
281 my $report = "found " . scalar @e . " elements:\n";
282
283 foreach my $i ( 0 .. $#e ) {
284 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
285 }
286
287 $log->debug($report);
288 }
289
290 my $key_element = $e[8]->clone;
291
292 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
293
294 $log->debug("key part: ", $key_element);
295
296 my @key;
297
298 $key_element->find( sub {
299 my $e = $_[1] || die "no element?";
300 $e->isa('PPI::Token::Word') or return '';
301 $e->content eq 'rec' or return '';
302
303 my $kf = $e->snext_sibling;
304
305 $log->debug("key fragment = $kf");
306
307 push @key, eval $kf;
308 $log->logdie("can't eval { $kf }: $@") if ($@);
309
310 return 1;
311 });
312
313 my $key = join('-', @key ) || $log->logdie("no key found!");
314
315 $log->debug("key = $key");
316
317 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
318 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
319
320 my $create = qq{
321 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
322 };
323
324 $log->debug("create: $create");
325
326 # save code to create this lookup
327 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
328 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
329
330
331 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
332 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
333 }
334
335 # save this dependency
336 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
337
338 if ($#e < 10) {
339 $e[8]->insert_after( $e[8]->clone );
340 $e[8]->insert_after( $e[7]->clone );
341 $e[8]->insert_after( $e[6]->clone );
342 }
343
344 $e[7]->remove;
345 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
346 $e[8]->remove;
347
348
349 $log->debug(">>> ", $Element->snext_sibling);
350 });
351
352 my $normalize_source = $Document->serialize;
353 $log->debug("create: ", dump($self->{_lookup_create}) );
354 $log->debug("normalize: $normalize_source");
355
356 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
357
358 if ($self->{debug}) {
359 my $Dumper = PPI::Dumper->new( $Document );
360 $Dumper->print;
361 }
362
363 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
364
365 return 1;
366 }
367
368
369 =head2 _q
370
371 Strip single or double quotes around value
372
373 _q(qq/'foo'/) -> foo
374
375 =cut
376
377 sub _q {
378 my $v = shift || return;
379 $v =~ s/^['"]*//g;
380 $v =~ s/['"]*$//g;
381 return $v;
382 }
383
384 =head2 _input_name
385
386 Return C<name> value if HASH or arg if scalar
387
388 _input_name($input)
389
390 =cut
391
392 sub _input_name {
393 my $input = shift || return;
394 if (ref($input) eq 'HASH') {
395 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
396 return $input->{name};
397 } else {
398 return $input;
399 }
400 }
401
402
403 =head1 AUTHOR
404
405 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
406
407 =head1 COPYRIGHT & LICENSE
408
409 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
410
411 This program is free software; you can redistribute it and/or modify it
412 under the same terms as Perl itself.
413
414 =cut
415
416 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26