/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 703 - (show annotations)
Mon Sep 25 13:24:09 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 8627 byte(s)
 r1000@llin:  dpavlin | 2006-09-25 15:16:49 +0200
 support multiple lookups on same input file
 move _read_sources and _parse_lookups to private functions

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common WebPAC::Normalize/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files and mungle it
17
18 =head1 VERSION
19
20 Version 0.05
21
22 =cut
23
24 our $VERSION = '0.05';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization.
30
31 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
32 reading about LISP. It might be a bit over-the board, but at least it removed
33 separate configuration files for lookups.
34
35 This is experimental code, but it replaces all older formats which where,
36 at one point in time, available in WebPAC.
37
38 FIXME
39
40 =head1 FUNCTIONS
41
42 =head2 new
43
44 Create new parser object.
45
46 my $parser = new WebPAC::Parser(
47 config => new WebPAC::Config(),
48 base_path => '/optional/path/to/conf',
49 );
50
51 =cut
52
53 sub new {
54 my $class = shift;
55 my $self = {@_};
56 bless($self, $class);
57
58 my $log = $self->_get_logger();
59
60 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
61
62 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
63
64 $self->_read_sources;
65
66 $self ? return $self : return undef;
67 }
68
69 =head2 lookup_create_rules
70
71 my $source = $parser->lookup_create_rules($database, $input);
72
73 =cut
74
75 sub lookup_create_rules {
76 my $self = shift;
77 my ($database,$input) = @_;
78 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
79 }
80
81 =head2 valid_database
82
83 my $ok = $parse->valid_database('key');
84
85 =cut
86
87 sub valid_database {
88 my $self = shift;
89
90 my $database = shift || return;
91
92 return defined($self->{valid_inputs}->{ _q($database) });
93 }
94
95 =head2 valid_database_input
96
97 my $ok = $parse->valid_database('database_key','input_name');
98
99 =cut
100
101 sub valid_database_input {
102 my $self = shift;
103
104 my ($database,$input) = @_;
105 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
106 }
107
108 =head2 depends
109
110 Return all databases and inputs on which specified one depends
111
112 $depends_on = $parser->depends('database','input');
113
114 =cut
115
116 sub depends {
117 my $self = shift;
118 my ($database,$input) = @_;
119 $self->_get_logger->debug("depends($database,$input)");
120 return unless (
121 defined( $self->{depends}->{ _q($database) } ) &&
122 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
123 );
124 return $self->{depends}->{ _q($database) }->{ _q($input) };
125 }
126
127 =head1 PRIVATE
128
129 =head2 _read_sources
130
131 my $source_files = $parser->_read_sources;
132
133 Called by L</new>.
134
135 =cut
136
137 sub _read_sources {
138 my $self = shift;
139
140 my $log = $self->_get_logger();
141
142 my $nr = 0;
143
144 my @lookups;
145
146 $self->{config}->iterate_inputs( sub {
147 my ($input, $database) = @_;
148
149 $log->debug("database: $database input = ", dump($input));
150
151 foreach my $normalize (@{ $input->{normalize} }) {
152
153 my $path = $normalize->{path};
154 return unless($path);
155 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
156
157 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
158
159 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
160
161 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
162
163 $log->debug("$database/$input_name: adding $path");
164
165 $self->{valid_inputs}->{$database}->{$input_name}++;
166
167 push @lookups, sub {
168 $self->_parse_lookups( $database, $input_name, $full, $s );
169 };
170
171 $nr++;
172 }
173 } );
174
175 $log->debug("found $nr source files");
176
177 # parse all lookups
178 $_->() foreach (@lookups);
179
180 return $nr;
181 }
182
183 =head2 _parse_lookups
184
185 $parser->_parse_lookups($database,$input,$path,$source);
186
187 Called for each normalize source (rules) in each input by L</read_sources>
188
189 It will report invalid databases and inputs in error log after parsing.
190
191 =cut
192
193 sub _parse_lookups {
194 my $self = shift;
195 my ($database, $input, $path, $source) = @_;
196
197 $input = _input_name($input);
198
199 my $log = $self->_get_logger();
200
201 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
202 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
203
204 $log->logdie("no source found for database $database input $input path $path") unless ($source);
205
206 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
207
208 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
209
210 $Document->prune('PPI::Token::Whitespace');
211 #$Document->prune('PPI::Token::Operator');
212
213 # Find all the named subroutines
214
215 $self->{_lookup_errors} = ();
216
217 sub _lookup_error {
218 my $self = shift;
219 my $msg = shift;
220 $self->_get_logger->logconfess("error without message?") unless ($msg);
221 push @{ $self->{_lookup_errors} }, $msg;
222 return '';
223 }
224
225 $Document->find( sub {
226 my ($Document,$Element) = @_;
227
228 $Element->isa('PPI::Token::Word') or return '';
229 $Element->content eq 'lookup' or return '';
230
231 $log->debug("expansion: ", $Element->snext_sibling);
232
233 my $args = $Element->snext_sibling;
234
235 my @e = $args->child(0)->elements;
236 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
237
238 if ($log->is_debug) {
239 my $report = "found " . scalar @e . " elements:\n";
240
241 foreach my $i ( 0 .. $#e ) {
242 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
243 }
244
245 $log->debug($report);
246 }
247
248 my $key_element = $e[8]->clone;
249
250 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
251
252 $log->debug("key part: ", $key_element);
253
254 my @key;
255
256 $key_element->find( sub {
257 my $e = $_[1] || die "no element?";
258 $e->isa('PPI::Token::Word') or return '';
259 $e->content eq 'rec' or return '';
260
261 my $kf = $e->snext_sibling;
262
263 $log->debug("key fragment = $kf");
264
265 push @key, eval $kf;
266 $log->logdie("can't eval { $kf }: $@") if ($@);
267
268 return 1;
269 });
270
271 my $key = join('-', @key ) || $log->logdie("no key found!");
272
273 $log->debug("key = $key");
274
275 my $create = '
276 $coderef = ' . $e[7] . $e[8] . ';
277 foreach my $v ($coderef->()) {
278 next unless (defined($v) && $v ne \'\');
279 push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
280 }
281 ';
282
283 $log->debug("create: $create");
284
285 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
286 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
287
288 # save code to create this lookup
289 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
290
291
292 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
293 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
294 }
295
296 # save this dependency
297 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
298
299 if ($#e < 10) {
300 $e[8]->insert_after( $e[8]->clone );
301 $e[8]->insert_after( $e[7]->clone );
302 $e[8]->insert_after( $e[6]->clone );
303 }
304
305 $e[7]->remove;
306 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
307 $e[8]->remove;
308
309
310 $log->debug(">>> ", $Element->snext_sibling);
311 });
312
313 my $normalize_source = $Document->serialize;
314 $log->debug("create: ", dump($self->{_lookup_create}) );
315 $log->debug("normalize: $normalize_source");
316
317 $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
318
319 if ($self->{debug}) {
320 my $Dumper = PPI::Dumper->new( $Document );
321 $Dumper->print;
322 }
323
324 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
325
326 return 1;
327 }
328
329
330 =head2 _q
331
332 Strip single or double quotes around value
333
334 _q(qq/'foo'/) -> foo
335
336 =cut
337
338 sub _q {
339 my $v = shift || return;
340 $v =~ s/^['"]*//g;
341 $v =~ s/['"]*$//g;
342 return $v;
343 }
344
345 =head2 _input_name
346
347 Return C<name> value if HASH or arg if scalar
348
349 _input_name($input)
350
351 =cut
352
353 sub _input_name {
354 my $input = shift || return;
355 if (ref($input) eq 'HASH') {
356 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
357 return $input->{name};
358 } else {
359 return $input;
360 }
361 }
362
363
364 =head1 AUTHOR
365
366 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
367
368 =head1 COPYRIGHT & LICENSE
369
370 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
371
372 This program is free software; you can redistribute it and/or modify it
373 under the same terms as Perl itself.
374
375 =cut
376
377 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26