/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1277 - (show annotations)
Wed Aug 19 16:24:43 2009 UTC (14 years, 8 months ago) by dpavlin
File size: 12018 byte(s)
insert space before if
1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.08
21
22 =cut
23
24 our $VERSION = '0.08';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 only_database => $only
53 );
54
55 =cut
56
57 sub new {
58 my $class = shift;
59 my $self = {@_};
60 bless($self, $class);
61
62 my $log = $self->_get_logger();
63
64 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65
66 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
67
68 $self->_read_sources;
69
70 $self ? return $self : return undef;
71 }
72
73 =head2 valid_database
74
75 my $ok = $parse->valid_database('key');
76
77 =cut
78
79 sub valid_database {
80 my $self = shift;
81
82 my $database = shift || return;
83
84 return defined($self->{valid_inputs}->{ _q($database) });
85 }
86
87 =head2 valid_database_input
88
89 my $ok = $parse->valid_database('database_key','input_name');
90
91 =cut
92
93 sub valid_database_input {
94 my $self = shift;
95 my ($database,$input) = @_;
96 $input = _input_name($input);
97 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
98 }
99
100 =head2 depends
101
102 Return all databases and inputs on which specified one depends
103
104 $depends_on = $parser->depends('database','input');
105
106 =cut
107
108 sub depends {
109 my $self = shift;
110 my ($database,$input) = @_;
111 $input = _input_name($input);
112 $self->_get_logger->debug("depends($database,$input)");
113 return unless (
114 defined( $self->{depends}->{ _q($database) } ) &&
115 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116 );
117 return $self->{depends}->{ _q($database) }->{ _q($input) };
118 }
119
120 =head2 have_lookup_create
121
122 my @keys = $parser->have_lookup_create($database, $input);
123
124 =cut
125
126 sub have_lookup_create {
127 my $self = shift;
128 my ($database,$input) = @_;
129 $input = _input_name($input);
130 return unless (
131 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133 );
134 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
135 }
136
137
138 =head2 lookup_create_rules
139
140 my $source = $parser->lookup_create_rules($database, $input);
141
142 =cut
143
144 sub lookup_create_rules {
145 my $self = shift;
146 my ($database,$input) = @_;
147 $input = _input_name($input);
148 return unless (
149 defined( $self->{_lookup_create}->{ _q($database) } ) &&
150 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151 );
152 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
153 }
154
155 =head2 normalize_rules
156
157 my $source = $parser->normalize_rules($database, $input);
158
159 =cut
160
161 sub normalize_rules {
162 my $self = shift;
163 my ($database,$input) = @_;
164 $input = _input_name($input);
165 return unless (
166 defined( $self->{_normalize_source}->{ _q($database) } ) &&
167 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168 );
169 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
170 }
171
172
173 =head2 have_rules
174
175 my $do_marc = $parser->have_rules('marc', $database, $input);
176 my $do_index = $parser->have_rules('search', $database);
177
178 This function will return hash containing count of all found C<marc_*> or
179 C<search> directives. Input name is optional.
180
181 =cut
182
183 sub have_rules {
184 my $self = shift;
185
186 my $log = $self->_get_logger();
187 my $type = shift @_ || $log->logconfess("need at least type");
188 my $database = shift @_ || $log->logconfess("database is required");
189 my $input = shift @_;
190
191 $input = _input_name($input);
192
193
194 return unless defined( $self->{_have_rules}->{ _q($database) } );
195
196 my $database_rules = $self->{_have_rules}->{ _q($database ) };
197
198 if (defined($input)) {
199
200 return unless (
201 defined( $database_rules->{ _q($input) } ) &&
202 defined( $database_rules->{ _q($input) }->{ $type } )
203 );
204
205 return $database_rules->{ _q($input) }->{ $type };
206 }
207
208 my $usage;
209
210 foreach my $i (keys %{ $database_rules }) {
211 next unless defined( $database_rules->{$i}->{$type} );
212
213 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214 $usage->{ $t } += $database_rules->{ $i }->{ $t };
215 }
216 }
217
218 return $usage;
219
220 }
221
222
223 =head1 PRIVATE
224
225 =head2 _read_sources
226
227 my $source_files = $parser->_read_sources;
228
229 Called by L</new>.
230
231 =cut
232
233 sub _read_sources {
234 my $self = shift;
235
236 my $log = $self->_get_logger();
237
238 my $nr = 0;
239
240 my @sources;
241
242 my $lookup_src_cache;
243
244 my $only_database = $self->{only_database};
245 my $only_input = $self->{only_input};
246
247 $self->{config}->iterate_inputs( sub {
248 my ($input, $database) = @_;
249
250 return if ( $only_database && $database !~ m/$only_database/i );
251 return if ( $only_input && $input->{name} !~ m/$only_input/i );
252
253 $log->debug("database: $database input = ", dump($input));
254
255 foreach my $normalize (@{ $input->{normalize} }) {
256
257 my $path = $normalize->{path};
258 return unless($path);
259 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
260
261 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
262
263 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
264
265 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
266
267 $log->debug("$database/$input_name: adding $path");
268
269 $self->{valid_inputs}->{$database}->{$input_name}++;
270
271 push @sources, sub {
272 #warn "### $database $input_name, $full ###\n";
273 $self->_parse_source( $database, $input_name, $full, $s );
274 };
275
276 $nr++;
277 }
278 } );
279
280 $log->debug("found $nr source files");
281
282 # parse all sources
283 $_->() foreach (@sources);
284
285 return $nr;
286 }
287
288 =head2 _parse_source
289
290 $parser->_parse_source($database,$input,$path,$source);
291
292 Called for each normalize source (rules) in each input by L</_read_sources>
293
294 It will report invalid databases and inputs in error log after parsing.
295
296 =cut
297
298 sub _parse_source {
299 my $self = shift;
300 my ($database, $input, $path, $source) = @_;
301
302 $input = _input_name($input);
303
304 my $log = $self->_get_logger();
305
306 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
308
309 $log->logdie("no source found for database $database input $input path $path") unless ($source);
310
311 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
312
313 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
314
315 $Document->prune('PPI::Token::Whitespace');
316 $Document->prune('PPI::Token::Comment');
317 #$Document->prune('PPI::Token::Operator');
318
319 # Find all the named subroutines
320
321 $self->{_lookup_errors} = ();
322
323 sub _lookup_error {
324 my $self = shift;
325 my $msg = shift;
326 $self->_get_logger->logconfess("error without message?") unless ($msg);
327 push @{ $self->{_lookup_errors} }, $msg;
328 return '';
329 }
330
331 $Document->find( sub {
332 my ($Document,$Element) = @_;
333
334 $Element->isa('PPI::Token::Word') or return '';
335
336 if ( $Element->content =~ m{^(sub|if)$} ) {
337 # repair demage done by prune of whitespace
338 $Element->insert_after( PPI::Token::Whitespace->new(' ') );
339 return '';
340 } elsif ( $Element->content eq 'my' ) {
341 $Element->insert_before( PPI::Token::Whitespace->new(' ') );
342 }
343
344 $Element->content eq 'lookup' or return '';
345
346 $log->debug("expansion: ", $Element->snext_sibling);
347
348 my $args = $Element->snext_sibling;
349
350 my @e = $args->child(0)->elements;
351 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
352
353 if ($log->is_debug) {
354 my $report = "found " . scalar @e . " elements:\n";
355
356 foreach my $i ( 0 .. $#e ) {
357 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
358 }
359
360 $log->debug($report);
361 }
362
363 my $key_element = $e[8]->clone;
364
365 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
366
367 $log->debug("key part: ", $key_element);
368
369 my @key;
370
371 $key_element->find( sub {
372 my $e = $_[1] || die "no element?";
373 $e->isa('PPI::Token::Word') or return '';
374 $e->content eq 'rec' or return '';
375
376 my $kf = $e->snext_sibling;
377
378 $log->debug("key fragment = $kf");
379
380 push @key, eval $kf;
381 $log->logdie("can't eval { $kf }: $@") if ($@);
382
383 return 1;
384 });
385
386 my $key = join('-', @key ) || $log->logdie("no key found!");
387
388 $log->debug("key = $key");
389
390 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
391 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
392
393 my $create = qq{
394 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
395 };
396
397 $log->debug("create: $create");
398
399 # save code to create this lookup
400 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
401 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
402
403
404 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
405 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
406 }
407
408 # save this dependency
409 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
410
411 if ($#e < 10) {
412 $e[8]->insert_after( $e[8]->clone );
413 $e[8]->insert_after( $e[7]->clone );
414 $e[8]->insert_after( $e[6]->clone );
415 }
416
417 $e[7]->remove;
418 $e[8]->insert_before( PPI::Token::Quote::Single->new( "'$key'" ) );
419 $e[8]->remove;
420
421
422 $log->debug(">>> ", $Element->snext_sibling);
423 });
424
425 my $normalize_source = $Document->serialize;
426 $log->debug("create: ", dump($self->{_lookup_create}) );
427 $log->debug("normalize: $normalize_source");
428
429 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
430
431 if ($self->{debug}) {
432 my $Dumper = PPI::Dumper->new( $Document );
433 $Dumper->print;
434 }
435
436 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
437
438 $Document->find( sub {
439 my ($Document,$Element) = @_;
440
441 $Element->isa('PPI::Token::Word') or return '';
442 if ($Element->content =~ m/^(marc|search)/) {
443 my $what = $1;
444 $log->debug("found $what rules in $database/$input");
445 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
446 } else {
447 return '';
448 }
449 });
450
451 return 1;
452 }
453
454
455 =head2 _q
456
457 Strip single or double quotes around value
458
459 _q(qq/'foo'/) -> foo
460
461 =cut
462
463 sub _q {
464 my $v = shift || return;
465 $v =~ s/^['"]*//g;
466 $v =~ s/['"]*$//g;
467 return $v;
468 }
469
470 =head2 _input_name
471
472 Return C<name> value if HASH or arg if scalar
473
474 _input_name($input)
475
476 =cut
477
478 sub _input_name {
479 my $input = shift || return;
480 if (ref($input) eq 'HASH') {
481 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
482 return $input->{name};
483 } else {
484 return $input;
485 }
486 }
487
488
489 =head1 AUTHOR
490
491 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
492
493 =head1 COPYRIGHT & LICENSE
494
495 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
496
497 This program is free software; you can redistribute it and/or modify it
498 under the same terms as Perl itself.
499
500 =cut
501
502 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26