/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1265 - (hide annotations)
Sun Aug 9 20:10:21 2009 UTC (14 years, 8 months ago) by dpavlin
File size: 12010 byte(s)
fix foreach my combination
1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16 dpavlin 737 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17 dpavlin 686
18     =head1 VERSION
19    
20 dpavlin 755 Version 0.08
21 dpavlin 686
22     =cut
23    
24 dpavlin 755 our $VERSION = '0.08';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 dpavlin 737 to produce lookups and normalization. It will also parse other parts of
30     source to produce some of DWIM (I<Do What I Mean>) magic
31     (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32     rules in normalisation).
33 dpavlin 698
34     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35     reading about LISP. It might be a bit over-the board, but at least it removed
36     separate configuration files for lookups.
37    
38     This is experimental code, but it replaces all older formats which where,
39     at one point in time, available in WebPAC.
40    
41 dpavlin 686 FIXME
42    
43     =head1 FUNCTIONS
44    
45     =head2 new
46    
47     Create new parser object.
48    
49     my $parser = new WebPAC::Parser(
50     config => new WebPAC::Config(),
51     base_path => '/optional/path/to/conf',
52 dpavlin 976 only_database => $only
53 dpavlin 686 );
54    
55     =cut
56    
57     sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62     my $log = $self->_get_logger();
63    
64     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65    
66     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
67    
68 dpavlin 703 $self->_read_sources;
69 dpavlin 686
70 dpavlin 691 $self ? return $self : return undef;
71     }
72    
73 dpavlin 703 =head2 valid_database
74    
75     my $ok = $parse->valid_database('key');
76    
77     =cut
78    
79     sub valid_database {
80     my $self = shift;
81    
82     my $database = shift || return;
83    
84     return defined($self->{valid_inputs}->{ _q($database) });
85     }
86    
87     =head2 valid_database_input
88    
89     my $ok = $parse->valid_database('database_key','input_name');
90    
91     =cut
92    
93     sub valid_database_input {
94     my $self = shift;
95     my ($database,$input) = @_;
96 dpavlin 705 $input = _input_name($input);
97 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
98     }
99    
100     =head2 depends
101    
102     Return all databases and inputs on which specified one depends
103    
104     $depends_on = $parser->depends('database','input');
105    
106     =cut
107    
108     sub depends {
109     my $self = shift;
110     my ($database,$input) = @_;
111 dpavlin 705 $input = _input_name($input);
112 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
113     return unless (
114     defined( $self->{depends}->{ _q($database) } ) &&
115     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116     );
117     return $self->{depends}->{ _q($database) }->{ _q($input) };
118     }
119    
120 dpavlin 706 =head2 have_lookup_create
121    
122     my @keys = $parser->have_lookup_create($database, $input);
123    
124     =cut
125    
126     sub have_lookup_create {
127     my $self = shift;
128     my ($database,$input) = @_;
129     $input = _input_name($input);
130     return unless (
131     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133     );
134     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
135     }
136    
137    
138 dpavlin 705 =head2 lookup_create_rules
139    
140     my $source = $parser->lookup_create_rules($database, $input);
141    
142     =cut
143    
144     sub lookup_create_rules {
145     my $self = shift;
146     my ($database,$input) = @_;
147     $input = _input_name($input);
148     return unless (
149     defined( $self->{_lookup_create}->{ _q($database) } ) &&
150     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151     );
152     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
153     }
154    
155 dpavlin 717 =head2 normalize_rules
156    
157     my $source = $parser->normalize_rules($database, $input);
158    
159     =cut
160    
161     sub normalize_rules {
162     my $self = shift;
163     my ($database,$input) = @_;
164     $input = _input_name($input);
165     return unless (
166     defined( $self->{_normalize_source}->{ _q($database) } ) &&
167     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168     );
169     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
170     }
171    
172 dpavlin 737
173 dpavlin 755 =head2 have_rules
174 dpavlin 737
175 dpavlin 755 my $do_marc = $parser->have_rules('marc', $database, $input);
176     my $do_index = $parser->have_rules('search', $database);
177 dpavlin 737
178 dpavlin 755 This function will return hash containing count of all found C<marc_*> or
179     C<search> directives. Input name is optional.
180 dpavlin 737
181     =cut
182    
183 dpavlin 755 sub have_rules {
184 dpavlin 737 my $self = shift;
185 dpavlin 755
186     my $log = $self->_get_logger();
187     my $type = shift @_ || $log->logconfess("need at least type");
188     my $database = shift @_ || $log->logconfess("database is required");
189     my $input = shift @_;
190    
191 dpavlin 737 $input = _input_name($input);
192 dpavlin 755
193    
194     return unless defined( $self->{_have_rules}->{ _q($database) } );
195    
196     my $database_rules = $self->{_have_rules}->{ _q($database ) };
197    
198     if (defined($input)) {
199    
200     return unless (
201     defined( $database_rules->{ _q($input) } ) &&
202     defined( $database_rules->{ _q($input) }->{ $type } )
203     );
204    
205     return $database_rules->{ _q($input) }->{ $type };
206     }
207    
208     my $usage;
209    
210     foreach my $i (keys %{ $database_rules }) {
211     next unless defined( $database_rules->{$i}->{$type} );
212    
213     foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214     $usage->{ $t } += $database_rules->{ $i }->{ $t };
215     }
216     }
217    
218     return $usage;
219    
220 dpavlin 737 }
221    
222    
223 dpavlin 703 =head1 PRIVATE
224    
225     =head2 _read_sources
226    
227     my $source_files = $parser->_read_sources;
228    
229 dpavlin 691 Called by L</new>.
230    
231     =cut
232    
233 dpavlin 703 sub _read_sources {
234 dpavlin 691 my $self = shift;
235    
236     my $log = $self->_get_logger();
237    
238     my $nr = 0;
239    
240 dpavlin 737 my @sources;
241 dpavlin 701
242 dpavlin 800 my $lookup_src_cache;
243    
244 dpavlin 976 my $only_database = $self->{only_database};
245     my $only_input = $self->{only_input};
246    
247 dpavlin 691 $self->{config}->iterate_inputs( sub {
248     my ($input, $database) = @_;
249    
250 dpavlin 976 return if ( $only_database && $database !~ m/$only_database/i );
251     return if ( $only_input && $input->{name} !~ m/$only_input/i );
252    
253 dpavlin 699 $log->debug("database: $database input = ", dump($input));
254    
255 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
256 dpavlin 691
257 dpavlin 698 my $path = $normalize->{path};
258     return unless($path);
259     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
260 dpavlin 691
261 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
262 dpavlin 691
263 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
264 dpavlin 691
265 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
266 dpavlin 691
267 dpavlin 698 $log->debug("$database/$input_name: adding $path");
268    
269 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
270 dpavlin 698
271 dpavlin 737 push @sources, sub {
272 dpavlin 826 #warn "### $database $input_name, $full ###\n";
273 dpavlin 737 $self->_parse_source( $database, $input_name, $full, $s );
274 dpavlin 701 };
275 dpavlin 698
276     $nr++;
277     }
278 dpavlin 686 } );
279    
280 dpavlin 691 $log->debug("found $nr source files");
281 dpavlin 686
282 dpavlin 737 # parse all sources
283     $_->() foreach (@sources);
284 dpavlin 701
285 dpavlin 691 return $nr;
286 dpavlin 686 }
287    
288 dpavlin 737 =head2 _parse_source
289 dpavlin 686
290 dpavlin 737 $parser->_parse_source($database,$input,$path,$source);
291 dpavlin 691
292 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
293 dpavlin 698
294     It will report invalid databases and inputs in error log after parsing.
295    
296 dpavlin 686 =cut
297    
298 dpavlin 737 sub _parse_source {
299 dpavlin 686 my $self = shift;
300 dpavlin 701 my ($database, $input, $path, $source) = @_;
301 dpavlin 686
302 dpavlin 698 $input = _input_name($input);
303    
304 dpavlin 686 my $log = $self->_get_logger();
305    
306 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
308 dpavlin 686
309 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
310 dpavlin 686
311 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
312 dpavlin 691
313     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
314    
315 dpavlin 1259 $Document->prune('PPI::Token::Whitespace');
316 dpavlin 724 $Document->prune('PPI::Token::Comment');
317 dpavlin 686 #$Document->prune('PPI::Token::Operator');
318    
319     # Find all the named subroutines
320    
321 dpavlin 691 $self->{_lookup_errors} = ();
322 dpavlin 686
323 dpavlin 691 sub _lookup_error {
324     my $self = shift;
325     my $msg = shift;
326     $self->_get_logger->logconfess("error without message?") unless ($msg);
327     push @{ $self->{_lookup_errors} }, $msg;
328 dpavlin 690 return '';
329     }
330    
331 dpavlin 686 $Document->find( sub {
332     my ($Document,$Element) = @_;
333    
334     $Element->isa('PPI::Token::Word') or return '';
335 dpavlin 1061
336     if ( $Element->content eq 'sub' ) {
337     # repair demage done by prune of whitespace
338     $Element->insert_after( PPI::Token::Whitespace->new(' ') );
339     return '';
340 dpavlin 1265 } elsif ( $Element->content eq 'my' ) {
341     $Element->insert_before( PPI::Token::Whitespace->new(' ') );
342 dpavlin 1061 }
343    
344 dpavlin 686 $Element->content eq 'lookup' or return '';
345    
346 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
347 dpavlin 686
348     my $args = $Element->snext_sibling;
349    
350     my @e = $args->child(0)->elements;
351 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
352 dpavlin 686
353 dpavlin 687 if ($log->is_debug) {
354     my $report = "found " . scalar @e . " elements:\n";
355 dpavlin 686
356 dpavlin 687 foreach my $i ( 0 .. $#e ) {
357     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
358     }
359    
360     $log->debug($report);
361 dpavlin 686 }
362    
363     my $key_element = $e[8]->clone;
364    
365 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
366 dpavlin 686
367 dpavlin 687 $log->debug("key part: ", $key_element);
368 dpavlin 686
369     my @key;
370    
371     $key_element->find( sub {
372     my $e = $_[1] || die "no element?";
373     $e->isa('PPI::Token::Word') or return '';
374     $e->content eq 'rec' or return '';
375    
376     my $kf = $e->snext_sibling;
377    
378 dpavlin 687 $log->debug("key fragment = $kf");
379 dpavlin 686
380     push @key, eval $kf;
381 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
382 dpavlin 686
383     return 1;
384     });
385    
386 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
387 dpavlin 686
388 dpavlin 687 $log->debug("key = $key");
389 dpavlin 686
390 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
391     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
392 dpavlin 686
393 dpavlin 712 my $create = qq{
394     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
395     };
396    
397 dpavlin 687 $log->debug("create: $create");
398 dpavlin 686
399 dpavlin 698 # save code to create this lookup
400 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
401     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
402 dpavlin 686
403 dpavlin 698
404 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
405     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
406 dpavlin 698 }
407    
408     # save this dependency
409 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
410 dpavlin 698
411 dpavlin 686 if ($#e < 10) {
412     $e[8]->insert_after( $e[8]->clone );
413     $e[8]->insert_after( $e[7]->clone );
414     $e[8]->insert_after( $e[6]->clone );
415     }
416    
417     $e[7]->remove;
418 dpavlin 1061 $e[8]->insert_before( PPI::Token::Quote::Single->new( "'$key'" ) );
419 dpavlin 686 $e[8]->remove;
420    
421    
422 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
423 dpavlin 686 });
424    
425 dpavlin 693 my $normalize_source = $Document->serialize;
426 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
427 dpavlin 693 $log->debug("normalize: $normalize_source");
428 dpavlin 686
429 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
430 dpavlin 692
431 dpavlin 687 if ($self->{debug}) {
432     my $Dumper = PPI::Dumper->new( $Document );
433     $Dumper->print;
434     }
435 dpavlin 686
436 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
437 dpavlin 690
438 dpavlin 737 $Document->find( sub {
439     my ($Document,$Element) = @_;
440    
441     $Element->isa('PPI::Token::Word') or return '';
442 dpavlin 755 if ($Element->content =~ m/^(marc|search)/) {
443     my $what = $1;
444     $log->debug("found $what rules in $database/$input");
445     $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
446     } else {
447     return '';
448     }
449 dpavlin 737 });
450    
451 dpavlin 687 return 1;
452 dpavlin 686 }
453    
454 dpavlin 692
455 dpavlin 698 =head2 _q
456    
457     Strip single or double quotes around value
458    
459     _q(qq/'foo'/) -> foo
460    
461     =cut
462    
463     sub _q {
464     my $v = shift || return;
465     $v =~ s/^['"]*//g;
466     $v =~ s/['"]*$//g;
467     return $v;
468     }
469    
470     =head2 _input_name
471    
472     Return C<name> value if HASH or arg if scalar
473    
474     _input_name($input)
475    
476     =cut
477    
478     sub _input_name {
479     my $input = shift || return;
480     if (ref($input) eq 'HASH') {
481     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
482     return $input->{name};
483     } else {
484     return $input;
485     }
486     }
487    
488    
489 dpavlin 686 =head1 AUTHOR
490    
491     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
492    
493     =head1 COPYRIGHT & LICENSE
494    
495     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
496    
497     This program is free software; you can redistribute it and/or modify it
498     under the same terms as Perl itself.
499    
500     =cut
501    
502     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26