/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1204 - (hide annotations)
Fri May 29 19:38:09 2009 UTC (14 years, 11 months ago) by dpavlin
File size: 11903 byte(s)
 r1894@llin:  dpavlin | 2009-05-29 21:38:05 +0200
 don't remove whitespaces so we can have more comprex perl
 in normalize istread of mungling it becaise of removed spaces

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16 dpavlin 737 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17 dpavlin 686
18     =head1 VERSION
19    
20 dpavlin 755 Version 0.08
21 dpavlin 686
22     =cut
23    
24 dpavlin 755 our $VERSION = '0.08';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 dpavlin 737 to produce lookups and normalization. It will also parse other parts of
30     source to produce some of DWIM (I<Do What I Mean>) magic
31     (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32     rules in normalisation).
33 dpavlin 698
34     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35     reading about LISP. It might be a bit over-the board, but at least it removed
36     separate configuration files for lookups.
37    
38     This is experimental code, but it replaces all older formats which where,
39     at one point in time, available in WebPAC.
40    
41 dpavlin 686 FIXME
42    
43     =head1 FUNCTIONS
44    
45     =head2 new
46    
47     Create new parser object.
48    
49     my $parser = new WebPAC::Parser(
50     config => new WebPAC::Config(),
51     base_path => '/optional/path/to/conf',
52 dpavlin 976 only_database => $only
53 dpavlin 686 );
54    
55     =cut
56    
57     sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62     my $log = $self->_get_logger();
63    
64     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65    
66     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
67    
68 dpavlin 703 $self->_read_sources;
69 dpavlin 686
70 dpavlin 691 $self ? return $self : return undef;
71     }
72    
73 dpavlin 703 =head2 valid_database
74    
75     my $ok = $parse->valid_database('key');
76    
77     =cut
78    
79     sub valid_database {
80     my $self = shift;
81    
82     my $database = shift || return;
83    
84     return defined($self->{valid_inputs}->{ _q($database) });
85     }
86    
87     =head2 valid_database_input
88    
89     my $ok = $parse->valid_database('database_key','input_name');
90    
91     =cut
92    
93     sub valid_database_input {
94     my $self = shift;
95     my ($database,$input) = @_;
96 dpavlin 705 $input = _input_name($input);
97 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
98     }
99    
100     =head2 depends
101    
102     Return all databases and inputs on which specified one depends
103    
104     $depends_on = $parser->depends('database','input');
105    
106     =cut
107    
108     sub depends {
109     my $self = shift;
110     my ($database,$input) = @_;
111 dpavlin 705 $input = _input_name($input);
112 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
113     return unless (
114     defined( $self->{depends}->{ _q($database) } ) &&
115     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116     );
117     return $self->{depends}->{ _q($database) }->{ _q($input) };
118     }
119    
120 dpavlin 706 =head2 have_lookup_create
121    
122     my @keys = $parser->have_lookup_create($database, $input);
123    
124     =cut
125    
126     sub have_lookup_create {
127     my $self = shift;
128     my ($database,$input) = @_;
129     $input = _input_name($input);
130     return unless (
131     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133     );
134     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
135     }
136    
137    
138 dpavlin 705 =head2 lookup_create_rules
139    
140     my $source = $parser->lookup_create_rules($database, $input);
141    
142     =cut
143    
144     sub lookup_create_rules {
145     my $self = shift;
146     my ($database,$input) = @_;
147     $input = _input_name($input);
148     return unless (
149     defined( $self->{_lookup_create}->{ _q($database) } ) &&
150     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151     );
152     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
153     }
154    
155 dpavlin 717 =head2 normalize_rules
156    
157     my $source = $parser->normalize_rules($database, $input);
158    
159     =cut
160    
161     sub normalize_rules {
162     my $self = shift;
163     my ($database,$input) = @_;
164     $input = _input_name($input);
165     return unless (
166     defined( $self->{_normalize_source}->{ _q($database) } ) &&
167     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168     );
169     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
170     }
171    
172 dpavlin 737
173 dpavlin 755 =head2 have_rules
174 dpavlin 737
175 dpavlin 755 my $do_marc = $parser->have_rules('marc', $database, $input);
176     my $do_index = $parser->have_rules('search', $database);
177 dpavlin 737
178 dpavlin 755 This function will return hash containing count of all found C<marc_*> or
179     C<search> directives. Input name is optional.
180 dpavlin 737
181     =cut
182    
183 dpavlin 755 sub have_rules {
184 dpavlin 737 my $self = shift;
185 dpavlin 755
186     my $log = $self->_get_logger();
187     my $type = shift @_ || $log->logconfess("need at least type");
188     my $database = shift @_ || $log->logconfess("database is required");
189     my $input = shift @_;
190    
191 dpavlin 737 $input = _input_name($input);
192 dpavlin 755
193    
194     return unless defined( $self->{_have_rules}->{ _q($database) } );
195    
196     my $database_rules = $self->{_have_rules}->{ _q($database ) };
197    
198     if (defined($input)) {
199    
200     return unless (
201     defined( $database_rules->{ _q($input) } ) &&
202     defined( $database_rules->{ _q($input) }->{ $type } )
203     );
204    
205     return $database_rules->{ _q($input) }->{ $type };
206     }
207    
208     my $usage;
209    
210     foreach my $i (keys %{ $database_rules }) {
211     next unless defined( $database_rules->{$i}->{$type} );
212    
213     foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214     $usage->{ $t } += $database_rules->{ $i }->{ $t };
215     }
216     }
217    
218     return $usage;
219    
220 dpavlin 737 }
221    
222    
223 dpavlin 703 =head1 PRIVATE
224    
225     =head2 _read_sources
226    
227     my $source_files = $parser->_read_sources;
228    
229 dpavlin 691 Called by L</new>.
230    
231     =cut
232    
233 dpavlin 703 sub _read_sources {
234 dpavlin 691 my $self = shift;
235    
236     my $log = $self->_get_logger();
237    
238     my $nr = 0;
239    
240 dpavlin 737 my @sources;
241 dpavlin 701
242 dpavlin 800 my $lookup_src_cache;
243    
244 dpavlin 976 my $only_database = $self->{only_database};
245     my $only_input = $self->{only_input};
246    
247 dpavlin 691 $self->{config}->iterate_inputs( sub {
248     my ($input, $database) = @_;
249    
250 dpavlin 976 return if ( $only_database && $database !~ m/$only_database/i );
251     return if ( $only_input && $input->{name} !~ m/$only_input/i );
252    
253 dpavlin 699 $log->debug("database: $database input = ", dump($input));
254    
255 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
256 dpavlin 691
257 dpavlin 698 my $path = $normalize->{path};
258     return unless($path);
259     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
260 dpavlin 691
261 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
262 dpavlin 691
263 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
264 dpavlin 691
265 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
266 dpavlin 691
267 dpavlin 698 $log->debug("$database/$input_name: adding $path");
268    
269 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
270 dpavlin 698
271 dpavlin 737 push @sources, sub {
272 dpavlin 826 #warn "### $database $input_name, $full ###\n";
273 dpavlin 737 $self->_parse_source( $database, $input_name, $full, $s );
274 dpavlin 701 };
275 dpavlin 698
276     $nr++;
277     }
278 dpavlin 686 } );
279    
280 dpavlin 691 $log->debug("found $nr source files");
281 dpavlin 686
282 dpavlin 737 # parse all sources
283     $_->() foreach (@sources);
284 dpavlin 701
285 dpavlin 691 return $nr;
286 dpavlin 686 }
287    
288 dpavlin 737 =head2 _parse_source
289 dpavlin 686
290 dpavlin 737 $parser->_parse_source($database,$input,$path,$source);
291 dpavlin 691
292 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
293 dpavlin 698
294     It will report invalid databases and inputs in error log after parsing.
295    
296 dpavlin 686 =cut
297    
298 dpavlin 737 sub _parse_source {
299 dpavlin 686 my $self = shift;
300 dpavlin 701 my ($database, $input, $path, $source) = @_;
301 dpavlin 686
302 dpavlin 698 $input = _input_name($input);
303    
304 dpavlin 686 my $log = $self->_get_logger();
305    
306 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
308 dpavlin 686
309 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
310 dpavlin 686
311 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
312 dpavlin 691
313     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
314    
315 dpavlin 1204 #$Document->prune('PPI::Token::Whitespace');
316 dpavlin 724 $Document->prune('PPI::Token::Comment');
317 dpavlin 686 #$Document->prune('PPI::Token::Operator');
318    
319     # Find all the named subroutines
320    
321 dpavlin 691 $self->{_lookup_errors} = ();
322 dpavlin 686
323 dpavlin 691 sub _lookup_error {
324     my $self = shift;
325     my $msg = shift;
326     $self->_get_logger->logconfess("error without message?") unless ($msg);
327     push @{ $self->{_lookup_errors} }, $msg;
328 dpavlin 690 return '';
329     }
330    
331 dpavlin 686 $Document->find( sub {
332     my ($Document,$Element) = @_;
333    
334     $Element->isa('PPI::Token::Word') or return '';
335 dpavlin 1061
336     if ( $Element->content eq 'sub' ) {
337     # repair demage done by prune of whitespace
338     $Element->insert_after( PPI::Token::Whitespace->new(' ') );
339     return '';
340     }
341    
342 dpavlin 686 $Element->content eq 'lookup' or return '';
343    
344 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
345 dpavlin 686
346     my $args = $Element->snext_sibling;
347    
348     my @e = $args->child(0)->elements;
349 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
350 dpavlin 686
351 dpavlin 687 if ($log->is_debug) {
352     my $report = "found " . scalar @e . " elements:\n";
353 dpavlin 686
354 dpavlin 687 foreach my $i ( 0 .. $#e ) {
355     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
356     }
357    
358     $log->debug($report);
359 dpavlin 686 }
360    
361     my $key_element = $e[8]->clone;
362    
363 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
364 dpavlin 686
365 dpavlin 687 $log->debug("key part: ", $key_element);
366 dpavlin 686
367     my @key;
368    
369     $key_element->find( sub {
370     my $e = $_[1] || die "no element?";
371     $e->isa('PPI::Token::Word') or return '';
372     $e->content eq 'rec' or return '';
373    
374     my $kf = $e->snext_sibling;
375    
376 dpavlin 687 $log->debug("key fragment = $kf");
377 dpavlin 686
378     push @key, eval $kf;
379 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
380 dpavlin 686
381     return 1;
382     });
383    
384 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
385 dpavlin 686
386 dpavlin 687 $log->debug("key = $key");
387 dpavlin 686
388 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
389     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
390 dpavlin 686
391 dpavlin 712 my $create = qq{
392     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
393     };
394    
395 dpavlin 687 $log->debug("create: $create");
396 dpavlin 686
397 dpavlin 698 # save code to create this lookup
398 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
399     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
400 dpavlin 686
401 dpavlin 698
402 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
403     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
404 dpavlin 698 }
405    
406     # save this dependency
407 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
408 dpavlin 698
409 dpavlin 686 if ($#e < 10) {
410     $e[8]->insert_after( $e[8]->clone );
411     $e[8]->insert_after( $e[7]->clone );
412     $e[8]->insert_after( $e[6]->clone );
413     }
414    
415     $e[7]->remove;
416 dpavlin 1061 $e[8]->insert_before( PPI::Token::Quote::Single->new( "'$key'" ) );
417 dpavlin 686 $e[8]->remove;
418    
419    
420 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
421 dpavlin 686 });
422    
423 dpavlin 693 my $normalize_source = $Document->serialize;
424 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
425 dpavlin 693 $log->debug("normalize: $normalize_source");
426 dpavlin 686
427 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
428 dpavlin 692
429 dpavlin 687 if ($self->{debug}) {
430     my $Dumper = PPI::Dumper->new( $Document );
431     $Dumper->print;
432     }
433 dpavlin 686
434 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
435 dpavlin 690
436 dpavlin 737 $Document->find( sub {
437     my ($Document,$Element) = @_;
438    
439     $Element->isa('PPI::Token::Word') or return '';
440 dpavlin 755 if ($Element->content =~ m/^(marc|search)/) {
441     my $what = $1;
442     $log->debug("found $what rules in $database/$input");
443     $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
444     } else {
445     return '';
446     }
447 dpavlin 737 });
448    
449 dpavlin 687 return 1;
450 dpavlin 686 }
451    
452 dpavlin 692
453 dpavlin 698 =head2 _q
454    
455     Strip single or double quotes around value
456    
457     _q(qq/'foo'/) -> foo
458    
459     =cut
460    
461     sub _q {
462     my $v = shift || return;
463     $v =~ s/^['"]*//g;
464     $v =~ s/['"]*$//g;
465     return $v;
466     }
467    
468     =head2 _input_name
469    
470     Return C<name> value if HASH or arg if scalar
471    
472     _input_name($input)
473    
474     =cut
475    
476     sub _input_name {
477     my $input = shift || return;
478     if (ref($input) eq 'HASH') {
479     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
480     return $input->{name};
481     } else {
482     return $input;
483     }
484     }
485    
486    
487 dpavlin 686 =head1 AUTHOR
488    
489     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
490    
491     =head1 COPYRIGHT & LICENSE
492    
493     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
494    
495     This program is free software; you can redistribute it and/or modify it
496     under the same terms as Perl itself.
497    
498     =cut
499    
500     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26