/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 826 - (hide annotations)
Sun May 20 16:19:13 2007 UTC (16 years, 11 months ago) by dpavlin
File size: 11485 byte(s)
 r1211@llin:  dpavlin | 2007-05-20 14:48:37 +0200
 Removed debugging output

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16 dpavlin 737 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17 dpavlin 686
18     =head1 VERSION
19    
20 dpavlin 755 Version 0.08
21 dpavlin 686
22     =cut
23    
24 dpavlin 755 our $VERSION = '0.08';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 dpavlin 737 to produce lookups and normalization. It will also parse other parts of
30     source to produce some of DWIM (I<Do What I Mean>) magic
31     (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32     rules in normalisation).
33 dpavlin 698
34     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35     reading about LISP. It might be a bit over-the board, but at least it removed
36     separate configuration files for lookups.
37    
38     This is experimental code, but it replaces all older formats which where,
39     at one point in time, available in WebPAC.
40    
41 dpavlin 686 FIXME
42    
43     =head1 FUNCTIONS
44    
45     =head2 new
46    
47     Create new parser object.
48    
49     my $parser = new WebPAC::Parser(
50     config => new WebPAC::Config(),
51     base_path => '/optional/path/to/conf',
52     );
53    
54     =cut
55    
56     sub new {
57     my $class = shift;
58     my $self = {@_};
59     bless($self, $class);
60    
61     my $log = $self->_get_logger();
62    
63     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64    
65     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66    
67 dpavlin 703 $self->_read_sources;
68 dpavlin 686
69 dpavlin 691 $self ? return $self : return undef;
70     }
71    
72 dpavlin 703 =head2 valid_database
73    
74     my $ok = $parse->valid_database('key');
75    
76     =cut
77    
78     sub valid_database {
79     my $self = shift;
80    
81     my $database = shift || return;
82    
83     return defined($self->{valid_inputs}->{ _q($database) });
84     }
85    
86     =head2 valid_database_input
87    
88     my $ok = $parse->valid_database('database_key','input_name');
89    
90     =cut
91    
92     sub valid_database_input {
93     my $self = shift;
94     my ($database,$input) = @_;
95 dpavlin 705 $input = _input_name($input);
96 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97     }
98    
99     =head2 depends
100    
101     Return all databases and inputs on which specified one depends
102    
103     $depends_on = $parser->depends('database','input');
104    
105     =cut
106    
107     sub depends {
108     my $self = shift;
109     my ($database,$input) = @_;
110 dpavlin 705 $input = _input_name($input);
111 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
112     return unless (
113     defined( $self->{depends}->{ _q($database) } ) &&
114     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115     );
116     return $self->{depends}->{ _q($database) }->{ _q($input) };
117     }
118    
119 dpavlin 706 =head2 have_lookup_create
120    
121     my @keys = $parser->have_lookup_create($database, $input);
122    
123     =cut
124    
125     sub have_lookup_create {
126     my $self = shift;
127     my ($database,$input) = @_;
128     $input = _input_name($input);
129     return unless (
130     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132     );
133     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134     }
135    
136    
137 dpavlin 705 =head2 lookup_create_rules
138    
139     my $source = $parser->lookup_create_rules($database, $input);
140    
141     =cut
142    
143     sub lookup_create_rules {
144     my $self = shift;
145     my ($database,$input) = @_;
146     $input = _input_name($input);
147     return unless (
148     defined( $self->{_lookup_create}->{ _q($database) } ) &&
149     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150     );
151     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152     }
153    
154 dpavlin 717 =head2 normalize_rules
155    
156     my $source = $parser->normalize_rules($database, $input);
157    
158     =cut
159    
160     sub normalize_rules {
161     my $self = shift;
162     my ($database,$input) = @_;
163     $input = _input_name($input);
164     return unless (
165     defined( $self->{_normalize_source}->{ _q($database) } ) &&
166     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167     );
168     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169     }
170    
171 dpavlin 737
172 dpavlin 755 =head2 have_rules
173 dpavlin 737
174 dpavlin 755 my $do_marc = $parser->have_rules('marc', $database, $input);
175     my $do_index = $parser->have_rules('search', $database);
176 dpavlin 737
177 dpavlin 755 This function will return hash containing count of all found C<marc_*> or
178     C<search> directives. Input name is optional.
179 dpavlin 737
180     =cut
181    
182 dpavlin 755 sub have_rules {
183 dpavlin 737 my $self = shift;
184 dpavlin 755
185     my $log = $self->_get_logger();
186     my $type = shift @_ || $log->logconfess("need at least type");
187     my $database = shift @_ || $log->logconfess("database is required");
188     my $input = shift @_;
189    
190 dpavlin 737 $input = _input_name($input);
191 dpavlin 755
192    
193     return unless defined( $self->{_have_rules}->{ _q($database) } );
194    
195     my $database_rules = $self->{_have_rules}->{ _q($database ) };
196    
197     if (defined($input)) {
198    
199     return unless (
200     defined( $database_rules->{ _q($input) } ) &&
201     defined( $database_rules->{ _q($input) }->{ $type } )
202     );
203    
204     return $database_rules->{ _q($input) }->{ $type };
205     }
206    
207     my $usage;
208    
209     foreach my $i (keys %{ $database_rules }) {
210     next unless defined( $database_rules->{$i}->{$type} );
211    
212     foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
213     $usage->{ $t } += $database_rules->{ $i }->{ $t };
214     }
215     }
216    
217     return $usage;
218    
219 dpavlin 737 }
220    
221    
222 dpavlin 703 =head1 PRIVATE
223    
224     =head2 _read_sources
225    
226     my $source_files = $parser->_read_sources;
227    
228 dpavlin 691 Called by L</new>.
229    
230     =cut
231    
232 dpavlin 703 sub _read_sources {
233 dpavlin 691 my $self = shift;
234    
235     my $log = $self->_get_logger();
236    
237     my $nr = 0;
238    
239 dpavlin 737 my @sources;
240 dpavlin 701
241 dpavlin 800 my $lookup_src_cache;
242    
243 dpavlin 691 $self->{config}->iterate_inputs( sub {
244     my ($input, $database) = @_;
245    
246 dpavlin 699 $log->debug("database: $database input = ", dump($input));
247    
248 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
249 dpavlin 691
250 dpavlin 698 my $path = $normalize->{path};
251     return unless($path);
252     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
253 dpavlin 691
254 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
255 dpavlin 691
256 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
257 dpavlin 691
258 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
259 dpavlin 691
260 dpavlin 698 $log->debug("$database/$input_name: adding $path");
261    
262 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
263 dpavlin 698
264 dpavlin 737 push @sources, sub {
265 dpavlin 826 #warn "### $database $input_name, $full ###\n";
266 dpavlin 737 $self->_parse_source( $database, $input_name, $full, $s );
267 dpavlin 701 };
268 dpavlin 698
269     $nr++;
270     }
271 dpavlin 686 } );
272    
273 dpavlin 691 $log->debug("found $nr source files");
274 dpavlin 686
275 dpavlin 737 # parse all sources
276     $_->() foreach (@sources);
277 dpavlin 701
278 dpavlin 691 return $nr;
279 dpavlin 686 }
280    
281 dpavlin 737 =head2 _parse_source
282 dpavlin 686
283 dpavlin 737 $parser->_parse_source($database,$input,$path,$source);
284 dpavlin 691
285 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
286 dpavlin 698
287     It will report invalid databases and inputs in error log after parsing.
288    
289 dpavlin 686 =cut
290    
291 dpavlin 737 sub _parse_source {
292 dpavlin 686 my $self = shift;
293 dpavlin 701 my ($database, $input, $path, $source) = @_;
294 dpavlin 686
295 dpavlin 698 $input = _input_name($input);
296    
297 dpavlin 686 my $log = $self->_get_logger();
298    
299 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
300     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
301 dpavlin 686
302 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
303 dpavlin 686
304 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
305 dpavlin 691
306     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
307    
308 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
309 dpavlin 724 $Document->prune('PPI::Token::Comment');
310 dpavlin 686 #$Document->prune('PPI::Token::Operator');
311    
312     # Find all the named subroutines
313    
314 dpavlin 691 $self->{_lookup_errors} = ();
315 dpavlin 686
316 dpavlin 691 sub _lookup_error {
317     my $self = shift;
318     my $msg = shift;
319     $self->_get_logger->logconfess("error without message?") unless ($msg);
320     push @{ $self->{_lookup_errors} }, $msg;
321 dpavlin 690 return '';
322     }
323    
324 dpavlin 686 $Document->find( sub {
325     my ($Document,$Element) = @_;
326    
327     $Element->isa('PPI::Token::Word') or return '';
328     $Element->content eq 'lookup' or return '';
329    
330 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
331 dpavlin 686
332     my $args = $Element->snext_sibling;
333    
334     my @e = $args->child(0)->elements;
335 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
336 dpavlin 686
337 dpavlin 687 if ($log->is_debug) {
338     my $report = "found " . scalar @e . " elements:\n";
339 dpavlin 686
340 dpavlin 687 foreach my $i ( 0 .. $#e ) {
341     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
342     }
343    
344     $log->debug($report);
345 dpavlin 686 }
346    
347     my $key_element = $e[8]->clone;
348    
349 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
350 dpavlin 686
351 dpavlin 687 $log->debug("key part: ", $key_element);
352 dpavlin 686
353     my @key;
354    
355     $key_element->find( sub {
356     my $e = $_[1] || die "no element?";
357     $e->isa('PPI::Token::Word') or return '';
358     $e->content eq 'rec' or return '';
359    
360     my $kf = $e->snext_sibling;
361    
362 dpavlin 687 $log->debug("key fragment = $kf");
363 dpavlin 686
364     push @key, eval $kf;
365 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
366 dpavlin 686
367     return 1;
368     });
369    
370 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
371 dpavlin 686
372 dpavlin 687 $log->debug("key = $key");
373 dpavlin 686
374 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
375     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
376 dpavlin 686
377 dpavlin 712 my $create = qq{
378     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
379     };
380    
381 dpavlin 687 $log->debug("create: $create");
382 dpavlin 686
383 dpavlin 698 # save code to create this lookup
384 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
385     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
386 dpavlin 686
387 dpavlin 698
388 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
389     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
390 dpavlin 698 }
391    
392     # save this dependency
393 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
394 dpavlin 698
395 dpavlin 686 if ($#e < 10) {
396     $e[8]->insert_after( $e[8]->clone );
397     $e[8]->insert_after( $e[7]->clone );
398     $e[8]->insert_after( $e[6]->clone );
399     }
400    
401     $e[7]->remove;
402     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
403     $e[8]->remove;
404    
405    
406 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
407 dpavlin 686 });
408    
409 dpavlin 693 my $normalize_source = $Document->serialize;
410 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
411 dpavlin 693 $log->debug("normalize: $normalize_source");
412 dpavlin 686
413 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
414 dpavlin 692
415 dpavlin 687 if ($self->{debug}) {
416     my $Dumper = PPI::Dumper->new( $Document );
417     $Dumper->print;
418     }
419 dpavlin 686
420 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
421 dpavlin 690
422 dpavlin 737 $Document->find( sub {
423     my ($Document,$Element) = @_;
424    
425     $Element->isa('PPI::Token::Word') or return '';
426 dpavlin 755 if ($Element->content =~ m/^(marc|search)/) {
427     my $what = $1;
428     $log->debug("found $what rules in $database/$input");
429     $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
430     } else {
431     return '';
432     }
433 dpavlin 737 });
434    
435 dpavlin 687 return 1;
436 dpavlin 686 }
437    
438 dpavlin 692
439 dpavlin 698 =head2 _q
440    
441     Strip single or double quotes around value
442    
443     _q(qq/'foo'/) -> foo
444    
445     =cut
446    
447     sub _q {
448     my $v = shift || return;
449     $v =~ s/^['"]*//g;
450     $v =~ s/['"]*$//g;
451     return $v;
452     }
453    
454     =head2 _input_name
455    
456     Return C<name> value if HASH or arg if scalar
457    
458     _input_name($input)
459    
460     =cut
461    
462     sub _input_name {
463     my $input = shift || return;
464     if (ref($input) eq 'HASH') {
465     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
466     return $input->{name};
467     } else {
468     return $input;
469     }
470     }
471    
472    
473 dpavlin 686 =head1 AUTHOR
474    
475     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
476    
477     =head1 COPYRIGHT & LICENSE
478    
479     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
480    
481     This program is free software; you can redistribute it and/or modify it
482     under the same terms as Perl itself.
483    
484     =cut
485    
486     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26