/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 755 - (hide annotations)
Sun Oct 8 20:28:17 2006 UTC (17 years, 6 months ago) by dpavlin
File size: 11409 byte(s)
 r1097@llin:  dpavlin | 2006-10-08 22:24:54 +0200
 replaced generate_marc with universal have_rules [0.08]

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16 dpavlin 737 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17 dpavlin 686
18     =head1 VERSION
19    
20 dpavlin 755 Version 0.08
21 dpavlin 686
22     =cut
23    
24 dpavlin 755 our $VERSION = '0.08';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 dpavlin 737 to produce lookups and normalization. It will also parse other parts of
30     source to produce some of DWIM (I<Do What I Mean>) magic
31     (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32     rules in normalisation).
33 dpavlin 698
34     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35     reading about LISP. It might be a bit over-the board, but at least it removed
36     separate configuration files for lookups.
37    
38     This is experimental code, but it replaces all older formats which where,
39     at one point in time, available in WebPAC.
40    
41 dpavlin 686 FIXME
42    
43     =head1 FUNCTIONS
44    
45     =head2 new
46    
47     Create new parser object.
48    
49     my $parser = new WebPAC::Parser(
50     config => new WebPAC::Config(),
51     base_path => '/optional/path/to/conf',
52     );
53    
54     =cut
55    
56     sub new {
57     my $class = shift;
58     my $self = {@_};
59     bless($self, $class);
60    
61     my $log = $self->_get_logger();
62    
63     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64    
65     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66    
67 dpavlin 703 $self->_read_sources;
68 dpavlin 686
69 dpavlin 691 $self ? return $self : return undef;
70     }
71    
72 dpavlin 703 =head2 valid_database
73    
74     my $ok = $parse->valid_database('key');
75    
76     =cut
77    
78     sub valid_database {
79     my $self = shift;
80    
81     my $database = shift || return;
82    
83     return defined($self->{valid_inputs}->{ _q($database) });
84     }
85    
86     =head2 valid_database_input
87    
88     my $ok = $parse->valid_database('database_key','input_name');
89    
90     =cut
91    
92     sub valid_database_input {
93     my $self = shift;
94     my ($database,$input) = @_;
95 dpavlin 705 $input = _input_name($input);
96 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97     }
98    
99     =head2 depends
100    
101     Return all databases and inputs on which specified one depends
102    
103     $depends_on = $parser->depends('database','input');
104    
105     =cut
106    
107     sub depends {
108     my $self = shift;
109     my ($database,$input) = @_;
110 dpavlin 705 $input = _input_name($input);
111 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
112     return unless (
113     defined( $self->{depends}->{ _q($database) } ) &&
114     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115     );
116     return $self->{depends}->{ _q($database) }->{ _q($input) };
117     }
118    
119 dpavlin 706 =head2 have_lookup_create
120    
121     my @keys = $parser->have_lookup_create($database, $input);
122    
123     =cut
124    
125     sub have_lookup_create {
126     my $self = shift;
127     my ($database,$input) = @_;
128     $input = _input_name($input);
129     return unless (
130     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132     );
133     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134     }
135    
136    
137 dpavlin 705 =head2 lookup_create_rules
138    
139     my $source = $parser->lookup_create_rules($database, $input);
140    
141     =cut
142    
143     sub lookup_create_rules {
144     my $self = shift;
145     my ($database,$input) = @_;
146     $input = _input_name($input);
147     return unless (
148     defined( $self->{_lookup_create}->{ _q($database) } ) &&
149     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150     );
151     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152     }
153    
154 dpavlin 717 =head2 normalize_rules
155    
156     my $source = $parser->normalize_rules($database, $input);
157    
158     =cut
159    
160     sub normalize_rules {
161     my $self = shift;
162     my ($database,$input) = @_;
163     $input = _input_name($input);
164     return unless (
165     defined( $self->{_normalize_source}->{ _q($database) } ) &&
166     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167     );
168     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169     }
170    
171 dpavlin 737
172 dpavlin 755 =head2 have_rules
173 dpavlin 737
174 dpavlin 755 my $do_marc = $parser->have_rules('marc', $database, $input);
175     my $do_index = $parser->have_rules('search', $database);
176 dpavlin 737
177 dpavlin 755 This function will return hash containing count of all found C<marc_*> or
178     C<search> directives. Input name is optional.
179 dpavlin 737
180     =cut
181    
182 dpavlin 755 sub have_rules {
183 dpavlin 737 my $self = shift;
184 dpavlin 755
185     my $log = $self->_get_logger();
186     my $type = shift @_ || $log->logconfess("need at least type");
187     my $database = shift @_ || $log->logconfess("database is required");
188     my $input = shift @_;
189    
190 dpavlin 737 $input = _input_name($input);
191 dpavlin 755
192    
193     return unless defined( $self->{_have_rules}->{ _q($database) } );
194    
195     my $database_rules = $self->{_have_rules}->{ _q($database ) };
196    
197     if (defined($input)) {
198    
199     return unless (
200     defined( $database_rules->{ _q($input) } ) &&
201     defined( $database_rules->{ _q($input) }->{ $type } )
202     );
203    
204     return $database_rules->{ _q($input) }->{ $type };
205     }
206    
207     my $usage;
208    
209     foreach my $i (keys %{ $database_rules }) {
210     next unless defined( $database_rules->{$i}->{$type} );
211    
212     foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
213     $usage->{ $t } += $database_rules->{ $i }->{ $t };
214     }
215     }
216    
217     return $usage;
218    
219 dpavlin 737 }
220    
221    
222 dpavlin 703 =head1 PRIVATE
223    
224     =head2 _read_sources
225    
226     my $source_files = $parser->_read_sources;
227    
228 dpavlin 691 Called by L</new>.
229    
230     =cut
231    
232 dpavlin 703 sub _read_sources {
233 dpavlin 691 my $self = shift;
234    
235     my $log = $self->_get_logger();
236    
237     my $nr = 0;
238    
239 dpavlin 737 my @sources;
240 dpavlin 701
241 dpavlin 691 $self->{config}->iterate_inputs( sub {
242     my ($input, $database) = @_;
243    
244 dpavlin 699 $log->debug("database: $database input = ", dump($input));
245    
246 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
247 dpavlin 691
248 dpavlin 698 my $path = $normalize->{path};
249     return unless($path);
250     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
251 dpavlin 691
252 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
253 dpavlin 691
254 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
255 dpavlin 691
256 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
257 dpavlin 691
258 dpavlin 698 $log->debug("$database/$input_name: adding $path");
259    
260 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
261 dpavlin 698
262 dpavlin 737 push @sources, sub {
263     $self->_parse_source( $database, $input_name, $full, $s );
264 dpavlin 701 };
265 dpavlin 698
266     $nr++;
267     }
268 dpavlin 686 } );
269    
270 dpavlin 691 $log->debug("found $nr source files");
271 dpavlin 686
272 dpavlin 737 # parse all sources
273     $_->() foreach (@sources);
274 dpavlin 701
275 dpavlin 691 return $nr;
276 dpavlin 686 }
277    
278 dpavlin 737 =head2 _parse_source
279 dpavlin 686
280 dpavlin 737 $parser->_parse_source($database,$input,$path,$source);
281 dpavlin 691
282 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
283 dpavlin 698
284     It will report invalid databases and inputs in error log after parsing.
285    
286 dpavlin 686 =cut
287    
288 dpavlin 737 sub _parse_source {
289 dpavlin 686 my $self = shift;
290 dpavlin 701 my ($database, $input, $path, $source) = @_;
291 dpavlin 686
292 dpavlin 698 $input = _input_name($input);
293    
294 dpavlin 686 my $log = $self->_get_logger();
295    
296 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
297     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
298 dpavlin 686
299 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
300 dpavlin 686
301 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
302 dpavlin 691
303     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
304    
305 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
306 dpavlin 724 $Document->prune('PPI::Token::Comment');
307 dpavlin 686 #$Document->prune('PPI::Token::Operator');
308    
309     # Find all the named subroutines
310    
311 dpavlin 691 $self->{_lookup_errors} = ();
312 dpavlin 686
313 dpavlin 691 sub _lookup_error {
314     my $self = shift;
315     my $msg = shift;
316     $self->_get_logger->logconfess("error without message?") unless ($msg);
317     push @{ $self->{_lookup_errors} }, $msg;
318 dpavlin 690 return '';
319     }
320    
321 dpavlin 686 $Document->find( sub {
322     my ($Document,$Element) = @_;
323    
324     $Element->isa('PPI::Token::Word') or return '';
325     $Element->content eq 'lookup' or return '';
326    
327 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
328 dpavlin 686
329     my $args = $Element->snext_sibling;
330    
331     my @e = $args->child(0)->elements;
332 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
333 dpavlin 686
334 dpavlin 687 if ($log->is_debug) {
335     my $report = "found " . scalar @e . " elements:\n";
336 dpavlin 686
337 dpavlin 687 foreach my $i ( 0 .. $#e ) {
338     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
339     }
340    
341     $log->debug($report);
342 dpavlin 686 }
343    
344     my $key_element = $e[8]->clone;
345    
346 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
347 dpavlin 686
348 dpavlin 687 $log->debug("key part: ", $key_element);
349 dpavlin 686
350     my @key;
351    
352     $key_element->find( sub {
353     my $e = $_[1] || die "no element?";
354     $e->isa('PPI::Token::Word') or return '';
355     $e->content eq 'rec' or return '';
356    
357     my $kf = $e->snext_sibling;
358    
359 dpavlin 687 $log->debug("key fragment = $kf");
360 dpavlin 686
361     push @key, eval $kf;
362 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
363 dpavlin 686
364     return 1;
365     });
366    
367 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
368 dpavlin 686
369 dpavlin 687 $log->debug("key = $key");
370 dpavlin 686
371 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
372     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
373 dpavlin 686
374 dpavlin 712 my $create = qq{
375     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
376     };
377    
378 dpavlin 687 $log->debug("create: $create");
379 dpavlin 686
380 dpavlin 698 # save code to create this lookup
381 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
382     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
383 dpavlin 686
384 dpavlin 698
385 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
386     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
387 dpavlin 698 }
388    
389     # save this dependency
390 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
391 dpavlin 698
392 dpavlin 686 if ($#e < 10) {
393     $e[8]->insert_after( $e[8]->clone );
394     $e[8]->insert_after( $e[7]->clone );
395     $e[8]->insert_after( $e[6]->clone );
396     }
397    
398     $e[7]->remove;
399     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
400     $e[8]->remove;
401    
402    
403 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
404 dpavlin 686 });
405    
406 dpavlin 693 my $normalize_source = $Document->serialize;
407 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
408 dpavlin 693 $log->debug("normalize: $normalize_source");
409 dpavlin 686
410 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
411 dpavlin 692
412 dpavlin 687 if ($self->{debug}) {
413     my $Dumper = PPI::Dumper->new( $Document );
414     $Dumper->print;
415     }
416 dpavlin 686
417 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
418 dpavlin 690
419 dpavlin 737 $Document->find( sub {
420     my ($Document,$Element) = @_;
421    
422     $Element->isa('PPI::Token::Word') or return '';
423 dpavlin 755 if ($Element->content =~ m/^(marc|search)/) {
424     my $what = $1;
425     $log->debug("found $what rules in $database/$input");
426     $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
427     } else {
428     return '';
429     }
430 dpavlin 737 });
431    
432 dpavlin 687 return 1;
433 dpavlin 686 }
434    
435 dpavlin 692
436 dpavlin 698 =head2 _q
437    
438     Strip single or double quotes around value
439    
440     _q(qq/'foo'/) -> foo
441    
442     =cut
443    
444     sub _q {
445     my $v = shift || return;
446     $v =~ s/^['"]*//g;
447     $v =~ s/['"]*$//g;
448     return $v;
449     }
450    
451     =head2 _input_name
452    
453     Return C<name> value if HASH or arg if scalar
454    
455     _input_name($input)
456    
457     =cut
458    
459     sub _input_name {
460     my $input = shift || return;
461     if (ref($input) eq 'HASH') {
462     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
463     return $input->{name};
464     } else {
465     return $input;
466     }
467     }
468    
469    
470 dpavlin 686 =head1 AUTHOR
471    
472     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
473    
474     =head1 COPYRIGHT & LICENSE
475    
476     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
477    
478     This program is free software; you can redistribute it and/or modify it
479     under the same terms as Perl itself.
480    
481     =cut
482    
483     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26