/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 737 - (hide annotations)
Thu Oct 5 14:38:45 2006 UTC (17 years, 6 months ago) by dpavlin
File size: 10716 byte(s)
 r1067@llin:  dpavlin | 2006-10-05 16:35:45 +0200
 added generate_marc to find out if normalization rules have marc* directives

1 dpavlin 686 package WebPAC::Parser;
2    
3     use warnings;
4     use strict;
5    
6    
7     use PPI;
8     use PPI::Dumper;
9     use Data::Dump qw/dump/;
10     use File::Slurp;
11    
12 dpavlin 706 use base qw/WebPAC::Common/;
13 dpavlin 686
14     =head1 NAME
15    
16 dpavlin 737 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17 dpavlin 686
18     =head1 VERSION
19    
20 dpavlin 737 Version 0.07
21 dpavlin 686
22     =cut
23    
24 dpavlin 737 our $VERSION = '0.07';
25 dpavlin 686
26     =head1 SYNOPSIS
27    
28 dpavlin 698 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 dpavlin 737 to produce lookups and normalization. It will also parse other parts of
30     source to produce some of DWIM (I<Do What I Mean>) magic
31     (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32     rules in normalisation).
33 dpavlin 698
34     It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35     reading about LISP. It might be a bit over-the board, but at least it removed
36     separate configuration files for lookups.
37    
38     This is experimental code, but it replaces all older formats which where,
39     at one point in time, available in WebPAC.
40    
41 dpavlin 686 FIXME
42    
43     =head1 FUNCTIONS
44    
45     =head2 new
46    
47     Create new parser object.
48    
49     my $parser = new WebPAC::Parser(
50     config => new WebPAC::Config(),
51     base_path => '/optional/path/to/conf',
52     );
53    
54     =cut
55    
56     sub new {
57     my $class = shift;
58     my $self = {@_};
59     bless($self, $class);
60    
61     my $log = $self->_get_logger();
62    
63     $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64    
65     $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66    
67 dpavlin 703 $self->_read_sources;
68 dpavlin 686
69 dpavlin 691 $self ? return $self : return undef;
70     }
71    
72 dpavlin 703 =head2 valid_database
73    
74     my $ok = $parse->valid_database('key');
75    
76     =cut
77    
78     sub valid_database {
79     my $self = shift;
80    
81     my $database = shift || return;
82    
83     return defined($self->{valid_inputs}->{ _q($database) });
84     }
85    
86     =head2 valid_database_input
87    
88     my $ok = $parse->valid_database('database_key','input_name');
89    
90     =cut
91    
92     sub valid_database_input {
93     my $self = shift;
94     my ($database,$input) = @_;
95 dpavlin 705 $input = _input_name($input);
96 dpavlin 703 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97     }
98    
99     =head2 depends
100    
101     Return all databases and inputs on which specified one depends
102    
103     $depends_on = $parser->depends('database','input');
104    
105     =cut
106    
107     sub depends {
108     my $self = shift;
109     my ($database,$input) = @_;
110 dpavlin 705 $input = _input_name($input);
111 dpavlin 703 $self->_get_logger->debug("depends($database,$input)");
112     return unless (
113     defined( $self->{depends}->{ _q($database) } ) &&
114     defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115     );
116     return $self->{depends}->{ _q($database) }->{ _q($input) };
117     }
118    
119 dpavlin 706 =head2 have_lookup_create
120    
121     my @keys = $parser->have_lookup_create($database, $input);
122    
123     =cut
124    
125     sub have_lookup_create {
126     my $self = shift;
127     my ($database,$input) = @_;
128     $input = _input_name($input);
129     return unless (
130     defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131     defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132     );
133     return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134     }
135    
136    
137 dpavlin 705 =head2 lookup_create_rules
138    
139     my $source = $parser->lookup_create_rules($database, $input);
140    
141     =cut
142    
143     sub lookup_create_rules {
144     my $self = shift;
145     my ($database,$input) = @_;
146     $input = _input_name($input);
147     return unless (
148     defined( $self->{_lookup_create}->{ _q($database) } ) &&
149     defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150     );
151     return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152     }
153    
154 dpavlin 717 =head2 normalize_rules
155    
156     my $source = $parser->normalize_rules($database, $input);
157    
158     =cut
159    
160     sub normalize_rules {
161     my $self = shift;
162     my ($database,$input) = @_;
163     $input = _input_name($input);
164     return unless (
165     defined( $self->{_normalize_source}->{ _q($database) } ) &&
166     defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167     );
168     return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169     }
170    
171 dpavlin 737
172     =head2 generate_marc
173    
174     my $do_marc = $parser->generate_marc($database, $input);
175    
176     This function will return hash containing count of all found C<marc_*> directives.
177    
178     =cut
179    
180     sub generate_marc {
181     my $self = shift;
182     my ($database,$input) = @_;
183     $input = _input_name($input);
184     return unless (
185     defined( $self->{_generate_marc}->{ _q($database) } ) &&
186     defined( $self->{_generate_marc}->{ _q($database) }->{ _q($input) } )
187     );
188     return $self->{_generate_marc}->{ _q($database) }->{ _q($input) };
189     }
190    
191    
192 dpavlin 703 =head1 PRIVATE
193    
194     =head2 _read_sources
195    
196     my $source_files = $parser->_read_sources;
197    
198 dpavlin 691 Called by L</new>.
199    
200     =cut
201    
202 dpavlin 703 sub _read_sources {
203 dpavlin 691 my $self = shift;
204    
205     my $log = $self->_get_logger();
206    
207     my $nr = 0;
208    
209 dpavlin 737 my @sources;
210 dpavlin 701
211 dpavlin 691 $self->{config}->iterate_inputs( sub {
212     my ($input, $database) = @_;
213    
214 dpavlin 699 $log->debug("database: $database input = ", dump($input));
215    
216 dpavlin 701 foreach my $normalize (@{ $input->{normalize} }) {
217 dpavlin 691
218 dpavlin 698 my $path = $normalize->{path};
219     return unless($path);
220     my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
221 dpavlin 691
222 dpavlin 698 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
223 dpavlin 691
224 dpavlin 698 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
225 dpavlin 691
226 dpavlin 698 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
227 dpavlin 691
228 dpavlin 698 $log->debug("$database/$input_name: adding $path");
229    
230 dpavlin 701 $self->{valid_inputs}->{$database}->{$input_name}++;
231 dpavlin 698
232 dpavlin 737 push @sources, sub {
233     $self->_parse_source( $database, $input_name, $full, $s );
234 dpavlin 701 };
235 dpavlin 698
236     $nr++;
237     }
238 dpavlin 686 } );
239    
240 dpavlin 691 $log->debug("found $nr source files");
241 dpavlin 686
242 dpavlin 737 # parse all sources
243     $_->() foreach (@sources);
244 dpavlin 701
245 dpavlin 691 return $nr;
246 dpavlin 686 }
247    
248 dpavlin 737 =head2 _parse_source
249 dpavlin 686
250 dpavlin 737 $parser->_parse_source($database,$input,$path,$source);
251 dpavlin 691
252 dpavlin 708 Called for each normalize source (rules) in each input by L</_read_sources>
253 dpavlin 698
254     It will report invalid databases and inputs in error log after parsing.
255    
256 dpavlin 686 =cut
257    
258 dpavlin 737 sub _parse_source {
259 dpavlin 686 my $self = shift;
260 dpavlin 701 my ($database, $input, $path, $source) = @_;
261 dpavlin 686
262 dpavlin 698 $input = _input_name($input);
263    
264 dpavlin 686 my $log = $self->_get_logger();
265    
266 dpavlin 691 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
267     $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
268 dpavlin 686
269 dpavlin 691 $log->logdie("no source found for database $database input $input path $path") unless ($source);
270 dpavlin 686
271 dpavlin 701 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
272 dpavlin 691
273     my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
274    
275 dpavlin 686 $Document->prune('PPI::Token::Whitespace');
276 dpavlin 724 $Document->prune('PPI::Token::Comment');
277 dpavlin 686 #$Document->prune('PPI::Token::Operator');
278    
279     # Find all the named subroutines
280    
281 dpavlin 691 $self->{_lookup_errors} = ();
282 dpavlin 686
283 dpavlin 691 sub _lookup_error {
284     my $self = shift;
285     my $msg = shift;
286     $self->_get_logger->logconfess("error without message?") unless ($msg);
287     push @{ $self->{_lookup_errors} }, $msg;
288 dpavlin 690 return '';
289     }
290    
291 dpavlin 686 $Document->find( sub {
292     my ($Document,$Element) = @_;
293    
294     $Element->isa('PPI::Token::Word') or return '';
295     $Element->content eq 'lookup' or return '';
296    
297 dpavlin 687 $log->debug("expansion: ", $Element->snext_sibling);
298 dpavlin 686
299     my $args = $Element->snext_sibling;
300    
301     my @e = $args->child(0)->elements;
302 dpavlin 687 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
303 dpavlin 686
304 dpavlin 687 if ($log->is_debug) {
305     my $report = "found " . scalar @e . " elements:\n";
306 dpavlin 686
307 dpavlin 687 foreach my $i ( 0 .. $#e ) {
308     $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
309     }
310    
311     $log->debug($report);
312 dpavlin 686 }
313    
314     my $key_element = $e[8]->clone;
315    
316 dpavlin 687 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
317 dpavlin 686
318 dpavlin 687 $log->debug("key part: ", $key_element);
319 dpavlin 686
320     my @key;
321    
322     $key_element->find( sub {
323     my $e = $_[1] || die "no element?";
324     $e->isa('PPI::Token::Word') or return '';
325     $e->content eq 'rec' or return '';
326    
327     my $kf = $e->snext_sibling;
328    
329 dpavlin 687 $log->debug("key fragment = $kf");
330 dpavlin 686
331     push @key, eval $kf;
332 dpavlin 687 $log->logdie("can't eval { $kf }: $@") if ($@);
333 dpavlin 686
334     return 1;
335     });
336    
337 dpavlin 687 my $key = join('-', @key ) || $log->logdie("no key found!");
338 dpavlin 686
339 dpavlin 687 $log->debug("key = $key");
340 dpavlin 686
341 dpavlin 712 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
342     return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
343 dpavlin 686
344 dpavlin 712 my $create = qq{
345     save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
346     };
347    
348 dpavlin 687 $log->debug("create: $create");
349 dpavlin 686
350 dpavlin 698 # save code to create this lookup
351 dpavlin 706 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
352     $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
353 dpavlin 686
354 dpavlin 698
355 dpavlin 702 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
356     $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
357 dpavlin 698 }
358    
359     # save this dependency
360 dpavlin 702 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
361 dpavlin 698
362 dpavlin 686 if ($#e < 10) {
363     $e[8]->insert_after( $e[8]->clone );
364     $e[8]->insert_after( $e[7]->clone );
365     $e[8]->insert_after( $e[6]->clone );
366     }
367    
368     $e[7]->remove;
369     $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
370     $e[8]->remove;
371    
372    
373 dpavlin 687 $log->debug(">>> ", $Element->snext_sibling);
374 dpavlin 686 });
375    
376 dpavlin 693 my $normalize_source = $Document->serialize;
377 dpavlin 692 $log->debug("create: ", dump($self->{_lookup_create}) );
378 dpavlin 693 $log->debug("normalize: $normalize_source");
379 dpavlin 686
380 dpavlin 720 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
381 dpavlin 692
382 dpavlin 687 if ($self->{debug}) {
383     my $Dumper = PPI::Dumper->new( $Document );
384     $Dumper->print;
385     }
386 dpavlin 686
387 dpavlin 692 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
388 dpavlin 690
389 dpavlin 737 $Document->find( sub {
390     my ($Document,$Element) = @_;
391    
392     $Element->isa('PPI::Token::Word') or return '';
393     $Element->content =~ m/^marc/ or return '';
394    
395     $log->debug("found marc output generation for $database/$input");
396     $self->{_generate_marc}->{ $database }->{ $input }->{ $Element->content }++;
397     });
398    
399 dpavlin 687 return 1;
400 dpavlin 686 }
401    
402 dpavlin 692
403 dpavlin 698 =head2 _q
404    
405     Strip single or double quotes around value
406    
407     _q(qq/'foo'/) -> foo
408    
409     =cut
410    
411     sub _q {
412     my $v = shift || return;
413     $v =~ s/^['"]*//g;
414     $v =~ s/['"]*$//g;
415     return $v;
416     }
417    
418     =head2 _input_name
419    
420     Return C<name> value if HASH or arg if scalar
421    
422     _input_name($input)
423    
424     =cut
425    
426     sub _input_name {
427     my $input = shift || return;
428     if (ref($input) eq 'HASH') {
429     die "can't find 'name' value in ", dump($input) unless defined($input->{name});
430     return $input->{name};
431     } else {
432     return $input;
433     }
434     }
435    
436    
437 dpavlin 686 =head1 AUTHOR
438    
439     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
440    
441     =head1 COPYRIGHT & LICENSE
442    
443     Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
444    
445     This program is free software; you can redistribute it and/or modify it
446     under the same terms as Perl itself.
447    
448     =cut
449    
450     1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26