/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 976 - (show annotations)
Sat Nov 3 12:30:43 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 11729 byte(s)
if --only filter is used, we will parse just normalization files for this database.

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.08
21
22 =cut
23
24 our $VERSION = '0.08';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 only_database => $only
53 );
54
55 =cut
56
57 sub new {
58 my $class = shift;
59 my $self = {@_};
60 bless($self, $class);
61
62 my $log = $self->_get_logger();
63
64 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65
66 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
67
68 $self->_read_sources;
69
70 $self ? return $self : return undef;
71 }
72
73 =head2 valid_database
74
75 my $ok = $parse->valid_database('key');
76
77 =cut
78
79 sub valid_database {
80 my $self = shift;
81
82 my $database = shift || return;
83
84 return defined($self->{valid_inputs}->{ _q($database) });
85 }
86
87 =head2 valid_database_input
88
89 my $ok = $parse->valid_database('database_key','input_name');
90
91 =cut
92
93 sub valid_database_input {
94 my $self = shift;
95 my ($database,$input) = @_;
96 $input = _input_name($input);
97 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
98 }
99
100 =head2 depends
101
102 Return all databases and inputs on which specified one depends
103
104 $depends_on = $parser->depends('database','input');
105
106 =cut
107
108 sub depends {
109 my $self = shift;
110 my ($database,$input) = @_;
111 $input = _input_name($input);
112 $self->_get_logger->debug("depends($database,$input)");
113 return unless (
114 defined( $self->{depends}->{ _q($database) } ) &&
115 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116 );
117 return $self->{depends}->{ _q($database) }->{ _q($input) };
118 }
119
120 =head2 have_lookup_create
121
122 my @keys = $parser->have_lookup_create($database, $input);
123
124 =cut
125
126 sub have_lookup_create {
127 my $self = shift;
128 my ($database,$input) = @_;
129 $input = _input_name($input);
130 return unless (
131 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133 );
134 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
135 }
136
137
138 =head2 lookup_create_rules
139
140 my $source = $parser->lookup_create_rules($database, $input);
141
142 =cut
143
144 sub lookup_create_rules {
145 my $self = shift;
146 my ($database,$input) = @_;
147 $input = _input_name($input);
148 return unless (
149 defined( $self->{_lookup_create}->{ _q($database) } ) &&
150 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151 );
152 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
153 }
154
155 =head2 normalize_rules
156
157 my $source = $parser->normalize_rules($database, $input);
158
159 =cut
160
161 sub normalize_rules {
162 my $self = shift;
163 my ($database,$input) = @_;
164 $input = _input_name($input);
165 return unless (
166 defined( $self->{_normalize_source}->{ _q($database) } ) &&
167 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168 );
169 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
170 }
171
172
173 =head2 have_rules
174
175 my $do_marc = $parser->have_rules('marc', $database, $input);
176 my $do_index = $parser->have_rules('search', $database);
177
178 This function will return hash containing count of all found C<marc_*> or
179 C<search> directives. Input name is optional.
180
181 =cut
182
183 sub have_rules {
184 my $self = shift;
185
186 my $log = $self->_get_logger();
187 my $type = shift @_ || $log->logconfess("need at least type");
188 my $database = shift @_ || $log->logconfess("database is required");
189 my $input = shift @_;
190
191 $input = _input_name($input);
192
193
194 return unless defined( $self->{_have_rules}->{ _q($database) } );
195
196 my $database_rules = $self->{_have_rules}->{ _q($database ) };
197
198 if (defined($input)) {
199
200 return unless (
201 defined( $database_rules->{ _q($input) } ) &&
202 defined( $database_rules->{ _q($input) }->{ $type } )
203 );
204
205 return $database_rules->{ _q($input) }->{ $type };
206 }
207
208 my $usage;
209
210 foreach my $i (keys %{ $database_rules }) {
211 next unless defined( $database_rules->{$i}->{$type} );
212
213 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214 $usage->{ $t } += $database_rules->{ $i }->{ $t };
215 }
216 }
217
218 return $usage;
219
220 }
221
222
223 =head1 PRIVATE
224
225 =head2 _read_sources
226
227 my $source_files = $parser->_read_sources;
228
229 Called by L</new>.
230
231 =cut
232
233 sub _read_sources {
234 my $self = shift;
235
236 my $log = $self->_get_logger();
237
238 my $nr = 0;
239
240 my @sources;
241
242 my $lookup_src_cache;
243
244 my $only_database = $self->{only_database};
245 my $only_input = $self->{only_input};
246
247 $self->{config}->iterate_inputs( sub {
248 my ($input, $database) = @_;
249
250 return if ( $only_database && $database !~ m/$only_database/i );
251 return if ( $only_input && $input->{name} !~ m/$only_input/i );
252
253 $log->debug("database: $database input = ", dump($input));
254
255 foreach my $normalize (@{ $input->{normalize} }) {
256
257 my $path = $normalize->{path};
258 return unless($path);
259 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
260
261 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
262
263 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
264
265 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
266
267 $log->debug("$database/$input_name: adding $path");
268
269 $self->{valid_inputs}->{$database}->{$input_name}++;
270
271 push @sources, sub {
272 #warn "### $database $input_name, $full ###\n";
273 $self->_parse_source( $database, $input_name, $full, $s );
274 };
275
276 $nr++;
277 }
278 } );
279
280 $log->debug("found $nr source files");
281
282 # parse all sources
283 $_->() foreach (@sources);
284
285 return $nr;
286 }
287
288 =head2 _parse_source
289
290 $parser->_parse_source($database,$input,$path,$source);
291
292 Called for each normalize source (rules) in each input by L</_read_sources>
293
294 It will report invalid databases and inputs in error log after parsing.
295
296 =cut
297
298 sub _parse_source {
299 my $self = shift;
300 my ($database, $input, $path, $source) = @_;
301
302 $input = _input_name($input);
303
304 my $log = $self->_get_logger();
305
306 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
308
309 $log->logdie("no source found for database $database input $input path $path") unless ($source);
310
311 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
312
313 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
314
315 #$Document->prune('PPI::Token::Whitespace');
316 $Document->prune('PPI::Token::Comment');
317 #$Document->prune('PPI::Token::Operator');
318
319 # Find all the named subroutines
320
321 $self->{_lookup_errors} = ();
322
323 sub _lookup_error {
324 my $self = shift;
325 my $msg = shift;
326 $self->_get_logger->logconfess("error without message?") unless ($msg);
327 push @{ $self->{_lookup_errors} }, $msg;
328 return '';
329 }
330
331 $Document->find( sub {
332 my ($Document,$Element) = @_;
333
334 $Element->isa('PPI::Token::Word') or return '';
335 $Element->content eq 'lookup' or return '';
336
337 $log->debug("expansion: ", $Element->snext_sibling);
338
339 my $args = $Element->snext_sibling;
340
341 my @e = $args->child(0)->elements;
342 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
343
344 if ($log->is_debug) {
345 my $report = "found " . scalar @e . " elements:\n";
346
347 foreach my $i ( 0 .. $#e ) {
348 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
349 }
350
351 $log->debug($report);
352 }
353
354 my $key_element = $e[8]->clone;
355
356 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
357
358 $log->debug("key part: ", $key_element);
359
360 my @key;
361
362 $key_element->find( sub {
363 my $e = $_[1] || die "no element?";
364 $e->isa('PPI::Token::Word') or return '';
365 $e->content eq 'rec' or return '';
366
367 my $kf = $e->snext_sibling;
368
369 $log->debug("key fragment = $kf");
370
371 push @key, eval $kf;
372 $log->logdie("can't eval { $kf }: $@") if ($@);
373
374 return 1;
375 });
376
377 my $key = join('-', @key ) || $log->logdie("no key found!");
378
379 $log->debug("key = $key");
380
381 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
382 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
383
384 my $create = qq{
385 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
386 };
387
388 $log->debug("create: $create");
389
390 # save code to create this lookup
391 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
392 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
393
394
395 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
396 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
397 }
398
399 # save this dependency
400 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
401
402 if ($#e < 10) {
403 $e[8]->insert_after( $e[8]->clone );
404 $e[8]->insert_after( $e[7]->clone );
405 $e[8]->insert_after( $e[6]->clone );
406 }
407
408 $e[7]->remove;
409 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
410 $e[8]->remove;
411
412
413 $log->debug(">>> ", $Element->snext_sibling);
414 });
415
416 my $normalize_source = $Document->serialize;
417 $log->debug("create: ", dump($self->{_lookup_create}) );
418 $log->debug("normalize: $normalize_source");
419
420 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
421
422 if ($self->{debug}) {
423 my $Dumper = PPI::Dumper->new( $Document );
424 $Dumper->print;
425 }
426
427 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
428
429 $Document->find( sub {
430 my ($Document,$Element) = @_;
431
432 $Element->isa('PPI::Token::Word') or return '';
433 if ($Element->content =~ m/^(marc|search)/) {
434 my $what = $1;
435 $log->debug("found $what rules in $database/$input");
436 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
437 } else {
438 return '';
439 }
440 });
441
442 return 1;
443 }
444
445
446 =head2 _q
447
448 Strip single or double quotes around value
449
450 _q(qq/'foo'/) -> foo
451
452 =cut
453
454 sub _q {
455 my $v = shift || return;
456 $v =~ s/^['"]*//g;
457 $v =~ s/['"]*$//g;
458 return $v;
459 }
460
461 =head2 _input_name
462
463 Return C<name> value if HASH or arg if scalar
464
465 _input_name($input)
466
467 =cut
468
469 sub _input_name {
470 my $input = shift || return;
471 if (ref($input) eq 'HASH') {
472 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
473 return $input->{name};
474 } else {
475 return $input;
476 }
477 }
478
479
480 =head1 AUTHOR
481
482 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
483
484 =head1 COPYRIGHT & LICENSE
485
486 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
487
488 This program is free software; you can redistribute it and/or modify it
489 under the same terms as Perl itself.
490
491 =cut
492
493 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26