/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1259 - (show annotations)
Tue Jul 28 13:55:44 2009 UTC (14 years, 9 months ago) by dpavlin
File size: 11902 byte(s)
prune whitespaces (reverting r1204) because our parser
depends on it

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.08
21
22 =cut
23
24 our $VERSION = '0.08';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 only_database => $only
53 );
54
55 =cut
56
57 sub new {
58 my $class = shift;
59 my $self = {@_};
60 bless($self, $class);
61
62 my $log = $self->_get_logger();
63
64 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65
66 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
67
68 $self->_read_sources;
69
70 $self ? return $self : return undef;
71 }
72
73 =head2 valid_database
74
75 my $ok = $parse->valid_database('key');
76
77 =cut
78
79 sub valid_database {
80 my $self = shift;
81
82 my $database = shift || return;
83
84 return defined($self->{valid_inputs}->{ _q($database) });
85 }
86
87 =head2 valid_database_input
88
89 my $ok = $parse->valid_database('database_key','input_name');
90
91 =cut
92
93 sub valid_database_input {
94 my $self = shift;
95 my ($database,$input) = @_;
96 $input = _input_name($input);
97 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
98 }
99
100 =head2 depends
101
102 Return all databases and inputs on which specified one depends
103
104 $depends_on = $parser->depends('database','input');
105
106 =cut
107
108 sub depends {
109 my $self = shift;
110 my ($database,$input) = @_;
111 $input = _input_name($input);
112 $self->_get_logger->debug("depends($database,$input)");
113 return unless (
114 defined( $self->{depends}->{ _q($database) } ) &&
115 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116 );
117 return $self->{depends}->{ _q($database) }->{ _q($input) };
118 }
119
120 =head2 have_lookup_create
121
122 my @keys = $parser->have_lookup_create($database, $input);
123
124 =cut
125
126 sub have_lookup_create {
127 my $self = shift;
128 my ($database,$input) = @_;
129 $input = _input_name($input);
130 return unless (
131 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133 );
134 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
135 }
136
137
138 =head2 lookup_create_rules
139
140 my $source = $parser->lookup_create_rules($database, $input);
141
142 =cut
143
144 sub lookup_create_rules {
145 my $self = shift;
146 my ($database,$input) = @_;
147 $input = _input_name($input);
148 return unless (
149 defined( $self->{_lookup_create}->{ _q($database) } ) &&
150 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151 );
152 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
153 }
154
155 =head2 normalize_rules
156
157 my $source = $parser->normalize_rules($database, $input);
158
159 =cut
160
161 sub normalize_rules {
162 my $self = shift;
163 my ($database,$input) = @_;
164 $input = _input_name($input);
165 return unless (
166 defined( $self->{_normalize_source}->{ _q($database) } ) &&
167 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168 );
169 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
170 }
171
172
173 =head2 have_rules
174
175 my $do_marc = $parser->have_rules('marc', $database, $input);
176 my $do_index = $parser->have_rules('search', $database);
177
178 This function will return hash containing count of all found C<marc_*> or
179 C<search> directives. Input name is optional.
180
181 =cut
182
183 sub have_rules {
184 my $self = shift;
185
186 my $log = $self->_get_logger();
187 my $type = shift @_ || $log->logconfess("need at least type");
188 my $database = shift @_ || $log->logconfess("database is required");
189 my $input = shift @_;
190
191 $input = _input_name($input);
192
193
194 return unless defined( $self->{_have_rules}->{ _q($database) } );
195
196 my $database_rules = $self->{_have_rules}->{ _q($database ) };
197
198 if (defined($input)) {
199
200 return unless (
201 defined( $database_rules->{ _q($input) } ) &&
202 defined( $database_rules->{ _q($input) }->{ $type } )
203 );
204
205 return $database_rules->{ _q($input) }->{ $type };
206 }
207
208 my $usage;
209
210 foreach my $i (keys %{ $database_rules }) {
211 next unless defined( $database_rules->{$i}->{$type} );
212
213 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214 $usage->{ $t } += $database_rules->{ $i }->{ $t };
215 }
216 }
217
218 return $usage;
219
220 }
221
222
223 =head1 PRIVATE
224
225 =head2 _read_sources
226
227 my $source_files = $parser->_read_sources;
228
229 Called by L</new>.
230
231 =cut
232
233 sub _read_sources {
234 my $self = shift;
235
236 my $log = $self->_get_logger();
237
238 my $nr = 0;
239
240 my @sources;
241
242 my $lookup_src_cache;
243
244 my $only_database = $self->{only_database};
245 my $only_input = $self->{only_input};
246
247 $self->{config}->iterate_inputs( sub {
248 my ($input, $database) = @_;
249
250 return if ( $only_database && $database !~ m/$only_database/i );
251 return if ( $only_input && $input->{name} !~ m/$only_input/i );
252
253 $log->debug("database: $database input = ", dump($input));
254
255 foreach my $normalize (@{ $input->{normalize} }) {
256
257 my $path = $normalize->{path};
258 return unless($path);
259 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
260
261 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
262
263 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
264
265 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
266
267 $log->debug("$database/$input_name: adding $path");
268
269 $self->{valid_inputs}->{$database}->{$input_name}++;
270
271 push @sources, sub {
272 #warn "### $database $input_name, $full ###\n";
273 $self->_parse_source( $database, $input_name, $full, $s );
274 };
275
276 $nr++;
277 }
278 } );
279
280 $log->debug("found $nr source files");
281
282 # parse all sources
283 $_->() foreach (@sources);
284
285 return $nr;
286 }
287
288 =head2 _parse_source
289
290 $parser->_parse_source($database,$input,$path,$source);
291
292 Called for each normalize source (rules) in each input by L</_read_sources>
293
294 It will report invalid databases and inputs in error log after parsing.
295
296 =cut
297
298 sub _parse_source {
299 my $self = shift;
300 my ($database, $input, $path, $source) = @_;
301
302 $input = _input_name($input);
303
304 my $log = $self->_get_logger();
305
306 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
308
309 $log->logdie("no source found for database $database input $input path $path") unless ($source);
310
311 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
312
313 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
314
315 $Document->prune('PPI::Token::Whitespace');
316 $Document->prune('PPI::Token::Comment');
317 #$Document->prune('PPI::Token::Operator');
318
319 # Find all the named subroutines
320
321 $self->{_lookup_errors} = ();
322
323 sub _lookup_error {
324 my $self = shift;
325 my $msg = shift;
326 $self->_get_logger->logconfess("error without message?") unless ($msg);
327 push @{ $self->{_lookup_errors} }, $msg;
328 return '';
329 }
330
331 $Document->find( sub {
332 my ($Document,$Element) = @_;
333
334 $Element->isa('PPI::Token::Word') or return '';
335
336 if ( $Element->content eq 'sub' ) {
337 # repair demage done by prune of whitespace
338 $Element->insert_after( PPI::Token::Whitespace->new(' ') );
339 return '';
340 }
341
342 $Element->content eq 'lookup' or return '';
343
344 $log->debug("expansion: ", $Element->snext_sibling);
345
346 my $args = $Element->snext_sibling;
347
348 my @e = $args->child(0)->elements;
349 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
350
351 if ($log->is_debug) {
352 my $report = "found " . scalar @e . " elements:\n";
353
354 foreach my $i ( 0 .. $#e ) {
355 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
356 }
357
358 $log->debug($report);
359 }
360
361 my $key_element = $e[8]->clone;
362
363 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
364
365 $log->debug("key part: ", $key_element);
366
367 my @key;
368
369 $key_element->find( sub {
370 my $e = $_[1] || die "no element?";
371 $e->isa('PPI::Token::Word') or return '';
372 $e->content eq 'rec' or return '';
373
374 my $kf = $e->snext_sibling;
375
376 $log->debug("key fragment = $kf");
377
378 push @key, eval $kf;
379 $log->logdie("can't eval { $kf }: $@") if ($@);
380
381 return 1;
382 });
383
384 my $key = join('-', @key ) || $log->logdie("no key found!");
385
386 $log->debug("key = $key");
387
388 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
389 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
390
391 my $create = qq{
392 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
393 };
394
395 $log->debug("create: $create");
396
397 # save code to create this lookup
398 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
399 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
400
401
402 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
403 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
404 }
405
406 # save this dependency
407 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
408
409 if ($#e < 10) {
410 $e[8]->insert_after( $e[8]->clone );
411 $e[8]->insert_after( $e[7]->clone );
412 $e[8]->insert_after( $e[6]->clone );
413 }
414
415 $e[7]->remove;
416 $e[8]->insert_before( PPI::Token::Quote::Single->new( "'$key'" ) );
417 $e[8]->remove;
418
419
420 $log->debug(">>> ", $Element->snext_sibling);
421 });
422
423 my $normalize_source = $Document->serialize;
424 $log->debug("create: ", dump($self->{_lookup_create}) );
425 $log->debug("normalize: $normalize_source");
426
427 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
428
429 if ($self->{debug}) {
430 my $Dumper = PPI::Dumper->new( $Document );
431 $Dumper->print;
432 }
433
434 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
435
436 $Document->find( sub {
437 my ($Document,$Element) = @_;
438
439 $Element->isa('PPI::Token::Word') or return '';
440 if ($Element->content =~ m/^(marc|search)/) {
441 my $what = $1;
442 $log->debug("found $what rules in $database/$input");
443 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
444 } else {
445 return '';
446 }
447 });
448
449 return 1;
450 }
451
452
453 =head2 _q
454
455 Strip single or double quotes around value
456
457 _q(qq/'foo'/) -> foo
458
459 =cut
460
461 sub _q {
462 my $v = shift || return;
463 $v =~ s/^['"]*//g;
464 $v =~ s/['"]*$//g;
465 return $v;
466 }
467
468 =head2 _input_name
469
470 Return C<name> value if HASH or arg if scalar
471
472 _input_name($input)
473
474 =cut
475
476 sub _input_name {
477 my $input = shift || return;
478 if (ref($input) eq 'HASH') {
479 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
480 return $input->{name};
481 } else {
482 return $input;
483 }
484 }
485
486
487 =head1 AUTHOR
488
489 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
490
491 =head1 COPYRIGHT & LICENSE
492
493 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
494
495 This program is free software; you can redistribute it and/or modify it
496 under the same terms as Perl itself.
497
498 =cut
499
500 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26