/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 826 - (show annotations)
Sun May 20 16:19:13 2007 UTC (16 years, 11 months ago) by dpavlin
File size: 11485 byte(s)
 r1211@llin:  dpavlin | 2007-05-20 14:48:37 +0200
 Removed debugging output

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.08
21
22 =cut
23
24 our $VERSION = '0.08';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 );
53
54 =cut
55
56 sub new {
57 my $class = shift;
58 my $self = {@_};
59 bless($self, $class);
60
61 my $log = $self->_get_logger();
62
63 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64
65 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66
67 $self->_read_sources;
68
69 $self ? return $self : return undef;
70 }
71
72 =head2 valid_database
73
74 my $ok = $parse->valid_database('key');
75
76 =cut
77
78 sub valid_database {
79 my $self = shift;
80
81 my $database = shift || return;
82
83 return defined($self->{valid_inputs}->{ _q($database) });
84 }
85
86 =head2 valid_database_input
87
88 my $ok = $parse->valid_database('database_key','input_name');
89
90 =cut
91
92 sub valid_database_input {
93 my $self = shift;
94 my ($database,$input) = @_;
95 $input = _input_name($input);
96 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97 }
98
99 =head2 depends
100
101 Return all databases and inputs on which specified one depends
102
103 $depends_on = $parser->depends('database','input');
104
105 =cut
106
107 sub depends {
108 my $self = shift;
109 my ($database,$input) = @_;
110 $input = _input_name($input);
111 $self->_get_logger->debug("depends($database,$input)");
112 return unless (
113 defined( $self->{depends}->{ _q($database) } ) &&
114 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115 );
116 return $self->{depends}->{ _q($database) }->{ _q($input) };
117 }
118
119 =head2 have_lookup_create
120
121 my @keys = $parser->have_lookup_create($database, $input);
122
123 =cut
124
125 sub have_lookup_create {
126 my $self = shift;
127 my ($database,$input) = @_;
128 $input = _input_name($input);
129 return unless (
130 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132 );
133 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134 }
135
136
137 =head2 lookup_create_rules
138
139 my $source = $parser->lookup_create_rules($database, $input);
140
141 =cut
142
143 sub lookup_create_rules {
144 my $self = shift;
145 my ($database,$input) = @_;
146 $input = _input_name($input);
147 return unless (
148 defined( $self->{_lookup_create}->{ _q($database) } ) &&
149 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150 );
151 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152 }
153
154 =head2 normalize_rules
155
156 my $source = $parser->normalize_rules($database, $input);
157
158 =cut
159
160 sub normalize_rules {
161 my $self = shift;
162 my ($database,$input) = @_;
163 $input = _input_name($input);
164 return unless (
165 defined( $self->{_normalize_source}->{ _q($database) } ) &&
166 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167 );
168 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169 }
170
171
172 =head2 have_rules
173
174 my $do_marc = $parser->have_rules('marc', $database, $input);
175 my $do_index = $parser->have_rules('search', $database);
176
177 This function will return hash containing count of all found C<marc_*> or
178 C<search> directives. Input name is optional.
179
180 =cut
181
182 sub have_rules {
183 my $self = shift;
184
185 my $log = $self->_get_logger();
186 my $type = shift @_ || $log->logconfess("need at least type");
187 my $database = shift @_ || $log->logconfess("database is required");
188 my $input = shift @_;
189
190 $input = _input_name($input);
191
192
193 return unless defined( $self->{_have_rules}->{ _q($database) } );
194
195 my $database_rules = $self->{_have_rules}->{ _q($database ) };
196
197 if (defined($input)) {
198
199 return unless (
200 defined( $database_rules->{ _q($input) } ) &&
201 defined( $database_rules->{ _q($input) }->{ $type } )
202 );
203
204 return $database_rules->{ _q($input) }->{ $type };
205 }
206
207 my $usage;
208
209 foreach my $i (keys %{ $database_rules }) {
210 next unless defined( $database_rules->{$i}->{$type} );
211
212 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
213 $usage->{ $t } += $database_rules->{ $i }->{ $t };
214 }
215 }
216
217 return $usage;
218
219 }
220
221
222 =head1 PRIVATE
223
224 =head2 _read_sources
225
226 my $source_files = $parser->_read_sources;
227
228 Called by L</new>.
229
230 =cut
231
232 sub _read_sources {
233 my $self = shift;
234
235 my $log = $self->_get_logger();
236
237 my $nr = 0;
238
239 my @sources;
240
241 my $lookup_src_cache;
242
243 $self->{config}->iterate_inputs( sub {
244 my ($input, $database) = @_;
245
246 $log->debug("database: $database input = ", dump($input));
247
248 foreach my $normalize (@{ $input->{normalize} }) {
249
250 my $path = $normalize->{path};
251 return unless($path);
252 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
253
254 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
255
256 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
257
258 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
259
260 $log->debug("$database/$input_name: adding $path");
261
262 $self->{valid_inputs}->{$database}->{$input_name}++;
263
264 push @sources, sub {
265 #warn "### $database $input_name, $full ###\n";
266 $self->_parse_source( $database, $input_name, $full, $s );
267 };
268
269 $nr++;
270 }
271 } );
272
273 $log->debug("found $nr source files");
274
275 # parse all sources
276 $_->() foreach (@sources);
277
278 return $nr;
279 }
280
281 =head2 _parse_source
282
283 $parser->_parse_source($database,$input,$path,$source);
284
285 Called for each normalize source (rules) in each input by L</_read_sources>
286
287 It will report invalid databases and inputs in error log after parsing.
288
289 =cut
290
291 sub _parse_source {
292 my $self = shift;
293 my ($database, $input, $path, $source) = @_;
294
295 $input = _input_name($input);
296
297 my $log = $self->_get_logger();
298
299 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
300 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
301
302 $log->logdie("no source found for database $database input $input path $path") unless ($source);
303
304 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
305
306 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
307
308 $Document->prune('PPI::Token::Whitespace');
309 $Document->prune('PPI::Token::Comment');
310 #$Document->prune('PPI::Token::Operator');
311
312 # Find all the named subroutines
313
314 $self->{_lookup_errors} = ();
315
316 sub _lookup_error {
317 my $self = shift;
318 my $msg = shift;
319 $self->_get_logger->logconfess("error without message?") unless ($msg);
320 push @{ $self->{_lookup_errors} }, $msg;
321 return '';
322 }
323
324 $Document->find( sub {
325 my ($Document,$Element) = @_;
326
327 $Element->isa('PPI::Token::Word') or return '';
328 $Element->content eq 'lookup' or return '';
329
330 $log->debug("expansion: ", $Element->snext_sibling);
331
332 my $args = $Element->snext_sibling;
333
334 my @e = $args->child(0)->elements;
335 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
336
337 if ($log->is_debug) {
338 my $report = "found " . scalar @e . " elements:\n";
339
340 foreach my $i ( 0 .. $#e ) {
341 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
342 }
343
344 $log->debug($report);
345 }
346
347 my $key_element = $e[8]->clone;
348
349 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
350
351 $log->debug("key part: ", $key_element);
352
353 my @key;
354
355 $key_element->find( sub {
356 my $e = $_[1] || die "no element?";
357 $e->isa('PPI::Token::Word') or return '';
358 $e->content eq 'rec' or return '';
359
360 my $kf = $e->snext_sibling;
361
362 $log->debug("key fragment = $kf");
363
364 push @key, eval $kf;
365 $log->logdie("can't eval { $kf }: $@") if ($@);
366
367 return 1;
368 });
369
370 my $key = join('-', @key ) || $log->logdie("no key found!");
371
372 $log->debug("key = $key");
373
374 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
375 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
376
377 my $create = qq{
378 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
379 };
380
381 $log->debug("create: $create");
382
383 # save code to create this lookup
384 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
385 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
386
387
388 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
389 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
390 }
391
392 # save this dependency
393 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
394
395 if ($#e < 10) {
396 $e[8]->insert_after( $e[8]->clone );
397 $e[8]->insert_after( $e[7]->clone );
398 $e[8]->insert_after( $e[6]->clone );
399 }
400
401 $e[7]->remove;
402 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
403 $e[8]->remove;
404
405
406 $log->debug(">>> ", $Element->snext_sibling);
407 });
408
409 my $normalize_source = $Document->serialize;
410 $log->debug("create: ", dump($self->{_lookup_create}) );
411 $log->debug("normalize: $normalize_source");
412
413 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
414
415 if ($self->{debug}) {
416 my $Dumper = PPI::Dumper->new( $Document );
417 $Dumper->print;
418 }
419
420 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
421
422 $Document->find( sub {
423 my ($Document,$Element) = @_;
424
425 $Element->isa('PPI::Token::Word') or return '';
426 if ($Element->content =~ m/^(marc|search)/) {
427 my $what = $1;
428 $log->debug("found $what rules in $database/$input");
429 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
430 } else {
431 return '';
432 }
433 });
434
435 return 1;
436 }
437
438
439 =head2 _q
440
441 Strip single or double quotes around value
442
443 _q(qq/'foo'/) -> foo
444
445 =cut
446
447 sub _q {
448 my $v = shift || return;
449 $v =~ s/^['"]*//g;
450 $v =~ s/['"]*$//g;
451 return $v;
452 }
453
454 =head2 _input_name
455
456 Return C<name> value if HASH or arg if scalar
457
458 _input_name($input)
459
460 =cut
461
462 sub _input_name {
463 my $input = shift || return;
464 if (ref($input) eq 'HASH') {
465 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
466 return $input->{name};
467 } else {
468 return $input;
469 }
470 }
471
472
473 =head1 AUTHOR
474
475 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
476
477 =head1 COPYRIGHT & LICENSE
478
479 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
480
481 This program is free software; you can redistribute it and/or modify it
482 under the same terms as Perl itself.
483
484 =cut
485
486 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26