/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 755 - (show annotations)
Sun Oct 8 20:28:17 2006 UTC (17 years, 6 months ago) by dpavlin
File size: 11409 byte(s)
 r1097@llin:  dpavlin | 2006-10-08 22:24:54 +0200
 replaced generate_marc with universal have_rules [0.08]

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.08
21
22 =cut
23
24 our $VERSION = '0.08';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 );
53
54 =cut
55
56 sub new {
57 my $class = shift;
58 my $self = {@_};
59 bless($self, $class);
60
61 my $log = $self->_get_logger();
62
63 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64
65 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66
67 $self->_read_sources;
68
69 $self ? return $self : return undef;
70 }
71
72 =head2 valid_database
73
74 my $ok = $parse->valid_database('key');
75
76 =cut
77
78 sub valid_database {
79 my $self = shift;
80
81 my $database = shift || return;
82
83 return defined($self->{valid_inputs}->{ _q($database) });
84 }
85
86 =head2 valid_database_input
87
88 my $ok = $parse->valid_database('database_key','input_name');
89
90 =cut
91
92 sub valid_database_input {
93 my $self = shift;
94 my ($database,$input) = @_;
95 $input = _input_name($input);
96 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97 }
98
99 =head2 depends
100
101 Return all databases and inputs on which specified one depends
102
103 $depends_on = $parser->depends('database','input');
104
105 =cut
106
107 sub depends {
108 my $self = shift;
109 my ($database,$input) = @_;
110 $input = _input_name($input);
111 $self->_get_logger->debug("depends($database,$input)");
112 return unless (
113 defined( $self->{depends}->{ _q($database) } ) &&
114 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115 );
116 return $self->{depends}->{ _q($database) }->{ _q($input) };
117 }
118
119 =head2 have_lookup_create
120
121 my @keys = $parser->have_lookup_create($database, $input);
122
123 =cut
124
125 sub have_lookup_create {
126 my $self = shift;
127 my ($database,$input) = @_;
128 $input = _input_name($input);
129 return unless (
130 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132 );
133 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134 }
135
136
137 =head2 lookup_create_rules
138
139 my $source = $parser->lookup_create_rules($database, $input);
140
141 =cut
142
143 sub lookup_create_rules {
144 my $self = shift;
145 my ($database,$input) = @_;
146 $input = _input_name($input);
147 return unless (
148 defined( $self->{_lookup_create}->{ _q($database) } ) &&
149 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150 );
151 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152 }
153
154 =head2 normalize_rules
155
156 my $source = $parser->normalize_rules($database, $input);
157
158 =cut
159
160 sub normalize_rules {
161 my $self = shift;
162 my ($database,$input) = @_;
163 $input = _input_name($input);
164 return unless (
165 defined( $self->{_normalize_source}->{ _q($database) } ) &&
166 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167 );
168 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169 }
170
171
172 =head2 have_rules
173
174 my $do_marc = $parser->have_rules('marc', $database, $input);
175 my $do_index = $parser->have_rules('search', $database);
176
177 This function will return hash containing count of all found C<marc_*> or
178 C<search> directives. Input name is optional.
179
180 =cut
181
182 sub have_rules {
183 my $self = shift;
184
185 my $log = $self->_get_logger();
186 my $type = shift @_ || $log->logconfess("need at least type");
187 my $database = shift @_ || $log->logconfess("database is required");
188 my $input = shift @_;
189
190 $input = _input_name($input);
191
192
193 return unless defined( $self->{_have_rules}->{ _q($database) } );
194
195 my $database_rules = $self->{_have_rules}->{ _q($database ) };
196
197 if (defined($input)) {
198
199 return unless (
200 defined( $database_rules->{ _q($input) } ) &&
201 defined( $database_rules->{ _q($input) }->{ $type } )
202 );
203
204 return $database_rules->{ _q($input) }->{ $type };
205 }
206
207 my $usage;
208
209 foreach my $i (keys %{ $database_rules }) {
210 next unless defined( $database_rules->{$i}->{$type} );
211
212 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
213 $usage->{ $t } += $database_rules->{ $i }->{ $t };
214 }
215 }
216
217 return $usage;
218
219 }
220
221
222 =head1 PRIVATE
223
224 =head2 _read_sources
225
226 my $source_files = $parser->_read_sources;
227
228 Called by L</new>.
229
230 =cut
231
232 sub _read_sources {
233 my $self = shift;
234
235 my $log = $self->_get_logger();
236
237 my $nr = 0;
238
239 my @sources;
240
241 $self->{config}->iterate_inputs( sub {
242 my ($input, $database) = @_;
243
244 $log->debug("database: $database input = ", dump($input));
245
246 foreach my $normalize (@{ $input->{normalize} }) {
247
248 my $path = $normalize->{path};
249 return unless($path);
250 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
251
252 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
253
254 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
255
256 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
257
258 $log->debug("$database/$input_name: adding $path");
259
260 $self->{valid_inputs}->{$database}->{$input_name}++;
261
262 push @sources, sub {
263 $self->_parse_source( $database, $input_name, $full, $s );
264 };
265
266 $nr++;
267 }
268 } );
269
270 $log->debug("found $nr source files");
271
272 # parse all sources
273 $_->() foreach (@sources);
274
275 return $nr;
276 }
277
278 =head2 _parse_source
279
280 $parser->_parse_source($database,$input,$path,$source);
281
282 Called for each normalize source (rules) in each input by L</_read_sources>
283
284 It will report invalid databases and inputs in error log after parsing.
285
286 =cut
287
288 sub _parse_source {
289 my $self = shift;
290 my ($database, $input, $path, $source) = @_;
291
292 $input = _input_name($input);
293
294 my $log = $self->_get_logger();
295
296 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
297 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
298
299 $log->logdie("no source found for database $database input $input path $path") unless ($source);
300
301 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
302
303 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
304
305 $Document->prune('PPI::Token::Whitespace');
306 $Document->prune('PPI::Token::Comment');
307 #$Document->prune('PPI::Token::Operator');
308
309 # Find all the named subroutines
310
311 $self->{_lookup_errors} = ();
312
313 sub _lookup_error {
314 my $self = shift;
315 my $msg = shift;
316 $self->_get_logger->logconfess("error without message?") unless ($msg);
317 push @{ $self->{_lookup_errors} }, $msg;
318 return '';
319 }
320
321 $Document->find( sub {
322 my ($Document,$Element) = @_;
323
324 $Element->isa('PPI::Token::Word') or return '';
325 $Element->content eq 'lookup' or return '';
326
327 $log->debug("expansion: ", $Element->snext_sibling);
328
329 my $args = $Element->snext_sibling;
330
331 my @e = $args->child(0)->elements;
332 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
333
334 if ($log->is_debug) {
335 my $report = "found " . scalar @e . " elements:\n";
336
337 foreach my $i ( 0 .. $#e ) {
338 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
339 }
340
341 $log->debug($report);
342 }
343
344 my $key_element = $e[8]->clone;
345
346 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
347
348 $log->debug("key part: ", $key_element);
349
350 my @key;
351
352 $key_element->find( sub {
353 my $e = $_[1] || die "no element?";
354 $e->isa('PPI::Token::Word') or return '';
355 $e->content eq 'rec' or return '';
356
357 my $kf = $e->snext_sibling;
358
359 $log->debug("key fragment = $kf");
360
361 push @key, eval $kf;
362 $log->logdie("can't eval { $kf }: $@") if ($@);
363
364 return 1;
365 });
366
367 my $key = join('-', @key ) || $log->logdie("no key found!");
368
369 $log->debug("key = $key");
370
371 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
372 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
373
374 my $create = qq{
375 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
376 };
377
378 $log->debug("create: $create");
379
380 # save code to create this lookup
381 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
382 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
383
384
385 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
386 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
387 }
388
389 # save this dependency
390 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
391
392 if ($#e < 10) {
393 $e[8]->insert_after( $e[8]->clone );
394 $e[8]->insert_after( $e[7]->clone );
395 $e[8]->insert_after( $e[6]->clone );
396 }
397
398 $e[7]->remove;
399 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
400 $e[8]->remove;
401
402
403 $log->debug(">>> ", $Element->snext_sibling);
404 });
405
406 my $normalize_source = $Document->serialize;
407 $log->debug("create: ", dump($self->{_lookup_create}) );
408 $log->debug("normalize: $normalize_source");
409
410 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
411
412 if ($self->{debug}) {
413 my $Dumper = PPI::Dumper->new( $Document );
414 $Dumper->print;
415 }
416
417 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
418
419 $Document->find( sub {
420 my ($Document,$Element) = @_;
421
422 $Element->isa('PPI::Token::Word') or return '';
423 if ($Element->content =~ m/^(marc|search)/) {
424 my $what = $1;
425 $log->debug("found $what rules in $database/$input");
426 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
427 } else {
428 return '';
429 }
430 });
431
432 return 1;
433 }
434
435
436 =head2 _q
437
438 Strip single or double quotes around value
439
440 _q(qq/'foo'/) -> foo
441
442 =cut
443
444 sub _q {
445 my $v = shift || return;
446 $v =~ s/^['"]*//g;
447 $v =~ s/['"]*$//g;
448 return $v;
449 }
450
451 =head2 _input_name
452
453 Return C<name> value if HASH or arg if scalar
454
455 _input_name($input)
456
457 =cut
458
459 sub _input_name {
460 my $input = shift || return;
461 if (ref($input) eq 'HASH') {
462 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
463 return $input->{name};
464 } else {
465 return $input;
466 }
467 }
468
469
470 =head1 AUTHOR
471
472 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
473
474 =head1 COPYRIGHT & LICENSE
475
476 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
477
478 This program is free software; you can redistribute it and/or modify it
479 under the same terms as Perl itself.
480
481 =cut
482
483 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26