/[webpac2]/trunk/lib/WebPAC/Parser.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 737 - (show annotations)
Thu Oct 5 14:38:45 2006 UTC (17 years, 6 months ago) by dpavlin
File size: 10716 byte(s)
 r1067@llin:  dpavlin | 2006-10-05 16:35:45 +0200
 added generate_marc to find out if normalization rules have marc* directives

1 package WebPAC::Parser;
2
3 use warnings;
4 use strict;
5
6
7 use PPI;
8 use PPI::Dumper;
9 use Data::Dump qw/dump/;
10 use File::Slurp;
11
12 use base qw/WebPAC::Common/;
13
14 =head1 NAME
15
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
17
18 =head1 VERSION
19
20 Version 0.07
21
22 =cut
23
24 our $VERSION = '0.07';
25
26 =head1 SYNOPSIS
27
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
33
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
37
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
40
41 FIXME
42
43 =head1 FUNCTIONS
44
45 =head2 new
46
47 Create new parser object.
48
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 );
53
54 =cut
55
56 sub new {
57 my $class = shift;
58 my $self = {@_};
59 bless($self, $class);
60
61 my $log = $self->_get_logger();
62
63 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
64
65 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
66
67 $self->_read_sources;
68
69 $self ? return $self : return undef;
70 }
71
72 =head2 valid_database
73
74 my $ok = $parse->valid_database('key');
75
76 =cut
77
78 sub valid_database {
79 my $self = shift;
80
81 my $database = shift || return;
82
83 return defined($self->{valid_inputs}->{ _q($database) });
84 }
85
86 =head2 valid_database_input
87
88 my $ok = $parse->valid_database('database_key','input_name');
89
90 =cut
91
92 sub valid_database_input {
93 my $self = shift;
94 my ($database,$input) = @_;
95 $input = _input_name($input);
96 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
97 }
98
99 =head2 depends
100
101 Return all databases and inputs on which specified one depends
102
103 $depends_on = $parser->depends('database','input');
104
105 =cut
106
107 sub depends {
108 my $self = shift;
109 my ($database,$input) = @_;
110 $input = _input_name($input);
111 $self->_get_logger->debug("depends($database,$input)");
112 return unless (
113 defined( $self->{depends}->{ _q($database) } ) &&
114 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
115 );
116 return $self->{depends}->{ _q($database) }->{ _q($input) };
117 }
118
119 =head2 have_lookup_create
120
121 my @keys = $parser->have_lookup_create($database, $input);
122
123 =cut
124
125 sub have_lookup_create {
126 my $self = shift;
127 my ($database,$input) = @_;
128 $input = _input_name($input);
129 return unless (
130 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
132 );
133 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
134 }
135
136
137 =head2 lookup_create_rules
138
139 my $source = $parser->lookup_create_rules($database, $input);
140
141 =cut
142
143 sub lookup_create_rules {
144 my $self = shift;
145 my ($database,$input) = @_;
146 $input = _input_name($input);
147 return unless (
148 defined( $self->{_lookup_create}->{ _q($database) } ) &&
149 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
150 );
151 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
152 }
153
154 =head2 normalize_rules
155
156 my $source = $parser->normalize_rules($database, $input);
157
158 =cut
159
160 sub normalize_rules {
161 my $self = shift;
162 my ($database,$input) = @_;
163 $input = _input_name($input);
164 return unless (
165 defined( $self->{_normalize_source}->{ _q($database) } ) &&
166 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
167 );
168 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
169 }
170
171
172 =head2 generate_marc
173
174 my $do_marc = $parser->generate_marc($database, $input);
175
176 This function will return hash containing count of all found C<marc_*> directives.
177
178 =cut
179
180 sub generate_marc {
181 my $self = shift;
182 my ($database,$input) = @_;
183 $input = _input_name($input);
184 return unless (
185 defined( $self->{_generate_marc}->{ _q($database) } ) &&
186 defined( $self->{_generate_marc}->{ _q($database) }->{ _q($input) } )
187 );
188 return $self->{_generate_marc}->{ _q($database) }->{ _q($input) };
189 }
190
191
192 =head1 PRIVATE
193
194 =head2 _read_sources
195
196 my $source_files = $parser->_read_sources;
197
198 Called by L</new>.
199
200 =cut
201
202 sub _read_sources {
203 my $self = shift;
204
205 my $log = $self->_get_logger();
206
207 my $nr = 0;
208
209 my @sources;
210
211 $self->{config}->iterate_inputs( sub {
212 my ($input, $database) = @_;
213
214 $log->debug("database: $database input = ", dump($input));
215
216 foreach my $normalize (@{ $input->{normalize} }) {
217
218 my $path = $normalize->{path};
219 return unless($path);
220 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
221
222 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
223
224 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
225
226 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
227
228 $log->debug("$database/$input_name: adding $path");
229
230 $self->{valid_inputs}->{$database}->{$input_name}++;
231
232 push @sources, sub {
233 $self->_parse_source( $database, $input_name, $full, $s );
234 };
235
236 $nr++;
237 }
238 } );
239
240 $log->debug("found $nr source files");
241
242 # parse all sources
243 $_->() foreach (@sources);
244
245 return $nr;
246 }
247
248 =head2 _parse_source
249
250 $parser->_parse_source($database,$input,$path,$source);
251
252 Called for each normalize source (rules) in each input by L</_read_sources>
253
254 It will report invalid databases and inputs in error log after parsing.
255
256 =cut
257
258 sub _parse_source {
259 my $self = shift;
260 my ($database, $input, $path, $source) = @_;
261
262 $input = _input_name($input);
263
264 my $log = $self->_get_logger();
265
266 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
267 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
268
269 $log->logdie("no source found for database $database input $input path $path") unless ($source);
270
271 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
272
273 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
274
275 $Document->prune('PPI::Token::Whitespace');
276 $Document->prune('PPI::Token::Comment');
277 #$Document->prune('PPI::Token::Operator');
278
279 # Find all the named subroutines
280
281 $self->{_lookup_errors} = ();
282
283 sub _lookup_error {
284 my $self = shift;
285 my $msg = shift;
286 $self->_get_logger->logconfess("error without message?") unless ($msg);
287 push @{ $self->{_lookup_errors} }, $msg;
288 return '';
289 }
290
291 $Document->find( sub {
292 my ($Document,$Element) = @_;
293
294 $Element->isa('PPI::Token::Word') or return '';
295 $Element->content eq 'lookup' or return '';
296
297 $log->debug("expansion: ", $Element->snext_sibling);
298
299 my $args = $Element->snext_sibling;
300
301 my @e = $args->child(0)->elements;
302 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
303
304 if ($log->is_debug) {
305 my $report = "found " . scalar @e . " elements:\n";
306
307 foreach my $i ( 0 .. $#e ) {
308 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
309 }
310
311 $log->debug($report);
312 }
313
314 my $key_element = $e[8]->clone;
315
316 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
317
318 $log->debug("key part: ", $key_element);
319
320 my @key;
321
322 $key_element->find( sub {
323 my $e = $_[1] || die "no element?";
324 $e->isa('PPI::Token::Word') or return '';
325 $e->content eq 'rec' or return '';
326
327 my $kf = $e->snext_sibling;
328
329 $log->debug("key fragment = $kf");
330
331 push @key, eval $kf;
332 $log->logdie("can't eval { $kf }: $@") if ($@);
333
334 return 1;
335 });
336
337 my $key = join('-', @key ) || $log->logdie("no key found!");
338
339 $log->debug("key = $key");
340
341 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
342 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
343
344 my $create = qq{
345 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
346 };
347
348 $log->debug("create: $create");
349
350 # save code to create this lookup
351 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
352 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
353
354
355 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
356 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
357 }
358
359 # save this dependency
360 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
361
362 if ($#e < 10) {
363 $e[8]->insert_after( $e[8]->clone );
364 $e[8]->insert_after( $e[7]->clone );
365 $e[8]->insert_after( $e[6]->clone );
366 }
367
368 $e[7]->remove;
369 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
370 $e[8]->remove;
371
372
373 $log->debug(">>> ", $Element->snext_sibling);
374 });
375
376 my $normalize_source = $Document->serialize;
377 $log->debug("create: ", dump($self->{_lookup_create}) );
378 $log->debug("normalize: $normalize_source");
379
380 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
381
382 if ($self->{debug}) {
383 my $Dumper = PPI::Dumper->new( $Document );
384 $Dumper->print;
385 }
386
387 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
388
389 $Document->find( sub {
390 my ($Document,$Element) = @_;
391
392 $Element->isa('PPI::Token::Word') or return '';
393 $Element->content =~ m/^marc/ or return '';
394
395 $log->debug("found marc output generation for $database/$input");
396 $self->{_generate_marc}->{ $database }->{ $input }->{ $Element->content }++;
397 });
398
399 return 1;
400 }
401
402
403 =head2 _q
404
405 Strip single or double quotes around value
406
407 _q(qq/'foo'/) -> foo
408
409 =cut
410
411 sub _q {
412 my $v = shift || return;
413 $v =~ s/^['"]*//g;
414 $v =~ s/['"]*$//g;
415 return $v;
416 }
417
418 =head2 _input_name
419
420 Return C<name> value if HASH or arg if scalar
421
422 _input_name($input)
423
424 =cut
425
426 sub _input_name {
427 my $input = shift || return;
428 if (ref($input) eq 'HASH') {
429 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
430 return $input->{name};
431 } else {
432 return $input;
433 }
434 }
435
436
437 =head1 AUTHOR
438
439 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
440
441 =head1 COPYRIGHT & LICENSE
442
443 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
444
445 This program is free software; you can redistribute it and/or modify it
446 under the same terms as Perl itself.
447
448 =cut
449
450 1; # End of WebPAC::Parser

  ViewVC Help
Powered by ViewVC 1.1.26