--- trunk/lib/WebPAC/Parser.pm	2006/09/24 17:25:04	686
+++ trunk/lib/WebPAC/Parser.pm	2007/11/03 12:30:43	976
@@ -3,28 +3,41 @@
 use warnings;
 use strict;
 
-use base qw/WebPAC::Common WebPAC::Normalize/;
 
 use PPI;
 use PPI::Dumper;
 use Data::Dump qw/dump/;
 use File::Slurp;
 
+use base qw/WebPAC::Common/;
 
 =head1 NAME
 
-WebPAC::Parser - parse perl normalization configuration files and mungle it
+WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
 
 =head1 VERSION
 
-Version 0.01
+Version 0.08
 
 =cut
 
-our $VERSION = '0.01';
+our $VERSION = '0.08';
 
 =head1 SYNOPSIS
 
+This module will parse L<WebPAC::Normalize/lookup> directives and generate source
+to produce lookups and normalization. It will also parse other parts of
+source to produce some of DWIM (I<Do What I Mean>) magic
+(like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
+rules in normalisation).
+
+It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
+reading about LISP. It might be a bit over-the board, but at least it removed
+separate configuration files for lookups.
+
+This is experimental code, but it replaces all older formats which where,
+at one point in time, available in WebPAC.
+
 FIXME
 
 =head1 FUNCTIONS
@@ -36,6 +49,7 @@
   my $parser = new WebPAC::Parser(
   	config => new WebPAC::Config(),
 	base_path => '/optional/path/to/conf',
+	only_database => $only
   );
 
 =cut
@@ -51,44 +65,268 @@
 
 	$log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
 
-	my $source;
+	$self->_read_sources;
+
+	$self ? return $self : return undef;
+}
+
+=head2 valid_database
+
+  my $ok = $parse->valid_database('key');
+
+=cut
+
+sub valid_database {
+	my $self = shift;
+
+	my $database = shift || return;
+
+	return defined($self->{valid_inputs}->{ _q($database) });
+}
+
+=head2 valid_database_input
+
+  my $ok = $parse->valid_database('database_key','input_name');
+
+=cut
+
+sub valid_database_input {
+	my $self = shift;
+	my ($database,$input) = @_;
+	$input = _input_name($input);
+	return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
+}
+
+=head2 depends
+
+Return all databases and inputs on which specified one depends
+
+  $depends_on = $parser->depends('database','input');
+
+=cut
+
+sub depends {
+	my $self = shift;
+	my ($database,$input) = @_;
+	$input = _input_name($input);
+	$self->_get_logger->debug("depends($database,$input)");
+	return unless (
+		defined( $self->{depends}->{ _q($database) } ) &&
+		defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
+	);
+	return $self->{depends}->{ _q($database) }->{ _q($input) };
+}
+
+=head2 have_lookup_create
+
+  my @keys = $parser->have_lookup_create($database, $input);
+
+=cut
+
+sub have_lookup_create {
+	my $self = shift;
+	my ($database,$input) = @_;
+	$input = _input_name($input);
+	return unless (
+		defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
+		defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
+	);
+	return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
+}
+
+
+=head2 lookup_create_rules
+
+  my $source = $parser->lookup_create_rules($database, $input);
+
+=cut
+
+sub lookup_create_rules {
+	my $self = shift;
+	my ($database,$input) = @_;
+	$input = _input_name($input);
+	return unless (
+		defined( $self->{_lookup_create}->{ _q($database) } ) &&
+		defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
+	);
+	return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
+}
+
+=head2 normalize_rules
+
+  my $source = $parser->normalize_rules($database, $input);
+
+=cut
+
+sub normalize_rules {
+	my $self = shift;
+	my ($database,$input) = @_;
+	$input = _input_name($input);
+	return unless (
+		defined( $self->{_normalize_source}->{ _q($database) } ) &&
+		defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
+	);
+	return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
+}
+
+
+=head2 have_rules
+
+  my $do_marc = $parser->have_rules('marc', $database, $input);
+  my $do_index = $parser->have_rules('search', $database);
+
+This function will return hash containing count of all found C<marc_*> or
+C<search> directives. Input name is optional.
+
+=cut
+
+sub have_rules {
+	my $self = shift;
+
+	my $log = $self->_get_logger();
+	my $type = shift @_ || $log->logconfess("need at least type");
+	my $database = shift @_ || $log->logconfess("database is required");
+	my $input = shift @_;
+
+	$input = _input_name($input);
+
+
+	return unless defined( $self->{_have_rules}->{ _q($database) } );
+
+	my $database_rules = $self->{_have_rules}->{ _q($database ) };
+
+	if (defined($input)) {
+
+		return unless (
+			defined( $database_rules->{ _q($input) } ) &&
+			defined( $database_rules->{ _q($input) }->{ $type } )
+		);
+
+		return $database_rules->{ _q($input) }->{ $type };
+	}
+
+	my $usage;
+
+	foreach my $i (keys %{ $database_rules }) {
+		next unless defined( $database_rules->{$i}->{$type} );
+
+		foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
+			$usage->{ $t } += $database_rules->{ $i }->{ $t };
+		}
+	}
+
+	return $usage;
+
+}
+
+
+=head1 PRIVATE
+
+=head2 _read_sources
+
+  my $source_files = $parser->_read_sources;
+
+Called by L</new>.
+
+=cut
+
+sub _read_sources {
+	my $self = shift;
+
+	my $log = $self->_get_logger();
+
+	my $nr = 0;
+
+	my @sources;
+
+	my $lookup_src_cache;
+
+	my $only_database = $self->{only_database};
+	my $only_input = $self->{only_input};
 
 	$self->{config}->iterate_inputs( sub {
-		my $input = shift;
-		my $path = $input->{normalize}->{path} || return;
-		my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
-		$log->logdie("normalization input $full doesn't exist") unless (-e $full);
-		my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
-		$log->debug("adding $path to parser [",length($s)," bytes]");
-		$source .= $s;
+		my ($input, $database) = @_;
+
+		return if ( $only_database && $database !~ m/$only_database/i );
+		return if ( $only_input && $input->{name} !~ m/$only_input/i );
+
+		$log->debug("database: $database input = ", dump($input));
+
+		foreach my $normalize (@{ $input->{normalize} }) {
+
+			my $path = $normalize->{path};
+			return unless($path);
+			my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
+
+			$log->logdie("normalization input $full doesn't exist") unless (-e $full);
+
+			my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
+
+			my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
+
+			$log->debug("$database/$input_name: adding $path");
+
+			$self->{valid_inputs}->{$database}->{$input_name}++;
+
+			push @sources, sub {
+				#warn "### $database $input_name, $full ###\n";
+				$self->_parse_source( $database, $input_name, $full, $s );
+			};
+
+			$nr++;
+		}
 	} );
 
-	$log->debug("collected ", length($source), " bytes of source");
+	$log->debug("found $nr source files");
 
-	$self->{source} = $source;
+	# parse all sources
+	$_->() foreach (@sources);
 
-	$self ? return $self : return undef;
+	return $nr;
 }
 
-=head2 parse
+=head2 _parse_source
+
+  $parser->_parse_source($database,$input,$path,$source);
+
+Called for each normalize source (rules) in each input by L</_read_sources>
+
+It will report invalid databases and inputs in error log after parsing.
 
 =cut
 
-sub parse {
+sub _parse_source {
 	my $self = shift;
+	my ($database, $input, $path, $source) = @_;
+
+	$input = _input_name($input);
 
 	my $log = $self->_get_logger();
 
-	$log->logdie('no source found in object') unless ($self->{source});
+	$log->logdie("invalid database $database" ) unless $self->valid_database( $database );
+	$log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
 
-	my $Document = PPI::Document->new( \$self->{source} ) || $log->logdie("can't parse source:\n", $self->{source});
+	$log->logdie("no source found for database $database input $input path $path") unless ($source);
 
-	$Document->prune('PPI::Token::Whitespace');
+	$log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
+
+	my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
+
+	#$Document->prune('PPI::Token::Whitespace');
+	$Document->prune('PPI::Token::Comment');
 	#$Document->prune('PPI::Token::Operator');
 
 	# Find all the named subroutines
 
-	my $eval_create;
+	$self->{_lookup_errors} = ();
+
+	sub _lookup_error {
+		my $self = shift;
+		my $msg = shift;
+		$self->_get_logger->logconfess("error without message?") unless ($msg);
+		push @{ $self->{_lookup_errors} }, $msg;
+		return '';
+	}
 
 	$Document->find( sub {
 			my ($Document,$Element) = @_;
@@ -96,24 +334,28 @@
 			$Element->isa('PPI::Token::Word') or return '';
 			$Element->content eq 'lookup' or return '';
 
-			print "#*** expansion: ", $Element->snext_sibling,$/;
+			$log->debug("expansion: ", $Element->snext_sibling);
 
 			my $args = $Element->snext_sibling;
 		
 			my @e = $args->child(0)->elements;
-			print "hum, expect at least 8 elements, got ", scalar @e, " in $args\n" if ($#e < 8);
+			$log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
 
-			print "# found ", scalar @e, " elements:\n";
+			if ($log->is_debug) {
+				my $report = "found " . scalar @e . " elements:\n";
+
+				foreach my $i ( 0 .. $#e ) {
+					$report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
+				}
 
-			foreach my $i ( 0 .. $#e ) {
-				printf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
+				$log->debug($report);
 			}
 
 			my $key_element = $e[8]->clone;
 
-			die "key element must be PPI::Structure::Block" unless $key_element->isa('PPI::Structure::Block');
+			$log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
 
-			print "## key part: ", $key_element, $/;
+			$log->debug("key part: ", $key_element);
 
 			my @key;
 
@@ -124,30 +366,38 @@
 
 				my $kf = $e->snext_sibling;
 
-				print "## key fragment = $kf\n";
+				$log->debug("key fragment = $kf");
 
 				push @key, eval $kf;
-				print "ERROR: can't eval { $kf }: $@" if ($@);
+				$log->logdie("can't eval { $kf }: $@") if ($@);
 
 				return 1;
 			});
 
-			my $key = join('-', @key ) || print "ERROR: no key found!";
+			my $key = join('-', @key ) || $log->logdie("no key found!");
 
-			print "key = $key\n";
+			$log->debug("key = $key");
 
-			my $create = ' 
-				$coderef = ' . $e[7] . $e[8] . ';
-				foreach my $v ($coderef->()) {
-					next unless (defined($v) && $v ne \'\');
-					push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
-				}
-			';
+			return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
+			return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
+
+			my $create = qq{
+				save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
+			};
 
-			print "create: $create\n";
+			$log->debug("create: $create");
 
-			$create =~ s/\s+/ /gs;
-			$eval_create->{ $e[3] }->{ $e[5] } .= $create;
+			# save code to create this lookup
+			$self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
+			$self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
+
+
+			if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
+				$log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
+			}
+
+			# save this dependency
+			$self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
 
 			if ($#e < 10) {
 				$e[8]->insert_after( $e[8]->clone );
@@ -160,18 +410,73 @@
 			$e[8]->remove;
 
 
-			print "# >>> ", $Element->snext_sibling, "\n";
+			$log->debug(">>> ", $Element->snext_sibling);
+	});
+
+	my $normalize_source = $Document->serialize;
+	$log->debug("create: ", dump($self->{_lookup_create}) );
+	$log->debug("normalize: $normalize_source");
+
+	$self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
+
+	if ($self->{debug}) {
+		my $Dumper = PPI::Dumper->new( $Document );
+		$Dumper->print;
+	}
+
+	$log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
+
+	$Document->find( sub {
+			my ($Document,$Element) = @_;
+
+			$Element->isa('PPI::Token::Word') or return '';
+			if ($Element->content =~ m/^(marc|search)/) {
+				my $what = $1;
+				$log->debug("found $what rules in $database/$input");
+				$self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
+			} else {
+				return '';
+			}
 	});
 
-	print "-----\ncreate: ", dump($eval_create), "\n";
-	print "-----\nlookup: ", $Document->serialize, "\n";
-	print "-----\n";
+	return 1;
+}
+
 
-	my $Dumper = PPI::Dumper->new( $Document );
-	$Dumper->print;
+=head2 _q
+
+Strip single or double quotes around value
+
+  _q(qq/'foo'/) -> foo
+
+=cut
 
+sub _q {
+	my $v = shift || return;
+	$v =~ s/^['"]*//g;
+	$v =~ s/['"]*$//g;
+	return $v;
 }
 
+=head2 _input_name
+
+Return C<name> value if HASH or arg if scalar
+
+  _input_name($input)
+
+=cut
+
+sub _input_name {
+	my $input = shift || return;
+	if (ref($input) eq 'HASH') {
+		die "can't find 'name' value in ", dump($input) unless defined($input->{name});
+		return $input->{name};
+	} else {
+		return $input;
+	}
+}
+
+
 =head1 AUTHOR
 
 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>