--- trunk/lib/WebPAC/Output/Estraier.pm 2005/11/20 20:13:33 73 +++ trunk/lib/WebPAC/Output/Estraier.pm 2005/11/20 20:13:39 74 @@ -3,9 +3,15 @@ use warnings; use strict; +use base qw/WebPAC::Common/; + +use HyperEstraier; +use Text::Iconv; +use Data::Dumper; + =head1 NAME -WebPAC::Output::Estraier - The great new WebPAC::Output::Estraier! +WebPAC::Output::Estraier - Create Hyper Estraier full text index =head1 VERSION @@ -17,49 +23,156 @@ =head1 SYNOPSIS -Quick summary of what the module does. +Create full text index using Hyper Estraier index from data with +type C. + +=head1 FUNCTIONS -Perhaps a little code snippet. +=head2 new - use WebPAC::Output::Estraier; +Connect to Hyper Estraier index using HTTP - my $foo = WebPAC::Output::Estraier->new(); - ... + my $est = new WebPAC::Output::Estraier( + url => 'http://localhost:1978/node/webpac2', + user => 'admin', + passwd => 'admin', + database => 'demo', + encoding => 'iso-8859-2', + ); -=head1 EXPORT +Options are: -A list of functions that can be exported. You can delete this section -if you don't export anything, such as for a purely object-oriented module. +=over 4 -=head1 FUNCTIONS +=item url + +URI to C node + +=item user + +C user with sufficient rights + +=item passwd + +password for user + +=item database + +name of database from which data comes -=head2 function1 +=item encoding + +character encoding of C if it's differenet than C +(and it probably is). This encoding will be converted to C for +Hyper Estraier. + +=back + +Name of database will be used to form URI of documents in index. =cut -sub function1 { +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $log = $self->_get_logger; + + foreach my $p (qw/url user passwd/) { + $log->logdie("need $p") unless ($self->{$p}); + } + + $log->info("opening Hyper Estraier index $self->{'url'}"); + + $self->{'db'} = HyperEstraier::Node->new($self->{'url'}); + $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'}); + + my $encoding = $self->{'encoding'} || 'ISO-8859-2'; + $log->info("using encoding $encoding"); + + my $iconv = new Text::Iconv('iso-8859-2', 'utf-8'); + + $self ? return $self : return undef; } -=head2 function2 +=head2 add + +Adds one entry to database. + + $est->add( + id => 42, + ds => $ds, + type => 'display', + url_prefix => 'database name', + text => 'optional text from which snippet is created', + ); + +This function will create entries in index using following URI format: + + C + +Each tag in C with specified C will create one +attribute and corresponding hidden text (used for search). =cut -sub function2 { -} +sub add { + my $self = shift; -=head1 AUTHOR + my $args = {@_}; -Dobrica Pavlinusic, C<< >> + my $log = $self->_get_logger; + + my $database = $self->{'database'} || $log->logconfess('no database in $self'); + $log->logconfess('need db in object') unless ($self->{'db'}); + + foreach my $p (qw/id ds type/) { + $log->logdie("need $p") unless ($args->{$p}); + } + + my $type = $args->{'type'}; + my $mfn = $args->{'id'}; + + my $uri = "file:///$type/$database/$mfn"; + $log->debug("creating $uri"); + + my $doc = HyperEstraier::Document->new; + $doc->add_attr('@uri', $uri); + + $log->debug("ds = ", sub { Dumper($args->{'ds'}) } ); -=head1 BUGS + # filter all tags which have type defined + my @tags = grep { + defined( $args->{'ds'}->{$_}->{$type} ) + } keys %{ $args->{'ds'} }; -Please report any bugs or feature requests to -C, or through the web interface at -L. -I will be notified, and then you'll automatically be notified of progress on -your bug as I make changes. + $log->debug("tags = ", join(",", @tags)); -=head1 ACKNOWLEDGEMENTS + return unless (@tags); + + foreach my $tag (@tags) { + + my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} }); + + $log->logconfess("no values for $tag/$type") unless ($vals); + + $doc->add_attr($tag, $vals); + $doc->add_hidden_text($vals); + } + + my $text = $args->{'text'}; + $doc->add_text( $text ) if ( $text ); + + $log->debug("adding ", sub { $doc->dump_draft } ); + $self->{'db'}->put_doc($doc) || $log->die("can't add document $uri to index"); + + return 1; +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE