--- trunk/lib/WebPAC/Output/Estraier.pm 2005/06/25 20:23:23 1 +++ trunk/lib/WebPAC/Output/Estraier.pm 2005/11/23 00:14:05 113 @@ -3,9 +3,15 @@ use warnings; use strict; +use base qw/WebPAC::Common/; + +use HyperEstraier; +use Text::Iconv; +use Data::Dumper; + =head1 NAME -WebPAC::Output::Estraier - The great new WebPAC::Output::Estraier! +WebPAC::Output::Estraier - Create Hyper Estraier full text index =head1 VERSION @@ -17,49 +23,165 @@ =head1 SYNOPSIS -Quick summary of what the module does. +Create full text index using Hyper Estraier index from data with +type C. -Perhaps a little code snippet. +=head1 FUNCTIONS - use WebPAC::Output::Estraier; +=head2 new - my $foo = WebPAC::Output::Estraier->new(); - ... +Connect to Hyper Estraier index using HTTP -=head1 EXPORT + my $est = new WebPAC::Output::Estraier( + url => 'http://localhost:1978/node/webpac2', + user => 'admin', + passwd => 'admin', + database => 'demo', + encoding => 'iso-8859-2', + ); -A list of functions that can be exported. You can delete this section -if you don't export anything, such as for a purely object-oriented module. +Options are: -=head1 FUNCTIONS +=over 4 + +=item url + +URI to C node + +=item user + +C user with sufficient rights + +=item passwd + +password for user + +=item database + +name of database from which data comes + +=item encoding -=head2 function1 +character encoding of C if it's differenet than C +(and it probably is). This encoding will be converted to C for +Hyper Estraier. + +=back + +Name of database will be used to form URI of documents in index. =cut -sub function1 { +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $log = $self->_get_logger; + + foreach my $p (qw/url user passwd/) { + $log->logdie("need $p") unless ($self->{$p}); + } + + $log->info("opening Hyper Estraier index $self->{'url'}"); + + $self->{'db'} = HyperEstraier::Node->new($self->{'url'}); + $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'}); + + my $encoding = $self->{'encoding'} || 'ISO-8859-2'; + $log->info("using encoding $encoding"); + + $self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or + $log->logdie("can't create conversion from $encoding to UTF-8"); + + $self ? return $self : return undef; } -=head2 function2 + +=head2 add + +Adds one entry to database. + + $est->add( + id => 42, + ds => $ds, + type => 'display', + url_prefix => 'database name', + text => 'optional text from which snippet is created', + ); + +This function will create entries in index using following URI format: + + C + +Each tag in C with specified C will create one +attribute and corresponding hidden text (used for search). =cut -sub function2 { -} +sub add { + my $self = shift; -=head1 AUTHOR + my $args = {@_}; -Dobrica Pavlinusic, C<< >> + my $log = $self->_get_logger; + + my $database = $self->{'database'} || $log->logconfess('no database in $self'); + $log->logconfess('need db in object') unless ($self->{'db'}); + + foreach my $p (qw/id ds type/) { + $log->logdie("need $p") unless ($args->{$p}); + } + + my $type = $args->{'type'}; + my $mfn = $args->{'id'}; + + my $uri = "file:///$type/$database/$mfn"; + $log->debug("creating $uri"); + + my $doc = HyperEstraier::Document->new; + $doc->add_attr('@uri', $self->{'iconv'}->convert($uri) ); + + $log->debug("ds = ", sub { Dumper($args->{'ds'}) } ); + + # filter all tags which have type defined + my @tags = grep { + ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} ) + } keys %{ $args->{'ds'} }; -=head1 BUGS + $log->debug("tags = ", join(",", @tags)); -Please report any bugs or feature requests to -C, or through the web interface at -L. -I will be notified, and then you'll automatically be notified of progress on -your bug as I make changes. + return unless (@tags); -=head1 ACKNOWLEDGEMENTS + foreach my $tag (@tags) { + + my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} }); + + $log->logconfess("no values for $tag/$type") unless ($vals); + + $vals = $self->{'iconv'}->convert( $vals ) or + $log->logdie("can't convert '$vals' to UTF-8"); + + $doc->add_attr( $tag, $vals ); + $doc->add_hidden_text( $vals ); + } + + my $text = $args->{'text'}; + if ( $text ) { + $text = $self->{'iconv'}->convert( $text ) or + $log->logdie("can't convert '$text' to UTF-8"); + $doc->add_text( $text ); + } + + $log->debug("adding ", sub { $doc->dump_draft } ); + $self->{'db'}->put_doc($doc) || $log->logdie("can't add document $uri to index"); + + return 1; +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE