/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (hide annotations)
Tue Nov 22 08:27:53 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 3952 byte(s)
fixed logdie

1 dpavlin 1 package WebPAC::Output::Estraier;
2    
3     use warnings;
4     use strict;
5    
6 dpavlin 74 use base qw/WebPAC::Common/;
7    
8     use HyperEstraier;
9     use Text::Iconv;
10     use Data::Dumper;
11    
12 dpavlin 1 =head1 NAME
13    
14 dpavlin 74 WebPAC::Output::Estraier - Create Hyper Estraier full text index
15 dpavlin 1
16     =head1 VERSION
17    
18     Version 0.01
19    
20     =cut
21    
22     our $VERSION = '0.01';
23    
24     =head1 SYNOPSIS
25    
26 dpavlin 74 Create full text index using Hyper Estraier index from data with
27     type C<search>.
28 dpavlin 1
29 dpavlin 74 =head1 FUNCTIONS
30 dpavlin 1
31 dpavlin 74 =head2 new
32 dpavlin 1
33 dpavlin 74 Connect to Hyper Estraier index using HTTP
34 dpavlin 1
35 dpavlin 74 my $est = new WebPAC::Output::Estraier(
36     url => 'http://localhost:1978/node/webpac2',
37     user => 'admin',
38     passwd => 'admin',
39     database => 'demo',
40     encoding => 'iso-8859-2',
41     );
42 dpavlin 1
43 dpavlin 74 Options are:
44 dpavlin 1
45 dpavlin 74 =over 4
46 dpavlin 1
47 dpavlin 74 =item url
48 dpavlin 1
49 dpavlin 74 URI to C<estmaster> node
50    
51     =item user
52    
53     C<estmaster> user with sufficient rights
54    
55     =item passwd
56    
57     password for user
58    
59     =item database
60    
61     name of database from which data comes
62    
63     =item encoding
64    
65     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
66     (and it probably is). This encoding will be converted to C<UTF-8> for
67     Hyper Estraier.
68    
69     =back
70    
71     Name of database will be used to form URI of documents in index.
72    
73 dpavlin 1 =cut
74    
75 dpavlin 74 sub new {
76     my $class = shift;
77     my $self = {@_};
78     bless($self, $class);
79    
80     my $log = $self->_get_logger;
81    
82     foreach my $p (qw/url user passwd/) {
83     $log->logdie("need $p") unless ($self->{$p});
84     }
85    
86     $log->info("opening Hyper Estraier index $self->{'url'}");
87    
88     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
89     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
90    
91     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
92     $log->info("using encoding $encoding");
93    
94 dpavlin 75 $self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or
95 dpavlin 85 $log->logdie("can't create conversion from $encoding to UTF-8");
96 dpavlin 74
97     $self ? return $self : return undef;
98 dpavlin 1 }
99    
100 dpavlin 75
101 dpavlin 74 =head2 add
102 dpavlin 1
103 dpavlin 74 Adds one entry to database.
104    
105     $est->add(
106     id => 42,
107     ds => $ds,
108     type => 'display',
109     url_prefix => 'database name',
110     text => 'optional text from which snippet is created',
111     );
112    
113     This function will create entries in index using following URI format:
114    
115     C<file:///database%20name/000>
116    
117     Each tag in C<data_structure> with specified C<type> will create one
118     attribute and corresponding hidden text (used for search).
119    
120 dpavlin 1 =cut
121    
122 dpavlin 74 sub add {
123     my $self = shift;
124    
125     my $args = {@_};
126    
127     my $log = $self->_get_logger;
128    
129     my $database = $self->{'database'} || $log->logconfess('no database in $self');
130     $log->logconfess('need db in object') unless ($self->{'db'});
131    
132     foreach my $p (qw/id ds type/) {
133     $log->logdie("need $p") unless ($args->{$p});
134     }
135    
136     my $type = $args->{'type'};
137     my $mfn = $args->{'id'};
138    
139     my $uri = "file:///$type/$database/$mfn";
140     $log->debug("creating $uri");
141    
142     my $doc = HyperEstraier::Document->new;
143 dpavlin 75 $doc->add_attr('@uri', $self->{'iconv'}->convert($uri) );
144 dpavlin 74
145     $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
146    
147     # filter all tags which have type defined
148     my @tags = grep {
149     defined( $args->{'ds'}->{$_}->{$type} )
150     } keys %{ $args->{'ds'} };
151    
152     $log->debug("tags = ", join(",", @tags));
153    
154     return unless (@tags);
155    
156     foreach my $tag (@tags) {
157    
158     my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
159    
160     $log->logconfess("no values for $tag/$type") unless ($vals);
161    
162 dpavlin 75 $vals = $self->{'iconv'}->convert( $vals ) or
163 dpavlin 85 $log->logdie("can't convert '$vals' to UTF-8");
164 dpavlin 75
165     $doc->add_attr( $tag, $vals );
166     $doc->add_hidden_text( $vals );
167 dpavlin 74 }
168    
169     my $text = $args->{'text'};
170 dpavlin 75 if ( $text ) {
171     $text = $self->{'iconv'}->convert( $text ) or
172 dpavlin 85 $log->logdie("can't convert '$text' to UTF-8");
173 dpavlin 75 $doc->add_text( $text );
174     }
175 dpavlin 74
176     $log->debug("adding ", sub { $doc->dump_draft } );
177 dpavlin 85 $self->{'db'}->put_doc($doc) || $log->logdie("can't add document $uri to index");
178 dpavlin 74
179     return 1;
180 dpavlin 1 }
181    
182     =head1 AUTHOR
183    
184     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
185    
186     =head1 COPYRIGHT & LICENSE
187    
188     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
189    
190     This program is free software; you can redistribute it and/or modify it
191     under the same terms as Perl itself.
192    
193     =cut
194    
195     1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26