/[webpac2]/trunk/lib/WebPAC/Output/EstraierNative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Output/EstraierNative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 627 - (hide annotations)
Tue Sep 5 15:14:14 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 5264 byte(s)
 r884@llin:  dpavlin | 2006-09-05 17:13:36 +0200
 added preliminary support for perl native Hyper Estraier bindings

1 dpavlin 627 package WebPAC::Output::EstraierNative;
2    
3     use warnings;
4     use strict;
5    
6     use base qw/WebPAC::Common/;
7    
8     use Estraier;
9     use Encode qw/from_to/;
10     use Data::Dumper;
11     use LWP;
12     use URI::Escape;
13     use List::Util qw/first/;
14    
15     $Estraier::DEBUG = 1;
16    
17    
18     =head1 NAME
19    
20     WebPAC::Output::EstraierNative - Create Hyper Estraier full text index using native bindings
21    
22     =head1 VERSION
23    
24     Version 0.1
25    
26     =cut
27    
28     our $VERSION = '0.1';
29    
30     =head1 SYNOPSIS
31    
32     Create full text index using Hyper Estraier index from data with
33     type C<search>.
34    
35     =head1 FUNCTIONS
36    
37     =head2 new
38    
39     Connect to Hyper Estraier index using HTTP
40    
41     my $est = new WebPAC::Output::Estraier(
42     path => 'casket/',
43     database => 'demo',
44     label => 'node label',
45     encoding => 'iso-8859-2',
46     clean => 1,
47     );
48    
49     Options are:
50    
51     =over 4
52    
53     =item path
54    
55     full or relative path to Hyper Estraier database
56    
57     =item database
58    
59     name of database from which data comes
60    
61     =item label
62    
63     label for node (optional)
64    
65     =item encoding
66    
67     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68     (and it probably is). This encoding will be converted to C<UTF-8> for
69     Hyper Estraier.
70    
71     =back
72    
73     Name of database will be used to form URI of documents in index.
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84     #$log->debug("self: ", sub { Dumper($self) });
85    
86     foreach my $p (qw/path database/) {
87     $log->logdie("need $p") unless ($self->{$p});
88     }
89    
90     $self->{encoding} ||= 'ISO-8859-2';
91    
92     $self->{label} ||= "WebPAC $self->{database}";
93    
94     my $path = 'casket';
95    
96     my $db = new Database();
97     unless($db->open($path, Database::DBWRITER | Database::DBCREAT)) {
98     $log->logdie("can't open $path: ", $db->err_msg($db->error()) );
99     }
100    
101     $self->{db} = $db;
102    
103     $log->info("using ", $self->{clean} ? "new " : "", "index $self->{path} '$self->{label}' with encoding $self->{encoding}");
104    
105     $self ? return $self : return undef;
106     }
107    
108    
109     =head2 add
110    
111     Adds one entry to database.
112    
113     $est->add(
114     id => 42,
115     ds => $ds,
116     type => 'display',
117     text => 'optional text from which snippet is created',
118     );
119    
120     This function will create entries in index using following URI format:
121    
122     C<file:///type/database%20name/000>
123    
124     Each tag in C<data_structure> with specified C<type> will create one
125     attribute and corresponding hidden text (used for search).
126    
127     =cut
128    
129     sub add {
130     my $self = shift;
131    
132     my $args = {@_};
133    
134     my $log = $self->_get_logger;
135    
136     my $database = $self->{'database'} || $log->logconfess('no database in $self');
137     $log->logconfess('need db in object') unless ($self->{'db'});
138    
139     foreach my $p (qw/id ds type/) {
140     $log->logdie("need $p") unless ($args->{$p});
141     }
142    
143     my $type = $args->{'type'};
144     my $id = $args->{'id'};
145    
146     my $uri = "file:///$type/$database/$id";
147     $log->debug("creating $uri");
148    
149     my $doc = new Document();
150     $doc->add_attr('@uri', $self->convert($uri) );
151    
152     # store type and database name
153     $doc->add_attr('_database', $database );
154     $doc->add_attr('_type', $type );
155    
156     $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
157    
158     # filter all tags which have type defined
159     my @tags = grep {
160     ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
161     } keys %{ $args->{'ds'} };
162    
163     $log->debug("tags = ", join(",", @tags));
164    
165     return unless (@tags);
166    
167     foreach my $tag (@tags) {
168    
169     my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
170    
171     next if (! $vals);
172    
173     $vals = join(" ") if (ref($vals) eq 'ARRAY');
174    
175     $vals = $self->convert( $vals ) or
176     $log->logdie("can't convert '$vals' to UTF-8");
177    
178     $doc->add_attr( $tag, $vals );
179     $doc->add_hidden_text( $vals );
180     }
181    
182     my $text = $args->{'text'};
183     if ( $text ) {
184     $text = $self->convert( $text ) or
185     $log->logdie("can't convert '$text' to UTF-8");
186     $doc->add_text( $text );
187     }
188    
189     $log->debug("adding ", sub { $doc->dump_draft } );
190     $self->{'db'}->put_doc($doc, Database::PDCLEAN) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{path} . " status: " . $self->{db}->status());
191    
192     return 1;
193     }
194    
195     =head2 add_link
196    
197     $est->add_link(
198     from => 'ps',
199     to => 'webpac2',
200     credit => 10000,
201     );
202    
203     =cut
204    
205     sub add_link {
206     my $self = shift;
207    
208     my $args = {@_};
209     my $log = $self->_get_logger;
210    
211     $log->warn("add_link is not implemented");
212     return;
213    
214     foreach my $p (qw/from to credit/) {
215     $log->logdie("need $p") unless ($args->{$p});
216     }
217    
218     my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );
219    
220     if (! $node) {
221     $log->warn("can't find node $args->{to}, skipping link creaton");
222     return;
223     }
224    
225     my $label = $node->{label};
226    
227     if (! $label) {
228     $log->warn("can't find label for $args->{to}, skipping link creaton");
229     return;
230     }
231    
232     $log->debug("using label $label for $args->{to}");
233    
234     return $self->{db}->set_link(
235     $self->{masterurl} . '/node/' . $args->{to},
236     $label,
237     $args->{credit},
238     );
239     }
240    
241    
242     =head2 convert
243    
244     my $utf8_string = $self->convert('string in codepage');
245    
246     =cut
247    
248     sub convert {
249     my $self = shift;
250    
251     my $text = shift || return;
252     from_to($text, $self->{encoding}, 'UTF-8');
253     return $text;
254     }
255    
256     =head1 AUTHOR
257    
258     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
259    
260     =head1 COPYRIGHT & LICENSE
261    
262     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
263    
264     This program is free software; you can redistribute it and/or modify it
265     under the same terms as Perl itself.
266    
267     =cut
268    
269     1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26