/[webpac2]/trunk/lib/WebPAC/Output/EstraierNative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Output/EstraierNative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 630 - (hide annotations)
Wed Sep 6 14:25:05 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 5343 byte(s)
 r886@llin:  dpavlin | 2006-09-05 22:51:30 +0200
 added debug log for tags

1 dpavlin 627 package WebPAC::Output::EstraierNative;
2    
3     use warnings;
4     use strict;
5    
6     use base qw/WebPAC::Common/;
7    
8     use Estraier;
9     use Encode qw/from_to/;
10     use Data::Dumper;
11     use LWP;
12     use URI::Escape;
13     use List::Util qw/first/;
14    
15     $Estraier::DEBUG = 1;
16    
17    
18     =head1 NAME
19    
20     WebPAC::Output::EstraierNative - Create Hyper Estraier full text index using native bindings
21    
22     =head1 VERSION
23    
24     Version 0.1
25    
26     =cut
27    
28     our $VERSION = '0.1';
29    
30     =head1 SYNOPSIS
31    
32     Create full text index using Hyper Estraier index from data with
33     type C<search>.
34    
35     =head1 FUNCTIONS
36    
37     =head2 new
38    
39     Connect to Hyper Estraier index using HTTP
40    
41     my $est = new WebPAC::Output::Estraier(
42     path => 'casket/',
43     database => 'demo',
44     label => 'node label',
45     encoding => 'iso-8859-2',
46     clean => 1,
47     );
48    
49     Options are:
50    
51     =over 4
52    
53     =item path
54    
55     full or relative path to Hyper Estraier database
56    
57     =item database
58    
59     name of database from which data comes
60    
61     =item label
62    
63     label for node (optional)
64    
65     =item encoding
66    
67     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68     (and it probably is). This encoding will be converted to C<UTF-8> for
69     Hyper Estraier.
70    
71     =back
72    
73     Name of database will be used to form URI of documents in index.
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84     #$log->debug("self: ", sub { Dumper($self) });
85    
86     foreach my $p (qw/path database/) {
87     $log->logdie("need $p") unless ($self->{$p});
88     }
89    
90     $self->{encoding} ||= 'ISO-8859-2';
91    
92     $self->{label} ||= "WebPAC $self->{database}";
93    
94     my $path = 'casket';
95    
96     my $db = new Database();
97     unless($db->open($path, Database::DBWRITER | Database::DBCREAT)) {
98     $log->logdie("can't open $path: ", $db->err_msg($db->error()) );
99     }
100    
101     $self->{db} = $db;
102    
103     $log->info("using ", $self->{clean} ? "new " : "", "index $self->{path} '$self->{label}' with encoding $self->{encoding}");
104    
105     $self ? return $self : return undef;
106     }
107    
108    
109     =head2 add
110    
111     Adds one entry to database.
112    
113     $est->add(
114     id => 42,
115     ds => $ds,
116     type => 'display',
117     text => 'optional text from which snippet is created',
118     );
119    
120     This function will create entries in index using following URI format:
121    
122     C<file:///type/database%20name/000>
123    
124     Each tag in C<data_structure> with specified C<type> will create one
125     attribute and corresponding hidden text (used for search).
126    
127     =cut
128    
129     sub add {
130     my $self = shift;
131    
132     my $args = {@_};
133    
134     my $log = $self->_get_logger;
135    
136     my $database = $self->{'database'} || $log->logconfess('no database in $self');
137     $log->logconfess('need db in object') unless ($self->{'db'});
138    
139     foreach my $p (qw/id ds type/) {
140     $log->logdie("need $p") unless ($args->{$p});
141     }
142    
143     my $type = $args->{'type'};
144     my $id = $args->{'id'};
145    
146     my $uri = "file:///$type/$database/$id";
147     $log->debug("creating $uri");
148    
149     my $doc = new Document();
150     $doc->add_attr('@uri', $self->convert($uri) );
151    
152     # store type and database name
153     $doc->add_attr('_database', $database );
154     $doc->add_attr('_type', $type );
155    
156     $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
157    
158     # filter all tags which have type defined
159     my @tags = grep {
160     ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
161     } keys %{ $args->{'ds'} };
162    
163     $log->debug("tags = ", join(",", @tags));
164    
165     return unless (@tags);
166    
167     foreach my $tag (@tags) {
168    
169 dpavlin 630 $log->debug("$tag :: $type == ",Dumper( $args->{'ds'}->{$tag}->{$type} ) );
170    
171 dpavlin 627 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
172    
173     next if (! $vals);
174    
175     $vals = join(" ") if (ref($vals) eq 'ARRAY');
176    
177     $vals = $self->convert( $vals ) or
178     $log->logdie("can't convert '$vals' to UTF-8");
179    
180     $doc->add_attr( $tag, $vals );
181     $doc->add_hidden_text( $vals );
182     }
183    
184     my $text = $args->{'text'};
185     if ( $text ) {
186     $text = $self->convert( $text ) or
187     $log->logdie("can't convert '$text' to UTF-8");
188     $doc->add_text( $text );
189     }
190    
191     $log->debug("adding ", sub { $doc->dump_draft } );
192     $self->{'db'}->put_doc($doc, Database::PDCLEAN) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{path} . " status: " . $self->{db}->status());
193    
194     return 1;
195     }
196    
197     =head2 add_link
198    
199     $est->add_link(
200     from => 'ps',
201     to => 'webpac2',
202     credit => 10000,
203     );
204    
205     =cut
206    
207     sub add_link {
208     my $self = shift;
209    
210     my $args = {@_};
211     my $log = $self->_get_logger;
212    
213     $log->warn("add_link is not implemented");
214     return;
215    
216     foreach my $p (qw/from to credit/) {
217     $log->logdie("need $p") unless ($args->{$p});
218     }
219    
220     my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );
221    
222     if (! $node) {
223     $log->warn("can't find node $args->{to}, skipping link creaton");
224     return;
225     }
226    
227     my $label = $node->{label};
228    
229     if (! $label) {
230     $log->warn("can't find label for $args->{to}, skipping link creaton");
231     return;
232     }
233    
234     $log->debug("using label $label for $args->{to}");
235    
236     return $self->{db}->set_link(
237     $self->{masterurl} . '/node/' . $args->{to},
238     $label,
239     $args->{credit},
240     );
241     }
242    
243    
244     =head2 convert
245    
246     my $utf8_string = $self->convert('string in codepage');
247    
248     =cut
249    
250     sub convert {
251     my $self = shift;
252    
253     my $text = shift || return;
254     from_to($text, $self->{encoding}, 'UTF-8');
255     return $text;
256     }
257    
258     =head1 AUTHOR
259    
260     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
261    
262     =head1 COPYRIGHT & LICENSE
263    
264     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
265    
266     This program is free software; you can redistribute it and/or modify it
267     under the same terms as Perl itself.
268    
269     =cut
270    
271     1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26