/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1 by dpavlin, Sat Jun 25 20:23:23 2005 UTC revision 503 by dpavlin, Sun May 14 22:23:28 2006 UTC
# Line 3  package WebPAC::Output::Estraier; Line 3  package WebPAC::Output::Estraier;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6    use base qw/WebPAC::Common/;
7    
8    use Search::Estraier 0.06;
9    use Encode qw/from_to/;
10    use Data::Dumper;
11    use LWP;
12    use URI::Escape;
13    use List::Util qw/first/;
14    
15  =head1 NAME  =head1 NAME
16    
17  WebPAC::Output::Estraier - The great new WebPAC::Output::Estraier!  WebPAC::Output::Estraier - Create Hyper Estraier full text index
18    
19  =head1 VERSION  =head1 VERSION
20    
21  Version 0.01  Version 0.11
22    
23  =cut  =cut
24    
25  our $VERSION = '0.01';  our $VERSION = '0.11';
26    
27  =head1 SYNOPSIS  =head1 SYNOPSIS
28    
29  Quick summary of what the module does.  Create full text index using Hyper Estraier index from data with
30    type C<search>.
31    
32  Perhaps a little code snippet.  =head1 FUNCTIONS
33    
34      use WebPAC::Output::Estraier;  =head2 new
35    
36      my $foo = WebPAC::Output::Estraier->new();  Connect to Hyper Estraier index using HTTP
     ...  
37    
38  =head1 EXPORT   my $est = new WebPAC::Output::Estraier(
39            masterurl => 'http://localhost:1978/',
40            user => 'admin',
41            passwd => 'admin',
42            database => 'demo',
43            label => 'node label',
44            encoding => 'iso-8859-2',
45            clean => 1,
46     );
47    
48  A list of functions that can be exported.  You can delete this section  Options are:
 if you don't export anything, such as for a purely object-oriented module.  
49    
50  =head1 FUNCTIONS  =over 4
51    
52    =item masterurl
53    
54    URI to C<estmaster> node
55    
56    =item user
57    
58    C<estmaster> user with sufficient rights
59    
60    =item passwd
61    
62    password for user
63    
64  =head2 function1  =item database
65    
66    name of database from which data comes
67    
68    =item label
69    
70    label for node (optional)
71    
72    =item encoding
73    
74    character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
75    (and it probably is). This encoding will be converted to C<UTF-8> for
76    Hyper Estraier.
77    
78    =back
79    
80    Name of database will be used to form URI of documents in index.
81    
82  =cut  =cut
83    
84  sub function1 {  sub new {
85            my $class = shift;
86            my $self = {@_};
87            bless($self, $class);
88    
89            my $log = $self->_get_logger;
90    
91            #$log->debug("self: ", sub { Dumper($self) });
92    
93            foreach my $p (qw/masterurl user passwd database/) {
94                    $log->logdie("need $p") unless ($self->{$p});
95            }
96    
97            $self->{encoding} ||= 'ISO-8859-2';
98    
99            my $url = $self->{masterurl} . '/node/' . $self->{database};
100            $self->{url} = $url;
101    
102            $self->{db} = Search::Estraier::Node->new(
103                    url => $url,
104                    user => $self->{user},
105                    passwd => $self->{passwd},
106                    debug => $self->{debug},
107                    create => 1,
108                    label => "WebPAC $self->{database}",
109            );
110    
111            $log->info("using ", $self->{clean} ? "new " : "", "index $self->{url} with encoding $self->{encoding}");
112    
113            if ($self->{clean}) {
114                    $log->debug("clean $self->{database}");
115                    $self->master( action => 'nodeclr', name => $self->{database} );
116            } else {
117                    $log->debug("opening index $self->{url}");
118            }
119    
120            $self ? return $self : return undef;
121  }  }
122    
123  =head2 function2  
124    =head2 add
125    
126    Adds one entry to database.
127    
128      $est->add(
129            id => 42,
130            ds => $ds,
131            type => 'display',
132            text => 'optional text from which snippet is created',
133      );
134    
135    This function will create  entries in index using following URI format:
136    
137      C<file:///type/database%20name/000>
138    
139    Each tag in C<data_structure> with specified C<type> will create one
140    attribute and corresponding hidden text (used for search).
141    
142  =cut  =cut
143    
144  sub function2 {  sub add {
145            my $self = shift;
146    
147            my $args = {@_};
148    
149            my $log = $self->_get_logger;
150    
151            my $database = $self->{'database'} || $log->logconfess('no database in $self');
152            $log->logconfess('need db in object') unless ($self->{'db'});
153    
154            foreach my $p (qw/id ds type/) {
155                    $log->logdie("need $p") unless ($args->{$p});
156            }
157    
158            my $type = $args->{'type'};
159            my $id = $args->{'id'};
160    
161            my $uri = "file:///$type/$database/$id";
162            $log->debug("creating $uri");
163    
164            my $doc = Search::Estraier::Document->new;
165            $doc->add_attr('@uri', $self->convert($uri) );
166    
167            $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
168    
169            # filter all tags which have type defined
170            my @tags = grep {
171                    ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
172            } keys %{ $args->{'ds'} };
173    
174            $log->debug("tags = ", join(",", @tags));
175    
176            return unless (@tags);
177    
178            foreach my $tag (@tags) {
179    
180                    my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
181    
182                    next if (! $vals);
183    
184                    $vals = $self->convert( $vals ) or
185                            $log->logdie("can't convert '$vals' to UTF-8");
186    
187                    $doc->add_attr( $tag, $vals );
188                    $doc->add_hidden_text( $vals );
189            }
190    
191            my $text = $args->{'text'};
192            if ( $text ) {
193                    $text = $self->convert( $text ) or
194                            $log->logdie("can't convert '$text' to UTF-8");
195                    $doc->add_text( $text );
196            }
197    
198            $log->debug("adding ", sub { $doc->dump_draft } );
199            $self->{'db'}->put_doc($doc) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{url} . " status: " . $self->{db}->status());
200    
201            return 1;
202  }  }
203    
204  =head1 AUTHOR  =head2 add_link
205    
206  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>    $est->add_link(
207            from => 'ps',
208            to => 'webpac2',
209            credit => 10000,
210      );
211    
212    =cut
213    
214    sub add_link {
215            my $self = shift;
216    
217            my $args = {@_};
218            my $log = $self->_get_logger;
219    
220            my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );
221    
222            if (! $node) {
223                    $log->warn("can't find node $args->{to}, skipping link creaton");
224                    return;
225            }
226    
227            my $label = $node->{label};
228    
229            if (! $label) {
230                    $log->warn("can't find label for $args->{to}, skipping link creaton");
231                    return;
232            }
233    
234            $log->debug("using label $label for $args->{to}");
235    
236            return $self->{db}->set_link(
237                    $self->{masterurl} . '/node/' . $args->{to},
238                    $label,
239                    $args->{credit},
240            );
241    }
242    
243    
244    =head2 master
245    
246    Issue administrative commands to C<estmaster> process. See documentation for
247    C<master> in L<Search::Estraier>::Node.
248    
249      $self->master(
250            action => 'nodeclr',
251            name => 'foobar',
252      );
253    
254    =cut
255    
256    sub master {
257            my $self = shift;
258            $self->{db}->master( @_ );
259    }
260    
 =head1 BUGS  
261    
262  Please report any bugs or feature requests to  =head2 convert
 C<bug-webpac-output-estraier@rt.cpan.org>, or through the web interface at  
 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WebPAC>.  
 I will be notified, and then you'll automatically be notified of progress on  
 your bug as I make changes.  
263    
264  =head1 ACKNOWLEDGEMENTS   my $utf8_string = $self->convert('string in codepage');
265    
266    =cut
267    
268    sub convert {
269            my $self = shift;
270    
271            my $text = shift || return;
272            from_to($text, $self->{encoding}, 'UTF-8');
273            return $text;
274    }
275    
276    =head1 AUTHOR
277    
278    Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
279    
280  =head1 COPYRIGHT & LICENSE  =head1 COPYRIGHT & LICENSE
281    

Legend:
Removed from v.1  
changed lines
  Added in v.503

  ViewVC Help
Powered by ViewVC 1.1.26