/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 74 by dpavlin, Sun Nov 20 20:13:39 2005 UTC revision 1148 by dpavlin, Fri Apr 24 09:37:08 2009 UTC
# Line 5  use strict; Line 5  use strict;
5    
6  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
7    
8  use HyperEstraier;  use Search::Estraier 0.06;
9  use Text::Iconv;  use Encode qw/from_to/;
10  use Data::Dumper;  use Data::Dump qw/dump/;
11    use LWP;
12    use URI::Escape;
13    use List::Util qw/first/;
14    use File::Path qw/mkpath/;
15    use YAML;
16    
17  =head1 NAME  =head1 NAME
18    
# Line 15  WebPAC::Output::Estraier - Create Hyper Line 20  WebPAC::Output::Estraier - Create Hyper
20    
21  =head1 VERSION  =head1 VERSION
22    
23  Version 0.01  Version 0.12
24    
25  =cut  =cut
26    
27  our $VERSION = '0.01';  our $VERSION = '0.12';
28    
29  =head1 SYNOPSIS  =head1 SYNOPSIS
30    
# Line 33  type C<search>. Line 38  type C<search>.
38  Connect to Hyper Estraier index using HTTP  Connect to Hyper Estraier index using HTTP
39    
40   my $est = new WebPAC::Output::Estraier(   my $est = new WebPAC::Output::Estraier(
41          url => 'http://localhost:1978/node/webpac2',          masterurl => 'http://localhost:1978/',
42          user => 'admin',          user => 'admin',
43          passwd => 'admin',          passwd => 'admin',
44          database => 'demo',          database => 'demo',
45            label => 'node label',
46          encoding => 'iso-8859-2',          encoding => 'iso-8859-2',
47            clean => 1,
48   );   );
49    
50  Options are:  Options are:
51    
52  =over 4  =over 4
53    
54  =item url  =item masterurl
55    
56  URI to C<estmaster> node  URI to C<estmaster> node
57    
# Line 60  password for user Line 67  password for user
67    
68  name of database from which data comes  name of database from which data comes
69    
70    =item label
71    
72    label for node (optional)
73    
74  =item encoding  =item encoding
75    
76  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
# Line 74  Name of database will be used to form UR Line 85  Name of database will be used to form UR
85    
86  sub new {  sub new {
87          my $class = shift;          my $class = shift;
88          my $self = {@_};          my $self = {@_};
89          bless($self, $class);          bless($self, $class);
90    
91          my $log = $self->_get_logger;          my $log = $self->_get_logger;
92    
93          foreach my $p (qw/url user passwd/) {          #$log->debug("self: ", sub { dump($self) });
94    
95            foreach my $p (qw/masterurl user passwd database/) {
96                  $log->logdie("need $p") unless ($self->{$p});                  $log->logdie("need $p") unless ($self->{$p});
97          }          }
98    
99          $log->info("opening Hyper Estraier index $self->{'url'}");          $self->{encoding} ||= 'ISO-8859-2';
100    
101            my $url = $self->{masterurl} . '/node/' . $self->{database};
102            $self->{url} = $url;
103    
104          $self->{'db'} = HyperEstraier::Node->new($self->{'url'});          $self->{label} ||= "WebPAC $self->{database}";
         $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});  
105    
106          my $encoding = $self->{'encoding'} || 'ISO-8859-2';          $self->{db} = Search::Estraier::Node->new(
107          $log->info("using encoding $encoding");                  url => $url,
108                    user => $self->{user},
109                    passwd => $self->{passwd},
110                    debug => $self->{debug},
111                    create => 1,
112                    label => $self->convert( $self->{label} ),
113            );
114    
115            $log->info("using ", $self->{clean} ? "new " : "", "index $self->{url} '$self->{label}' with encoding $self->{encoding}");
116    
117            if ($self->{clean}) {
118                    $log->debug("clean $self->{database}");
119                    $self->master( action => 'nodeclr', name => $self->{database} );
120            } else {
121                    $log->debug("opening index $self->{url}");
122            }
123    
124          my $iconv = new Text::Iconv('iso-8859-2', 'utf-8');          $self->{stats} = {};
125    
126          $self ? return $self : return undef;          $self ? return $self : return undef;
127  }  }
128    
129    
130  =head2 add  =head2 add
131    
132  Adds one entry to database.  Adds one entry to database.
# Line 104  Adds one entry to database. Line 135  Adds one entry to database.
135          id => 42,          id => 42,
136          ds => $ds,          ds => $ds,
137          type => 'display',          type => 'display',
         url_prefix => 'database name',  
138          text => 'optional text from which snippet is created',          text => 'optional text from which snippet is created',
139    );    );
140    
141  This function will create  entries in index using following URI format:  This function will create  entries in index using following URI format:
142    
143    C<file:///database%20name/000>    C<file:///type/database%20name/000>
144    
145  Each tag in C<data_structure> with specified C<type> will create one  Each tag in C<data_structure> with specified C<type> will create one
146  attribute and corresponding hidden text (used for search).  attribute and corresponding hidden text (used for search).
# Line 120  attribute and corresponding hidden text Line 150  attribute and corresponding hidden text
150  sub add {  sub add {
151          my $self = shift;          my $self = shift;
152    
153          my $args = {@_};          my $args = {@_};
154    
155          my $log = $self->_get_logger;          my $log = $self->_get_logger;
156    
# Line 132  sub add { Line 162  sub add {
162          }          }
163    
164          my $type = $args->{'type'};          my $type = $args->{'type'};
165          my $mfn = $args->{'id'};          my $id = $args->{'id'};
166    
167          my $uri = "file:///$type/$database/$mfn";          my $uri = "file:///$type/$database/$id";
168          $log->debug("creating $uri");          $log->debug("creating $uri");
169    
170          my $doc = HyperEstraier::Document->new;          my $doc = Search::Estraier::Document->new;
171          $doc->add_attr('@uri', $uri);          $doc->add_attr('@uri', $self->convert($uri) );
172    
173          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );          $log->debug("ds = ", sub { dump($args->{'ds'}) } );
174    
175          # filter all tags which have type defined          # filter all tags which have type defined
176          my @tags = grep {          my @tags = grep {
177                  defined( $args->{'ds'}->{$_}->{$type} )                  ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
178          } keys %{ $args->{'ds'} };          } keys %{ $args->{'ds'} };
179    
180          $log->debug("tags = ", join(",", @tags));          $log->debug("tags = ", join(",", @tags));
# Line 155  sub add { Line 185  sub add {
185    
186                  my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });                  my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
187    
188                  $log->logconfess("no values for $tag/$type") unless ($vals);                  next if (! $vals);
189    
190                    $vals = $self->convert( $vals ) or
191                            $log->logdie("can't convert '$vals' to UTF-8");
192    
193                    $doc->add_attr( $tag, $vals );
194                    $doc->add_hidden_text( $vals );
195    
196                  $doc->add_attr($tag, $vals);                  $self->{stats}->{attr}->{$tag}++;
                 $doc->add_hidden_text($vals);  
197          }          }
198    
199          my $text = $args->{'text'};          my $text = $args->{'text'};
200          $doc->add_text( $text ) if ( $text );          if ( $text ) {
201                    $text = $self->convert( $text ) or
202                            $log->logdie("can't convert '$text' to UTF-8");
203                    $doc->add_text( $text );
204            }
205    
206          $log->debug("adding ", sub { $doc->dump_draft } );          $log->debug("adding ", sub { $doc->dump_draft } );
207          $self->{'db'}->put_doc($doc) || $log->die("can't add document $uri to index");          $self->{'db'}->put_doc($doc) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{url} . " status: " . $self->{db}->status());
208    
209          return 1;          return 1;
210  }  }
211    
212    =head2 add_link
213    
214      $est->add_link(
215            from => 'ps',
216            to => 'webpac2',
217            credit => 10000,
218      );
219    
220    =cut
221    
222    sub add_link {
223            my $self = shift;
224    
225            my $args = {@_};
226            my $log = $self->_get_logger;
227    
228            foreach my $p (qw/from to credit/) {
229                    $log->logdie("need $p") unless ($args->{$p});
230            }
231    
232            my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );
233    
234            if (! $node) {
235                    $log->warn("can't find node $args->{to}, skipping link creaton");
236                    return;
237            }
238    
239            my $label = $node->{label};
240    
241            if (! $label) {
242                    $log->warn("can't find label for $args->{to}, skipping link creaton");
243                    return;
244            }
245    
246            $log->debug("using label $label for $args->{to}");
247    
248            return $self->{db}->set_link(
249                    $self->{masterurl} . '/node/' . $args->{to},
250                    $label,
251                    $args->{credit},
252            );
253    }
254    
255    
256    =head2 master
257    
258    Issue administrative commands to C<estmaster> process. See documentation for
259    C<master> in L<Search::Estraier>::Node.
260    
261      $self->master(
262            action => 'nodeclr',
263            name => 'foobar',
264      );
265    
266    =cut
267    
268    sub master {
269            my $self = shift;
270            $self->{db}->master( @_ );
271    }
272    
273    
274    =head2 convert
275    
276     my $utf8_string = $self->convert('string in codepage');
277    
278    =cut
279    
280    sub convert {
281            my $self = shift;
282    
283            my $text = shift || return;
284            from_to($text, $self->{encoding}, 'UTF-8');
285            return $text;
286    }
287    
288    =head2 finish
289    
290    Dump attributes used on disk
291    
292    =cut
293    
294    sub finish {
295            my $self = shift;
296            my $log = $self->_get_logger();
297    
298            my $path = 'var/estraier';
299    
300            mkpath $path unless -e $path;
301    
302            $path .= '/' . $self->{database} . '.yaml';
303            YAML::DumpFile( $path, $self->{stats} );
304            $log->info("created  $path ", -s $path, " bytes");
305            $log->debug( dump( $self->{stats} ) );
306    }
307    
308  =head1 AUTHOR  =head1 AUTHOR
309    
310  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>

Legend:
Removed from v.74  
changed lines
  Added in v.1148

  ViewVC Help
Powered by ViewVC 1.1.26