/[webpac2]/trunk/lib/WebPAC/Output/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Output/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 924 - (hide annotations)
Wed Oct 31 00:26:45 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 3206 byte(s)
 r1394@llin:  dpavlin | 2007-10-31 01:26:46 +0100
 add new (exported by default) function force_array
 used all over the place

1 dpavlin 431 package WebPAC::Output::KinoSearch;
2    
3     use warnings;
4     use strict;
5    
6 dpavlin 914 use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
7     __PACKAGE__->mk_accessors(qw(
8     path
9     database
10     encoding
11     clean
12 dpavlin 431
13 dpavlin 914 index
14     ));
15    
16     use KinoSearch::Simple;
17     use File::Path;
18 dpavlin 431 use Encode qw/from_to/;
19 dpavlin 887 use Data::Dump qw/dump/;
20 dpavlin 536 use Storable;
21 dpavlin 431
22     =head1 NAME
23    
24     WebPAC::Output::KinoSearch - Create KinoSearch full text index
25    
26     =head1 VERSION
27    
28 dpavlin 919 Version 0.05
29 dpavlin 431
30     =cut
31    
32 dpavlin 919 our $VERSION = '0.05';
33 dpavlin 431
34     =head1 SYNOPSIS
35    
36     Create full text index using KinoSearch index from data with
37     type C<search>.
38    
39     =head1 FUNCTIONS
40    
41     =head2 new
42    
43     Open KinoSearch index
44    
45 dpavlin 917 my $out = new WebPAC::Output::KinoSearch({
46 dpavlin 914 path => '/path/to/invindex',
47 dpavlin 431 database => 'demo',
48     encoding => 'iso-8859-2',
49     clean => 1,
50 dpavlin 914 });
51 dpavlin 431
52     Options are:
53    
54     =over 4
55    
56 dpavlin 914 =item path
57 dpavlin 431
58     path to KinoSearch index to use
59    
60     =item database
61    
62     name of database from which data comes
63    
64     =item encoding
65    
66     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
67     (and it probably is). This encoding will be converted to C<UTF-8> for
68     index.
69    
70     =back
71    
72 dpavlin 917 =head2 init
73    
74     $out->init;
75    
76 dpavlin 431 =cut
77    
78 dpavlin 914 sub init {
79     my $self = shift;
80 dpavlin 431
81     my $log = $self->_get_logger;
82    
83 dpavlin 887 #$log->debug("self: ", sub { dump($self) });
84 dpavlin 431
85 dpavlin 914 foreach my $p (qw/path database/) {
86     $log->logdie("need $p") unless ($self->$p);
87 dpavlin 431 }
88    
89 dpavlin 914 # $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
90 dpavlin 431
91 dpavlin 914 $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
92 dpavlin 431
93 dpavlin 914 if ( ! -e $self->path ) {
94     mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
95     $log->info("created ", $self->path);
96 dpavlin 919 } elsif ( $self->clean ) {
97     $log->info("removing existing ", $self->path);
98     rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!");
99     mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
100 dpavlin 914 }
101 dpavlin 610
102 dpavlin 914 my $path = $self->path . '/' . $self->database;
103 dpavlin 431
104 dpavlin 914 $log->info("using index $path with encoding ", $self->encoding);
105 dpavlin 609
106 dpavlin 914 my $index = KinoSearch::Simple->new(
107     path => $path,
108     language => 'en',
109 dpavlin 431 );
110    
111 dpavlin 914 $log->logdie("can't open $path: $!") unless $index;
112 dpavlin 536
113 dpavlin 914 $self->index( $index );
114 dpavlin 431
115     }
116    
117    
118     =head2 add
119    
120 dpavlin 914 Adds one entry
121 dpavlin 431
122 dpavlin 917 $out->add( 42, $ds );
123 dpavlin 431
124     =cut
125    
126     sub add {
127     my $self = shift;
128    
129 dpavlin 914 my ( $id, $ds ) = @_;
130 dpavlin 431
131     my $log = $self->_get_logger;
132 dpavlin 914 $log->logdie("need id") unless defined $id;
133     $log->logdie("need ds") unless $ds;
134 dpavlin 431
135 dpavlin 914 $log->debug("id: $id ds = ",dump($ds));
136 dpavlin 431
137 dpavlin 914 my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
138 dpavlin 431
139 dpavlin 919 $hash->{database} ||= $self->database;
140     $hash->{id} ||= $id;
141 dpavlin 431
142 dpavlin 924 foreach my $f ( keys %$hash ) {
143     if ( ref($hash->{$f}) eq 'ARRAY' ) {
144     $hash->{$f} = join(' <*> ', @{ $hash->{$f} });
145     }
146     }
147    
148 dpavlin 919 $log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) });
149    
150 dpavlin 914 $self->index->add_doc( $hash );
151 dpavlin 431
152 dpavlin 922 $self->{count}++;
153    
154 dpavlin 431 return 1;
155     }
156    
157 dpavlin 434 =head2 finish
158 dpavlin 431
159 dpavlin 434 Close index
160    
161 dpavlin 917 $out->finish;
162 dpavlin 434
163     =cut
164    
165     sub finish {
166     my $self = shift;
167    
168 dpavlin 536 my $log = $self->_get_logger();
169    
170 dpavlin 922 $log->info("indexed ", $self->{count}, " records");
171 dpavlin 536
172 dpavlin 434 }
173    
174 dpavlin 431 =head1 AUTHOR
175    
176     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
177    
178     =head1 COPYRIGHT & LICENSE
179    
180 dpavlin 914 Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
181 dpavlin 431
182     This program is free software; you can redistribute it and/or modify it
183     under the same terms as Perl itself.
184    
185     =cut
186    
187     1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26