/[webpac2]/trunk/lib/WebPAC/Output/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Output/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 914 - (show annotations)
Tue Oct 30 20:11:04 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 2897 byte(s)
 r1373@llin:  dpavlin | 2007-10-30 21:11:01 +0100
 use KinoSearch::Simple and convert to new Output API

1 package WebPAC::Output::KinoSearch;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
7 __PACKAGE__->mk_accessors(qw(
8 path
9 database
10 encoding
11 clean
12
13 index
14 ));
15
16 use KinoSearch::Simple;
17 use File::Path;
18 use Encode qw/from_to/;
19 use Data::Dump qw/dump/;
20 use Storable;
21
22 =head1 NAME
23
24 WebPAC::Output::KinoSearch - Create KinoSearch full text index
25
26 =head1 VERSION
27
28 Version 0.04
29
30 =cut
31
32 our $VERSION = '0.04';
33
34 =head1 SYNOPSIS
35
36 Create full text index using KinoSearch index from data with
37 type C<search>.
38
39 =head1 FUNCTIONS
40
41 =head2 new
42
43 Open KinoSearch index
44
45 my $est = new WebPAC::Output::KinoSearch({
46 path => '/path/to/invindex',
47 database => 'demo',
48 encoding => 'iso-8859-2',
49 clean => 1,
50 });
51
52 Options are:
53
54 =over 4
55
56 =item path
57
58 path to KinoSearch index to use
59
60 =item database
61
62 name of database from which data comes
63
64 =item encoding
65
66 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
67 (and it probably is). This encoding will be converted to C<UTF-8> for
68 index.
69
70 =back
71
72 =cut
73
74 sub init {
75 my $self = shift;
76
77 my $log = $self->_get_logger;
78
79 #$log->debug("self: ", sub { dump($self) });
80
81 foreach my $p (qw/path database/) {
82 $log->logdie("need $p") unless ($self->$p);
83 }
84
85 # $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
86
87 $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
88
89 if ( ! -e $self->path ) {
90 mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
91 $log->info("created ", $self->path);
92 }
93
94 my $path = $self->path . '/' . $self->database;
95
96 $log->info("using index $path with encoding ", $self->encoding);
97
98 my $index = KinoSearch::Simple->new(
99 path => $path,
100 language => 'en',
101 );
102
103 $log->logdie("can't open $path: $!") unless $index;
104
105 $self->index( $index );
106
107 }
108
109
110 =head2 add
111
112 Adds one entry
113
114 $est->add( 42, $ds );
115
116 =cut
117
118 sub add {
119 my $self = shift;
120
121 my ( $id, $ds ) = @_;
122
123 my $log = $self->_get_logger;
124 $log->logdie("need id") unless defined $id;
125 $log->logdie("need ds") unless $ds;
126
127 $log->debug("id: $id ds = ",dump($ds));
128
129 my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
130
131 warn "add( $id, ",dump($ds)," ) => ", dump( $hash );
132
133 $self->index->add_doc( $hash );
134
135 return 1;
136 }
137
138 =head2 finish
139
140 Close index
141
142 $index->finish;
143
144 =cut
145
146 sub finish {
147 my $self = shift;
148
149 my $log = $self->_get_logger();
150
151 $log->info("dummy finish");
152
153 }
154
155 =head2 convert
156
157 my $utf8_string = $self->convert('string in codepage');
158
159 =cut
160
161 sub convert {
162 my $self = shift;
163
164 my $text = shift || return;
165 from_to($text, $self->{encoding}, 'UTF-8');
166 return $text;
167 }
168
169 =head1 AUTHOR
170
171 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
172
173 =head1 COPYRIGHT & LICENSE
174
175 Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
176
177 This program is free software; you can redistribute it and/or modify it
178 under the same terms as Perl itself.
179
180 =cut
181
182 1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26