/[webpac2]/trunk/lib/WebPAC/Output/SWISH.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Output/SWISH.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1145 - (show annotations)
Thu Apr 23 11:14:24 2009 UTC (15 years ago) by dpavlin
File size: 4044 byte(s)
 r1798@llin:  dpavlin | 2009-04-23 13:14:23 +0200
 start support for swish-e using WebPAC::Output::SWISH

1 package WebPAC::Output::SWISH;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common/;
7
8 use File::Path qw/make_path/;
9 use Data::Dump qw/dump/;
10 use YAML;
11
12 =head1 NAME
13
14 WebPAC::Output::SWISH - Create swish-e full text index
15
16 =cut
17
18 our $VERSION = '0.01';
19
20 =head1 SYNOPSIS
21
22 Create full text index using swish-e indexer from data with
23 type C<search>.
24
25 =head1 FUNCTIONS
26
27 =head2 new
28
29 my $out = new WebPAC::Output::SWISH(
30 database => 'demo',
31 );
32
33 Options are:
34
35 =over 4
36
37 =item database
38
39 name of database from which data comes
40
41 =back
42
43 Name of database will be used to form URI of documents in index.
44
45 =cut
46
47 our $dir = 'var/swish';
48
49 sub new {
50 my $class = shift;
51 my $self = {@_};
52 bless($self, $class);
53
54 my $log = $self->_get_logger;
55
56 #$log->debug("self: ", sub { dump($self) });
57
58 my $database = $self->{database} || $log->logdie("need database");
59
60 my $path = "$dir/$database.conf";
61
62 open(my $conf, '>', $path) || die "can't open $path: $!";
63
64 print $conf <<"DEFAULT_SWISH_CONF";
65 # swish-e config file for $database
66
67 IndexDir stdin
68
69 # input file definition
70 DefaultContents XML*
71
72 # indexed metatags
73 MetaNames xml swishdocpath
74
75
76 #XMLClassAttributes type
77 UndefinedMetaTags auto
78 UndefinedXMLAttributes auto
79
80 IndexFile $dir/$database
81
82 # Croatian ISO-8859-2 characters to unaccented equivalents
83 TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
84
85
86 # disable output
87 ParserWarnLevel 0
88 IndexReport 1
89
90 DEFAULT_SWISH_CONF
91
92 close($conf) || die "can't write config $path: $!";
93
94 $self->{_swish_index_path} = "$dir/$database";
95
96 my $swish = "swish-e -S prog -c $path";
97 open( $self->{_swish_fh}, '-|', $swish ) || die "can't open pipe to $swish: $!";
98
99 $self->{stats} = {};
100
101 $self ? return $self : return undef;
102 }
103
104 =head2
105
106 my $path = $out->index_path;
107
108 =cut
109
110 sub index_path { $_[0]->{_swish_index_path} };
111
112 =head2 add
113
114 Adds one entry to database.
115
116 $out->add(
117 id => 42,
118 ds => $ds,
119 type => 'display',
120 text => 'optional text from which snippet is created',
121 );
122
123 This function will create entries in index using following URI format:
124
125 C<file:///type/database%20name/000>
126
127 Each tag in C<data_structure> with specified C<type> will create one
128 attribute and corresponding hidden text (used for search).
129
130 =cut
131
132 my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
133 my $escape_re = join '|' => keys %escape;
134
135 sub add {
136 my $self = shift;
137
138 my $args = {@_};
139
140 my $log = $self->_get_logger;
141
142 my $database = $self->{'database'} || $log->logconfess('no database in $self');
143 $log->logconfess('need db in object') unless ($self->{'db'});
144
145 foreach my $p (qw/id ds type/) {
146 $log->logdie("need $p") unless ($args->{$p});
147 }
148
149 my $type = $args->{'type'};
150 my $id = $args->{'id'};
151
152 my $uri = "$database/$id";
153 $log->debug("creating $uri");
154
155 # filter all tags which have type defined
156 my @tags = grep {
157 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
158 } keys %{ $args->{'ds'} };
159
160 $log->debug("tags = ", join(",", @tags));
161
162 return unless (@tags);
163
164 my $xml = qq{<xml>};
165
166 foreach my $tag (@tags) {
167
168 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
169
170 next if ! $vals;
171
172 $vals =~ s/($escape_re)/$escape{$1}/gs;
173 $xml .= qq{<$tag><![CDATA[$vals]]></$tag>};
174
175 $self->{stats}->{attr}->{$tag}++;
176 }
177
178 $xml .= qq{</xml>\n};
179
180 my $len = length($xml);
181
182 my $fh = $self->{_swish_fh};
183
184 print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml";
185 # || die "can't add $uri: $@\n$xml\n";
186
187 return 1;
188 }
189
190 =head2 finish
191
192 Dump attributes used on disk
193
194 =cut
195
196 sub finish {
197 my $self = shift;
198 my $log = $self->_get_logger();
199
200 my $path = $dir . '/' . $self->{database} . '.yaml';
201 YAML::DumpFile( $path, $self->{stats} );
202 $log->info("created $path ", -s $path, " bytes");
203 $log->debug( dump( $self->{stats} ) );
204 }
205
206 =head1 AUTHOR
207
208 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
209
210 =head1 COPYRIGHT & LICENSE
211
212 Copyright 2004-2009 Dobrica Pavlinusic, All Rights Reserved.
213
214 This program is free software; you can redistribute it and/or modify it
215 under the same terms as Perl itself.
216
217 =cut
218
219 1;

  ViewVC Help
Powered by ViewVC 1.1.26