/[webpac2]/trunk/lib/WebPAC/Output/SWISH.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Output/SWISH.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1145 - (hide annotations)
Thu Apr 23 11:14:24 2009 UTC (15 years ago) by dpavlin
File size: 4044 byte(s)
 r1798@llin:  dpavlin | 2009-04-23 13:14:23 +0200
 start support for swish-e using WebPAC::Output::SWISH

1 dpavlin 1145 package WebPAC::Output::SWISH;
2    
3     use warnings;
4     use strict;
5    
6     use base qw/WebPAC::Common/;
7    
8     use File::Path qw/make_path/;
9     use Data::Dump qw/dump/;
10     use YAML;
11    
12     =head1 NAME
13    
14     WebPAC::Output::SWISH - Create swish-e full text index
15    
16     =cut
17    
18     our $VERSION = '0.01';
19    
20     =head1 SYNOPSIS
21    
22     Create full text index using swish-e indexer from data with
23     type C<search>.
24    
25     =head1 FUNCTIONS
26    
27     =head2 new
28    
29     my $out = new WebPAC::Output::SWISH(
30     database => 'demo',
31     );
32    
33     Options are:
34    
35     =over 4
36    
37     =item database
38    
39     name of database from which data comes
40    
41     =back
42    
43     Name of database will be used to form URI of documents in index.
44    
45     =cut
46    
47     our $dir = 'var/swish';
48    
49     sub new {
50     my $class = shift;
51     my $self = {@_};
52     bless($self, $class);
53    
54     my $log = $self->_get_logger;
55    
56     #$log->debug("self: ", sub { dump($self) });
57    
58     my $database = $self->{database} || $log->logdie("need database");
59    
60     my $path = "$dir/$database.conf";
61    
62     open(my $conf, '>', $path) || die "can't open $path: $!";
63    
64     print $conf <<"DEFAULT_SWISH_CONF";
65     # swish-e config file for $database
66    
67     IndexDir stdin
68    
69     # input file definition
70     DefaultContents XML*
71    
72     # indexed metatags
73     MetaNames xml swishdocpath
74    
75    
76     #XMLClassAttributes type
77     UndefinedMetaTags auto
78     UndefinedXMLAttributes auto
79    
80     IndexFile $dir/$database
81    
82     # Croatian ISO-8859-2 characters to unaccented equivalents
83     TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
84    
85    
86     # disable output
87     ParserWarnLevel 0
88     IndexReport 1
89    
90     DEFAULT_SWISH_CONF
91    
92     close($conf) || die "can't write config $path: $!";
93    
94     $self->{_swish_index_path} = "$dir/$database";
95    
96     my $swish = "swish-e -S prog -c $path";
97     open( $self->{_swish_fh}, '-|', $swish ) || die "can't open pipe to $swish: $!";
98    
99     $self->{stats} = {};
100    
101     $self ? return $self : return undef;
102     }
103    
104     =head2
105    
106     my $path = $out->index_path;
107    
108     =cut
109    
110     sub index_path { $_[0]->{_swish_index_path} };
111    
112     =head2 add
113    
114     Adds one entry to database.
115    
116     $out->add(
117     id => 42,
118     ds => $ds,
119     type => 'display',
120     text => 'optional text from which snippet is created',
121     );
122    
123     This function will create entries in index using following URI format:
124    
125     C<file:///type/database%20name/000>
126    
127     Each tag in C<data_structure> with specified C<type> will create one
128     attribute and corresponding hidden text (used for search).
129    
130     =cut
131    
132     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
133     my $escape_re = join '|' => keys %escape;
134    
135     sub add {
136     my $self = shift;
137    
138     my $args = {@_};
139    
140     my $log = $self->_get_logger;
141    
142     my $database = $self->{'database'} || $log->logconfess('no database in $self');
143     $log->logconfess('need db in object') unless ($self->{'db'});
144    
145     foreach my $p (qw/id ds type/) {
146     $log->logdie("need $p") unless ($args->{$p});
147     }
148    
149     my $type = $args->{'type'};
150     my $id = $args->{'id'};
151    
152     my $uri = "$database/$id";
153     $log->debug("creating $uri");
154    
155     # filter all tags which have type defined
156     my @tags = grep {
157     ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
158     } keys %{ $args->{'ds'} };
159    
160     $log->debug("tags = ", join(",", @tags));
161    
162     return unless (@tags);
163    
164     my $xml = qq{<xml>};
165    
166     foreach my $tag (@tags) {
167    
168     my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
169    
170     next if ! $vals;
171    
172     $vals =~ s/($escape_re)/$escape{$1}/gs;
173     $xml .= qq{<$tag><![CDATA[$vals]]></$tag>};
174    
175     $self->{stats}->{attr}->{$tag}++;
176     }
177    
178     $xml .= qq{</xml>\n};
179    
180     my $len = length($xml);
181    
182     my $fh = $self->{_swish_fh};
183    
184     print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml";
185     # || die "can't add $uri: $@\n$xml\n";
186    
187     return 1;
188     }
189    
190     =head2 finish
191    
192     Dump attributes used on disk
193    
194     =cut
195    
196     sub finish {
197     my $self = shift;
198     my $log = $self->_get_logger();
199    
200     my $path = $dir . '/' . $self->{database} . '.yaml';
201     YAML::DumpFile( $path, $self->{stats} );
202     $log->info("created $path ", -s $path, " bytes");
203     $log->debug( dump( $self->{stats} ) );
204     }
205    
206     =head1 AUTHOR
207    
208     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
209    
210     =head1 COPYRIGHT & LICENSE
211    
212     Copyright 2004-2009 Dobrica Pavlinusic, All Rights Reserved.
213    
214     This program is free software; you can redistribute it and/or modify it
215     under the same terms as Perl itself.
216    
217     =cut
218    
219     1;

  ViewVC Help
Powered by ViewVC 1.1.26