/[mws]/trunk/lib/MWS/CLucene.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/MWS/CLucene.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 57 - (show annotations)
Sat Feb 12 02:52:18 2005 UTC (19 years, 2 months ago) by dpavlin
File size: 3154 byte(s)
added non-working code for CLucene

1 #!/usr/bin/perl -w
2
3 package MWS::Clucene;
4 use strict;
5 use warnings;
6
7 use MWS::Indexer;
8 our @ISA=qw(MWS::Indexer);
9
10 our $VERSION = '0.01';
11
12 BEGIN {
13 # this is so utterly wrong that I won't even start to talk
14 # about it. But, FulltextSearch::Clucene still miss real
15 # Makefile.PL
16 my $dir = $ENV{'HOME'}.'/clucene.output';
17 $ENV{'LD_LIBRARY_PATH'} = "$dir/perl";
18 unshift @INC, "$dir/perl";
19 }
20
21 use FulltextSearch::CLucene;
22 use Carp;
23 use Data::Dumper;
24
25 =head1 NAME
26
27 MWS::Clucene - index your data using CLucene
28
29 =head1 DESCRIPTION
30
31 Simple implementation to use Lucene port to C++
32
33 =head1 METHODS
34
35 =head2 open_index
36
37 $self->open_index;
38
39 =cut
40
41 sub open_index {
42 my $self = shift;
43
44 $self->{index} = FulltextSearch::CLucene->new( path => "./index" ) || croak "can't open index '",$self->{index_dir},"': $!";
45
46 return $self->{index};
47
48 }
49
50 =head2 search_index
51
52 my @results = $self->search_index('message:funny');
53
54 Date limits are, well, cludged and sort isn't supported!
55
56 =cut
57
58 sub search_index {
59 my $self = shift;
60
61 my $s = shift || croak "search_index needs query";
62
63 $self->open_index if (! $self->{index});
64
65 # kill sort:something [asc|desc]
66 $s =~ s/sort:\w+\s+\w+//;
67
68 my ($y,$m,$d);
69
70 if ($s =~ s/date:"*(\d{4})(?:-(\d{2}))*(?:-(\d{2}))*"*//) {
71 ($y,$m,$d) = ($1,$2,$3);
72
73 my ($df,$dt); # date from, to
74
75 if ($y && $m && $d) {
76 $df = $self->fmtdate($y,$m,$y);
77 $dt = $df;
78 } elsif ($y && $m) {
79 $df = $self->fmtdate($y,$m)."-01";
80 $dt = $self->fmtdate($y,$m)."-31";
81 } elsif ($y) {
82 $df = $self->fmtdate($y)."-01-01";
83 $dt = $self->fmtdate($y)."-12-31";
84 }
85 print STDERR "Plucene query $s [from $df to $dt]\n" if ($self->{debug});
86 return $self->{index}->search_during($s, $df => $dt);
87 } else {
88 print STDERR "Plucene query $s\n" if ($self->{debug});
89 return $self->{index}->search($s);
90 }
91 }
92
93 =head2 add_index
94
95 $self->add_index($mbox_id, $document);
96
97 This method will try to fake "message" field which is used to search whole
98 message (including headers) with body and subject. Duplicating content is
99 wrong, but using Plucene::Simple has it's limitations.
100
101 It will also remove time from date to make Plucene happy.
102
103 =cut
104
105 sub add_index {
106 my $self = shift;
107
108 my $mbox_id = shift || croak "add_index needs mbox_id";
109 my $document = shift || croak "add_index needs document";
110
111 $self->open_index if (! $self->{index});
112
113 # does index withh document allready exist?
114 if (-f $self->{index_dir}."/segmets") {
115 return if ($self->{index}->search( field => "id:", query => $document->{id} ));
116 }
117
118 print STDERR "add_index($mbox_id)\n" if ($self->{debug});
119
120 $document->{message} = ($document->{body} || '') . ($document->{subject} || '');
121
122 $document->{date} =~ s/\d{4}-\d+-\d+.*//;
123
124 $self->{index}->new_document;
125 foreach my $f (keys %{$document}) {
126 if (lc($f) eq 'date') {
127 $self->{index}->add_date( field => $f, value => $document->{$f});
128 } else {
129 $self->{index}->add_field( field => $f, value => $document->{$f});
130 }
131 }
132
133 $self->{index}->insert_document;
134 }
135
136 =head2 close_index
137
138 $self->close_index;
139
140 This will also optimize Plucene index file.
141
142 =cut
143
144 sub close_index {
145 my $self = shift;
146
147 $self->{index}->close;
148 }
149
150 1;

  ViewVC Help
Powered by ViewVC 1.1.26