/[mws]/trunk/lib/MWS/CLucene.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/MWS/CLucene.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 57 - (hide annotations)
Sat Feb 12 02:52:18 2005 UTC (19 years, 2 months ago) by dpavlin
File size: 3154 byte(s)
added non-working code for CLucene

1 dpavlin 57 #!/usr/bin/perl -w
2    
3     package MWS::Clucene;
4     use strict;
5     use warnings;
6    
7     use MWS::Indexer;
8     our @ISA=qw(MWS::Indexer);
9    
10     our $VERSION = '0.01';
11    
12     BEGIN {
13     # this is so utterly wrong that I won't even start to talk
14     # about it. But, FulltextSearch::Clucene still miss real
15     # Makefile.PL
16     my $dir = $ENV{'HOME'}.'/clucene.output';
17     $ENV{'LD_LIBRARY_PATH'} = "$dir/perl";
18     unshift @INC, "$dir/perl";
19     }
20    
21     use FulltextSearch::CLucene;
22     use Carp;
23     use Data::Dumper;
24    
25     =head1 NAME
26    
27     MWS::Clucene - index your data using CLucene
28    
29     =head1 DESCRIPTION
30    
31     Simple implementation to use Lucene port to C++
32    
33     =head1 METHODS
34    
35     =head2 open_index
36    
37     $self->open_index;
38    
39     =cut
40    
41     sub open_index {
42     my $self = shift;
43    
44     $self->{index} = FulltextSearch::CLucene->new( path => "./index" ) || croak "can't open index '",$self->{index_dir},"': $!";
45    
46     return $self->{index};
47    
48     }
49    
50     =head2 search_index
51    
52     my @results = $self->search_index('message:funny');
53    
54     Date limits are, well, cludged and sort isn't supported!
55    
56     =cut
57    
58     sub search_index {
59     my $self = shift;
60    
61     my $s = shift || croak "search_index needs query";
62    
63     $self->open_index if (! $self->{index});
64    
65     # kill sort:something [asc|desc]
66     $s =~ s/sort:\w+\s+\w+//;
67    
68     my ($y,$m,$d);
69    
70     if ($s =~ s/date:"*(\d{4})(?:-(\d{2}))*(?:-(\d{2}))*"*//) {
71     ($y,$m,$d) = ($1,$2,$3);
72    
73     my ($df,$dt); # date from, to
74    
75     if ($y && $m && $d) {
76     $df = $self->fmtdate($y,$m,$y);
77     $dt = $df;
78     } elsif ($y && $m) {
79     $df = $self->fmtdate($y,$m)."-01";
80     $dt = $self->fmtdate($y,$m)."-31";
81     } elsif ($y) {
82     $df = $self->fmtdate($y)."-01-01";
83     $dt = $self->fmtdate($y)."-12-31";
84     }
85     print STDERR "Plucene query $s [from $df to $dt]\n" if ($self->{debug});
86     return $self->{index}->search_during($s, $df => $dt);
87     } else {
88     print STDERR "Plucene query $s\n" if ($self->{debug});
89     return $self->{index}->search($s);
90     }
91     }
92    
93     =head2 add_index
94    
95     $self->add_index($mbox_id, $document);
96    
97     This method will try to fake "message" field which is used to search whole
98     message (including headers) with body and subject. Duplicating content is
99     wrong, but using Plucene::Simple has it's limitations.
100    
101     It will also remove time from date to make Plucene happy.
102    
103     =cut
104    
105     sub add_index {
106     my $self = shift;
107    
108     my $mbox_id = shift || croak "add_index needs mbox_id";
109     my $document = shift || croak "add_index needs document";
110    
111     $self->open_index if (! $self->{index});
112    
113     # does index withh document allready exist?
114     if (-f $self->{index_dir}."/segmets") {
115     return if ($self->{index}->search( field => "id:", query => $document->{id} ));
116     }
117    
118     print STDERR "add_index($mbox_id)\n" if ($self->{debug});
119    
120     $document->{message} = ($document->{body} || '') . ($document->{subject} || '');
121    
122     $document->{date} =~ s/\d{4}-\d+-\d+.*//;
123    
124     $self->{index}->new_document;
125     foreach my $f (keys %{$document}) {
126     if (lc($f) eq 'date') {
127     $self->{index}->add_date( field => $f, value => $document->{$f});
128     } else {
129     $self->{index}->add_field( field => $f, value => $document->{$f});
130     }
131     }
132    
133     $self->{index}->insert_document;
134     }
135    
136     =head2 close_index
137    
138     $self->close_index;
139    
140     This will also optimize Plucene index file.
141    
142     =cut
143    
144     sub close_index {
145     my $self = shift;
146    
147     $self->{index}->close;
148     }
149    
150     1;

  ViewVC Help
Powered by ViewVC 1.1.26