/[mws]/trunk/lib/MWS/Plucene.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/MWS/Plucene.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (hide annotations)
Mon May 10 20:26:17 2004 UTC (20 years ago) by dpavlin
Original Path: trunk/MWS/Plucene.pm
File size: 2625 byte(s)
major code re-structuring: separation of indexer code into target independent
and depended, documentation improvements

1 dpavlin 9 #!/usr/bin/perl -w
2    
3 dpavlin 41 package MWS::Plucene;
4 dpavlin 9 use strict;
5 dpavlin 41 use warnings;
6 dpavlin 9
7 dpavlin 41 use MWS::Indexer;
8     our @ISA=qw(MWS::Indexer);
9 dpavlin 9
10 dpavlin 41 our $VERSION = '0.01';
11    
12 dpavlin 9 use Plucene::Simple;
13 dpavlin 41 use Carp;
14     use Data::Dumper;
15 dpavlin 9
16 dpavlin 41 =head1 NAME
17    
18     MWS::Plucene - index your data using Plucene
19    
20     =head1 DESCRIPTION
21    
22     Simple implementation to use Lucene port to perl
23    
24     =head1 METHODS
25    
26     =head2 open_index
27    
28     $self->open_index;
29    
30     =cut
31    
32 dpavlin 9 sub open_index {
33     my $self = shift;
34    
35 dpavlin 41 $self->{index} = Plucene::Simple->open($self->{index_dir}) || croak "can't open index '",$self->{index_dir},"': $!";
36 dpavlin 9
37     return $self->{index};
38    
39     }
40    
41 dpavlin 41 =head2 search_index
42    
43     my @results = $self->search_index('message:funny');
44    
45     Date limits are, well, cludged and sort isn't supported!
46    
47     =cut
48    
49 dpavlin 9 sub search_index {
50     my $self = shift;
51    
52     my $s = shift || croak "search_index needs query";
53    
54 dpavlin 41 $self->open_index if (! $self->{index});
55    
56     # kill sort:something [asc|desc]
57     $s =~ s/sort:\w+\s+\w+//;
58    
59     my ($y,$m,$d);
60    
61     if ($s =~ s/date:"*(\d{4})(?:-(\d{2}))*(?:-(\d{2}))*"*//) {
62     ($y,$m,$d) = ($1,$2,$3);
63    
64     my ($df,$dt); # date from, to
65    
66     if ($y && $m && $d) {
67     $df = $self->fmtdate($y,$m,$y);
68     $dt = $df;
69     } elsif ($y && $m) {
70     $df = $self->fmtdate($y,$m)."-01";
71     $dt = $self->fmtdate($y,$m)."-31";
72     } elsif ($y) {
73     $df = $self->fmtdate($y)."-01-01";
74     $dt = $self->fmtdate($y)."-12-31";
75     }
76     print STDERR "Plucene query $s [from $df to $dt]\n" if ($self->{debug});
77     return $self->{index}->search_during($s, $df => $dt);
78     } else {
79     print STDERR "Plucene query $s\n" if ($self->{debug});
80     return $self->{index}->search($s);
81     }
82 dpavlin 9 }
83    
84 dpavlin 41 =head2 add_index
85    
86     $self->add_index($mbox_id, $document);
87    
88     This method will try to fake "message" field which is used to search whole
89     message (including headers) with body and subject. Duplicating content is
90     wrong, but using Plucene::Simple has it's limitations.
91    
92     It will also remove time from date to make Plucene happy.
93    
94     =cut
95    
96 dpavlin 12 sub add_index {
97     my $self = shift;
98    
99 dpavlin 41 my $mbox_id = shift || croak "add_index needs mbox_id";
100     my $document = shift || croak "add_index needs document";
101    
102     $self->open_index if (! $self->{index});
103    
104     # does index withh document allready exist?
105     if (-f $self->{index_dir}."/segmets") {
106     return if ($self->{index}->search("id:".$document->{id}));
107     }
108    
109     print STDERR "add_index($mbox_id)\n" if ($self->{debug});
110    
111     $document->{message} = ($document->{body} || '') . ($document->{subject} || '');
112    
113     $document->{date} =~ s/\d{4}-\d+-\d+.*//;
114    
115     $self->{index}->add($mbox_id => $document );
116 dpavlin 12 }
117    
118 dpavlin 41 =head2 close_index
119    
120     $self->close_index;
121    
122     This will also optimize Plucene index file.
123    
124     =cut
125    
126 dpavlin 12 sub close_index {
127     my $self = shift;
128    
129     $self->{index}->optimize;
130     }
131    
132 dpavlin 9 1;

  ViewVC Help
Powered by ViewVC 1.1.26