/[mws]/trunk/lib/MWS/Plucene.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/MWS/Plucene.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/MWS_plucene.pm revision 12 by dpavlin, Thu May 6 12:40:11 2004 UTC trunk/MWS/Plucene.pm revision 41 by dpavlin, Mon May 10 20:26:17 2004 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    
3    package MWS::Plucene;
4  use strict;  use strict;
5    use warnings;
6    
7  #  use MWS::Indexer;
8  # simple implementation to use Lucene port to perl  our @ISA=qw(MWS::Indexer);
9  #  
10    our $VERSION = '0.01';
11    
12  use Plucene::Simple;  use Plucene::Simple;
13    use Carp;
14    use Data::Dumper;
15    
16    =head1 NAME
17    
18    MWS::Plucene - index your data using Plucene
19    
20    =head1 DESCRIPTION
21    
22    Simple implementation to use Lucene port to perl
23    
24    =head1 METHODS
25    
26    =head2 open_index
27    
28     $self->open_index;
29    
30    =cut
31    
32  sub open_index {  sub open_index {
33          my $self = shift;          my $self = shift;
34    
35          my $index_file = shift || croak "open_index needs index filename";          $self->{index} = Plucene::Simple->open($self->{index_dir}) || croak "can't open index '",$self->{index_dir},"': $!";
36    
         $self->{index} = Plucene::Simple->open($index_file) || croak "can't open index '$index_file': $!";  
37          return $self->{index};          return $self->{index};
38    
39  }  }
40    
41    =head2 search_index
42    
43     my @results = $self->search_index('message:funny');
44    
45    Date limits are, well, cludged and sort isn't supported!
46    
47    =cut
48    
49  sub search_index {  sub search_index {
50          my $self = shift;          my $self = shift;
51    
52          my $s = shift || croak "search_index needs query";          my $s = shift || croak "search_index needs query";
53    
54          return $self->{index}->search($s);          $self->open_index if (! $self->{index});
55    
56            # kill sort:something [asc|desc]
57            $s =~ s/sort:\w+\s+\w+//;
58    
59            my ($y,$m,$d);
60    
61            if ($s =~ s/date:"*(\d{4})(?:-(\d{2}))*(?:-(\d{2}))*"*//) {
62                    ($y,$m,$d) = ($1,$2,$3);
63    
64                    my ($df,$dt);   # date from, to
65    
66                    if ($y && $m && $d) {
67                            $df = $self->fmtdate($y,$m,$y);
68                            $dt = $df;
69                    } elsif ($y && $m) {
70                            $df = $self->fmtdate($y,$m)."-01";
71                            $dt = $self->fmtdate($y,$m)."-31";
72                    } elsif ($y) {
73                            $df = $self->fmtdate($y)."-01-01";
74                            $dt = $self->fmtdate($y)."-12-31";
75                    }
76                    print STDERR "Plucene query $s [from $df to $dt]\n" if ($self->{debug});
77                    return $self->{index}->search_during($s, $df => $dt);
78            } else {
79                    print STDERR "Plucene query $s\n" if ($self->{debug});
80                    return $self->{index}->search($s);
81            }
82  }  }
83    
84    =head2 add_index
85    
86     $self->add_index($mbox_id, $document);
87    
88    This method will try to fake "message" field which is used to search whole
89    message (including headers) with body and subject. Duplicating content is
90    wrong, but using Plucene::Simple has it's limitations.
91    
92    It will also remove time from date to make Plucene happy.
93    
94    =cut
95    
96  sub add_index {  sub add_index {
97          my $self = shift;          my $self = shift;
98    
99          $self->{index}->add(@_);          my $mbox_id = shift || croak "add_index needs mbox_id";
100            my $document = shift || croak "add_index needs document";
101    
102            $self->open_index if (! $self->{index});
103    
104            # does index withh document allready exist?
105            if (-f $self->{index_dir}."/segmets") {
106                    return if ($self->{index}->search("id:".$document->{id}));
107            }
108    
109            print STDERR "add_index($mbox_id)\n" if ($self->{debug});
110    
111            $document->{message} = ($document->{body} || '') . ($document->{subject} || '');
112    
113            $document->{date} =~ s/\d{4}-\d+-\d+.*//;
114    
115            $self->{index}->add($mbox_id => $document );
116  }  }
117    
118    =head2 close_index
119    
120     $self->close_index;
121    
122    This will also optimize Plucene index file.
123    
124    =cut
125    
126  sub close_index {  sub close_index {
127          my $self = shift;          my $self = shift;
128    

Legend:
Removed from v.12  
changed lines
  Added in v.41

  ViewVC Help
Powered by ViewVC 1.1.26