--- trunk/MWS.pm 2004/05/06 23:06:08 16 +++ trunk/MWS.pm 2004/05/08 01:13:33 22 @@ -11,6 +11,10 @@ use POSIX qw(strftime); use Text::Autoformat; use Text::Iconv; +use Text::Unaccent; +use Date::Parse; +use POSIX qw(strftime); +use MIME::Base64; #use MWS_plucene; use MWS_swish; @@ -47,10 +51,93 @@ $self->{folder} = {}; $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin'); + $self->{max_results} = $self->{config}->val('global', 'max_results') || 100; + $self->reset_counters; return $self; } +sub normalize_string { + my $self = shift; + + my $v = shift || return; + + $v = unac_string('ISO-8859-2', $v); + $v = join('',sort split(/\s+/,$v)); + $v =~ s/\W+//g; + + return $v; +} + +# reset tables for search results +sub reset_counters { + my $self = shift; + + $self->{counter} = {}; + +# foreach my $c (qw(thread from to cc bcc lists links att)) { +# $self->{counter}->{$c} = {}; +# } + +} + +sub add_counter($$) { + my $self = shift; + + my ($c,$v) = @_; + my $k = $self->normalize_string($v); + + $self->{counter}->{$c}->{$k}->{name} = $v; + return $self->{counter}->{$c}->{$k}->{usage}++; +} + +sub yyyymmdd { + my $self = shift; + + my $t = shift || time; + + my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t); + $mm++; + $yyyy+=1900; + return ($yyyy,$mm,$dd); +} + +sub fmtdate { + my $self = shift; + + my @out; + my @formats = qw(%04d %02d %02d); + while (my $v = shift) { + my $f = shift @formats; + push @out, sprintf($f, $v); + } + +print STDERR "fmtdate: ",join('|',@out),"\n"; + + return (wantarray ? @out : join("-",@out)); +} + +sub add_counter_calendar($) { + my $self = shift; + + my $t = shift || croak "add_counter_calendar without argument!"; + + my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t)); + + return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++; +} + + +sub counter { + my $self = shift; + + my $c = shift || return; + + return if (! $self->{counter}->{$c}); + + return $self->{counter}->{$c}; +} + sub mbox_name2path { my $self = shift; @@ -106,23 +193,32 @@ print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n"; } - return $self->open_folder($mbox)->find($id) || + my $msg = $self->open_folder($mbox)->find($id); + if ($msg) { + return $msg; + } else { print STDERR "can't find message $id in $mbox. Time to re-index?\n"; + return; + } } sub search { my $self = shift; - my $s = shift || carp "search called without argument!"; + carp "search called without argument!" if (! @_); - print STDERR "search_index($s)\n" if ($debug == 2); - my @index_ids = $self->search_index($s); + $self->reset_counters; + + print STDERR "search(",join(" ",@_),")\n" if ($debug == 2); + my @index_ids = $self->search_index(@_); $self->{'index_ids'} = \@index_ids; - my $results = $#index_ids + 1; - $self->{'results'} = $results; + #my $results = $#index_ids + 1; + #$self->{'results'} = $results; + + my $results = $self->{'total_hits'} || ($#index_ids + 1); $self->{'curr_result'} = 0; @@ -136,17 +232,30 @@ my $tmp = shift || return; - sub decode($$) { - my ($cp,$qp) = @_; + sub decode($$$) { + my ($cp,$enc,$qp) = @_; + + print STDERR "decode($cp,$qp) -> " if ($debug == 2); + + if (uc($enc) eq "Q") { + $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg; + $qp =~ s/_/ /g; + } elsif (uc($enc) eq "B") { + $qp = decode_base64($qp); + } else { + croak "unsupported encoding '$enc' in decode_qp\n"; + return $qp; + } + + print STDERR "$qp\n" if ($debug == 2); + my $iconv = Text::Iconv->new($cp,'ISO-8859-2'); - print STDERR "decode($cp,$qp) -> " if ($debug == 2); - $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg; - $qp =~ s/_/ /g; - print STDERR "$qp\n" if ($debug == 2); - return $iconv->convert($qp); + return $iconv->convert($qp) || ''; } - $tmp =~ s/=\?([^\?]+)\?Q\?(.+)\?=/decode($1,$2)/ex; + $tmp =~ s/=\?([^\?]+)\?([QB])\?(.+?)\?=/decode($1,$2,$3)/ige; + $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g; + #print STDERR "$tmp\n" if ($debug == 2); return $tmp; } @@ -157,11 +266,12 @@ my @arr; + return if (! $message->$part); + foreach my $from ($message->$part) { my $tmp = $from->$sub || next; $tmp = $self->decode_qp($tmp); - $tmp =~ s/^\s*["'](.*)["']\s*$/$1/; push @arr, $tmp; } @@ -181,6 +291,7 @@ push @arr, $self->fetch_result_by_id($id); } + return @arr; } @@ -221,7 +332,7 @@ my $wrap = $self->{wrap_margin}; if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) { $body =~ s/[\r\n]/\n/gs; - $body = autoformat($body, {right=>$wrap}); + $body = autoformat($body, {right=>$wrap, all=>1}); $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2); } @@ -240,15 +351,24 @@ print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2); - my $message = $self->fetch_message($id) || print STDERR "can't fetch message '$id'"; + my $message = $self->fetch_message($id) || return; $row->{'id'} = $id; - @{$row->{'from'}} = $self->unroll($message,'from','phrase'); - @{$row->{'to'}} = $self->unroll($message,'to','phrase'); - @{$row->{'cc'}} = $self->unroll($message,'cc','phrase'); + + foreach my $p (qw(from to cc bcc)) { + foreach my $v ($self->unroll($message,$p,'phrase')) { + push @{$row->{$p}},$v; + $self->add_counter($p,$v); + } + } $row->{'subject'} = $self->decode_qp($message->subject); $row->{'body'} = $self->plain_text_body($message); - $row->{'date'} = $message->date; + my $utime = str2time($message->date); + + $row->{'date_utime'} = $utime; + + $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime)); + $self->add_counter_calendar($utime); # XXX store in cache? $self->{cache}->{$id} = $row;