--- cvs-head/lib/WAIT/InvertedIndex.pm 2000/11/11 16:58:53 22 +++ cvs-head/lib/WAIT/InvertedIndex.pm 2000/11/12 01:23:47 30 @@ -16,7 +16,9 @@ use Fcntl; use WAIT::Filter; use Carp; -use vars qw(%FUNC); +use vars qw(%FUNC $VERSION); + +$VERSION = "1.801"; # others test if we are loaded by checking $VERSION # The dictionary has three different key types: # 'o'.$word @@ -247,6 +249,12 @@ # inverse document frequence gives the score for a term. This sort # order can be exploited for tuning of single term queries. + for my $did (keys %$post) { # sanity check + unless ($self->{db}->{"m". $did}) { + warn "Warning from WAIT: DIVZERO threat from did[$did] post[$post->{$did}]"; + $self->{db}->{"m". $did} = 1; # fails if we have not opened for writing + } + } for my $did (sort { $post->{$b} / $self->{db}->{'m'. $b} <=> $post->{$a} / $self->{db}->{'m'. $a} @@ -274,6 +282,12 @@ grep $occ{$_}++, &{$self->{func}}(@_); + # Be prepared for "Odd number of elements in hash assignment" + local $SIG{__WARN__} = sub { + my $warning = shift; + chomp $warning; + warn "Catching warning[$warning] during delete of key[$key]"; + }; for (keys %occ) {# may reorder posting list my %post = unpack 'w*', $db->{'p'.$_}; delete $post{$key}; @@ -467,7 +481,14 @@ } for (my $i=1; $i<@res; $i+=2) { - $res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf; + # $res[$i] /= $self->{db}->{'m'. $res[$i-1]} / $idf; + # above was written badly, allows two DIV_ZERO problems. + my $maxtf = $self->{db}->{"m". $res[$i-1]}; + unless ($maxtf) { + warn "WAIT-Warning: Averting DIVZERO for i[$i] \$res[\$i-1][$res[$i-1]] term[$term]"; + $maxtf = 1; + } + $res[$i] = ($res[$i] / $maxtf) * $idf; } return @res @@ -671,6 +692,7 @@ if ($self->{mode} & O_RDWR) { print STDERR "Flushing $self->{cached} postings\n" if $self->{cached}; while (my($key, $value) = each %{$self->{cache}}) { + $self->{db}->{"p". $key} ||= ""; if ($self->{reorg}) { $self->{db}->{'p'.$key} = $self->sort_postings($self->{db}->{'p'.$key} . $value);