4 |
# Author : Ulrich Pfeifer |
# Author : Ulrich Pfeifer |
5 |
# Created On : Thu Aug 8 13:05:10 1996 |
# Created On : Thu Aug 8 13:05:10 1996 |
6 |
# Last Modified By: Ulrich Pfeifer |
# Last Modified By: Ulrich Pfeifer |
7 |
# Last Modified On: Mon Dec 31 14:30:05 2001 |
# Last Modified On: Sat Apr 20 16:56:29 2002 |
8 |
# Language : CPerl |
# Language : CPerl |
9 |
# |
# |
10 |
# (C) Copyright 1996-2000, Ulrich Pfeifer |
# (C) Copyright 1996-2002, Ulrich Pfeifer |
11 |
# |
# |
12 |
|
|
13 |
package WAIT::InvertedIndex; |
package WAIT::InvertedIndex; |
18 |
use Carp; |
use Carp; |
19 |
use vars qw(%FUNC $VERSION); |
use vars qw(%FUNC $VERSION); |
20 |
|
|
21 |
$VERSION = "1.801"; # others test if we are loaded by checking $VERSION |
$VERSION = "1.900"; # others test if we are loaded by checking $VERSION |
22 |
|
|
23 |
# The dictionary has three different key types: |
# The dictionary has three different key types: |
24 |
# 'o'.$word |
# 'o'.$word |
25 |
# |
# |
26 |
# The document frequency is the number of documents a term occurs |
# The document frequency is the number of documents a term occurs |
27 |
# in. The idea is that a term occuring in a significant part of the |
# in. The idea is that a term occuring in a significant portion of the |
28 |
# documents is not too significant. |
# documents is not too significant. |
29 |
# |
# |
30 |
# 'm'.$word |
# 'm'.$word |
251 |
my $r = ''; |
my $r = ''; |
252 |
|
|
253 |
# Sort posting list by increasing ratio of maximum term frequency (~ |
# Sort posting list by increasing ratio of maximum term frequency (~ |
254 |
# "document length") and term frequency. This rati multipied by the |
# "document length") and term frequency. This ratio multipied by the |
255 |
# inverse document frequence gives the score for a term. This sort |
# inverse document frequence gives the score for a term. This sort |
256 |
# order can be exploited for tuning of single term queries. |
# order can be exploited for tuning of single term queries. |
257 |
|
|
411 |
|
|
412 |
defined $self->{db} or $self->open; |
defined $self->{db} or $self->open; |
413 |
$self->sync; |
$self->sync; |
414 |
$self->search_raw($query, &{$self->{func}}(@_)); # No call to parse() here |
$self->search_raw($query, &{$self->{func}}(@_)); # No call to parse() there |
415 |
} |
} |
416 |
|
|
417 |
sub parse { |
sub parse { |
613 |
my $full; # Need to process all postings |
my $full; # Need to process all postings |
614 |
my $chop; # Score necessary to enter the ranking list |
my $chop; # Score necessary to enter the ranking list |
615 |
|
|
616 |
if (# We know that wanted is true since we especial cased the |
if (# We know that wanted is true since we special cased the |
617 |
# exhaustive search. |
# exhaustive search. |
618 |
|
|
619 |
$wanted and |
$wanted and |