/[wait]/cvs-head/lib/WAIT/InvertedIndex.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/lib/WAIT/InvertedIndex.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 79 by laperla, Sun Jan 27 15:27:38 2002 UTC revision 80 by ulpfr, Sat Apr 20 15:01:38 2002 UTC
# Line 4  Line 4 
4  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
5  # Created On      : Thu Aug  8 13:05:10 1996  # Created On      : Thu Aug  8 13:05:10 1996
6  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
7  # Last Modified On: Mon Dec 31 14:30:05 2001  # Last Modified On: Sat Apr 20 16:56:29 2002
8  # Language        : CPerl  # Language        : CPerl
9  #  #
10  # (C) Copyright 1996-2000, Ulrich Pfeifer  # (C) Copyright 1996-2002, Ulrich Pfeifer
11  #  #
12    
13  package WAIT::InvertedIndex;  package WAIT::InvertedIndex;
# Line 18  use WAIT::Filter; Line 18  use WAIT::Filter;
18  use Carp;  use Carp;
19  use vars qw(%FUNC $VERSION);  use vars qw(%FUNC $VERSION);
20    
21  $VERSION = "1.801"; # others test if we are loaded by checking $VERSION  $VERSION = "1.900"; # others test if we are loaded by checking $VERSION
22    
23  # The dictionary has three different key types:  # The dictionary has three different key types:
24  #  'o'.$word  #  'o'.$word
25  #  #
26  #     The document frequency is the number of documents a term occurs  #     The document frequency is the number of documents a term occurs
27  #     in. The idea is that a term occuring in a significant part of the  #     in. The idea is that a term occuring in a significant portion of the
28  #     documents is not too significant.  #     documents is not too significant.
29  #  #
30  # 'm'.$word  # 'm'.$word
# Line 251  sub sort_postings { Line 251  sub sort_postings {
251    my $r = '';    my $r = '';
252    
253    # Sort posting list by increasing ratio of maximum term frequency (~    # Sort posting list by increasing ratio of maximum term frequency (~
254    # "document length") and term frequency. This rati multipied by the    # "document length") and term frequency. This ratio multipied by the
255    # inverse document frequence gives the score for a term.  This sort    # inverse document frequence gives the score for a term.  This sort
256    # order can be exploited for tuning of single term queries.    # order can be exploited for tuning of single term queries.
257    
# Line 411  sub search { Line 411  sub search {
411    
412    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
413    $self->sync;    $self->sync;
414    $self->search_raw($query, &{$self->{func}}(@_)); # No call to parse() here    $self->search_raw($query, &{$self->{func}}(@_)); # No call to parse() there
415  }  }
416    
417  sub parse {  sub parse {
# Line 613  sub search_raw { Line 613  sub search_raw {
613      my $full;                   # Need to process all postings      my $full;                   # Need to process all postings
614      my $chop;                   # Score necessary to enter the ranking list      my $chop;                   # Score necessary to enter the ranking list
615    
616      if (# We know that wanted is true since we especial cased the      if (# We know that wanted is true since we special cased the
617          # exhaustive search.          # exhaustive search.
618    
619          $wanted and          $wanted and

Legend:
Removed from v.79  
changed lines
  Added in v.80

  ViewVC Help
Powered by ViewVC 1.1.26