/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4 by dpavlin, Tue Sep 7 09:16:06 2004 UTC revision 10 by dpavlin, Fri Sep 10 12:16:21 2004 UTC
# Line 7  use locale; Line 7  use locale;
7  # maximum entries  # maximum entries
8  my $max = 0;  my $max = 0;
9  # minimum letters to search by  # minimum letters to search by
10  my $min_len = 2;  my $min_len = 3;
11  # if more than x elements, warn to increase min_len  # if more than x elements, warn to increase min_len
12  my $increase_at = 500;  my $increase_at = 500;
13    
14    # name of generated index
15    my $headlines = 'headlines';
16    
17  my $debug = 1;  my $debug = 1;
18    
19  sub print_file {  sub print_file {
# Line 23  sub print_file { Line 26  sub print_file {
26  }  }
27    
28  print qq{  print qq{
29  var headlines = Array();  var $headlines = new Object();
30  };  };
31    
32  my @part_arr;  my @part_arr;
# Line 35  my $max_elements = 0; Line 38  my $max_elements = 0;
38  while(<STDIN>) {  while(<STDIN>) {
39          chomp;          chomp;
40    
41          # escape single quote          if (!m/\t/ || m/\t$/) {
42          s/'/\\'/g;                  print STDERR "SKIP '$_': no tab\n";
43                    next;
44            }
45    
46            my ($path,$headline) = split(/\t+/,$_,2);
47    
48            if (length($headline) < $min_len) {
49                    print STDERR "SKIP '$_': too short\n";
50                    next;
51            }
52    
53    
54          # split into min_len part and rest          # split into min_len part and rest
55          my ($part,$rest) = ( substr($_,0,$min_len), substr($_,$min_len) );          my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
56    
57            # escape special chars
58            $part =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED part '$part'\n";
59            $rest =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED rest '$rest'\n";
60            $headline =~ s/(['\\])/\\$1/g;
61    
62          # make part lowercase          # make part lowercase
63          $part = lc($part);          $part = lc($part);
# Line 48  while(<STDIN>) { Line 66  while(<STDIN>) {
66    
67          # new part?          # new part?
68          if ($part ne $last_part) {          if ($part ne $last_part) {
69                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug);                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
70                  $max_elements = $#part_arr if ($#part_arr > $max_elements);                  $max_elements = $#part_arr if ($#part_arr > $max_elements);
71                  print "headlines['$last_part'] = Array(\n ",join(",\n ",@part_arr),");\n" if (@part_arr);                  print "${headlines}['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
72                  $total += $#part_arr;                  $total += $#part_arr;
73                  @part_arr = ();                  @part_arr = ();
74                  $last_part = $part;                  $last_part = $part;
75          }          }
76          push @part_arr, "'$_'";          push @part_arr, "['$path','$headline']";
77    
78          # break out?          # break out?
79          last if ($max && $total > $max);          last if ($max && $total > $max);
80  }  }
81    
82  print "headlines['$last_part'] = Array(\n ",join(",\n ",@part_arr),");\n" if (@part_arr);  print "${headlines}['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
83  print "var min_len = $min_len;\n";  print qq{
84  print "// index elements: $total\n";  
85    ${headlines}.min_len = $min_len;
86    ${headlines}.length = $total;
87    
88    };
89    
90  print STDERR "You have more than $increase_at elements in one array,\nyou whould probably increase min_len to ",$min_len+1," or higher.\n" if ($max_elements > $increase_at);  print STDERR "You have more than $increase_at elements, so you should\nincrease min_len to ",$min_len+1," or higher for performance benefit.\n" if ($max_elements > $increase_at);

Legend:
Removed from v.4  
changed lines
  Added in v.10

  ViewVC Help
Powered by ViewVC 1.1.26