/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 10 by dpavlin, Fri Sep 10 12:16:21 2004 UTC revision 26 by dpavlin, Wed Sep 15 16:48:24 2004 UTC
# Line 7  use locale; Line 7  use locale;
7  # maximum entries  # maximum entries
8  my $max = 0;  my $max = 0;
9  # minimum letters to search by  # minimum letters to search by
10  my $min_len = 3;  my $min_len = shift @ARGV || 3;
11  # if more than x elements, warn to increase min_len  # if more than x elements, warn to increase min_len
12  my $increase_at = 500;  my $increase_at = 500;
13    
# Line 35  my $total = 0; Line 35  my $total = 0;
35    
36  my $max_elements = 0;  my $max_elements = 0;
37    
38    sub escape_js {
39            my $t = shift || return 'undef';
40            # escape single quote and backspace
41            $t =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED '$t'\n";
42            # quote string if not number
43            $t = "'$t'" unless ($t =~ m/^\d+$/);
44            return $t;
45    }
46    
47  while(<STDIN>) {  while(<STDIN>) {
48          chomp;          chomp;
49    
# Line 43  while(<STDIN>) { Line 52  while(<STDIN>) {
52                  next;                  next;
53          }          }
54    
55          my ($path,$headline) = split(/\t+/,$_,2);          my @data = split(/\t+/,$_);
56    
57            my $headline = shift @data || die "need at least headline!";
58    
59          if (length($headline) < $min_len) {          if (length($headline) < $min_len) {
60                  print STDERR "SKIP '$_': too short\n";                  print STDERR "SKIP '$_': too short\n";
# Line 54  while(<STDIN>) { Line 65  while(<STDIN>) {
65          # split into min_len part and rest          # split into min_len part and rest
66          my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );          my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
67    
         # escape special chars  
         $part =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED part '$part'\n";  
         $rest =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED rest '$rest'\n";  
         $headline =~ s/(['\\])/\\$1/g;  
   
68          # make part lowercase          # make part lowercase
69          $part = lc($part);          $part = lc($part);
70    
# Line 68  while(<STDIN>) { Line 74  while(<STDIN>) {
74          if ($part ne $last_part) {          if ($part ne $last_part) {
75                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
76                  $max_elements = $#part_arr if ($#part_arr > $max_elements);                  $max_elements = $#part_arr if ($#part_arr > $max_elements);
77                  print "${headlines}['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);                  print "${headlines}[",escape_js($last_part),"] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
78                  $total += $#part_arr;                  $total += $#part_arr;
79                  @part_arr = ();                  @part_arr = ();
80                  $last_part = $part;                  $last_part = $part;
81          }          }
82          push @part_arr, "['$path','$headline']";          push @part_arr, "[".escape_js($headline).",".join(",",map { escape_js($_) } @data)."]";
83    
84          # break out?          # break out?
85          last if ($max && $total > $max);          last if ($max && $total > $max);
86  }  }
87    
88  print "${headlines}['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);  print "${headlines}[",escape_js($last_part)."] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
89  print qq{  print qq{
90    
91  ${headlines}.min_len = $min_len;  ${headlines}.min_len = $min_len;

Legend:
Removed from v.10  
changed lines
  Added in v.26

  ViewVC Help
Powered by ViewVC 1.1.26