/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by dpavlin, Wed Sep 8 17:32:20 2004 UTC revision 35 by dpavlin, Sat Oct 9 21:00:06 2004 UTC
# Line 7  use locale; Line 7  use locale;
7  # maximum entries  # maximum entries
8  my $max = 0;  my $max = 0;
9  # minimum letters to search by  # minimum letters to search by
10  my $min_len = 3;  my $min_len = shift @ARGV;
11    $min_len = 3 unless defined($min_len);
12  # if more than x elements, warn to increase min_len  # if more than x elements, warn to increase min_len
13  my $increase_at = 500;  my $increase_at = 500;
14    
15    # name of generated index
16    my $headlines = 'headlines';
17    
18  my $debug = 1;  my $debug = 1;
19    
20  sub print_file {  sub print_file {
# Line 23  sub print_file { Line 27  sub print_file {
27  }  }
28    
29  print qq{  print qq{
30  var headlines = Array();  var $headlines = new Object();
31  };  };
32    
33  my @part_arr;  my @part_arr;
# Line 32  my $total = 0; Line 36  my $total = 0;
36    
37  my $max_elements = 0;  my $max_elements = 0;
38    
39    sub escape_js {
40            my $t = shift || return 'undef';
41            # escape single quote and backspace
42            $t =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED '$t'\n";
43            # quote string if not number
44            $t = "'$t'" unless ($t =~ m/^\d+$/);
45            return $t;
46    }
47    
48    my @lines;
49    
50  while(<STDIN>) {  while(<STDIN>) {
51          chomp;          chomp;
52    
# Line 40  while(<STDIN>) { Line 55  while(<STDIN>) {
55                  next;                  next;
56          }          }
57    
58          my ($path,$headline) = split(/\t+/,$_,2);          # remove leading spaces (which are ignored if source list was
59            # sorted using locale)
60            s/^\s+//;
61    
62            push @lines, $_;
63    }
64    
65    # spaces will be ignored when sorting using locale. That's why we have
66    # cache of lines with spaces replaced by exclamation mark (!) so that
67    # sort order is strict and not dictionary. For more info, see:
68    # http://archives.postgresql.org/pgsql-sql/2002-04/msg00266.php
69    # http://groups.google.com/groups?selm=handler.82819.D82819.99045085113033.ackdone%40bugs.debian.org&output=gplain
70    
71    my %locale_space_fix;
72    
73    foreach (sort {
74                    unless($locale_space_fix{$a}) {
75                            my $tmp = $a;
76                            $tmp =~ s/ /!/g;
77                            $locale_space_fix{$a} = lc($tmp);
78                    }
79                    unless($locale_space_fix{$b}) {
80                            my $tmp = $b;
81                            $tmp =~ s/ /!/g;
82                            $locale_space_fix{$b} = lc($tmp);
83                    }
84                    $locale_space_fix{$a} cmp $locale_space_fix{$b};
85            } @lines) {
86    
87            my @data = split(/\t+/,$_);
88    
89            my $headline = shift @data || die "need at least headline!";
90    
91          if (length($headline) < $min_len) {          if (length($headline) < $min_len) {
92                  print STDERR "SKIP '$_': too short\n";                  print STDERR "SKIP '$_': too short\n";
# Line 51  while(<STDIN>) { Line 97  while(<STDIN>) {
97          # split into min_len part and rest          # split into min_len part and rest
98          my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );          my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
99    
         # escape special chars  
         $part =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED part '$part'\n";  
         $rest =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED rest '$rest'\n";  
         $headline =~ s/(['\\])/\\$1/g;  
   
100          # make part lowercase          # make part lowercase
101          $part = lc($part);          $part = lc($part);
102    
# Line 65  while(<STDIN>) { Line 106  while(<STDIN>) {
106          if ($part ne $last_part) {          if ($part ne $last_part) {
107                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);                  print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
108                  $max_elements = $#part_arr if ($#part_arr > $max_elements);                  $max_elements = $#part_arr if ($#part_arr > $max_elements);
109                  print "headlines['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);                  print "${headlines}[",escape_js($last_part),"] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
110                  $total += $#part_arr;                  $total += $#part_arr;
111                  @part_arr = ();                  @part_arr = ();
112                  $last_part = $part;                  $last_part = $part;
113          }          }
114          push @part_arr, "['$path','$headline']";          push @part_arr, "[".escape_js($headline).",".join(",",map { escape_js($_) } @data)."]";
115    
116          # break out?          # break out?
117          last if ($max && $total > $max);          last if ($max && $total > $max);
118  }  }
119    
120  print "headlines['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);  print "${headlines}[",escape_js($last_part)."] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
121  print qq{  print qq{
122    
123  var min_len = $min_len;  ${headlines}.min_len = $min_len;
124  var html_pre = '<div><a href="../';  ${headlines}.length = $total;
 var html_mid = '">';  
 var html_post = '</a></div>';  
   
 // index elements: $total  
125    
126  };  };
127    

Legend:
Removed from v.9  
changed lines
  Added in v.35

  ViewVC Help
Powered by ViewVC 1.1.26