/[Search-Estraier]/trunk/scripts/est-spider
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/scripts/est-spider

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 117 by dpavlin, Thu Jan 26 01:53:29 2006 UTC revision 118 by dpavlin, Sun Apr 16 23:22:54 2006 UTC
# Line 7  use Search::Estraier; Line 7  use Search::Estraier;
7  use Text::Iconv;  use Text::Iconv;
8  #use File::MMagic;  #use File::MMagic;
9  use File::MMagic::XS qw/:compat/;  use File::MMagic::XS qw/:compat/;
10    use Time::HiRes qw/time/;
11    
12  my $collection;         # name which will be inserted  my $collection;         # name which will be inserted
13  my $path_add;           # add additional info in path  my $path_add;           # add additional info in path
# Line 49  if (! -e $dir) { Line 50  if (! -e $dir) {
50  #$basedir =~ s,/[^/]+$,/,;  #$basedir =~ s,/[^/]+$,/,;
51  #require "$basedir/filter.pm";  #require "$basedir/filter.pm";
52    
53    my $docs = 0;
54    my $start_t = time();
55    
56  my $filter;  my $filter;
57  foreach my $f (qw/pdftotext pstotext/) {  foreach my $f (qw/pdftotext pstotext/) {
58          my $w = which($f);          my $w = which($f);
# Line 76  find({ wanted => \&file, Line 80  find({ wanted => \&file,
80          no_chdir => 1,          no_chdir => 1,
81  }, $dir);  }, $dir);
82    
83    my $dur = (time() - $start_t) || 1;
84    printf STDERR "%d documents in %.2fs [%.2f docs/s]\n", $docs, $dur, ($docs / $dur);
85    
86  exit;  exit;
87    
# Line 129  sub dump_contents { Line 135  sub dump_contents {
135          # register the document object to the database          # register the document object to the database
136          $db->put_doc($doc);          $db->put_doc($doc);
137    
138            $docs++;
139    
140  }  }
141    
142  sub filter_to_pages {  sub filter_to_pages {

Legend:
Removed from v.117  
changed lines
  Added in v.118

  ViewVC Help
Powered by ViewVC 1.1.26