--- trunk/scripts/est-spider 2006/04/16 23:22:54 118 +++ trunk/scripts/est-spider 2006/04/17 10:31:11 119 @@ -17,6 +17,7 @@ #$verbose = 1; my $debug = 0; my $force = 0; +my $all = 0; my $result = GetOptions( "collection=s" => \$collection, @@ -25,6 +26,7 @@ "debug!" => \$debug, "exclude=s" => \$exclude, "force!" => \$force, + "all!" => \$all, ); my ($node_url,$dir) = @ARGV; @@ -39,6 +41,7 @@ --verbose --force --debug + --all save placeholders for all files _END_OF_USAGE_ if (! -e $dir) { @@ -70,9 +73,12 @@ select(STDERR); $|=1; select(STDOUT); $|=1; -my $db = new Search::Estraier::Node; -$db->set_url($node_url); -$db->set_auth('admin', 'admin'); +my $db = new Search::Estraier::Node( + url => $node_url, + user => 'admin', + passwd => 'admin', + croak_on_error => 1, +); find({ wanted => \&file, follow => 1, @@ -130,7 +136,7 @@ # boost title $doc->add_hidden_text($title); -# print $doc->dump_draft if ($verbose); + print $doc->dump_draft if ($verbose); # register the document object to the database $db->put_doc($doc); @@ -188,10 +194,11 @@ my $path = $_; my $contents; - return if (! $force && -l $path || $path =~ m#/.svn# || $path =~ m/(~|.bak)$/); + return if (! $force && -l $path || $path =~ m#/.svn# || $path =~ m/(~|.bak|.gif)$/); my $mtime = (stat($path))[9] || -1; - my $mtime_db = $db->get_doc_attr_by_uri("file:///$path", '@mtime') || -2; + my $mtime_db = eval { $db->get_doc_attr_by_uri("file:///$path", '@mtime') }; + $mtime_db ||= -2; if ($mtime == $mtime_db) { print STDERR "# same: $path $mtime\n" if ($verbose); @@ -228,15 +235,16 @@ } else { # return if (! -f $path || ! m/\.(html*|php|pl|txt|info|log|text)$/i); - if (-f $path && $type =~ m/html/ || - ($path !~ m/\.(php|pl|txt|info|log|text)$/io) + if (-f $path && + $type !~ m/html/ && + $path !~ m/\.(php|pl|txt|info|log|text)$/io ) { - dump_contents($db, '', $mtime, $path, -s $path); + dump_contents($db, '', $mtime, $path, -s $path) if ($all); return; } # skip index files - return if (m/index_[a-z]\.html*/i || m/index_symbol\.html*/i); + return if ($path =~ m/index_(?:[a-z]+|symbol)\.html*/i); open(F,"$path") || die "can't open file: $path"; print STDERR "$path ($type)" if ($verbose);