/[Grep]/bin/reindex.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /bin/reindex.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 144 - (show annotations)
Tue May 8 14:11:38 2007 UTC (16 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 1353 byte(s)
added --duplicates switch to reindex.pl to keep duplicate items, create new
index every time to prevent duplicate results for same item
1 #!/usr/bin/perl
2
3 # helper script to re-index full text index
4
5 use strict;
6
7 use lib 'lib';
8
9 use Jifty;
10 use Grep::Search;
11 use Data::Dump qw/dump/;
12 use Text::DeDuper;
13 use Encode;
14 use Getopt::Long;
15
16 my $keep_duplicates = 0;
17
18 GetOptions(
19 'duplicates!' => $keep_duplicates,
20 );
21
22 $|=1;
23
24 BEGIN { Jifty->new; };
25
26 my $system_user = Grep::CurrentUser->superuser;
27 my $coll = Grep::Model::ItemCollection->new( results_are_readable => 1, current_user => $system_user );
28 $coll->unlimit;
29
30 print "indexing ", $coll->count, " items ", $keep_duplicates ? "" : "removing duplicates ";
31
32 my $search = Grep::Search->new({ create => 1 });
33 my $deduper = Text::DeDuper->new();
34
35 my ( $total, $duplicates ) = ( 0, 0 );
36
37 while ( my $i = $coll->next ) {
38
39 print $i->id;
40
41 if ( ! $keep_duplicates ) {
42
43 my $c = encode('utf-8', $i->content);
44
45 my @s = sort $deduper->find_similar( $c );
46 if ( @s ) {
47 #warn " similar = ",dump( @s );
48
49 foreach my $id ( @s ) {
50 next if $id == $i->id; # keep current
51 my $si = Grep::Model::Item->new();
52 $si->load( $id ) or die "can't find similar item $id";
53 print " -$id-";
54 $si->delete;
55 $duplicates++;
56 }
57 }
58
59 $deduper->add_doc( $i->id, $c );
60 }
61
62 $search->add( $i, $i->in_feed->owner->id );
63 print ' ';
64 $total++;
65 }
66
67 print "$total records indexed", $duplicates ? "($duplicates duplicates)" : "", "\n";
68
69 $search->finish;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26