/[Grep]/bin/reindex.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /bin/reindex.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 156 - (hide annotations)
Sun Jun 10 19:38:13 2007 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1430 byte(s)
better output 
1 dpavlin 47 #!/usr/bin/perl
2    
3     # helper script to re-index full text index
4    
5     use strict;
6    
7     use lib 'lib';
8    
9     use Jifty;
10     use Grep::Search;
11     use Data::Dump qw/dump/;
12 dpavlin 127 use Text::DeDuper;
13     use Encode;
14 dpavlin 144 use Getopt::Long;
15 dpavlin 47
16 dpavlin 144 my $keep_duplicates = 0;
17 dpavlin 127
18 dpavlin 144 GetOptions(
19     'duplicates!' => $keep_duplicates,
20     );
21    
22 dpavlin 127 $|=1;
23    
24 dpavlin 47 BEGIN { Jifty->new; };
25    
26 dpavlin 128 my $system_user = Grep::CurrentUser->superuser;
27     my $coll = Grep::Model::ItemCollection->new( results_are_readable => 1, current_user => $system_user );
28 dpavlin 47 $coll->unlimit;
29    
30 dpavlin 156 Jifty->log->info( "indexing ", $coll->count, " items ", $keep_duplicates ? "" : "removing duplicates " );
31 dpavlin 47
32 dpavlin 144 my $search = Grep::Search->new({ create => 1 });
33 dpavlin 127 my $deduper = Text::DeDuper->new();
34 dpavlin 110
35 dpavlin 127 my ( $total, $duplicates ) = ( 0, 0 );
36    
37 dpavlin 47 while ( my $i = $coll->next ) {
38    
39 dpavlin 129 print $i->id;
40 dpavlin 47
41 dpavlin 144 if ( ! $keep_duplicates ) {
42 dpavlin 129
43     my $c = encode('utf-8', $i->content);
44    
45     my @s = sort $deduper->find_similar( $c );
46     if ( @s ) {
47     #warn " similar = ",dump( @s );
48    
49     foreach my $id ( @s ) {
50     next if $id == $i->id; # keep current
51     my $si = Grep::Model::Item->new();
52     $si->load( $id ) or die "can't find similar item $id";
53     print " -$id-";
54     $si->delete;
55     $duplicates++;
56 dpavlin 145 $search->invindexer->delete_by_term( 'id', $id );
57 dpavlin 129 }
58     }
59    
60 dpavlin 127 $deduper->add_doc( $i->id, $c );
61     }
62 dpavlin 129
63     $search->add( $i, $i->in_feed->owner->id );
64     print ' ';
65 dpavlin 127 $total++;
66 dpavlin 47 }
67    
68 dpavlin 156 Jifty->log->info( "$total records indexed", $duplicates ? " ($duplicates duplicates)" : "" );
69 dpavlin 47
70 dpavlin 110 $search->finish;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26