11 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
12 |
use Text::DeDuper; |
use Text::DeDuper; |
13 |
use Encode; |
use Encode; |
14 |
|
use Getopt::Long; |
15 |
|
|
16 |
my $remove_duplicates = 1; |
my $keep_duplicates = 0; |
17 |
|
|
18 |
|
GetOptions( |
19 |
|
'duplicates!' => $keep_duplicates, |
20 |
|
); |
21 |
|
|
22 |
$|=1; |
$|=1; |
23 |
|
|
27 |
my $coll = Grep::Model::ItemCollection->new( results_are_readable => 1, current_user => $system_user ); |
my $coll = Grep::Model::ItemCollection->new( results_are_readable => 1, current_user => $system_user ); |
28 |
$coll->unlimit; |
$coll->unlimit; |
29 |
|
|
30 |
print "indexing ", $coll->count, " items "; |
Jifty->log->info( "indexing ", $coll->count, " items ", $keep_duplicates ? "" : "removing duplicates " ); |
31 |
|
|
32 |
my $search = Grep::Search->new(); |
my $search = Grep::Search->new({ create => 1 }); |
33 |
my $deduper = Text::DeDuper->new(); |
my $deduper = Text::DeDuper->new(); |
34 |
|
|
35 |
my ( $total, $duplicates ) = ( 0, 0 ); |
my ( $total, $duplicates ) = ( 0, 0 ); |
38 |
|
|
39 |
print $i->id; |
print $i->id; |
40 |
|
|
41 |
if ( $remove_duplicates ) { |
if ( ! $keep_duplicates ) { |
42 |
|
|
43 |
my $c = encode('utf-8', $i->content); |
my $c = encode('utf-8', $i->content); |
44 |
|
|
53 |
print " -$id-"; |
print " -$id-"; |
54 |
$si->delete; |
$si->delete; |
55 |
$duplicates++; |
$duplicates++; |
56 |
|
$search->invindexer->delete_by_term( 'id', $id ); |
57 |
} |
} |
58 |
} |
} |
59 |
|
|
65 |
$total++; |
$total++; |
66 |
} |
} |
67 |
|
|
68 |
print "$total records indexed", $remove_duplicates ? " ($duplicates duplicates)" : "", "\n"; |
Jifty->log->info( "$total records indexed", $duplicates ? " ($duplicates duplicates)" : "" ); |
69 |
|
|
70 |
$search->finish; |
$search->finish; |