--- trunk/bin/pgest-index.pl 2006/08/07 14:56:08 66 +++ trunk/bin/pgest-index.pl 2006/08/07 22:53:46 74 @@ -15,23 +15,84 @@ =cut my $c = { - name => 'imenik', - node_url => 'http://localhost:1978/node/imenik', - dbi => 'Pg:dbname=vip', - sql => qq{ - select ime,tel from imenik - }, - pk_col => 'tel', - debug => 1, - user => 'admin', - passwd => 'admin', + debug => 0, }; -GetOptions($c, qw/node_url=s sql=s pk_col=s debug+ user=s passwd=s/); +=head1 SYNOPSIS + + pgest-index.pl --create movies --sql "select id,title,year from movies" + + pgsql-index.pl --drop movies + +Options: + +=over 4 + +=item --create name + +Create index C and create triggers on table with same name + +=item --drop name + +Remove triggers from table C and node with same name + +=item --node-url http://localhost:1978/node/name + +Full URI to node. If it's not specified, it's assumed that you are using +Hyper Estraier on C. + +=item --sql "select col1,col2 from name" + +SQL query which will return names of columns which are included in full-text +index. Have in mind that you can't use aliases (as I) in this SQL +query (or triggers will be created with wrong fields). + +If SQL query isn't specified, default one C<< select * from movies >> will +be created. That will be B if all columns are +not needed for search. + +=item --pk id + +Specify name of primary key column in SQL query. If you allready have primary +key on table, and it consists of simgle column (compaund keys are not supported) +it will be picked up automatically. + +If you specify value which is not unique, you will get just last occurence +of that item in index (which might be what you want). That's because specified +C column will be used for C<@uri> in Hyper Estraier. + +=item --user login + +=item --passwd password + +Username and password to use when connecting to Hyper Estraier. If not specified, +C and C will be used. + +=item --debug + +Dump debugging output. It may be specified multiple times for more verbose +debugging. + +=back + +=cut + +my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n"; + +GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/); + +my $dbname = shift @ARGV || die $usage; + +$c->{dbi} = 'Pg:dbname=' . $dbname; warn "# c: ", Dumper($c) if ($c->{debug}); -my $table = $c->{name} || die "no name?"; +my $table = $c->{create} || $c->{drop} || die $usage; + +$c->{node_url} = 'http://localhost:1978/node/' . $table; + +$c->{user} ||= 'admin'; +$c->{passwd} ||= 'admin'; # create and configure node my $node = new Search::Estraier::Node( @@ -43,14 +104,37 @@ debug => $c->{debug} >= 4 ? 1 : 0, ); +# create DBI connection +my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr; + +# drop existing triggers +sub drop_triggers { + my $table = shift || die "no table?"; + + warn "removing triggers from $table\n"; + + foreach my $t (qw/UPDATE INSERT DELETE/) { + my $lc_t = lc($t); + $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} }); + } +} + +if ($c->{drop}) { + drop_triggers( $table ); + warn "removing node $table\n"; + $node->master( + action => 'nodedel', + name => $table, + ); + exit; +} + +# clear existing node $node->master( action => 'nodeclr', name => $table, ); -# create DBI connection -my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr; - # create PostgreSQL functions $dbh->do(qq{ @@ -66,26 +150,52 @@ }) || die $dbh->errstr(); -# drop existing triggers -foreach my $t (qw/UPDATE INSERT DELETE/) { - my $lc_t = lc($t); - $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} }); +drop_triggers( $table ); + +if (! $c->{pk}) { + + warn "# finding primary key for $table\n" if ($c->{debug}); + + $c->{pk} = $dbh->selectrow_array(qq{ + SELECT + a.attname, t.typname + FROM pg_type t, pg_attribute a + WHERE t.oid = a.atttypid AND attrelid = ( + SELECT indexrelid + FROM pg_class c, pg_index i + WHERE c.relname = '$table' + AND c.oid = i.indrelid + AND indisprimary + AND indnatts = 1 + ) + }); + } +die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk}); + +warn "using primary key $c->{pk}\n"; + $dbh->begin_work; +$c->{sql} ||= "select * from $table"; + my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr(); $sth->execute() || die $sth->errstr(); my @cols = @{ $sth->{NAME} }; +die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols); + warn "# columns: ",join(",", @cols),"\n" if ($c->{debug}); my $total = $sth->rows; my $i = 1; my $t = time(); -my $pk_col = $c->{pk_col} || 'id'; +my $pk = $c->{pk} || 'id'; + +warn "indexing existing ",$sth->rows," rows\n"; while (my $row = $sth->fetchrow_hashref() ) { @@ -94,13 +204,13 @@ # create document my $doc = new Search::Estraier::Document; - if (my $id = $row->{$pk_col}) { + if (my $id = $row->{$pk}) { $doc->add_attr('@uri', $id); } else { - die "can't find pk_col column '$pk_col' in results\n"; + die "can't find pk column '$pk' in results\n"; } - printf "%4d ",$i; + my $log = sprintf "%4d ",$i; while (my ($col,$val) = each %{$row}) { @@ -111,9 +221,9 @@ # add body text to document (make it searchable using full-text index) $doc->add_text($val); - print "R"; + $log .= "R"; } else { - print "."; + $log .= "."; } } @@ -122,7 +232,9 @@ die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) }); - printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) ); + $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) ); + + print STDERR $log; } @@ -137,7 +249,7 @@ CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t} ON ${table} FOR EACH ROW EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}', - '$c->{pk_col}', $cols + '$c->{pk}', $cols ) }; @@ -150,11 +262,19 @@ $dbh->commit; +=head1 SEARCHING + +At end of each run, this script will output example search SQL query on STDOUT. + +You can use it to quickly construct queries for your application. + +=cut + my $col_names = join(', ', @cols); my $col_def = join(', ', map { "$_ text" } @cols); print " -## example SQL search query: +-- example SQL search query: SELECT $col_names FROM pgest( @@ -169,3 +289,18 @@ ) as ($col_def); "; + +__END__ + +=head1 AUTHOR + +Dobrica Pavlinusic + +L + +=head1 LICENSE + +This product is licensed under GNU Public License (GPL) v2 or later. + +=cut +