/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 66 by dpavlin, Mon Aug 7 14:56:08 2006 UTC revision 74 by dpavlin, Mon Aug 7 22:53:46 2006 UTC
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
18          name => 'imenik',          debug => 0,
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         debug => 1,  
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    If SQL query isn't specified, default one C<< select * from movies >> will
51    be created. That will be B<serious performance hit> if all columns are
52    not needed for search.
53    
54    =item --pk id
55    
56    Specify name of primary key column in SQL query. If you allready have primary
57    key on table, and it consists of simgle column (compaund keys are not supported)
58    it will be picked up automatically.
59    
60    If you specify value which is not unique, you will get just last occurence
61    of that item in index (which might be what you want). That's because specified
62    C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64    =item --user login
65    
66    =item --passwd password
67    
68    Username and password to use when connecting to Hyper Estraier. If not specified,
69    C<admin> and C<admin> will be used.
70    
71    =item --debug
72    
73    Dump debugging output. It may be specified multiple times for more verbose
74    debugging.
75    
76    =back
77    
78    =cut
79    
80    my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81    
82    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83    
84    my $dbname = shift @ARGV || die $usage;
85    
86    $c->{dbi} = 'Pg:dbname=' . $dbname;
87    
88  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
89    
90  my $table = $c->{name} || die "no name?";  my $table = $c->{create} || $c->{drop} || die $usage;
91    
92    $c->{node_url} = 'http://localhost:1978/node/' . $table;
93    
94    $c->{user} ||= 'admin';
95    $c->{passwd} ||= 'admin';
96    
97  # create and configure node  # create and configure node
98  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
# Line 43  my $node = new Search::Estraier::Node( Line 104  my $node = new Search::Estraier::Node(
104          debug => $c->{debug} >= 4 ? 1 : 0,          debug => $c->{debug} >= 4 ? 1 : 0,
105  );  );
106    
107    # create DBI connection
108    my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109    
110    # drop existing triggers
111    sub drop_triggers {
112            my $table = shift || die "no table?";
113    
114            warn "removing triggers from $table\n";
115    
116            foreach my $t (qw/UPDATE INSERT DELETE/) {
117                    my $lc_t = lc($t);
118                    $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
119            }
120    }
121    
122    if ($c->{drop}) {
123            drop_triggers( $table );
124            warn "removing node $table\n";
125            $node->master(
126                    action => 'nodedel',
127                    name => $table,
128            );
129            exit;
130    }
131    
132    # clear existing node
133  $node->master(  $node->master(
134          action => 'nodeclr',          action => 'nodeclr',
135          name => $table,          name => $table,
136  );  );
137    
 # create DBI connection  
 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  
   
138  # create PostgreSQL functions  # create PostgreSQL functions
139  $dbh->do(qq{  $dbh->do(qq{
140    
# Line 66  CREATE OR REPLACE FUNCTION pgest_trigger Line 150  CREATE OR REPLACE FUNCTION pgest_trigger
150  }) || die $dbh->errstr();  }) || die $dbh->errstr();
151    
152    
153  # drop existing triggers  drop_triggers( $table );
154  foreach my $t (qw/UPDATE INSERT DELETE/) {  
155          my $lc_t = lc($t);  if (! $c->{pk}) {
156          $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
157            warn "# finding primary key for $table\n" if ($c->{debug});
158    
159            $c->{pk} = $dbh->selectrow_array(qq{
160                    SELECT
161                            a.attname, t.typname
162                    FROM pg_type t, pg_attribute a
163                    WHERE t.oid = a.atttypid AND attrelid = (
164                            SELECT indexrelid
165                            FROM pg_class c, pg_index i
166                            WHERE c.relname = '$table'
167                                    AND c.oid = i.indrelid
168                                    AND indisprimary
169                                    AND indnatts = 1
170                    )
171            });
172    
173  }  }
174    
175    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
176    
177    warn "using primary key $c->{pk}\n";
178    
179  $dbh->begin_work;  $dbh->begin_work;
180    
181    $c->{sql} ||= "select * from $table";
182    
183  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
184  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr();
185    
186  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
187    
188    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
189    
190  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
191    
192  my $total = $sth->rows;  my $total = $sth->rows;
193  my $i = 1;  my $i = 1;
194    
195  my $t = time();  my $t = time();
196  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
197    
198    warn "indexing existing ",$sth->rows," rows\n";
199    
200  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
201    
# Line 94  while (my $row = $sth->fetchrow_hashref( Line 204  while (my $row = $sth->fetchrow_hashref(
204          # create document          # create document
205          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
206    
207          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
208                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
209          } else {          } else {
210                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
211          }          }
212    
213          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
214    
215          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
216    
# Line 111  while (my $row = $sth->fetchrow_hashref( Line 221  while (my $row = $sth->fetchrow_hashref(
221                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
222                          $doc->add_text($val);                          $doc->add_text($val);
223    
224                          print "R";                          $log .= "R";
225                  } else {                  } else {
226                          print ".";                          $log .= ".";
227                  }                  }
228    
229          }          }
# Line 122  while (my $row = $sth->fetchrow_hashref( Line 232  while (my $row = $sth->fetchrow_hashref(
232    
233          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
234    
235          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
236    
237            print STDERR $log;
238    
239  }  }
240    
# Line 137  foreach my $t (qw/UPDATE INSERT DELETE/) Line 249  foreach my $t (qw/UPDATE INSERT DELETE/)
249                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
250                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
251                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
252                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
253                          )                          )
254    
255          };          };
# Line 150  foreach my $t (qw/UPDATE INSERT DELETE/) Line 262  foreach my $t (qw/UPDATE INSERT DELETE/)
262    
263  $dbh->commit;  $dbh->commit;
264    
265    =head1 SEARCHING
266    
267    At end of each run, this script will output example search SQL query on STDOUT.
268    
269    You can use it to quickly construct queries for your application.
270    
271    =cut
272    
273  my $col_names = join(', ', @cols);  my $col_names = join(', ', @cols);
274  my $col_def = join(', ', map { "$_ text" } @cols);  my $col_def = join(', ', map { "$_ text" } @cols);
275    
276  print "  print "
277  ## example SQL search query:  -- example SQL search query:
278    
279  SELECT $col_names  SELECT $col_names
280  FROM pgest(  FROM pgest(
# Line 169  FROM pgest( Line 289  FROM pgest(
289  ) as ($col_def);  ) as ($col_def);
290    
291  ";  ";
292    
293    __END__
294    
295    =head1 AUTHOR
296    
297    Dobrica Pavlinusic <dpavlin@rot13.org>
298    
299    L<http://www.rot13.org/~dpavlin/>
300    
301    =head1 LICENSE
302    
303    This product is licensed under GNU Public License (GPL) v2 or later.
304    
305    =cut
306    

Legend:
Removed from v.66  
changed lines
  Added in v.74

  ViewVC Help
Powered by ViewVC 1.1.26