/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by dpavlin, Mon Aug 7 14:30:55 2006 UTC revision 78 by dpavlin, Tue Aug 8 11:40:09 2006 UTC
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
18          name => 'imenik',          debug => 0,
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         db_encoding => 'iso-8859-2',  
         debug => 1,  
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    If SQL query isn't specified, default one C<< select * from movies >> will
51    be created. That will be B<serious performance hit> if all columns are
52    not needed for search.
53    
54    =item --pk id
55    
56    Specify name of primary key column in SQL query. If you allready have primary
57    key on table or unique index and it consists of simgle column
58    (compaund keys are not supported) it will be picked up automatically.
59    
60    If you specify value which is not unique, you will get just last occurence
61    of that item in index (which might be what you want). That's because specified
62    C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64    =item --user login
65    
66    =item --passwd password
67    
68    Username and password to use when connecting to Hyper Estraier. If not specified,
69    C<admin> and C<admin> will be used.
70    
71    =item --debug
72    
73    Dump debugging output. It may be specified multiple times for more verbose
74    debugging.
75    
76    =back
77    
78    =cut
79    
80    my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81    
82    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83    
84    my $dbname = shift @ARGV || die $usage;
85    
86    $c->{dbi} = 'Pg:dbname=' . $dbname;
87    
88  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
89    
90  my $table = $c->{name} || die "no name?";  my $table = $c->{create} || $c->{drop} || die $usage;
91    
92    $c->{node_url} = 'http://localhost:1978/node/' . $table;
93    
94    $c->{user} ||= 'admin';
95    $c->{passwd} ||= 'admin';
96    
97  # create and configure node  # create and configure node
98  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
# Line 44  my $node = new Search::Estraier::Node( Line 104  my $node = new Search::Estraier::Node(
104          debug => $c->{debug} >= 4 ? 1 : 0,          debug => $c->{debug} >= 4 ? 1 : 0,
105  );  );
106    
107    # create DBI connection
108    my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109    
110    # drop existing triggers
111    sub drop_triggers {
112            my $table = shift || die "no table?";
113    
114            my $sth = $dbh->prepare(qq{
115                    SELECT relname,tgname
116                    FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
117                    WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
118            }) || $dbh->errstr;
119    
120            $sth->execute( $table ) || $sth->errstr();
121    
122            warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
123    
124            while (my $row = $sth->fetchrow_hashref) {
125                    my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
126                    #warn "# $sql\n";
127                    $dbh->do( $sql ) || $dbh->errstr;
128            }
129    
130            warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
131    
132    }
133    
134    if ($c->{drop}) {
135            drop_triggers( $table );
136            warn "removing node $table\n";
137            $node->master(
138                    action => 'nodedel',
139                    name => $table,
140            );
141            exit;
142    }
143    
144    # clear existing node
145  $node->master(  $node->master(
146          action => 'nodeclr',          action => 'nodeclr',
147          name => $table,          name => $table,
148  );  );
149    
 # create DBI connection  
 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  
   
150  # create PostgreSQL functions  # create PostgreSQL functions
151  $dbh->do(qq{  $dbh->do(qq{
152    
# Line 67  CREATE OR REPLACE FUNCTION pgest_trigger Line 162  CREATE OR REPLACE FUNCTION pgest_trigger
162  }) || die $dbh->errstr();  }) || die $dbh->errstr();
163    
164    
165  # drop existing triggers  drop_triggers( $table );
166  foreach my $t (qw/UPDATE INSERT DELETE/) {  
167          my $lc_t = lc($t);  if (! $c->{pk}) {
168          $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
169            warn "# finding primary key for $table\n" if ($c->{debug});
170    
171            my $index_fmt = qq{
172                    SELECT
173                            a.attname, t.typname
174                    FROM pg_type t, pg_attribute a
175                    WHERE t.oid = a.atttypid AND attrelid = (
176                            SELECT indexrelid
177                            FROM pg_class c, pg_index i
178                            WHERE c.relname = '%s'
179                                    AND c.oid = i.indrelid
180                                    AND %s
181                                    AND indnatts = 1
182                    )
183            };
184    
185            $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
186            
187            $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
188    
189  }  }
190    
191    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
192    
193    warn "using primary key $c->{pk}\n";
194    
195  $dbh->begin_work;  $dbh->begin_work;
196    
197    $c->{sql} ||= "select * from $table";
198    
199  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
200  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr;
201    
202  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
203    
204    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
205    
206  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
207    
208  my $total = $sth->rows;  my $total = $sth->rows;
209  my $i = 1;  my $i = 1;
210    
211  my $t = time();  my $t = time();
212  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
213    
214    warn "indexing existing ",$sth->rows," rows\n";
215    
216  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
217    
# Line 95  while (my $row = $sth->fetchrow_hashref( Line 220  while (my $row = $sth->fetchrow_hashref(
220          # create document          # create document
221          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
222    
223          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
224                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
225          } else {          } else {
226                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
227          }          }
228    
229          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
230    
231          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
232    
233                  if ($val) {                  if ($val) {
                         # change encoding?  
                         from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');  
   
234                          # add attributes (make column usable from attribute search)                          # add attributes (make column usable from attribute search)
235                          $doc->add_attr($col, $val);                          $doc->add_attr($col, $val);
236    
237                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
238                          $doc->add_text($val);                          $doc->add_text($val);
239    
240                          print "R";                          $log .= "R";
241                  } else {                  } else {
242                          print ".";                          $log .= ".";
243                  }                  }
244    
245          }          }
# Line 126  while (my $row = $sth->fetchrow_hashref( Line 248  while (my $row = $sth->fetchrow_hashref(
248    
249          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
250    
251          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
252    
253            print STDERR $log;
254    
255  }  }
256    
257  my $cols = "'" . join("','", @cols) . "'";  my $cols = "'" . join("', '", @cols) . "'";
258    
259  foreach my $t (qw/UPDATE INSERT DELETE/) {  foreach my $t (qw/UPDATE INSERT DELETE/) {
260    
# Line 141  foreach my $t (qw/UPDATE INSERT DELETE/) Line 265  foreach my $t (qw/UPDATE INSERT DELETE/)
265                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
266                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
267                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
268                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
269                          )                          )
270    
271          };          };
272    
273          #warn "$sql\n";          #warn "$sql\n";
274    
275          $dbh->do( $sql ) || die $dbh->errstr();          $dbh->do( $sql ) || die $dbh->errstr;
276    
277  }  }
278    
279    warn "created consistency triggers\n";
280    
281  $dbh->commit;  $dbh->commit;
282    
283    =head1 SEARCHING
284    
285    At end of each run, this script will output example search SQL query on STDOUT.
286    
287    You can use it to quickly construct queries for your application.
288    
289    =cut
290    
291    my $col_names = join(', ', @cols);
292    my $col_def = join(', ', map { "$_ text" } @cols);
293    
294    print "
295    -- example SQL search query:
296    
297    SELECT $col_names
298    FROM pgest(
299            -- node, login, passwd, depth
300            '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
301            -- full text search
302            'foo bar',
303            -- attribute filter, order, limit, offset
304            null, null, null, null,
305            -- return columns
306            array[$cols]
307    ) as ($col_def);
308    
309    ";
310    
311    __END__
312    
313    =head1 AUTHOR
314    
315    Dobrica Pavlinusic <dpavlin@rot13.org>
316    
317    L<http://www.rot13.org/~dpavlin/>
318    
319    =head1 LICENSE
320    
321    This product is licensed under GNU Public License (GPL) v2 or later.
322    
323    =cut
324    

Legend:
Removed from v.63  
changed lines
  Added in v.78

  ViewVC Help
Powered by ViewVC 1.1.26