/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 78 - (hide annotations)
Tue Aug 8 11:40:09 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 7216 byte(s)
delete just existing triggers
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18 dpavlin 74 debug => 0,
19 dpavlin 62 };
20    
21 dpavlin 67 =head1 SYNOPSIS
22 dpavlin 62
23 dpavlin 67 pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25     pgsql-index.pl --drop movies
26    
27     Options:
28    
29     =over 4
30    
31     =item --create name
32    
33     Create index C<name> and create triggers on table with same name
34    
35     =item --drop name
36    
37     Remove triggers from table C<name> and node with same name
38    
39     =item --node-url http://localhost:1978/node/name
40    
41     Full URI to node. If it's not specified, it's assumed that you are using
42     Hyper Estraier on C<http://localhost:1978/>.
43    
44     =item --sql "select col1,col2 from name"
45    
46     SQL query which will return names of columns which are included in full-text
47     index. Have in mind that you can't use aliases (as I<something>) in this SQL
48     query (or triggers will be created with wrong fields).
49    
50 dpavlin 70 If SQL query isn't specified, default one C<< select * from movies >> will
51     be created. That will be B<serious performance hit> if all columns are
52     not needed for search.
53    
54 dpavlin 67 =item --pk id
55    
56 dpavlin 70 Specify name of primary key column in SQL query. If you allready have primary
57 dpavlin 77 key on table or unique index and it consists of simgle column
58     (compaund keys are not supported) it will be picked up automatically.
59 dpavlin 67
60 dpavlin 70 If you specify value which is not unique, you will get just last occurence
61     of that item in index (which might be what you want). That's because specified
62     C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64 dpavlin 67 =item --user login
65    
66     =item --passwd password
67    
68     Username and password to use when connecting to Hyper Estraier. If not specified,
69     C<admin> and C<admin> will be used.
70    
71     =item --debug
72    
73 dpavlin 70 Dump debugging output. It may be specified multiple times for more verbose
74     debugging.
75 dpavlin 67
76     =back
77    
78     =cut
79    
80 dpavlin 68 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81    
82 dpavlin 67 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83    
84 dpavlin 68 my $dbname = shift @ARGV || die $usage;
85    
86     $c->{dbi} = 'Pg:dbname=' . $dbname;
87    
88 dpavlin 62 warn "# c: ", Dumper($c) if ($c->{debug});
89    
90 dpavlin 68 my $table = $c->{create} || $c->{drop} || die $usage;
91 dpavlin 63
92 dpavlin 67 $c->{node_url} = 'http://localhost:1978/node/' . $table;
93    
94     $c->{user} ||= 'admin';
95     $c->{passwd} ||= 'admin';
96    
97 dpavlin 62 # create and configure node
98     my $node = new Search::Estraier::Node(
99     url => $c->{node_url},
100     user => $c->{user},
101     passwd => $c->{passwd},
102     croak_on_error => 1,
103     create => 1,
104     debug => $c->{debug} >= 4 ? 1 : 0,
105     );
106    
107 dpavlin 67 # create DBI connection
108     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109    
110     # drop existing triggers
111     sub drop_triggers {
112     my $table = shift || die "no table?";
113    
114 dpavlin 78 my $sth = $dbh->prepare(qq{
115     SELECT relname,tgname
116     FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
117     WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
118     }) || $dbh->errstr;
119 dpavlin 67
120 dpavlin 78 $sth->execute( $table ) || $sth->errstr();
121    
122     warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
123    
124     while (my $row = $sth->fetchrow_hashref) {
125     my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
126     #warn "# $sql\n";
127     $dbh->do( $sql ) || $dbh->errstr;
128 dpavlin 67 }
129 dpavlin 78
130     warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
131    
132 dpavlin 67 }
133    
134     if ($c->{drop}) {
135     drop_triggers( $table );
136     warn "removing node $table\n";
137     $node->master(
138     action => 'nodedel',
139     name => $table,
140     );
141     exit;
142     }
143    
144     # clear existing node
145 dpavlin 63 $node->master(
146     action => 'nodeclr',
147     name => $table,
148     );
149    
150     # create PostgreSQL functions
151     $dbh->do(qq{
152    
153     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
154     RETURNS setof record
155     AS 'pgest','pgest_node'
156     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
157    
158     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
159     AS 'pgest', 'pgest_trigger'
160     LANGUAGE 'C' STRICT;
161    
162     }) || die $dbh->errstr();
163    
164    
165 dpavlin 67 drop_triggers( $table );
166 dpavlin 63
167 dpavlin 68 if (! $c->{pk}) {
168    
169     warn "# finding primary key for $table\n" if ($c->{debug});
170    
171 dpavlin 77 my $index_fmt = qq{
172 dpavlin 68 SELECT
173     a.attname, t.typname
174     FROM pg_type t, pg_attribute a
175     WHERE t.oid = a.atttypid AND attrelid = (
176     SELECT indexrelid
177     FROM pg_class c, pg_index i
178 dpavlin 77 WHERE c.relname = '%s'
179 dpavlin 68 AND c.oid = i.indrelid
180 dpavlin 77 AND %s
181 dpavlin 68 AND indnatts = 1
182     )
183 dpavlin 77 };
184 dpavlin 68
185 dpavlin 77 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
186    
187     $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
188    
189 dpavlin 68 }
190    
191     die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
192    
193     warn "using primary key $c->{pk}\n";
194    
195 dpavlin 63 $dbh->begin_work;
196    
197 dpavlin 68 $c->{sql} ||= "select * from $table";
198    
199 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
200 dpavlin 78 $sth->execute() || die $sth->errstr;
201 dpavlin 62
202     my @cols = @{ $sth->{NAME} };
203    
204 dpavlin 68 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
205    
206 dpavlin 62 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
207    
208     my $total = $sth->rows;
209     my $i = 1;
210    
211     my $t = time();
212 dpavlin 67 my $pk = $c->{pk} || 'id';
213 dpavlin 62
214 dpavlin 69 warn "indexing existing ",$sth->rows," rows\n";
215    
216 dpavlin 62 while (my $row = $sth->fetchrow_hashref() ) {
217    
218     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
219    
220     # create document
221     my $doc = new Search::Estraier::Document;
222    
223 dpavlin 67 if (my $id = $row->{$pk}) {
224 dpavlin 62 $doc->add_attr('@uri', $id);
225     } else {
226 dpavlin 67 die "can't find pk column '$pk' in results\n";
227 dpavlin 62 }
228    
229 dpavlin 69 my $log = sprintf "%4d ",$i;
230 dpavlin 62
231     while (my ($col,$val) = each %{$row}) {
232    
233     if ($val) {
234     # add attributes (make column usable from attribute search)
235     $doc->add_attr($col, $val);
236    
237     # add body text to document (make it searchable using full-text index)
238     $doc->add_text($val);
239    
240 dpavlin 69 $log .= "R";
241 dpavlin 62 } else {
242 dpavlin 69 $log .= ".";
243 dpavlin 62 }
244    
245     }
246    
247     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
248    
249     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
250    
251 dpavlin 69 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
252 dpavlin 62
253 dpavlin 69 print STDERR $log;
254    
255 dpavlin 62 }
256    
257 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
258 dpavlin 62
259     foreach my $t (qw/UPDATE INSERT DELETE/) {
260    
261     my $lc_t = lc($t);
262    
263     my $sql = qq{
264    
265     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
266     ON ${table} FOR EACH ROW
267     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
268 dpavlin 67 '$c->{pk}', $cols
269 dpavlin 62 )
270    
271     };
272    
273 dpavlin 63 #warn "$sql\n";
274 dpavlin 62
275 dpavlin 78 $dbh->do( $sql ) || die $dbh->errstr;
276 dpavlin 62
277     }
278 dpavlin 63
279 dpavlin 78 warn "created consistency triggers\n";
280    
281 dpavlin 63 $dbh->commit;
282 dpavlin 65
283 dpavlin 70 =head1 SEARCHING
284    
285     At end of each run, this script will output example search SQL query on STDOUT.
286    
287     You can use it to quickly construct queries for your application.
288    
289     =cut
290    
291 dpavlin 65 my $col_names = join(', ', @cols);
292     my $col_def = join(', ', map { "$_ text" } @cols);
293    
294 dpavlin 66 print "
295 dpavlin 69 -- example SQL search query:
296 dpavlin 65
297     SELECT $col_names
298     FROM pgest(
299     -- node, login, passwd, depth
300     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
301     -- full text search
302     'foo bar',
303 dpavlin 66 -- attribute filter, order, limit, offset
304 dpavlin 65 null, null, null, null,
305     -- return columns
306     array[$cols]
307     ) as ($col_def);
308    
309     ";
310 dpavlin 70
311     __END__
312    
313     =head1 AUTHOR
314    
315     Dobrica Pavlinusic <dpavlin@rot13.org>
316    
317     L<http://www.rot13.org/~dpavlin/>
318    
319     =head1 LICENSE
320    
321     This product is licensed under GNU Public License (GPL) v2 or later.
322    
323     =cut
324    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26