/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 78 - (show annotations)
Tue Aug 8 11:40:09 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 7216 byte(s)
delete just existing triggers
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.07;
5 use DBI;
6 use Data::Dumper;
7 use Encode qw/from_to/;
8 use Time::HiRes qw/time/;
9 use Getopt::Long;
10
11 =head1 NAME
12
13 pgest-index.pl - create full-text index of some columns in your database
14
15 =cut
16
17 my $c = {
18 debug => 0,
19 };
20
21 =head1 SYNOPSIS
22
23 pgest-index.pl --create movies --sql "select id,title,year from movies"
24
25 pgsql-index.pl --drop movies
26
27 Options:
28
29 =over 4
30
31 =item --create name
32
33 Create index C<name> and create triggers on table with same name
34
35 =item --drop name
36
37 Remove triggers from table C<name> and node with same name
38
39 =item --node-url http://localhost:1978/node/name
40
41 Full URI to node. If it's not specified, it's assumed that you are using
42 Hyper Estraier on C<http://localhost:1978/>.
43
44 =item --sql "select col1,col2 from name"
45
46 SQL query which will return names of columns which are included in full-text
47 index. Have in mind that you can't use aliases (as I<something>) in this SQL
48 query (or triggers will be created with wrong fields).
49
50 If SQL query isn't specified, default one C<< select * from movies >> will
51 be created. That will be B<serious performance hit> if all columns are
52 not needed for search.
53
54 =item --pk id
55
56 Specify name of primary key column in SQL query. If you allready have primary
57 key on table or unique index and it consists of simgle column
58 (compaund keys are not supported) it will be picked up automatically.
59
60 If you specify value which is not unique, you will get just last occurence
61 of that item in index (which might be what you want). That's because specified
62 C<pk> column will be used for C<@uri> in Hyper Estraier.
63
64 =item --user login
65
66 =item --passwd password
67
68 Username and password to use when connecting to Hyper Estraier. If not specified,
69 C<admin> and C<admin> will be used.
70
71 =item --debug
72
73 Dump debugging output. It may be specified multiple times for more verbose
74 debugging.
75
76 =back
77
78 =cut
79
80 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81
82 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83
84 my $dbname = shift @ARGV || die $usage;
85
86 $c->{dbi} = 'Pg:dbname=' . $dbname;
87
88 warn "# c: ", Dumper($c) if ($c->{debug});
89
90 my $table = $c->{create} || $c->{drop} || die $usage;
91
92 $c->{node_url} = 'http://localhost:1978/node/' . $table;
93
94 $c->{user} ||= 'admin';
95 $c->{passwd} ||= 'admin';
96
97 # create and configure node
98 my $node = new Search::Estraier::Node(
99 url => $c->{node_url},
100 user => $c->{user},
101 passwd => $c->{passwd},
102 croak_on_error => 1,
103 create => 1,
104 debug => $c->{debug} >= 4 ? 1 : 0,
105 );
106
107 # create DBI connection
108 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109
110 # drop existing triggers
111 sub drop_triggers {
112 my $table = shift || die "no table?";
113
114 my $sth = $dbh->prepare(qq{
115 SELECT relname,tgname
116 FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
117 WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
118 }) || $dbh->errstr;
119
120 $sth->execute( $table ) || $sth->errstr();
121
122 warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
123
124 while (my $row = $sth->fetchrow_hashref) {
125 my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
126 #warn "# $sql\n";
127 $dbh->do( $sql ) || $dbh->errstr;
128 }
129
130 warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
131
132 }
133
134 if ($c->{drop}) {
135 drop_triggers( $table );
136 warn "removing node $table\n";
137 $node->master(
138 action => 'nodedel',
139 name => $table,
140 );
141 exit;
142 }
143
144 # clear existing node
145 $node->master(
146 action => 'nodeclr',
147 name => $table,
148 );
149
150 # create PostgreSQL functions
151 $dbh->do(qq{
152
153 CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
154 RETURNS setof record
155 AS 'pgest','pgest_node'
156 LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
157
158 CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
159 AS 'pgest', 'pgest_trigger'
160 LANGUAGE 'C' STRICT;
161
162 }) || die $dbh->errstr();
163
164
165 drop_triggers( $table );
166
167 if (! $c->{pk}) {
168
169 warn "# finding primary key for $table\n" if ($c->{debug});
170
171 my $index_fmt = qq{
172 SELECT
173 a.attname, t.typname
174 FROM pg_type t, pg_attribute a
175 WHERE t.oid = a.atttypid AND attrelid = (
176 SELECT indexrelid
177 FROM pg_class c, pg_index i
178 WHERE c.relname = '%s'
179 AND c.oid = i.indrelid
180 AND %s
181 AND indnatts = 1
182 )
183 };
184
185 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
186
187 $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
188
189 }
190
191 die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
192
193 warn "using primary key $c->{pk}\n";
194
195 $dbh->begin_work;
196
197 $c->{sql} ||= "select * from $table";
198
199 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
200 $sth->execute() || die $sth->errstr;
201
202 my @cols = @{ $sth->{NAME} };
203
204 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
205
206 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
207
208 my $total = $sth->rows;
209 my $i = 1;
210
211 my $t = time();
212 my $pk = $c->{pk} || 'id';
213
214 warn "indexing existing ",$sth->rows," rows\n";
215
216 while (my $row = $sth->fetchrow_hashref() ) {
217
218 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
219
220 # create document
221 my $doc = new Search::Estraier::Document;
222
223 if (my $id = $row->{$pk}) {
224 $doc->add_attr('@uri', $id);
225 } else {
226 die "can't find pk column '$pk' in results\n";
227 }
228
229 my $log = sprintf "%4d ",$i;
230
231 while (my ($col,$val) = each %{$row}) {
232
233 if ($val) {
234 # add attributes (make column usable from attribute search)
235 $doc->add_attr($col, $val);
236
237 # add body text to document (make it searchable using full-text index)
238 $doc->add_text($val);
239
240 $log .= "R";
241 } else {
242 $log .= ".";
243 }
244
245 }
246
247 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
248
249 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
250
251 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
252
253 print STDERR $log;
254
255 }
256
257 my $cols = "'" . join("', '", @cols) . "'";
258
259 foreach my $t (qw/UPDATE INSERT DELETE/) {
260
261 my $lc_t = lc($t);
262
263 my $sql = qq{
264
265 CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
266 ON ${table} FOR EACH ROW
267 EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
268 '$c->{pk}', $cols
269 )
270
271 };
272
273 #warn "$sql\n";
274
275 $dbh->do( $sql ) || die $dbh->errstr;
276
277 }
278
279 warn "created consistency triggers\n";
280
281 $dbh->commit;
282
283 =head1 SEARCHING
284
285 At end of each run, this script will output example search SQL query on STDOUT.
286
287 You can use it to quickly construct queries for your application.
288
289 =cut
290
291 my $col_names = join(', ', @cols);
292 my $col_def = join(', ', map { "$_ text" } @cols);
293
294 print "
295 -- example SQL search query:
296
297 SELECT $col_names
298 FROM pgest(
299 -- node, login, passwd, depth
300 '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
301 -- full text search
302 'foo bar',
303 -- attribute filter, order, limit, offset
304 null, null, null, null,
305 -- return columns
306 array[$cols]
307 ) as ($col_def);
308
309 ";
310
311 __END__
312
313 =head1 AUTHOR
314
315 Dobrica Pavlinusic <dpavlin@rot13.org>
316
317 L<http://www.rot13.org/~dpavlin/>
318
319 =head1 LICENSE
320
321 This product is licensed under GNU Public License (GPL) v2 or later.
322
323 =cut
324

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26