/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (show annotations)
Sun Mar 16 20:55:56 2008 UTC (16 years ago) by dpavlin
File MIME type: text/plain
File size: 7473 byte(s)
better error message when run without arguments
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.07;
5 use DBI;
6 use Data::Dumper;
7 use Encode qw/from_to/;
8 use Time::HiRes qw/time/;
9 use Getopt::Long;
10
11 =head1 NAME
12
13 pgest-index.pl - create full-text index of some columns in your database
14
15 =cut
16
17 my $c = {
18 debug => 0,
19 };
20
21 =head1 SYNOPSIS
22
23 pgest-index.pl --create movies --sql "select id,title,year from movies"
24
25 pgsql-index.pl --drop movies
26
27 Options:
28
29 =over 4
30
31 =item --create name
32
33 Create index C<name> and create triggers on table with same name
34
35 =item --drop name
36
37 Remove triggers from table C<name> and node with same name
38
39 =item --node-url http://localhost:1978/node/name
40
41 Full URI to node. If it's not specified, it's assumed that you are using
42 Hyper Estraier on C<http://localhost:1978/>.
43
44 =item --sql "select col1,col2 from name"
45
46 SQL query which will return names of columns which are included in full-text
47 index. Have in mind that you can't use aliases (as I<something>) in this SQL
48 query (or triggers will be created with wrong fields).
49
50 If SQL query isn't specified, default one C<< select * from movies >> will
51 be created. That will be B<serious performance hit> if all columns are
52 not needed for search.
53
54 =item --pk id
55
56 Specify name of primary key column in SQL query. If you allready have primary
57 key on table or unique index and it consists of simgle column
58 (compaund keys are not supported) it will be picked up automatically.
59
60 If you specify value which is not unique, you will get just last occurence
61 of that item in index (which might be what you want). That's because specified
62 C<pk> column will be used for C<@uri> in Hyper Estraier.
63
64 If name of primary key begins with C<_> it will not be added into text
65 indexing (so you won't be able to find prmary key value, but it will still
66 be available as attribute value).
67
68 =item --user login
69
70 =item --passwd password
71
72 Username and password to use when connecting to Hyper Estraier. If not specified,
73 C<admin> and C<admin> will be used.
74
75 =item --debug
76
77 Dump debugging output. It may be specified multiple times for more verbose
78 debugging.
79
80 =back
81
82 =cut
83
84 my $usage = "$0 database_name (--create|--drop) table_name [--sql='select id,foo,bar from table'] [--pk=id]\n";
85
86 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
87
88 my $dbname = shift @ARGV || die $usage;
89
90 $c->{dbi} = 'Pg:dbname=' . $dbname;
91
92 warn "# c: ", Dumper($c) if ($c->{debug});
93
94 my $table = $c->{create} || $c->{drop} || die $usage;
95
96 $c->{node_url} = 'http://localhost:1978/node/' . $table;
97
98 $c->{user} ||= 'admin';
99 $c->{passwd} ||= 'admin';
100
101 # create and configure node
102 my $node = new Search::Estraier::Node(
103 url => $c->{node_url},
104 user => $c->{user},
105 passwd => $c->{passwd},
106 croak_on_error => 1,
107 create => 1,
108 debug => $c->{debug} >= 4 ? 1 : 0,
109 );
110
111 # create DBI connection
112 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
113
114 # drop existing triggers
115 sub drop_triggers {
116 my $table = shift || die "no table?";
117
118 my $sth = $dbh->prepare(qq{
119 SELECT relname,tgname
120 FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
121 WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
122 }) || $dbh->errstr;
123
124 $sth->execute( $table ) || $sth->errstr();
125
126 warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
127
128 while (my $row = $sth->fetchrow_hashref) {
129 my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
130 #warn "# $sql\n";
131 $dbh->do( $sql ) || $dbh->errstr;
132 }
133
134 warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
135
136 }
137
138 if ($c->{drop}) {
139 drop_triggers( $table );
140 warn "removing node $table\n";
141 $node->master(
142 action => 'nodedel',
143 name => $table,
144 );
145 exit;
146 }
147
148 # clear existing node
149 $node->master(
150 action => 'nodeclr',
151 name => $table,
152 );
153
154 # create PostgreSQL functions
155 $dbh->do(qq{
156
157 CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
158 RETURNS setof record
159 AS 'pgest','pgest_node'
160 LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
161
162 CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
163 AS 'pgest', 'pgest_trigger'
164 LANGUAGE 'C' STRICT;
165
166 }) || die $dbh->errstr();
167
168
169 drop_triggers( $table );
170
171 if (! $c->{pk}) {
172
173 warn "# finding primary key for $table\n" if ($c->{debug});
174
175 my $index_fmt = qq{
176 SELECT
177 a.attname, t.typname
178 FROM pg_type t, pg_attribute a
179 WHERE t.oid = a.atttypid AND attrelid = (
180 SELECT indexrelid
181 FROM pg_class c, pg_index i
182 WHERE c.relname = '%s'
183 AND c.oid = i.indrelid
184 AND %s
185 AND indnatts = 1
186 )
187 };
188
189 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
190
191 $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
192
193 }
194
195 die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
196
197 warn "using primary key $c->{pk}", $c->{pk} =~ m/^_/ ? " (not indexed)" : "", "\n";
198
199 $dbh->begin_work;
200
201 $c->{sql} ||= "select * from $table";
202
203 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
204 $sth->execute() || die $sth->errstr;
205
206 my @cols = @{ $sth->{NAME} };
207
208 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
209
210 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
211
212 my $total = $sth->rows;
213 my $i = 1;
214
215 my $t = time();
216 my $pk = $c->{pk} || 'id';
217
218 warn "indexing existing ",$sth->rows," rows\n";
219
220 while (my $row = $sth->fetchrow_hashref() ) {
221
222 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
223
224 # create document
225 my $doc = new Search::Estraier::Document;
226
227 if (my $id = $row->{$pk}) {
228 $doc->add_attr('@uri', $id);
229 } else {
230 die "can't find pk column '$pk' in results\n";
231 }
232
233 my $log = sprintf "%4d ",$i;
234
235 while (my ($col,$val) = each %{$row}) {
236
237 if ($val) {
238 # add attributes (make column usable from attribute search)
239 $doc->add_attr($col, $val);
240
241 # add body text to document (make it searchable using full-text index)
242 $doc->add_text($val) unless ($col =~ m/^_/);
243
244 $log .= "R";
245 } else {
246 $log .= ".";
247 }
248
249 }
250
251 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
252
253 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
254
255 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
256
257 print STDERR $log;
258
259 }
260
261 my $cols = "'" . join("', '", @cols) . "'";
262
263 foreach my $t (qw/UPDATE INSERT DELETE/) {
264
265 my $lc_t = lc($t);
266
267 my $sql = qq{
268
269 CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
270 ON ${table} FOR EACH ROW
271 EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
272 '$c->{pk}', $cols
273 )
274
275 };
276
277 #warn "$sql\n";
278
279 $dbh->do( $sql ) || die $dbh->errstr;
280
281 }
282
283 warn "created consistency triggers\n";
284
285 $dbh->commit;
286
287 =head1 SEARCHING
288
289 At end of each run, this script will output example search SQL query on STDOUT.
290
291 You can use it to quickly construct queries for your application.
292
293 =cut
294
295 my $col_names = join(', ', @cols);
296 my $col_def = join(', ', map { "$_ text" } @cols);
297
298 print "
299 -- example SQL search query:
300
301 SELECT $col_names
302 FROM pgest(
303 -- node, login, passwd, depth
304 '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
305 -- full text search
306 'foo bar',
307 -- attribute filter, order, limit, offset
308 null, null, null, null,
309 -- return columns
310 array[$cols]
311 ) as ($col_def);
312
313 ";
314
315 __END__
316
317 =head1 AUTHOR
318
319 Dobrica Pavlinusic <dpavlin@rot13.org>
320
321 L<http://www.rot13.org/~dpavlin/>
322
323 =head1 LICENSE
324
325 This product is licensed under GNU Public License (GPL) v2 or later.
326
327 =cut
328

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26