/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 70 - (show annotations)
Mon Aug 7 21:08:41 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 6611 byte(s)
finish (hopefully) documentation. perldoc pgsql-index.pl is now really
useful :-)
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.07;
5 use DBI;
6 use Data::Dumper;
7 use Encode qw/from_to/;
8 use Time::HiRes qw/time/;
9 use Getopt::Long;
10
11 =head1 NAME
12
13 pgest-index.pl - create full-text index of some columns in your database
14
15 =cut
16
17 my $c = {
18 debug => 1,
19 };
20
21 =head1 SYNOPSIS
22
23 pgest-index.pl --create movies --sql "select id,title,year from movies"
24
25 pgsql-index.pl --drop movies
26
27 Options:
28
29 =over 4
30
31 =item --create name
32
33 Create index C<name> and create triggers on table with same name
34
35 =item --drop name
36
37 Remove triggers from table C<name> and node with same name
38
39 =item --node-url http://localhost:1978/node/name
40
41 Full URI to node. If it's not specified, it's assumed that you are using
42 Hyper Estraier on C<http://localhost:1978/>.
43
44 =item --sql "select col1,col2 from name"
45
46 SQL query which will return names of columns which are included in full-text
47 index. Have in mind that you can't use aliases (as I<something>) in this SQL
48 query (or triggers will be created with wrong fields).
49
50 If SQL query isn't specified, default one C<< select * from movies >> will
51 be created. That will be B<serious performance hit> if all columns are
52 not needed for search.
53
54 =item --pk id
55
56 Specify name of primary key column in SQL query. If you allready have primary
57 key on table, and it consists of simgle column (compaund keys are not supported)
58 it will be picked up automatically.
59
60 If you specify value which is not unique, you will get just last occurence
61 of that item in index (which might be what you want). That's because specified
62 C<pk> column will be used for C<@uri> in Hyper Estraier.
63
64 =item --user login
65
66 =item --passwd password
67
68 Username and password to use when connecting to Hyper Estraier. If not specified,
69 C<admin> and C<admin> will be used.
70
71 =item --debug
72
73 Dump debugging output. It may be specified multiple times for more verbose
74 debugging.
75
76 =back
77
78 =cut
79
80 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81
82 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83
84 my $dbname = shift @ARGV || die $usage;
85
86 $c->{dbi} = 'Pg:dbname=' . $dbname;
87
88 warn "# c: ", Dumper($c) if ($c->{debug});
89
90 my $table = $c->{create} || $c->{drop} || die $usage;
91
92 $c->{node_url} = 'http://localhost:1978/node/' . $table;
93
94 $c->{user} ||= 'admin';
95 $c->{passwd} ||= 'admin';
96
97 # create and configure node
98 my $node = new Search::Estraier::Node(
99 url => $c->{node_url},
100 user => $c->{user},
101 passwd => $c->{passwd},
102 croak_on_error => 1,
103 create => 1,
104 debug => $c->{debug} >= 4 ? 1 : 0,
105 );
106
107 # create DBI connection
108 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109
110 # drop existing triggers
111 sub drop_triggers {
112 my $table = shift || die "no table?";
113
114 warn "removing triggers from $table\n";
115
116 foreach my $t (qw/UPDATE INSERT DELETE/) {
117 my $lc_t = lc($t);
118 $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
119 }
120 }
121
122 if ($c->{drop}) {
123 drop_triggers( $table );
124 warn "removing node $table\n";
125 $node->master(
126 action => 'nodedel',
127 name => $table,
128 );
129 exit;
130 }
131
132 # clear existing node
133 $node->master(
134 action => 'nodeclr',
135 name => $table,
136 );
137
138 # create PostgreSQL functions
139 $dbh->do(qq{
140
141 CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
142 RETURNS setof record
143 AS 'pgest','pgest_node'
144 LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
145
146 CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
147 AS 'pgest', 'pgest_trigger'
148 LANGUAGE 'C' STRICT;
149
150 }) || die $dbh->errstr();
151
152
153 drop_triggers( $table );
154
155 if (! $c->{pk}) {
156
157 warn "# finding primary key for $table\n" if ($c->{debug});
158
159 $c->{pk} = $dbh->selectrow_array(qq{
160 SELECT
161 a.attname, t.typname
162 FROM pg_type t, pg_attribute a
163 WHERE t.oid = a.atttypid AND attrelid = (
164 SELECT indexrelid
165 FROM pg_class c, pg_index i
166 WHERE c.relname = '$table'
167 AND c.oid = i.indrelid
168 AND indisprimary
169 AND indnatts = 1
170 )
171 });
172
173 }
174
175 die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
176
177 warn "using primary key $c->{pk}\n";
178
179 $dbh->begin_work;
180
181 $c->{sql} ||= "select * from $table";
182
183 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
184 $sth->execute() || die $sth->errstr();
185
186 my @cols = @{ $sth->{NAME} };
187
188 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
189
190 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
191
192 my $total = $sth->rows;
193 my $i = 1;
194
195 my $t = time();
196 my $pk = $c->{pk} || 'id';
197
198 warn "indexing existing ",$sth->rows," rows\n";
199
200 while (my $row = $sth->fetchrow_hashref() ) {
201
202 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
203
204 # create document
205 my $doc = new Search::Estraier::Document;
206
207 if (my $id = $row->{$pk}) {
208 $doc->add_attr('@uri', $id);
209 } else {
210 die "can't find pk column '$pk' in results\n";
211 }
212
213 my $log = sprintf "%4d ",$i;
214
215 while (my ($col,$val) = each %{$row}) {
216
217 if ($val) {
218 # add attributes (make column usable from attribute search)
219 $doc->add_attr($col, $val);
220
221 # add body text to document (make it searchable using full-text index)
222 $doc->add_text($val);
223
224 $log .= "R";
225 } else {
226 $log .= ".";
227 }
228
229 }
230
231 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
232
233 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
234
235 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
236
237 print STDERR $log;
238
239 }
240
241 my $cols = "'" . join("', '", @cols) . "'";
242
243 foreach my $t (qw/UPDATE INSERT DELETE/) {
244
245 my $lc_t = lc($t);
246
247 my $sql = qq{
248
249 CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
250 ON ${table} FOR EACH ROW
251 EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
252 '$c->{pk}', $cols
253 )
254
255 };
256
257 #warn "$sql\n";
258
259 $dbh->do( $sql ) || die $dbh->errstr();
260
261 }
262
263 $dbh->commit;
264
265 =head1 SEARCHING
266
267 At end of each run, this script will output example search SQL query on STDOUT.
268
269 You can use it to quickly construct queries for your application.
270
271 =cut
272
273 my $col_names = join(', ', @cols);
274 my $col_def = join(', ', map { "$_ text" } @cols);
275
276 print "
277 -- example SQL search query:
278
279 SELECT $col_names
280 FROM pgest(
281 -- node, login, passwd, depth
282 '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
283 -- full text search
284 'foo bar',
285 -- attribute filter, order, limit, offset
286 null, null, null, null,
287 -- return columns
288 array[$cols]
289 ) as ($col_def);
290
291 ";
292
293 __END__
294
295 =head1 AUTHOR
296
297 Dobrica Pavlinusic <dpavlin@rot13.org>
298
299 L<http://www.rot13.org/~dpavlin/>
300
301 =head1 LICENSE
302
303 This product is licensed under GNU Public License (GPL) v2 or later.
304
305 =cut
306

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26