/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (show annotations)
Tue Aug 8 11:01:00 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 6762 byte(s)
try unique index if primary key doesn't exits
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.07;
5 use DBI;
6 use Data::Dumper;
7 use Encode qw/from_to/;
8 use Time::HiRes qw/time/;
9 use Getopt::Long;
10
11 =head1 NAME
12
13 pgest-index.pl - create full-text index of some columns in your database
14
15 =cut
16
17 my $c = {
18 debug => 0,
19 };
20
21 =head1 SYNOPSIS
22
23 pgest-index.pl --create movies --sql "select id,title,year from movies"
24
25 pgsql-index.pl --drop movies
26
27 Options:
28
29 =over 4
30
31 =item --create name
32
33 Create index C<name> and create triggers on table with same name
34
35 =item --drop name
36
37 Remove triggers from table C<name> and node with same name
38
39 =item --node-url http://localhost:1978/node/name
40
41 Full URI to node. If it's not specified, it's assumed that you are using
42 Hyper Estraier on C<http://localhost:1978/>.
43
44 =item --sql "select col1,col2 from name"
45
46 SQL query which will return names of columns which are included in full-text
47 index. Have in mind that you can't use aliases (as I<something>) in this SQL
48 query (or triggers will be created with wrong fields).
49
50 If SQL query isn't specified, default one C<< select * from movies >> will
51 be created. That will be B<serious performance hit> if all columns are
52 not needed for search.
53
54 =item --pk id
55
56 Specify name of primary key column in SQL query. If you allready have primary
57 key on table or unique index and it consists of simgle column
58 (compaund keys are not supported) it will be picked up automatically.
59
60 If you specify value which is not unique, you will get just last occurence
61 of that item in index (which might be what you want). That's because specified
62 C<pk> column will be used for C<@uri> in Hyper Estraier.
63
64 =item --user login
65
66 =item --passwd password
67
68 Username and password to use when connecting to Hyper Estraier. If not specified,
69 C<admin> and C<admin> will be used.
70
71 =item --debug
72
73 Dump debugging output. It may be specified multiple times for more verbose
74 debugging.
75
76 =back
77
78 =cut
79
80 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81
82 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83
84 my $dbname = shift @ARGV || die $usage;
85
86 $c->{dbi} = 'Pg:dbname=' . $dbname;
87
88 warn "# c: ", Dumper($c) if ($c->{debug});
89
90 my $table = $c->{create} || $c->{drop} || die $usage;
91
92 $c->{node_url} = 'http://localhost:1978/node/' . $table;
93
94 $c->{user} ||= 'admin';
95 $c->{passwd} ||= 'admin';
96
97 # create and configure node
98 my $node = new Search::Estraier::Node(
99 url => $c->{node_url},
100 user => $c->{user},
101 passwd => $c->{passwd},
102 croak_on_error => 1,
103 create => 1,
104 debug => $c->{debug} >= 4 ? 1 : 0,
105 );
106
107 # create DBI connection
108 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109
110 # drop existing triggers
111 sub drop_triggers {
112 my $table = shift || die "no table?";
113
114 warn "removing triggers from $table\n";
115
116 foreach my $t (qw/UPDATE INSERT DELETE/) {
117 my $lc_t = lc($t);
118 $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
119 }
120 }
121
122 if ($c->{drop}) {
123 drop_triggers( $table );
124 warn "removing node $table\n";
125 $node->master(
126 action => 'nodedel',
127 name => $table,
128 );
129 exit;
130 }
131
132 # clear existing node
133 $node->master(
134 action => 'nodeclr',
135 name => $table,
136 );
137
138 # create PostgreSQL functions
139 $dbh->do(qq{
140
141 CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
142 RETURNS setof record
143 AS 'pgest','pgest_node'
144 LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
145
146 CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
147 AS 'pgest', 'pgest_trigger'
148 LANGUAGE 'C' STRICT;
149
150 }) || die $dbh->errstr();
151
152
153 drop_triggers( $table );
154
155 if (! $c->{pk}) {
156
157 warn "# finding primary key for $table\n" if ($c->{debug});
158
159 my $index_fmt = qq{
160 SELECT
161 a.attname, t.typname
162 FROM pg_type t, pg_attribute a
163 WHERE t.oid = a.atttypid AND attrelid = (
164 SELECT indexrelid
165 FROM pg_class c, pg_index i
166 WHERE c.relname = '%s'
167 AND c.oid = i.indrelid
168 AND %s
169 AND indnatts = 1
170 )
171 };
172
173 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
174
175 $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
176
177 }
178
179 die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
180
181 warn "using primary key $c->{pk}\n";
182
183 $dbh->begin_work;
184
185 $c->{sql} ||= "select * from $table";
186
187 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
188 $sth->execute() || die $sth->errstr();
189
190 my @cols = @{ $sth->{NAME} };
191
192 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
193
194 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
195
196 my $total = $sth->rows;
197 my $i = 1;
198
199 my $t = time();
200 my $pk = $c->{pk} || 'id';
201
202 warn "indexing existing ",$sth->rows," rows\n";
203
204 while (my $row = $sth->fetchrow_hashref() ) {
205
206 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
207
208 # create document
209 my $doc = new Search::Estraier::Document;
210
211 if (my $id = $row->{$pk}) {
212 $doc->add_attr('@uri', $id);
213 } else {
214 die "can't find pk column '$pk' in results\n";
215 }
216
217 my $log = sprintf "%4d ",$i;
218
219 while (my ($col,$val) = each %{$row}) {
220
221 if ($val) {
222 # add attributes (make column usable from attribute search)
223 $doc->add_attr($col, $val);
224
225 # add body text to document (make it searchable using full-text index)
226 $doc->add_text($val);
227
228 $log .= "R";
229 } else {
230 $log .= ".";
231 }
232
233 }
234
235 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
236
237 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
238
239 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
240
241 print STDERR $log;
242
243 }
244
245 my $cols = "'" . join("', '", @cols) . "'";
246
247 foreach my $t (qw/UPDATE INSERT DELETE/) {
248
249 my $lc_t = lc($t);
250
251 my $sql = qq{
252
253 CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
254 ON ${table} FOR EACH ROW
255 EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
256 '$c->{pk}', $cols
257 )
258
259 };
260
261 #warn "$sql\n";
262
263 $dbh->do( $sql ) || die $dbh->errstr();
264
265 }
266
267 $dbh->commit;
268
269 =head1 SEARCHING
270
271 At end of each run, this script will output example search SQL query on STDOUT.
272
273 You can use it to quickly construct queries for your application.
274
275 =cut
276
277 my $col_names = join(', ', @cols);
278 my $col_def = join(', ', map { "$_ text" } @cols);
279
280 print "
281 -- example SQL search query:
282
283 SELECT $col_names
284 FROM pgest(
285 -- node, login, passwd, depth
286 '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
287 -- full text search
288 'foo bar',
289 -- attribute filter, order, limit, offset
290 null, null, null, null,
291 -- return columns
292 array[$cols]
293 ) as ($col_def);
294
295 ";
296
297 __END__
298
299 =head1 AUTHOR
300
301 Dobrica Pavlinusic <dpavlin@rot13.org>
302
303 L<http://www.rot13.org/~dpavlin/>
304
305 =head1 LICENSE
306
307 This product is licensed under GNU Public License (GPL) v2 or later.
308
309 =cut
310

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26