/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (hide annotations)
Sun Mar 16 20:55:56 2008 UTC (16 years, 1 month ago) by dpavlin
File MIME type: text/plain
File size: 7473 byte(s)
better error message when run without arguments
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18 dpavlin 74 debug => 0,
19 dpavlin 62 };
20    
21 dpavlin 67 =head1 SYNOPSIS
22 dpavlin 62
23 dpavlin 67 pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25     pgsql-index.pl --drop movies
26    
27     Options:
28    
29     =over 4
30    
31     =item --create name
32    
33     Create index C<name> and create triggers on table with same name
34    
35     =item --drop name
36    
37     Remove triggers from table C<name> and node with same name
38    
39     =item --node-url http://localhost:1978/node/name
40    
41     Full URI to node. If it's not specified, it's assumed that you are using
42     Hyper Estraier on C<http://localhost:1978/>.
43    
44     =item --sql "select col1,col2 from name"
45    
46     SQL query which will return names of columns which are included in full-text
47     index. Have in mind that you can't use aliases (as I<something>) in this SQL
48     query (or triggers will be created with wrong fields).
49    
50 dpavlin 70 If SQL query isn't specified, default one C<< select * from movies >> will
51     be created. That will be B<serious performance hit> if all columns are
52     not needed for search.
53    
54 dpavlin 67 =item --pk id
55    
56 dpavlin 70 Specify name of primary key column in SQL query. If you allready have primary
57 dpavlin 77 key on table or unique index and it consists of simgle column
58     (compaund keys are not supported) it will be picked up automatically.
59 dpavlin 67
60 dpavlin 70 If you specify value which is not unique, you will get just last occurence
61     of that item in index (which might be what you want). That's because specified
62     C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64 dpavlin 82 If name of primary key begins with C<_> it will not be added into text
65     indexing (so you won't be able to find prmary key value, but it will still
66     be available as attribute value).
67    
68 dpavlin 67 =item --user login
69    
70     =item --passwd password
71    
72     Username and password to use when connecting to Hyper Estraier. If not specified,
73     C<admin> and C<admin> will be used.
74    
75     =item --debug
76    
77 dpavlin 70 Dump debugging output. It may be specified multiple times for more verbose
78     debugging.
79 dpavlin 67
80     =back
81    
82     =cut
83    
84 dpavlin 88 my $usage = "$0 database_name (--create|--drop) table_name [--sql='select id,foo,bar from table'] [--pk=id]\n";
85 dpavlin 68
86 dpavlin 67 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
87    
88 dpavlin 68 my $dbname = shift @ARGV || die $usage;
89    
90     $c->{dbi} = 'Pg:dbname=' . $dbname;
91    
92 dpavlin 62 warn "# c: ", Dumper($c) if ($c->{debug});
93    
94 dpavlin 68 my $table = $c->{create} || $c->{drop} || die $usage;
95 dpavlin 63
96 dpavlin 67 $c->{node_url} = 'http://localhost:1978/node/' . $table;
97    
98     $c->{user} ||= 'admin';
99     $c->{passwd} ||= 'admin';
100    
101 dpavlin 62 # create and configure node
102     my $node = new Search::Estraier::Node(
103     url => $c->{node_url},
104     user => $c->{user},
105     passwd => $c->{passwd},
106     croak_on_error => 1,
107     create => 1,
108     debug => $c->{debug} >= 4 ? 1 : 0,
109     );
110    
111 dpavlin 67 # create DBI connection
112     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
113    
114     # drop existing triggers
115     sub drop_triggers {
116     my $table = shift || die "no table?";
117    
118 dpavlin 78 my $sth = $dbh->prepare(qq{
119     SELECT relname,tgname
120     FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
121     WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
122     }) || $dbh->errstr;
123 dpavlin 67
124 dpavlin 78 $sth->execute( $table ) || $sth->errstr();
125    
126     warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
127    
128     while (my $row = $sth->fetchrow_hashref) {
129     my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
130     #warn "# $sql\n";
131     $dbh->do( $sql ) || $dbh->errstr;
132 dpavlin 67 }
133 dpavlin 78
134     warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
135    
136 dpavlin 67 }
137    
138     if ($c->{drop}) {
139     drop_triggers( $table );
140     warn "removing node $table\n";
141     $node->master(
142     action => 'nodedel',
143     name => $table,
144     );
145     exit;
146     }
147    
148     # clear existing node
149 dpavlin 63 $node->master(
150     action => 'nodeclr',
151     name => $table,
152     );
153    
154     # create PostgreSQL functions
155     $dbh->do(qq{
156    
157     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
158     RETURNS setof record
159     AS 'pgest','pgest_node'
160     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
161    
162     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
163     AS 'pgest', 'pgest_trigger'
164     LANGUAGE 'C' STRICT;
165    
166     }) || die $dbh->errstr();
167    
168    
169 dpavlin 67 drop_triggers( $table );
170 dpavlin 63
171 dpavlin 68 if (! $c->{pk}) {
172    
173     warn "# finding primary key for $table\n" if ($c->{debug});
174    
175 dpavlin 77 my $index_fmt = qq{
176 dpavlin 68 SELECT
177     a.attname, t.typname
178     FROM pg_type t, pg_attribute a
179     WHERE t.oid = a.atttypid AND attrelid = (
180     SELECT indexrelid
181     FROM pg_class c, pg_index i
182 dpavlin 77 WHERE c.relname = '%s'
183 dpavlin 68 AND c.oid = i.indrelid
184 dpavlin 77 AND %s
185 dpavlin 68 AND indnatts = 1
186     )
187 dpavlin 77 };
188 dpavlin 68
189 dpavlin 77 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
190    
191     $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
192    
193 dpavlin 68 }
194    
195     die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
196    
197 dpavlin 82 warn "using primary key $c->{pk}", $c->{pk} =~ m/^_/ ? " (not indexed)" : "", "\n";
198 dpavlin 68
199 dpavlin 63 $dbh->begin_work;
200    
201 dpavlin 68 $c->{sql} ||= "select * from $table";
202    
203 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
204 dpavlin 78 $sth->execute() || die $sth->errstr;
205 dpavlin 62
206     my @cols = @{ $sth->{NAME} };
207    
208 dpavlin 68 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
209    
210 dpavlin 62 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
211    
212     my $total = $sth->rows;
213     my $i = 1;
214    
215     my $t = time();
216 dpavlin 67 my $pk = $c->{pk} || 'id';
217 dpavlin 62
218 dpavlin 69 warn "indexing existing ",$sth->rows," rows\n";
219    
220 dpavlin 62 while (my $row = $sth->fetchrow_hashref() ) {
221    
222     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
223    
224     # create document
225     my $doc = new Search::Estraier::Document;
226    
227 dpavlin 67 if (my $id = $row->{$pk}) {
228 dpavlin 62 $doc->add_attr('@uri', $id);
229     } else {
230 dpavlin 67 die "can't find pk column '$pk' in results\n";
231 dpavlin 62 }
232    
233 dpavlin 69 my $log = sprintf "%4d ",$i;
234 dpavlin 62
235     while (my ($col,$val) = each %{$row}) {
236    
237     if ($val) {
238     # add attributes (make column usable from attribute search)
239     $doc->add_attr($col, $val);
240    
241     # add body text to document (make it searchable using full-text index)
242 dpavlin 82 $doc->add_text($val) unless ($col =~ m/^_/);
243 dpavlin 62
244 dpavlin 69 $log .= "R";
245 dpavlin 62 } else {
246 dpavlin 69 $log .= ".";
247 dpavlin 62 }
248    
249     }
250    
251     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
252    
253     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
254    
255 dpavlin 69 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
256 dpavlin 62
257 dpavlin 69 print STDERR $log;
258    
259 dpavlin 62 }
260    
261 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
262 dpavlin 62
263     foreach my $t (qw/UPDATE INSERT DELETE/) {
264    
265     my $lc_t = lc($t);
266    
267     my $sql = qq{
268    
269     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
270     ON ${table} FOR EACH ROW
271     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
272 dpavlin 67 '$c->{pk}', $cols
273 dpavlin 62 )
274    
275     };
276    
277 dpavlin 63 #warn "$sql\n";
278 dpavlin 62
279 dpavlin 78 $dbh->do( $sql ) || die $dbh->errstr;
280 dpavlin 62
281     }
282 dpavlin 63
283 dpavlin 78 warn "created consistency triggers\n";
284    
285 dpavlin 63 $dbh->commit;
286 dpavlin 65
287 dpavlin 70 =head1 SEARCHING
288    
289     At end of each run, this script will output example search SQL query on STDOUT.
290    
291     You can use it to quickly construct queries for your application.
292    
293     =cut
294    
295 dpavlin 65 my $col_names = join(', ', @cols);
296     my $col_def = join(', ', map { "$_ text" } @cols);
297    
298 dpavlin 66 print "
299 dpavlin 69 -- example SQL search query:
300 dpavlin 65
301     SELECT $col_names
302     FROM pgest(
303     -- node, login, passwd, depth
304     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
305     -- full text search
306     'foo bar',
307 dpavlin 66 -- attribute filter, order, limit, offset
308 dpavlin 65 null, null, null, null,
309     -- return columns
310     array[$cols]
311     ) as ($col_def);
312    
313     ";
314 dpavlin 70
315     __END__
316    
317     =head1 AUTHOR
318    
319     Dobrica Pavlinusic <dpavlin@rot13.org>
320    
321     L<http://www.rot13.org/~dpavlin/>
322    
323     =head1 LICENSE
324    
325     This product is licensed under GNU Public License (GPL) v2 or later.
326    
327     =cut
328    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26