/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (hide annotations)
Tue Aug 8 11:01:00 2006 UTC (17 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 6762 byte(s)
try unique index if primary key doesn't exits
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18 dpavlin 74 debug => 0,
19 dpavlin 62 };
20    
21 dpavlin 67 =head1 SYNOPSIS
22 dpavlin 62
23 dpavlin 67 pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25     pgsql-index.pl --drop movies
26    
27     Options:
28    
29     =over 4
30    
31     =item --create name
32    
33     Create index C<name> and create triggers on table with same name
34    
35     =item --drop name
36    
37     Remove triggers from table C<name> and node with same name
38    
39     =item --node-url http://localhost:1978/node/name
40    
41     Full URI to node. If it's not specified, it's assumed that you are using
42     Hyper Estraier on C<http://localhost:1978/>.
43    
44     =item --sql "select col1,col2 from name"
45    
46     SQL query which will return names of columns which are included in full-text
47     index. Have in mind that you can't use aliases (as I<something>) in this SQL
48     query (or triggers will be created with wrong fields).
49    
50 dpavlin 70 If SQL query isn't specified, default one C<< select * from movies >> will
51     be created. That will be B<serious performance hit> if all columns are
52     not needed for search.
53    
54 dpavlin 67 =item --pk id
55    
56 dpavlin 70 Specify name of primary key column in SQL query. If you allready have primary
57 dpavlin 77 key on table or unique index and it consists of simgle column
58     (compaund keys are not supported) it will be picked up automatically.
59 dpavlin 67
60 dpavlin 70 If you specify value which is not unique, you will get just last occurence
61     of that item in index (which might be what you want). That's because specified
62     C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64 dpavlin 67 =item --user login
65    
66     =item --passwd password
67    
68     Username and password to use when connecting to Hyper Estraier. If not specified,
69     C<admin> and C<admin> will be used.
70    
71     =item --debug
72    
73 dpavlin 70 Dump debugging output. It may be specified multiple times for more verbose
74     debugging.
75 dpavlin 67
76     =back
77    
78     =cut
79    
80 dpavlin 68 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81    
82 dpavlin 67 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83    
84 dpavlin 68 my $dbname = shift @ARGV || die $usage;
85    
86     $c->{dbi} = 'Pg:dbname=' . $dbname;
87    
88 dpavlin 62 warn "# c: ", Dumper($c) if ($c->{debug});
89    
90 dpavlin 68 my $table = $c->{create} || $c->{drop} || die $usage;
91 dpavlin 63
92 dpavlin 67 $c->{node_url} = 'http://localhost:1978/node/' . $table;
93    
94     $c->{user} ||= 'admin';
95     $c->{passwd} ||= 'admin';
96    
97 dpavlin 62 # create and configure node
98     my $node = new Search::Estraier::Node(
99     url => $c->{node_url},
100     user => $c->{user},
101     passwd => $c->{passwd},
102     croak_on_error => 1,
103     create => 1,
104     debug => $c->{debug} >= 4 ? 1 : 0,
105     );
106    
107 dpavlin 67 # create DBI connection
108     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109    
110     # drop existing triggers
111     sub drop_triggers {
112     my $table = shift || die "no table?";
113    
114     warn "removing triggers from $table\n";
115    
116     foreach my $t (qw/UPDATE INSERT DELETE/) {
117     my $lc_t = lc($t);
118     $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
119     }
120     }
121    
122     if ($c->{drop}) {
123     drop_triggers( $table );
124     warn "removing node $table\n";
125     $node->master(
126     action => 'nodedel',
127     name => $table,
128     );
129     exit;
130     }
131    
132     # clear existing node
133 dpavlin 63 $node->master(
134     action => 'nodeclr',
135     name => $table,
136     );
137    
138     # create PostgreSQL functions
139     $dbh->do(qq{
140    
141     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
142     RETURNS setof record
143     AS 'pgest','pgest_node'
144     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
145    
146     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
147     AS 'pgest', 'pgest_trigger'
148     LANGUAGE 'C' STRICT;
149    
150     }) || die $dbh->errstr();
151    
152    
153 dpavlin 67 drop_triggers( $table );
154 dpavlin 63
155 dpavlin 68 if (! $c->{pk}) {
156    
157     warn "# finding primary key for $table\n" if ($c->{debug});
158    
159 dpavlin 77 my $index_fmt = qq{
160 dpavlin 68 SELECT
161     a.attname, t.typname
162     FROM pg_type t, pg_attribute a
163     WHERE t.oid = a.atttypid AND attrelid = (
164     SELECT indexrelid
165     FROM pg_class c, pg_index i
166 dpavlin 77 WHERE c.relname = '%s'
167 dpavlin 68 AND c.oid = i.indrelid
168 dpavlin 77 AND %s
169 dpavlin 68 AND indnatts = 1
170     )
171 dpavlin 77 };
172 dpavlin 68
173 dpavlin 77 $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
174    
175     $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
176    
177 dpavlin 68 }
178    
179     die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
180    
181     warn "using primary key $c->{pk}\n";
182    
183 dpavlin 63 $dbh->begin_work;
184    
185 dpavlin 68 $c->{sql} ||= "select * from $table";
186    
187 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
188     $sth->execute() || die $sth->errstr();
189    
190     my @cols = @{ $sth->{NAME} };
191    
192 dpavlin 68 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
193    
194 dpavlin 62 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
195    
196     my $total = $sth->rows;
197     my $i = 1;
198    
199     my $t = time();
200 dpavlin 67 my $pk = $c->{pk} || 'id';
201 dpavlin 62
202 dpavlin 69 warn "indexing existing ",$sth->rows," rows\n";
203    
204 dpavlin 62 while (my $row = $sth->fetchrow_hashref() ) {
205    
206     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
207    
208     # create document
209     my $doc = new Search::Estraier::Document;
210    
211 dpavlin 67 if (my $id = $row->{$pk}) {
212 dpavlin 62 $doc->add_attr('@uri', $id);
213     } else {
214 dpavlin 67 die "can't find pk column '$pk' in results\n";
215 dpavlin 62 }
216    
217 dpavlin 69 my $log = sprintf "%4d ",$i;
218 dpavlin 62
219     while (my ($col,$val) = each %{$row}) {
220    
221     if ($val) {
222     # add attributes (make column usable from attribute search)
223     $doc->add_attr($col, $val);
224    
225     # add body text to document (make it searchable using full-text index)
226     $doc->add_text($val);
227    
228 dpavlin 69 $log .= "R";
229 dpavlin 62 } else {
230 dpavlin 69 $log .= ".";
231 dpavlin 62 }
232    
233     }
234    
235     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
236    
237     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
238    
239 dpavlin 69 $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
240 dpavlin 62
241 dpavlin 69 print STDERR $log;
242    
243 dpavlin 62 }
244    
245 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
246 dpavlin 62
247     foreach my $t (qw/UPDATE INSERT DELETE/) {
248    
249     my $lc_t = lc($t);
250    
251     my $sql = qq{
252    
253     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
254     ON ${table} FOR EACH ROW
255     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
256 dpavlin 67 '$c->{pk}', $cols
257 dpavlin 62 )
258    
259     };
260    
261 dpavlin 63 #warn "$sql\n";
262 dpavlin 62
263     $dbh->do( $sql ) || die $dbh->errstr();
264    
265     }
266 dpavlin 63
267     $dbh->commit;
268 dpavlin 65
269 dpavlin 70 =head1 SEARCHING
270    
271     At end of each run, this script will output example search SQL query on STDOUT.
272    
273     You can use it to quickly construct queries for your application.
274    
275     =cut
276    
277 dpavlin 65 my $col_names = join(', ', @cols);
278     my $col_def = join(', ', map { "$_ text" } @cols);
279    
280 dpavlin 66 print "
281 dpavlin 69 -- example SQL search query:
282 dpavlin 65
283     SELECT $col_names
284     FROM pgest(
285     -- node, login, passwd, depth
286     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
287     -- full text search
288     'foo bar',
289 dpavlin 66 -- attribute filter, order, limit, offset
290 dpavlin 65 null, null, null, null,
291     -- return columns
292     array[$cols]
293     ) as ($col_def);
294    
295     ";
296 dpavlin 70
297     __END__
298    
299     =head1 AUTHOR
300    
301     Dobrica Pavlinusic <dpavlin@rot13.org>
302    
303     L<http://www.rot13.org/~dpavlin/>
304    
305     =head1 LICENSE
306    
307     This product is licensed under GNU Public License (GPL) v2 or later.
308    
309     =cut
310    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26