/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 68 - (show annotations)
Mon Aug 7 17:05:07 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 5664 byte(s)
auto-discover primary key (if it's single column and sutable), auto-generate SQL
select * from table which is *NOT OPTIMAL* if you are not going to search all
columns in your table
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.07;
5 use DBI;
6 use Data::Dumper;
7 use Encode qw/from_to/;
8 use Time::HiRes qw/time/;
9 use Getopt::Long;
10
11 =head1 NAME
12
13 pgest-index.pl - create full-text index of some columns in your database
14
15 =cut
16
17 my $c = {
18 debug => 1,
19 };
20
21 =head1 SYNOPSIS
22
23 pgest-index.pl --create movies --sql "select id,title,year from movies"
24
25 pgsql-index.pl --drop movies
26
27 Options:
28
29 =over 4
30
31 =item --create name
32
33 Create index C<name> and create triggers on table with same name
34
35 =item --drop name
36
37 Remove triggers from table C<name> and node with same name
38
39 =item --node-url http://localhost:1978/node/name
40
41 Full URI to node. If it's not specified, it's assumed that you are using
42 Hyper Estraier on C<http://localhost:1978/>.
43
44 =item --sql "select col1,col2 from name"
45
46 SQL query which will return names of columns which are included in full-text
47 index. Have in mind that you can't use aliases (as I<something>) in this SQL
48 query (or triggers will be created with wrong fields).
49
50 =item --pk id
51
52 Specify name of primary key column in SQL query. If not specified, C<id> will be used.
53
54 =item --user login
55
56 =item --passwd password
57
58 Username and password to use when connecting to Hyper Estraier. If not specified,
59 C<admin> and C<admin> will be used.
60
61 =item --debug
62
63 Dump debugging output. It may be specified multiple times for more verbose debugging.
64
65 =back
66
67 =cut
68
69 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
70
71 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
72
73 my $dbname = shift @ARGV || die $usage;
74
75 $c->{dbi} = 'Pg:dbname=' . $dbname;
76
77 warn "# c: ", Dumper($c) if ($c->{debug});
78
79 my $table = $c->{create} || $c->{drop} || die $usage;
80
81 $c->{node_url} = 'http://localhost:1978/node/' . $table;
82
83 $c->{user} ||= 'admin';
84 $c->{passwd} ||= 'admin';
85
86 # create and configure node
87 my $node = new Search::Estraier::Node(
88 url => $c->{node_url},
89 user => $c->{user},
90 passwd => $c->{passwd},
91 croak_on_error => 1,
92 create => 1,
93 debug => $c->{debug} >= 4 ? 1 : 0,
94 );
95
96 # create DBI connection
97 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
98
99 # drop existing triggers
100 sub drop_triggers {
101 my $table = shift || die "no table?";
102
103 warn "removing triggers from $table\n";
104
105 foreach my $t (qw/UPDATE INSERT DELETE/) {
106 my $lc_t = lc($t);
107 $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
108 }
109 }
110
111 if ($c->{drop}) {
112 drop_triggers( $table );
113 warn "removing node $table\n";
114 $node->master(
115 action => 'nodedel',
116 name => $table,
117 );
118 exit;
119 }
120
121 # clear existing node
122 $node->master(
123 action => 'nodeclr',
124 name => $table,
125 );
126
127 # create PostgreSQL functions
128 $dbh->do(qq{
129
130 CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
131 RETURNS setof record
132 AS 'pgest','pgest_node'
133 LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
134
135 CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
136 AS 'pgest', 'pgest_trigger'
137 LANGUAGE 'C' STRICT;
138
139 }) || die $dbh->errstr();
140
141
142 drop_triggers( $table );
143
144 if (! $c->{pk}) {
145
146 warn "# finding primary key for $table\n" if ($c->{debug});
147
148 $c->{pk} = $dbh->selectrow_array(qq{
149 SELECT
150 a.attname, t.typname
151 FROM pg_type t, pg_attribute a
152 WHERE t.oid = a.atttypid AND attrelid = (
153 SELECT indexrelid
154 FROM pg_class c, pg_index i
155 WHERE c.relname = '$table'
156 AND c.oid = i.indrelid
157 AND indisprimary
158 AND indnatts = 1
159 )
160 });
161
162 }
163
164 die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
165
166 warn "using primary key $c->{pk}\n";
167
168 $dbh->begin_work;
169
170 $c->{sql} ||= "select * from $table";
171
172 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
173 $sth->execute() || die $sth->errstr();
174
175 my @cols = @{ $sth->{NAME} };
176
177 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
178
179 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
180
181 my $total = $sth->rows;
182 my $i = 1;
183
184 my $t = time();
185 my $pk = $c->{pk} || 'id';
186
187 while (my $row = $sth->fetchrow_hashref() ) {
188
189 warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
190
191 # create document
192 my $doc = new Search::Estraier::Document;
193
194 if (my $id = $row->{$pk}) {
195 $doc->add_attr('@uri', $id);
196 } else {
197 die "can't find pk column '$pk' in results\n";
198 }
199
200 printf "%4d ",$i;
201
202 while (my ($col,$val) = each %{$row}) {
203
204 if ($val) {
205 # add attributes (make column usable from attribute search)
206 $doc->add_attr($col, $val);
207
208 # add body text to document (make it searchable using full-text index)
209 $doc->add_text($val);
210
211 print "R";
212 } else {
213 print ".";
214 }
215
216 }
217
218 warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
219
220 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
221
222 printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
223
224 }
225
226 my $cols = "'" . join("', '", @cols) . "'";
227
228 foreach my $t (qw/UPDATE INSERT DELETE/) {
229
230 my $lc_t = lc($t);
231
232 my $sql = qq{
233
234 CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
235 ON ${table} FOR EACH ROW
236 EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
237 '$c->{pk}', $cols
238 )
239
240 };
241
242 #warn "$sql\n";
243
244 $dbh->do( $sql ) || die $dbh->errstr();
245
246 }
247
248 $dbh->commit;
249
250 my $col_names = join(', ', @cols);
251 my $col_def = join(', ', map { "$_ text" } @cols);
252
253 print "
254 ## example SQL search query:
255
256 SELECT $col_names
257 FROM pgest(
258 -- node, login, passwd, depth
259 '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
260 -- full text search
261 'foo bar',
262 -- attribute filter, order, limit, offset
263 null, null, null, null,
264 -- return columns
265 array[$cols]
266 ) as ($col_def);
267
268 ";

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26