/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 68 - (hide annotations)
Mon Aug 7 17:05:07 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 5664 byte(s)
auto-discover primary key (if it's single column and sutable), auto-generate SQL
select * from table which is *NOT OPTIMAL* if you are not going to search all
columns in your table
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18     debug => 1,
19     };
20    
21 dpavlin 67 =head1 SYNOPSIS
22 dpavlin 62
23 dpavlin 67 pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25     pgsql-index.pl --drop movies
26    
27     Options:
28    
29     =over 4
30    
31     =item --create name
32    
33     Create index C<name> and create triggers on table with same name
34    
35     =item --drop name
36    
37     Remove triggers from table C<name> and node with same name
38    
39     =item --node-url http://localhost:1978/node/name
40    
41     Full URI to node. If it's not specified, it's assumed that you are using
42     Hyper Estraier on C<http://localhost:1978/>.
43    
44     =item --sql "select col1,col2 from name"
45    
46     SQL query which will return names of columns which are included in full-text
47     index. Have in mind that you can't use aliases (as I<something>) in this SQL
48     query (or triggers will be created with wrong fields).
49    
50     =item --pk id
51    
52     Specify name of primary key column in SQL query. If not specified, C<id> will be used.
53    
54     =item --user login
55    
56     =item --passwd password
57    
58     Username and password to use when connecting to Hyper Estraier. If not specified,
59     C<admin> and C<admin> will be used.
60    
61     =item --debug
62    
63     Dump debugging output. It may be specified multiple times for more verbose debugging.
64    
65     =back
66    
67     =cut
68    
69 dpavlin 68 my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
70    
71 dpavlin 67 GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
72    
73 dpavlin 68 my $dbname = shift @ARGV || die $usage;
74    
75     $c->{dbi} = 'Pg:dbname=' . $dbname;
76    
77 dpavlin 62 warn "# c: ", Dumper($c) if ($c->{debug});
78    
79 dpavlin 68 my $table = $c->{create} || $c->{drop} || die $usage;
80 dpavlin 63
81 dpavlin 67 $c->{node_url} = 'http://localhost:1978/node/' . $table;
82    
83     $c->{user} ||= 'admin';
84     $c->{passwd} ||= 'admin';
85    
86 dpavlin 62 # create and configure node
87     my $node = new Search::Estraier::Node(
88     url => $c->{node_url},
89     user => $c->{user},
90     passwd => $c->{passwd},
91     croak_on_error => 1,
92     create => 1,
93     debug => $c->{debug} >= 4 ? 1 : 0,
94     );
95    
96 dpavlin 67 # create DBI connection
97     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
98    
99     # drop existing triggers
100     sub drop_triggers {
101     my $table = shift || die "no table?";
102    
103     warn "removing triggers from $table\n";
104    
105     foreach my $t (qw/UPDATE INSERT DELETE/) {
106     my $lc_t = lc($t);
107     $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
108     }
109     }
110    
111     if ($c->{drop}) {
112     drop_triggers( $table );
113     warn "removing node $table\n";
114     $node->master(
115     action => 'nodedel',
116     name => $table,
117     );
118     exit;
119     }
120    
121     # clear existing node
122 dpavlin 63 $node->master(
123     action => 'nodeclr',
124     name => $table,
125     );
126    
127     # create PostgreSQL functions
128     $dbh->do(qq{
129    
130     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
131     RETURNS setof record
132     AS 'pgest','pgest_node'
133     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
134    
135     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
136     AS 'pgest', 'pgest_trigger'
137     LANGUAGE 'C' STRICT;
138    
139     }) || die $dbh->errstr();
140    
141    
142 dpavlin 67 drop_triggers( $table );
143 dpavlin 63
144 dpavlin 68 if (! $c->{pk}) {
145    
146     warn "# finding primary key for $table\n" if ($c->{debug});
147    
148     $c->{pk} = $dbh->selectrow_array(qq{
149     SELECT
150     a.attname, t.typname
151     FROM pg_type t, pg_attribute a
152     WHERE t.oid = a.atttypid AND attrelid = (
153     SELECT indexrelid
154     FROM pg_class c, pg_index i
155     WHERE c.relname = '$table'
156     AND c.oid = i.indrelid
157     AND indisprimary
158     AND indnatts = 1
159     )
160     });
161    
162     }
163    
164     die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
165    
166     warn "using primary key $c->{pk}\n";
167    
168 dpavlin 63 $dbh->begin_work;
169    
170 dpavlin 68 $c->{sql} ||= "select * from $table";
171    
172 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
173     $sth->execute() || die $sth->errstr();
174    
175     my @cols = @{ $sth->{NAME} };
176    
177 dpavlin 68 die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
178    
179 dpavlin 62 warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
180    
181     my $total = $sth->rows;
182     my $i = 1;
183    
184     my $t = time();
185 dpavlin 67 my $pk = $c->{pk} || 'id';
186 dpavlin 62
187     while (my $row = $sth->fetchrow_hashref() ) {
188    
189     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
190    
191     # create document
192     my $doc = new Search::Estraier::Document;
193    
194 dpavlin 67 if (my $id = $row->{$pk}) {
195 dpavlin 62 $doc->add_attr('@uri', $id);
196     } else {
197 dpavlin 67 die "can't find pk column '$pk' in results\n";
198 dpavlin 62 }
199    
200     printf "%4d ",$i;
201    
202     while (my ($col,$val) = each %{$row}) {
203    
204     if ($val) {
205     # add attributes (make column usable from attribute search)
206     $doc->add_attr($col, $val);
207    
208     # add body text to document (make it searchable using full-text index)
209     $doc->add_text($val);
210    
211     print "R";
212     } else {
213     print ".";
214     }
215    
216     }
217    
218     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
219    
220     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
221    
222     printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
223    
224     }
225    
226 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
227 dpavlin 62
228     foreach my $t (qw/UPDATE INSERT DELETE/) {
229    
230     my $lc_t = lc($t);
231    
232     my $sql = qq{
233    
234     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
235     ON ${table} FOR EACH ROW
236     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
237 dpavlin 67 '$c->{pk}', $cols
238 dpavlin 62 )
239    
240     };
241    
242 dpavlin 63 #warn "$sql\n";
243 dpavlin 62
244     $dbh->do( $sql ) || die $dbh->errstr();
245    
246     }
247 dpavlin 63
248     $dbh->commit;
249 dpavlin 65
250     my $col_names = join(', ', @cols);
251     my $col_def = join(', ', map { "$_ text" } @cols);
252    
253 dpavlin 66 print "
254     ## example SQL search query:
255 dpavlin 65
256     SELECT $col_names
257     FROM pgest(
258     -- node, login, passwd, depth
259     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
260     -- full text search
261     'foo bar',
262 dpavlin 66 -- attribute filter, order, limit, offset
263 dpavlin 65 null, null, null, null,
264     -- return columns
265     array[$cols]
266     ) as ($col_def);
267    
268     ";

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26