/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 67 - (hide annotations)
Mon Aug 7 16:38:24 2006 UTC (17 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 4954 byte(s)
added chunk of documentation and ability to --create or --drop indexes
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18     name => 'imenik',
19     dbi => 'Pg:dbname=vip',
20     sql => qq{
21     select ime,tel from imenik
22     },
23 dpavlin 67 pk => 'tel',
24 dpavlin 62 debug => 1,
25     };
26    
27 dpavlin 67 =head1 SYNOPSIS
28 dpavlin 62
29 dpavlin 67 pgest-index.pl --create movies --sql "select id,title,year from movies"
30    
31     pgsql-index.pl --drop movies
32    
33     Options:
34    
35     =over 4
36    
37     =item --create name
38    
39     Create index C<name> and create triggers on table with same name
40    
41     =item --drop name
42    
43     Remove triggers from table C<name> and node with same name
44    
45     =item --node-url http://localhost:1978/node/name
46    
47     Full URI to node. If it's not specified, it's assumed that you are using
48     Hyper Estraier on C<http://localhost:1978/>.
49    
50     =item --sql "select col1,col2 from name"
51    
52     SQL query which will return names of columns which are included in full-text
53     index. Have in mind that you can't use aliases (as I<something>) in this SQL
54     query (or triggers will be created with wrong fields).
55    
56     =item --pk id
57    
58     Specify name of primary key column in SQL query. If not specified, C<id> will be used.
59    
60     =item --user login
61    
62     =item --passwd password
63    
64     Username and password to use when connecting to Hyper Estraier. If not specified,
65     C<admin> and C<admin> will be used.
66    
67     =item --debug
68    
69     Dump debugging output. It may be specified multiple times for more verbose debugging.
70    
71     =back
72    
73     =cut
74    
75     GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
76    
77 dpavlin 62 warn "# c: ", Dumper($c) if ($c->{debug});
78    
79 dpavlin 67 my $table = $c->{create} || $c->{drop} || die "$0 (--create|--drop) name [--sql='select id,foo,bar from table']\n";
80 dpavlin 63
81 dpavlin 67 $c->{node_url} = 'http://localhost:1978/node/' . $table;
82    
83     $c->{user} ||= 'admin';
84     $c->{passwd} ||= 'admin';
85    
86 dpavlin 62 # create and configure node
87     my $node = new Search::Estraier::Node(
88     url => $c->{node_url},
89     user => $c->{user},
90     passwd => $c->{passwd},
91     croak_on_error => 1,
92     create => 1,
93     debug => $c->{debug} >= 4 ? 1 : 0,
94     );
95    
96 dpavlin 67 # create DBI connection
97     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
98    
99     # drop existing triggers
100     sub drop_triggers {
101     my $table = shift || die "no table?";
102    
103     warn "removing triggers from $table\n";
104    
105     foreach my $t (qw/UPDATE INSERT DELETE/) {
106     my $lc_t = lc($t);
107     $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
108     }
109     }
110    
111     if ($c->{drop}) {
112     drop_triggers( $table );
113     warn "removing node $table\n";
114     $node->master(
115     action => 'nodedel',
116     name => $table,
117     );
118     exit;
119     }
120    
121     # clear existing node
122 dpavlin 63 $node->master(
123     action => 'nodeclr',
124     name => $table,
125     );
126    
127     # create PostgreSQL functions
128     $dbh->do(qq{
129    
130     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
131     RETURNS setof record
132     AS 'pgest','pgest_node'
133     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
134    
135     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
136     AS 'pgest', 'pgest_trigger'
137     LANGUAGE 'C' STRICT;
138    
139     }) || die $dbh->errstr();
140    
141    
142 dpavlin 67 drop_triggers( $table );
143 dpavlin 63
144     $dbh->begin_work;
145    
146 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
147     $sth->execute() || die $sth->errstr();
148    
149     my @cols = @{ $sth->{NAME} };
150    
151     warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
152    
153     my $total = $sth->rows;
154     my $i = 1;
155    
156     my $t = time();
157 dpavlin 67 my $pk = $c->{pk} || 'id';
158 dpavlin 62
159     while (my $row = $sth->fetchrow_hashref() ) {
160    
161     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
162    
163     # create document
164     my $doc = new Search::Estraier::Document;
165    
166 dpavlin 67 if (my $id = $row->{$pk}) {
167 dpavlin 62 $doc->add_attr('@uri', $id);
168     } else {
169 dpavlin 67 die "can't find pk column '$pk' in results\n";
170 dpavlin 62 }
171    
172     printf "%4d ",$i;
173    
174     while (my ($col,$val) = each %{$row}) {
175    
176     if ($val) {
177     # add attributes (make column usable from attribute search)
178     $doc->add_attr($col, $val);
179    
180     # add body text to document (make it searchable using full-text index)
181     $doc->add_text($val);
182    
183     print "R";
184     } else {
185     print ".";
186     }
187    
188     }
189    
190     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
191    
192     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
193    
194     printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
195    
196     }
197    
198 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
199 dpavlin 62
200     foreach my $t (qw/UPDATE INSERT DELETE/) {
201    
202     my $lc_t = lc($t);
203    
204     my $sql = qq{
205    
206     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
207     ON ${table} FOR EACH ROW
208     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
209 dpavlin 67 '$c->{pk}', $cols
210 dpavlin 62 )
211    
212     };
213    
214 dpavlin 63 #warn "$sql\n";
215 dpavlin 62
216     $dbh->do( $sql ) || die $dbh->errstr();
217    
218     }
219 dpavlin 63
220     $dbh->commit;
221 dpavlin 65
222     my $col_names = join(', ', @cols);
223     my $col_def = join(', ', map { "$_ text" } @cols);
224    
225 dpavlin 66 print "
226     ## example SQL search query:
227 dpavlin 65
228     SELECT $col_names
229     FROM pgest(
230     -- node, login, passwd, depth
231     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
232     -- full text search
233     'foo bar',
234 dpavlin 66 -- attribute filter, order, limit, offset
235 dpavlin 65 null, null, null, null,
236     -- return columns
237     array[$cols]
238     ) as ($col_def);
239    
240     ";

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26