/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 62 by dpavlin, Mon Aug 7 13:24:49 2006 UTC revision 88 by dpavlin, Sun Mar 16 20:55:56 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    
3  use strict;  use strict;
4  use Search::Estraier 0.06;  use Search::Estraier 0.07;
5  use DBI;  use DBI;
6  use Data::Dumper;  use Data::Dumper;
7  use Encode qw/from_to/;  use Encode qw/from_to/;
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
18          name => 'imenik',          debug => 0,
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         db_encoding => 'iso-8859-2',  
         debug => 1,  
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    If SQL query isn't specified, default one C<< select * from movies >> will
51    be created. That will be B<serious performance hit> if all columns are
52    not needed for search.
53    
54    =item --pk id
55    
56    Specify name of primary key column in SQL query. If you allready have primary
57    key on table or unique index and it consists of simgle column
58    (compaund keys are not supported) it will be picked up automatically.
59    
60    If you specify value which is not unique, you will get just last occurence
61    of that item in index (which might be what you want). That's because specified
62    C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64    If name of primary key begins with C<_> it will not be added into text
65    indexing (so you won't be able to find prmary key value, but it will still
66    be available as attribute value).
67    
68    =item --user login
69    
70    =item --passwd password
71    
72    Username and password to use when connecting to Hyper Estraier. If not specified,
73    C<admin> and C<admin> will be used.
74    
75    =item --debug
76    
77    Dump debugging output. It may be specified multiple times for more verbose
78    debugging.
79    
80    =back
81    
82    =cut
83    
84    my $usage = "$0 database_name (--create|--drop) table_name [--sql='select id,foo,bar from table'] [--pk=id]\n";
85    
86    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
87    
88    my $dbname = shift @ARGV || die $usage;
89    
90    $c->{dbi} = 'Pg:dbname=' . $dbname;
91    
92  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
93    
94    my $table = $c->{create} || $c->{drop} || die $usage;
95    
96    $c->{node_url} = 'http://localhost:1978/node/' . $table;
97    
98    $c->{user} ||= 'admin';
99    $c->{passwd} ||= 'admin';
100    
101  # create and configure node  # create and configure node
102  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
103          url => $c->{node_url},          url => $c->{node_url},
# Line 45  my $node = new Search::Estraier::Node( Line 111  my $node = new Search::Estraier::Node(
111  # create DBI connection  # create DBI connection
112  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
113    
114    # drop existing triggers
115    sub drop_triggers {
116            my $table = shift || die "no table?";
117    
118            my $sth = $dbh->prepare(qq{
119                    SELECT relname,tgname
120                    FROM pg_trigger JOIN pg_class ON relfilenode = tgrelid
121                    WHERE tgname LIKE 'pgest_trigger_%' AND relname = ?
122            }) || $dbh->errstr;
123    
124            $sth->execute( $table ) || $sth->errstr();
125    
126            warn "there are ", $sth->rows, " triggers instead of just 3, dropping all\n" if ($sth->rows != 3);
127    
128            while (my $row = $sth->fetchrow_hashref) {
129                    my $sql = sprintf(qq{ DROP TRIGGER %s ON %s }, $row->{tgname}, $row->{relname} );
130                    #warn "# $sql\n";
131                    $dbh->do( $sql ) || $dbh->errstr;
132            }
133    
134            warn "removed ", $sth->rows, " triggers from $table\n" if ($sth->rows);
135    
136    }
137    
138    if ($c->{drop}) {
139            drop_triggers( $table );
140            warn "removing node $table\n";
141            $node->master(
142                    action => 'nodedel',
143                    name => $table,
144            );
145            exit;
146    }
147    
148    # clear existing node
149    $node->master(
150            action => 'nodeclr',
151            name => $table,
152    );
153    
154    # create PostgreSQL functions
155    $dbh->do(qq{
156    
157    CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
158            RETURNS setof record
159            AS 'pgest','pgest_node'
160            LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
161    
162    CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
163            AS 'pgest', 'pgest_trigger'
164            LANGUAGE 'C' STRICT;
165    
166    }) || die $dbh->errstr();
167    
168    
169    drop_triggers( $table );
170    
171    if (! $c->{pk}) {
172    
173            warn "# finding primary key for $table\n" if ($c->{debug});
174    
175            my $index_fmt = qq{
176                    SELECT
177                            a.attname, t.typname
178                    FROM pg_type t, pg_attribute a
179                    WHERE t.oid = a.atttypid AND attrelid = (
180                            SELECT indexrelid
181                            FROM pg_class c, pg_index i
182                            WHERE c.relname = '%s'
183                                    AND c.oid = i.indrelid
184                                    AND %s
185                                    AND indnatts = 1
186                    )
187            };
188    
189            $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
190            
191            $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
192    
193    }
194    
195    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
196    
197    warn "using primary key $c->{pk}", $c->{pk} =~ m/^_/ ? " (not indexed)" : "", "\n";
198    
199    $dbh->begin_work;
200    
201    $c->{sql} ||= "select * from $table";
202    
203  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
204  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr;
205    
206  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
207    
208    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
209    
210  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
211    
212  my $total = $sth->rows;  my $total = $sth->rows;
213  my $i = 1;  my $i = 1;
214    
215  my $t = time();  my $t = time();
216  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
217    
218    warn "indexing existing ",$sth->rows," rows\n";
219    
220  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
221    
# Line 65  while (my $row = $sth->fetchrow_hashref( Line 224  while (my $row = $sth->fetchrow_hashref(
224          # create document          # create document
225          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
226    
227          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
228                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
229          } else {          } else {
230                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
231          }          }
232    
233          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
234    
235          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
236    
237                  if ($val) {                  if ($val) {
                         # change encoding?  
                         from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');  
   
238                          # add attributes (make column usable from attribute search)                          # add attributes (make column usable from attribute search)
239                          $doc->add_attr($col, $val);                          $doc->add_attr($col, $val);
240    
241                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
242                          $doc->add_text($val);                          $doc->add_text($val) unless ($col =~ m/^_/);
243    
244                          print "R";                          $log .= "R";
245                  } else {                  } else {
246                          print ".";                          $log .= ".";
247                  }                  }
248    
249          }          }
# Line 96  while (my $row = $sth->fetchrow_hashref( Line 252  while (my $row = $sth->fetchrow_hashref(
252    
253          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
254    
255          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
256    
257  }          print STDERR $log;
258    
259  my $table = $c->{name} || die "no name?";  }
260    
261  my $cols = "'" . join("','", @cols) . "'";  my $cols = "'" . join("', '", @cols) . "'";
262    
263  foreach my $t (qw/UPDATE INSERT DELETE/) {  foreach my $t (qw/UPDATE INSERT DELETE/) {
264    
265          my $lc_t = lc($t);          my $lc_t = lc($t);
266    
         $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
   
267          my $sql = qq{          my $sql = qq{
268    
269                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
270                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
271                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
272                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
273                          )                          )
274    
275          };          };
276    
277          warn "$sql\n";          #warn "$sql\n";
278    
279          $dbh->do( $sql ) || die $dbh->errstr();          $dbh->do( $sql ) || die $dbh->errstr;
280    
281  }  }
282    
283    warn "created consistency triggers\n";
284    
285    $dbh->commit;
286    
287    =head1 SEARCHING
288    
289    At end of each run, this script will output example search SQL query on STDOUT.
290    
291    You can use it to quickly construct queries for your application.
292    
293    =cut
294    
295    my $col_names = join(', ', @cols);
296    my $col_def = join(', ', map { "$_ text" } @cols);
297    
298    print "
299    -- example SQL search query:
300    
301    SELECT $col_names
302    FROM pgest(
303            -- node, login, passwd, depth
304            '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
305            -- full text search
306            'foo bar',
307            -- attribute filter, order, limit, offset
308            null, null, null, null,
309            -- return columns
310            array[$cols]
311    ) as ($col_def);
312    
313    ";
314    
315    __END__
316    
317    =head1 AUTHOR
318    
319    Dobrica Pavlinusic <dpavlin@rot13.org>
320    
321    L<http://www.rot13.org/~dpavlin/>
322    
323    =head1 LICENSE
324    
325    This product is licensed under GNU Public License (GPL) v2 or later.
326    
327    =cut
328    

Legend:
Removed from v.62  
changed lines
  Added in v.88

  ViewVC Help
Powered by ViewVC 1.1.26