/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 62 by dpavlin, Mon Aug 7 13:24:49 2006 UTC revision 77 by dpavlin, Tue Aug 8 11:01:00 2006 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    
3  use strict;  use strict;
4  use Search::Estraier 0.06;  use Search::Estraier 0.07;
5  use DBI;  use DBI;
6  use Data::Dumper;  use Data::Dumper;
7  use Encode qw/from_to/;  use Encode qw/from_to/;
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
18          name => 'imenik',          debug => 0,
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         db_encoding => 'iso-8859-2',  
         debug => 1,  
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    If SQL query isn't specified, default one C<< select * from movies >> will
51    be created. That will be B<serious performance hit> if all columns are
52    not needed for search.
53    
54    =item --pk id
55    
56    Specify name of primary key column in SQL query. If you allready have primary
57    key on table or unique index and it consists of simgle column
58    (compaund keys are not supported) it will be picked up automatically.
59    
60    If you specify value which is not unique, you will get just last occurence
61    of that item in index (which might be what you want). That's because specified
62    C<pk> column will be used for C<@uri> in Hyper Estraier.
63    
64    =item --user login
65    
66    =item --passwd password
67    
68    Username and password to use when connecting to Hyper Estraier. If not specified,
69    C<admin> and C<admin> will be used.
70    
71    =item --debug
72    
73    Dump debugging output. It may be specified multiple times for more verbose
74    debugging.
75    
76    =back
77    
78    =cut
79    
80    my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
81    
82    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
83    
84    my $dbname = shift @ARGV || die $usage;
85    
86    $c->{dbi} = 'Pg:dbname=' . $dbname;
87    
88  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
89    
90    my $table = $c->{create} || $c->{drop} || die $usage;
91    
92    $c->{node_url} = 'http://localhost:1978/node/' . $table;
93    
94    $c->{user} ||= 'admin';
95    $c->{passwd} ||= 'admin';
96    
97  # create and configure node  # create and configure node
98  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
99          url => $c->{node_url},          url => $c->{node_url},
# Line 45  my $node = new Search::Estraier::Node( Line 107  my $node = new Search::Estraier::Node(
107  # create DBI connection  # create DBI connection
108  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
109    
110    # drop existing triggers
111    sub drop_triggers {
112            my $table = shift || die "no table?";
113    
114            warn "removing triggers from $table\n";
115    
116            foreach my $t (qw/UPDATE INSERT DELETE/) {
117                    my $lc_t = lc($t);
118                    $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
119            }
120    }
121    
122    if ($c->{drop}) {
123            drop_triggers( $table );
124            warn "removing node $table\n";
125            $node->master(
126                    action => 'nodedel',
127                    name => $table,
128            );
129            exit;
130    }
131    
132    # clear existing node
133    $node->master(
134            action => 'nodeclr',
135            name => $table,
136    );
137    
138    # create PostgreSQL functions
139    $dbh->do(qq{
140    
141    CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
142            RETURNS setof record
143            AS 'pgest','pgest_node'
144            LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
145    
146    CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
147            AS 'pgest', 'pgest_trigger'
148            LANGUAGE 'C' STRICT;
149    
150    }) || die $dbh->errstr();
151    
152    
153    drop_triggers( $table );
154    
155    if (! $c->{pk}) {
156    
157            warn "# finding primary key for $table\n" if ($c->{debug});
158    
159            my $index_fmt = qq{
160                    SELECT
161                            a.attname, t.typname
162                    FROM pg_type t, pg_attribute a
163                    WHERE t.oid = a.atttypid AND attrelid = (
164                            SELECT indexrelid
165                            FROM pg_class c, pg_index i
166                            WHERE c.relname = '%s'
167                                    AND c.oid = i.indrelid
168                                    AND %s
169                                    AND indnatts = 1
170                    )
171            };
172    
173            $c->{pk} = $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisprimary') );
174            
175            $c->{pk} ||= $dbh->selectrow_array( sprintf($index_fmt, $table, 'indisunique') );
176    
177    }
178    
179    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
180    
181    warn "using primary key $c->{pk}\n";
182    
183    $dbh->begin_work;
184    
185    $c->{sql} ||= "select * from $table";
186    
187  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
188  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr();
189    
190  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
191    
192    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
193    
194  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
195    
196  my $total = $sth->rows;  my $total = $sth->rows;
197  my $i = 1;  my $i = 1;
198    
199  my $t = time();  my $t = time();
200  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
201    
202    warn "indexing existing ",$sth->rows," rows\n";
203    
204  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
205    
# Line 65  while (my $row = $sth->fetchrow_hashref( Line 208  while (my $row = $sth->fetchrow_hashref(
208          # create document          # create document
209          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
210    
211          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
212                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
213          } else {          } else {
214                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
215          }          }
216    
217          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
218    
219          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
220    
221                  if ($val) {                  if ($val) {
                         # change encoding?  
                         from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');  
   
222                          # add attributes (make column usable from attribute search)                          # add attributes (make column usable from attribute search)
223                          $doc->add_attr($col, $val);                          $doc->add_attr($col, $val);
224    
225                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
226                          $doc->add_text($val);                          $doc->add_text($val);
227    
228                          print "R";                          $log .= "R";
229                  } else {                  } else {
230                          print ".";                          $log .= ".";
231                  }                  }
232    
233          }          }
# Line 96  while (my $row = $sth->fetchrow_hashref( Line 236  while (my $row = $sth->fetchrow_hashref(
236    
237          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
238    
239          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
240    
241  }          print STDERR $log;
242    
243  my $table = $c->{name} || die "no name?";  }
244    
245  my $cols = "'" . join("','", @cols) . "'";  my $cols = "'" . join("', '", @cols) . "'";
246    
247  foreach my $t (qw/UPDATE INSERT DELETE/) {  foreach my $t (qw/UPDATE INSERT DELETE/) {
248    
249          my $lc_t = lc($t);          my $lc_t = lc($t);
250    
         $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
   
251          my $sql = qq{          my $sql = qq{
252    
253                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
254                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
255                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
256                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
257                          )                          )
258    
259          };          };
260    
261          warn "$sql\n";          #warn "$sql\n";
262    
263          $dbh->do( $sql ) || die $dbh->errstr();          $dbh->do( $sql ) || die $dbh->errstr();
264    
265  }  }
266    
267    $dbh->commit;
268    
269    =head1 SEARCHING
270    
271    At end of each run, this script will output example search SQL query on STDOUT.
272    
273    You can use it to quickly construct queries for your application.
274    
275    =cut
276    
277    my $col_names = join(', ', @cols);
278    my $col_def = join(', ', map { "$_ text" } @cols);
279    
280    print "
281    -- example SQL search query:
282    
283    SELECT $col_names
284    FROM pgest(
285            -- node, login, passwd, depth
286            '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
287            -- full text search
288            'foo bar',
289            -- attribute filter, order, limit, offset
290            null, null, null, null,
291            -- return columns
292            array[$cols]
293    ) as ($col_def);
294    
295    ";
296    
297    __END__
298    
299    =head1 AUTHOR
300    
301    Dobrica Pavlinusic <dpavlin@rot13.org>
302    
303    L<http://www.rot13.org/~dpavlin/>
304    
305    =head1 LICENSE
306    
307    This product is licensed under GNU Public License (GPL) v2 or later.
308    
309    =cut
310    

Legend:
Removed from v.62  
changed lines
  Added in v.77

  ViewVC Help
Powered by ViewVC 1.1.26