/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 62 by dpavlin, Mon Aug 7 13:24:49 2006 UTC revision 69 by dpavlin, Mon Aug 7 17:30:56 2006 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    
3  use strict;  use strict;
4  use Search::Estraier 0.06;  use Search::Estraier 0.07;
5  use DBI;  use DBI;
6  use Data::Dumper;  use Data::Dumper;
7  use Encode qw/from_to/;  use Encode qw/from_to/;
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
         name => 'imenik',  
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         db_encoding => 'iso-8859-2',  
18          debug => 1,          debug => 1,
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    =item --pk id
51    
52    Specify name of primary key column in SQL query. If not specified, C<id> will be used.
53    
54    =item --user login
55    
56    =item --passwd password
57    
58    Username and password to use when connecting to Hyper Estraier. If not specified,
59    C<admin> and C<admin> will be used.
60    
61    =item --debug
62    
63    Dump debugging output. It may be specified multiple times for more verbose debugging.
64    
65    =back
66    
67    =cut
68    
69    my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
70    
71    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
72    
73    my $dbname = shift @ARGV || die $usage;
74    
75    $c->{dbi} = 'Pg:dbname=' . $dbname;
76    
77  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
78    
79    my $table = $c->{create} || $c->{drop} || die $usage;
80    
81    $c->{node_url} = 'http://localhost:1978/node/' . $table;
82    
83    $c->{user} ||= 'admin';
84    $c->{passwd} ||= 'admin';
85    
86  # create and configure node  # create and configure node
87  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
88          url => $c->{node_url},          url => $c->{node_url},
# Line 45  my $node = new Search::Estraier::Node( Line 96  my $node = new Search::Estraier::Node(
96  # create DBI connection  # create DBI connection
97  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
98    
99    # drop existing triggers
100    sub drop_triggers {
101            my $table = shift || die "no table?";
102    
103            warn "removing triggers from $table\n";
104    
105            foreach my $t (qw/UPDATE INSERT DELETE/) {
106                    my $lc_t = lc($t);
107                    $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
108            }
109    }
110    
111    if ($c->{drop}) {
112            drop_triggers( $table );
113            warn "removing node $table\n";
114            $node->master(
115                    action => 'nodedel',
116                    name => $table,
117            );
118            exit;
119    }
120    
121    # clear existing node
122    $node->master(
123            action => 'nodeclr',
124            name => $table,
125    );
126    
127    # create PostgreSQL functions
128    $dbh->do(qq{
129    
130    CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
131            RETURNS setof record
132            AS 'pgest','pgest_node'
133            LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
134    
135    CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
136            AS 'pgest', 'pgest_trigger'
137            LANGUAGE 'C' STRICT;
138    
139    }) || die $dbh->errstr();
140    
141    
142    drop_triggers( $table );
143    
144    if (! $c->{pk}) {
145    
146            warn "# finding primary key for $table\n" if ($c->{debug});
147    
148            $c->{pk} = $dbh->selectrow_array(qq{
149                    SELECT
150                            a.attname, t.typname
151                    FROM pg_type t, pg_attribute a
152                    WHERE t.oid = a.atttypid AND attrelid = (
153                            SELECT indexrelid
154                            FROM pg_class c, pg_index i
155                            WHERE c.relname = '$table'
156                                    AND c.oid = i.indrelid
157                                    AND indisprimary
158                                    AND indnatts = 1
159                    )
160            });
161    
162    }
163    
164    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
165    
166    warn "using primary key $c->{pk}\n";
167    
168    $dbh->begin_work;
169    
170    $c->{sql} ||= "select * from $table";
171    
172  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
173  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr();
174    
175  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
176    
177    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
178    
179  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
180    
181  my $total = $sth->rows;  my $total = $sth->rows;
182  my $i = 1;  my $i = 1;
183    
184  my $t = time();  my $t = time();
185  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
186    
187    warn "indexing existing ",$sth->rows," rows\n";
188    
189  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
190    
# Line 65  while (my $row = $sth->fetchrow_hashref( Line 193  while (my $row = $sth->fetchrow_hashref(
193          # create document          # create document
194          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
195    
196          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
197                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
198          } else {          } else {
199                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
200          }          }
201    
202          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
203    
204          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
205    
206                  if ($val) {                  if ($val) {
                         # change encoding?  
                         from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');  
   
207                          # add attributes (make column usable from attribute search)                          # add attributes (make column usable from attribute search)
208                          $doc->add_attr($col, $val);                          $doc->add_attr($col, $val);
209    
210                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
211                          $doc->add_text($val);                          $doc->add_text($val);
212    
213                          print "R";                          $log .= "R";
214                  } else {                  } else {
215                          print ".";                          $log .= ".";
216                  }                  }
217    
218          }          }
# Line 96  while (my $row = $sth->fetchrow_hashref( Line 221  while (my $row = $sth->fetchrow_hashref(
221    
222          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
223    
224          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
225    
226  }          print STDERR $log;
227    
228  my $table = $c->{name} || die "no name?";  }
229    
230  my $cols = "'" . join("','", @cols) . "'";  my $cols = "'" . join("', '", @cols) . "'";
231    
232  foreach my $t (qw/UPDATE INSERT DELETE/) {  foreach my $t (qw/UPDATE INSERT DELETE/) {
233    
234          my $lc_t = lc($t);          my $lc_t = lc($t);
235    
         $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
   
236          my $sql = qq{          my $sql = qq{
237    
238                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
239                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
240                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
241                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
242                          )                          )
243    
244          };          };
245    
246          warn "$sql\n";          #warn "$sql\n";
247    
248          $dbh->do( $sql ) || die $dbh->errstr();          $dbh->do( $sql ) || die $dbh->errstr();
249    
250  }  }
251    
252    $dbh->commit;
253    
254    my $col_names = join(', ', @cols);
255    my $col_def = join(', ', map { "$_ text" } @cols);
256    
257    print "
258    -- example SQL search query:
259    
260    SELECT $col_names
261    FROM pgest(
262            -- node, login, passwd, depth
263            '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
264            -- full text search
265            'foo bar',
266            -- attribute filter, order, limit, offset
267            null, null, null, null,
268            -- return columns
269            array[$cols]
270    ) as ($col_def);
271    
272    ";

Legend:
Removed from v.62  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.26