/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by dpavlin, Mon Aug 7 14:30:55 2006 UTC revision 69 by dpavlin, Mon Aug 7 17:30:56 2006 UTC
# Line 15  pgest-index.pl - create full-text index Line 15  pgest-index.pl - create full-text index
15  =cut  =cut
16    
17  my $c = {  my $c = {
         name => 'imenik',  
         node_url => 'http://localhost:1978/node/imenik',  
         dbi => 'Pg:dbname=vip',  
         sql => qq{  
                 select ime,tel from imenik  
         },  
         pk_col => 'tel',  
         db_encoding => 'iso-8859-2',  
18          debug => 1,          debug => 1,
         user => 'admin',  
         passwd => 'admin',  
19  };  };
20    
21  GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);  =head1 SYNOPSIS
22    
23      pgest-index.pl --create movies --sql "select id,title,year from movies"
24    
25      pgsql-index.pl --drop movies
26    
27    Options:
28    
29    =over 4
30    
31    =item --create name
32    
33    Create index C<name> and create triggers on table with same name
34    
35    =item --drop name
36    
37    Remove triggers from table C<name> and node with same name
38    
39    =item --node-url http://localhost:1978/node/name
40    
41    Full URI to node. If it's not specified, it's assumed that you are using
42    Hyper Estraier on C<http://localhost:1978/>.
43    
44    =item --sql "select col1,col2 from name"
45    
46    SQL query which will return names of columns which are included in full-text
47    index. Have in mind that you can't use aliases (as I<something>) in this SQL
48    query (or triggers will be created with wrong fields).
49    
50    =item --pk id
51    
52    Specify name of primary key column in SQL query. If not specified, C<id> will be used.
53    
54    =item --user login
55    
56    =item --passwd password
57    
58    Username and password to use when connecting to Hyper Estraier. If not specified,
59    C<admin> and C<admin> will be used.
60    
61    =item --debug
62    
63    Dump debugging output. It may be specified multiple times for more verbose debugging.
64    
65    =back
66    
67    =cut
68    
69    my $usage = "$0 database_name (--create|--drop) name [--sql='select id,foo,bar from table'] [--pk=id]\n";
70    
71    GetOptions($c, qw/create=s drop=s node_url=s sql=s pk=s user=s passwd=s debug+/);
72    
73    my $dbname = shift @ARGV || die $usage;
74    
75    $c->{dbi} = 'Pg:dbname=' . $dbname;
76    
77  warn "# c: ", Dumper($c) if ($c->{debug});  warn "# c: ", Dumper($c) if ($c->{debug});
78    
79  my $table = $c->{name} || die "no name?";  my $table = $c->{create} || $c->{drop} || die $usage;
80    
81    $c->{node_url} = 'http://localhost:1978/node/' . $table;
82    
83    $c->{user} ||= 'admin';
84    $c->{passwd} ||= 'admin';
85    
86  # create and configure node  # create and configure node
87  my $node = new Search::Estraier::Node(  my $node = new Search::Estraier::Node(
# Line 44  my $node = new Search::Estraier::Node( Line 93  my $node = new Search::Estraier::Node(
93          debug => $c->{debug} >= 4 ? 1 : 0,          debug => $c->{debug} >= 4 ? 1 : 0,
94  );  );
95    
96    # create DBI connection
97    my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
98    
99    # drop existing triggers
100    sub drop_triggers {
101            my $table = shift || die "no table?";
102    
103            warn "removing triggers from $table\n";
104    
105            foreach my $t (qw/UPDATE INSERT DELETE/) {
106                    my $lc_t = lc($t);
107                    $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
108            }
109    }
110    
111    if ($c->{drop}) {
112            drop_triggers( $table );
113            warn "removing node $table\n";
114            $node->master(
115                    action => 'nodedel',
116                    name => $table,
117            );
118            exit;
119    }
120    
121    # clear existing node
122  $node->master(  $node->master(
123          action => 'nodeclr',          action => 'nodeclr',
124          name => $table,          name => $table,
125  );  );
126    
 # create DBI connection  
 my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;  
   
127  # create PostgreSQL functions  # create PostgreSQL functions
128  $dbh->do(qq{  $dbh->do(qq{
129    
# Line 67  CREATE OR REPLACE FUNCTION pgest_trigger Line 139  CREATE OR REPLACE FUNCTION pgest_trigger
139  }) || die $dbh->errstr();  }) || die $dbh->errstr();
140    
141    
142  # drop existing triggers  drop_triggers( $table );
143  foreach my $t (qw/UPDATE INSERT DELETE/) {  
144          my $lc_t = lc($t);  if (! $c->{pk}) {
145          $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });  
146            warn "# finding primary key for $table\n" if ($c->{debug});
147    
148            $c->{pk} = $dbh->selectrow_array(qq{
149                    SELECT
150                            a.attname, t.typname
151                    FROM pg_type t, pg_attribute a
152                    WHERE t.oid = a.atttypid AND attrelid = (
153                            SELECT indexrelid
154                            FROM pg_class c, pg_index i
155                            WHERE c.relname = '$table'
156                                    AND c.oid = i.indrelid
157                                    AND indisprimary
158                                    AND indnatts = 1
159                    )
160            });
161    
162  }  }
163    
164    die "$0: can't find single column primary key for table ${table}. Please specify column with --pk\n" unless ($c->{pk});
165    
166    warn "using primary key $c->{pk}\n";
167    
168  $dbh->begin_work;  $dbh->begin_work;
169    
170    $c->{sql} ||= "select * from $table";
171    
172  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();  my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
173  $sth->execute() || die $sth->errstr();  $sth->execute() || die $sth->errstr();
174    
175  my @cols = @{ $sth->{NAME} };  my @cols = @{ $sth->{NAME} };
176    
177    die "SQL '$c->{sql}' didn't include primary key $c->{pk}\n" unless grep(/^\Q$c->{pk}\E$/, @cols);
178    
179  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});  warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
180    
181  my $total = $sth->rows;  my $total = $sth->rows;
182  my $i = 1;  my $i = 1;
183    
184  my $t = time();  my $t = time();
185  my $pk_col = $c->{pk_col} || 'id';  my $pk = $c->{pk} || 'id';
186    
187    warn "indexing existing ",$sth->rows," rows\n";
188    
189  while (my $row = $sth->fetchrow_hashref() ) {  while (my $row = $sth->fetchrow_hashref() ) {
190    
# Line 95  while (my $row = $sth->fetchrow_hashref( Line 193  while (my $row = $sth->fetchrow_hashref(
193          # create document          # create document
194          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
195    
196          if (my $id = $row->{$pk_col}) {          if (my $id = $row->{$pk}) {
197                  $doc->add_attr('@uri', $id);                  $doc->add_attr('@uri', $id);
198          } else {          } else {
199                  die "can't find pk_col column '$pk_col' in results\n";                  die "can't find pk column '$pk' in results\n";
200          }          }
201    
202          printf "%4d ",$i;          my $log = sprintf "%4d ",$i;
203    
204          while (my ($col,$val) = each %{$row}) {          while (my ($col,$val) = each %{$row}) {
205    
206                  if ($val) {                  if ($val) {
                         # change encoding?  
                         from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');  
   
207                          # add attributes (make column usable from attribute search)                          # add attributes (make column usable from attribute search)
208                          $doc->add_attr($col, $val);                          $doc->add_attr($col, $val);
209    
210                          # add body text to document (make it searchable using full-text index)                          # add body text to document (make it searchable using full-text index)
211                          $doc->add_text($val);                          $doc->add_text($val);
212    
213                          print "R";                          $log .= "R";
214                  } else {                  } else {
215                          print ".";                          $log .= ".";
216                  }                  }
217    
218          }          }
# Line 126  while (my $row = $sth->fetchrow_hashref( Line 221  while (my $row = $sth->fetchrow_hashref(
221    
222          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
223    
224          printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );          $log .= sprintf(" %d%% %.1f/s\r", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
225    
226            print STDERR $log;
227    
228  }  }
229    
230  my $cols = "'" . join("','", @cols) . "'";  my $cols = "'" . join("', '", @cols) . "'";
231    
232  foreach my $t (qw/UPDATE INSERT DELETE/) {  foreach my $t (qw/UPDATE INSERT DELETE/) {
233    
# Line 141  foreach my $t (qw/UPDATE INSERT DELETE/) Line 238  foreach my $t (qw/UPDATE INSERT DELETE/)
238                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}                  CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
239                          ON ${table} FOR EACH ROW                          ON ${table} FOR EACH ROW
240                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',                          EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
241                                  '$c->{pk_col}', $cols                                  '$c->{pk}', $cols
242                          )                          )
243    
244          };          };
# Line 153  foreach my $t (qw/UPDATE INSERT DELETE/) Line 250  foreach my $t (qw/UPDATE INSERT DELETE/)
250  }  }
251    
252  $dbh->commit;  $dbh->commit;
253    
254    my $col_names = join(', ', @cols);
255    my $col_def = join(', ', map { "$_ text" } @cols);
256    
257    print "
258    -- example SQL search query:
259    
260    SELECT $col_names
261    FROM pgest(
262            -- node, login, passwd, depth
263            '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
264            -- full text search
265            'foo bar',
266            -- attribute filter, order, limit, offset
267            null, null, null, null,
268            -- return columns
269            array[$cols]
270    ) as ($col_def);
271    
272    ";

Legend:
Removed from v.63  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.26