/[pgestraier]/trunk/bin/pgest-index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/pgest-index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (hide annotations)
Mon Aug 7 14:47:00 2006 UTC (17 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 3591 byte(s)
print example SQL search query at end
1 dpavlin 62 #!/usr/bin/perl -w
2    
3     use strict;
4 dpavlin 63 use Search::Estraier 0.07;
5 dpavlin 62 use DBI;
6     use Data::Dumper;
7     use Encode qw/from_to/;
8     use Time::HiRes qw/time/;
9     use Getopt::Long;
10    
11     =head1 NAME
12    
13     pgest-index.pl - create full-text index of some columns in your database
14    
15     =cut
16    
17     my $c = {
18     name => 'imenik',
19     node_url => 'http://localhost:1978/node/imenik',
20     dbi => 'Pg:dbname=vip',
21     sql => qq{
22     select ime,tel from imenik
23     },
24     pk_col => 'tel',
25     db_encoding => 'iso-8859-2',
26     debug => 1,
27     user => 'admin',
28     passwd => 'admin',
29     };
30    
31     GetOptions($c, qw/node_url=s sql=s pk_col=s eb_encoding=s debug+ user=s passwd=s/);
32    
33     warn "# c: ", Dumper($c) if ($c->{debug});
34    
35 dpavlin 63 my $table = $c->{name} || die "no name?";
36    
37 dpavlin 62 # create and configure node
38     my $node = new Search::Estraier::Node(
39     url => $c->{node_url},
40     user => $c->{user},
41     passwd => $c->{passwd},
42     croak_on_error => 1,
43     create => 1,
44     debug => $c->{debug} >= 4 ? 1 : 0,
45     );
46    
47 dpavlin 63 $node->master(
48     action => 'nodeclr',
49     name => $table,
50     );
51    
52 dpavlin 62 # create DBI connection
53     my $dbh = DBI->connect("DBI:$c->{dbi}","","") || die $DBI::errstr;
54    
55 dpavlin 63 # create PostgreSQL functions
56     $dbh->do(qq{
57    
58     CREATE OR REPLACE FUNCTION pgest(text, text, text, int, text, text, text, int, int, text[])
59     RETURNS setof record
60     AS 'pgest','pgest_node'
61     LANGUAGE 'C' IMMUTABLE CALLED ON NULL INPUT;
62    
63     CREATE OR REPLACE FUNCTION pgest_trigger() RETURNS TRIGGER
64     AS 'pgest', 'pgest_trigger'
65     LANGUAGE 'C' STRICT;
66    
67     }) || die $dbh->errstr();
68    
69    
70     # drop existing triggers
71     foreach my $t (qw/UPDATE INSERT DELETE/) {
72     my $lc_t = lc($t);
73     $dbh->do(qq{ DROP TRIGGER pgest_trigger_${lc_t} ON ${table} });
74     }
75    
76     $dbh->begin_work;
77    
78 dpavlin 62 my $sth = $dbh->prepare($c->{sql}) || die $dbh->errstr();
79     $sth->execute() || die $sth->errstr();
80    
81     my @cols = @{ $sth->{NAME} };
82    
83     warn "# columns: ",join(",", @cols),"\n" if ($c->{debug});
84    
85     my $total = $sth->rows;
86     my $i = 1;
87    
88     my $t = time();
89     my $pk_col = $c->{pk_col} || 'id';
90    
91     while (my $row = $sth->fetchrow_hashref() ) {
92    
93     warn "# row: ",Dumper($row) if ($c->{debug} >= 3);
94    
95     # create document
96     my $doc = new Search::Estraier::Document;
97    
98     if (my $id = $row->{$pk_col}) {
99     $doc->add_attr('@uri', $id);
100     } else {
101     die "can't find pk_col column '$pk_col' in results\n";
102     }
103    
104     printf "%4d ",$i;
105    
106     while (my ($col,$val) = each %{$row}) {
107    
108     if ($val) {
109     # change encoding?
110     from_to($val, ($c->{db_encoding} || 'ISO-8859-1'), 'UTF-8');
111    
112     # add attributes (make column usable from attribute search)
113     $doc->add_attr($col, $val);
114    
115     # add body text to document (make it searchable using full-text index)
116     $doc->add_text($val);
117    
118     print "R";
119     } else {
120     print ".";
121     }
122    
123     }
124    
125     warn "# doc draft: ",$doc->dump_draft, "\n" if ($c->{debug} >= 2);
126    
127     die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
128    
129     printf (" %d%% %.1f/s\n", int(( $i++ / $total) * 100), ( $i / (time() - $t) ) );
130    
131     }
132    
133 dpavlin 65 my $cols = "'" . join("', '", @cols) . "'";
134 dpavlin 62
135     foreach my $t (qw/UPDATE INSERT DELETE/) {
136    
137     my $lc_t = lc($t);
138    
139     my $sql = qq{
140    
141     CREATE TRIGGER pgest_trigger_${lc_t} AFTER ${t}
142     ON ${table} FOR EACH ROW
143     EXECUTE PROCEDURE pgest_trigger('$c->{node_url}','$c->{user}','$c->{passwd}',
144     '$c->{pk_col}', $cols
145     )
146    
147     };
148    
149 dpavlin 63 #warn "$sql\n";
150 dpavlin 62
151     $dbh->do( $sql ) || die $dbh->errstr();
152    
153     }
154 dpavlin 63
155     $dbh->commit;
156 dpavlin 65
157     my $col_names = join(', ', @cols);
158     my $col_def = join(', ', map { "$_ text" } @cols);
159    
160     print "example SQL search query:
161    
162    
163     SELECT $col_names
164     FROM pgest(
165     -- node, login, passwd, depth
166     '$c->{node_url}', '$c->{user}', '$c->{passwd}', 0,
167     -- full text search
168     'foo bar',
169     -- attribute filter, order by, limit, offset
170     null, null, null, null,
171     -- return columns
172     array[$cols]
173     ) as ($col_def);
174    
175     ";

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26