/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 40 - (hide annotations)
Sat Sep 10 18:51:13 2005 UTC (18 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 8667 byte(s)
add support for multiple attributes delimited by {{!}}

1 dpavlin 1 /*
2     * integrate Hyper Estraier into PostgreSQL
3     *
4     * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5     *
6     * TODO:
7     * - all
8     *
9     * NOTES:
10     * - clear structures with memset to support hash indexes (who whould like
11     * to create hash index on table returned from function?)
12     * - number of returned rows is set by PostgreSQL evaluator, see:
13     * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14     *
15     * Based on:
16     * - C example from PostgreSQL documentation (BSD licence)
17     * - example002.c from Hyper Estraier (GPL)
18     * - _textin/_textout from pgcurl.c (LGPL)
19     *
20     * This code is licenced under GPL
21     */
22    
23     #include "postgres.h"
24     #include "fmgr.h"
25     #include "funcapi.h"
26     #include "utils/builtins.h"
27     #include "utils/array.h"
28     #include "miscadmin.h"
29     #include <estraier.h>
30     #include <cabin.h>
31    
32     #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33     #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34     #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35     #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36    
37 dpavlin 27 /* SortMem got renamed in PostgreSQL 8.0 */
38     #ifndef SortMem
39     #define SortMem 16 * 1024
40     #endif
41    
42 dpavlin 40 #define ATTR_DELIMITER "{{!}}"
43    
44 dpavlin 5 /* prototype */
45     char *attr2text(ESTDOC *doc, char *attr);
46 dpavlin 1
47    
48     /* work in progress */
49 dpavlin 19 PG_FUNCTION_INFO_V1(pgest_attr);
50     Datum pgest_attr(PG_FUNCTION_ARGS)
51 dpavlin 1 {
52 dpavlin 31 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
53 dpavlin 25 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
54     int attr_ndims = ARR_NDIM(attr_arr);
55     int *attr_dim_counts = ARR_DIMS(attr_arr);
56     int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
57 dpavlin 19 int ncols = 0;
58     int nrows = 0;
59     int indx[MAXDIM];
60 dpavlin 25 int16 attr_len;
61     bool attr_byval;
62     char attr_align;
63 dpavlin 1 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
64     AttInMetadata *attinmeta;
65     TupleDesc tupdesc;
66 dpavlin 19 Tuplestorestate *tupstore = NULL;
67 dpavlin 1 HeapTuple tuple;
68     MemoryContext per_query_ctx;
69     MemoryContext oldcontext;
70     Datum dvalue;
71     char **values;
72 dpavlin 19 int rsinfo_ncols;
73 dpavlin 1 int i, j;
74 dpavlin 19 /* estvars */
75 dpavlin 25 ESTDB *db;
76     ESTCOND *cond;
77     ESTDOC *doc;
78     const CBLIST *texts;
79     int ecode, *est_result, resnum;
80     int limit = 0;
81     int offset = 0;
82    
83 dpavlin 19 char *index_path;
84     char *query;
85     char *attr;
86 dpavlin 31 char *order;
87 dpavlin 1
88 dpavlin 19
89     /* only allow 1D input array */
90 dpavlin 25 if (attr_ndims == 1)
91 dpavlin 19 {
92 dpavlin 25 ncols = attr_dim_counts[0];
93 dpavlin 19 }
94     else
95     ereport(ERROR,
96     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
97     errmsg("invalid input array"),
98     errdetail("Input array must have 1 dimension")));
99    
100 dpavlin 1 /* check to see if caller supports us returning a tuplestore */
101     if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
102     ereport(ERROR,
103     (errcode(ERRCODE_SYNTAX_ERROR),
104     errmsg("materialize mode required, but it is not " \
105     "allowed in this context")));
106    
107 dpavlin 19 /* get info about element type needed to construct the array */
108 dpavlin 25 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
109 dpavlin 19
110 dpavlin 1 /* get the requested return tuple description */
111     tupdesc = rsinfo->expectedDesc;
112 dpavlin 19 rsinfo_ncols = tupdesc->natts;
113 dpavlin 1
114     /*
115     * The requested tuple description better match up with the array
116     * we were given.
117     */
118 dpavlin 19 if (rsinfo_ncols != ncols)
119     ereport(ERROR,
120     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
121     errmsg("invalid input array"),
122     errdetail("Number of elements in array must match number of query specified columns.")));
123    
124 dpavlin 1 /* OK, use it */
125     attinmeta = TupleDescGetAttInMetadata(tupdesc);
126    
127     /* Now go to work */
128     rsinfo->returnMode = SFRM_Materialize;
129    
130     per_query_ctx = fcinfo->flinfo->fn_mcxt;
131     oldcontext = MemoryContextSwitchTo(per_query_ctx);
132    
133     /* initialize our tuplestore */
134     tupstore = tuplestore_begin_heap(true, false, SortMem);
135    
136 dpavlin 19
137     /* take rest of arguments from function */
138    
139     /* index path */
140     if (PG_ARGISNULL(0)) {
141     ereport(ERROR,
142     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
143     errmsg("index path can't be null"),
144     errdetail("Index path must be valid full path to HyperEstraier index")));
145     }
146     index_path = _textout(PG_GETARG_TEXT_P(0));
147    
148     /* query string */
149 dpavlin 31 if (PG_ARGISNULL(1)) {
150 dpavlin 19 query = "";
151     } else {
152     query = _textout(PG_GETARG_TEXT_P(1));
153     }
154    
155     /* atribute filter */
156     if (PG_ARGISNULL(2)) {
157     attr = "";
158     } else {
159     attr = _textout(PG_GETARG_TEXT_P(2));
160     }
161 dpavlin 31
162     /* sort order */
163     if (PG_ARGISNULL(3)) {
164     order = "";
165     } else {
166     order = _textout(PG_GETARG_TEXT_P(3));
167     }
168 dpavlin 19
169 dpavlin 31
170 dpavlin 19 /* limit */
171 dpavlin 31 if (PG_ARGISNULL(4)) {
172 dpavlin 19 limit = 0;
173     } else {
174 dpavlin 31 limit = PG_GETARG_INT32(4);
175 dpavlin 19 }
176    
177     /* offset */
178 dpavlin 31 if (PG_ARGISNULL(5)) {
179 dpavlin 19 offset = 0;
180     } else {
181 dpavlin 31 offset = PG_GETARG_INT32(5);
182 dpavlin 19 }
183    
184    
185     /* open the database */
186     elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
187    
188     if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
189     ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
190     errmsg("est_db_open: can't open %s: %d", index_path, ecode),
191     errdetail(est_err_msg(ecode))));
192     }
193    
194 dpavlin 20 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
195 dpavlin 19
196     /* create a search condition object */
197     if (!(cond = est_cond_new())) {
198     ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
199     errmsg("pgest_attr: est_cond_new failed")));
200     }
201    
202     /* set the search phrase to the search condition object */
203     if (! PG_ARGISNULL(1) && strlen(query) > 0)
204     est_cond_set_phrase(cond, query);
205    
206     /* minimum valid attribute length is 10: @a STREQ a */
207     if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
208 dpavlin 40 elog(DEBUG1,"attributes: %s", attr);
209     char *curr_attr;
210     curr_attr = strtok(attr, ATTR_DELIMITER);
211     while (curr_attr) {
212     elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
213     est_cond_add_attr(cond, curr_attr);
214     curr_attr = strtok(NULL, ATTR_DELIMITER);
215     }
216 dpavlin 19 }
217    
218 dpavlin 31 /* set the search phrase to the search condition object */
219     if (! PG_ARGISNULL(3) && strlen(order) > 0) {
220     elog(DEBUG1,"est_cond_set_order(%s)", order);
221     est_cond_set_order(cond, order);
222     }
223    
224 dpavlin 38 if (limit) {
225     elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
226     est_cond_set_max(cond, limit + offset);
227     }
228    
229 dpavlin 19 /* get the result of search */
230     est_result = est_db_search(db, cond, &resnum, NULL);
231    
232     /* check if results exists */
233     if ( 0 == resnum ) {
234     elog(INFO, "pgest_attr: no results for: %s", query );
235     }
236    
237     /* total number of tuples to be returned */
238     if (limit && limit < resnum) {
239 dpavlin 31 nrows = limit;
240 dpavlin 19 } else {
241     nrows = resnum - offset;
242     }
243    
244    
245     elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
246    
247    
248 dpavlin 1 values = (char **) palloc(ncols * sizeof(char *));
249    
250     for (i = 0; i < nrows; i++)
251     {
252 dpavlin 19
253     /* get result from estraier */
254     if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
255     elog(INFO, "can't find result %d", i + offset);
256     } else {
257     elog(DEBUG1, "URI: %s\n Title: %s\n",
258     est_doc_attr(doc, "@uri"),
259     est_doc_attr(doc, "@title")
260     );
261     }
262    
263     /* iterate over results */
264 dpavlin 1 for (j = 0; j < ncols; j++)
265     {
266 dpavlin 19 bool isnull;
267    
268     /* array value of this position */
269 dpavlin 25 indx[0] = j + attr_dim_lower_bounds[0];
270 dpavlin 19
271 dpavlin 25 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
272 dpavlin 19
273     if (!isnull && doc)
274     values[j] = DatumGetCString(
275     attr2text(doc,
276 dpavlin 20 (char *)DirectFunctionCall1(textout, dvalue)
277 dpavlin 19 ));
278     else
279     values[j] = NULL;
280 dpavlin 1 }
281     /* construct the tuple */
282     tuple = BuildTupleFromCStrings(attinmeta, values);
283    
284     /* now store it */
285     tuplestore_puttuple(tupstore, tuple);
286 dpavlin 19
287    
288     /* delete estraier document object */
289     est_doc_delete(doc);
290 dpavlin 1 }
291    
292     tuplestore_donestoring(tupstore);
293     rsinfo->setResult = tupstore;
294    
295     /*
296     * SFRM_Materialize mode expects us to return a NULL Datum. The actual
297     * tuples are in our tuplestore and passed back through
298     * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
299     * that we actually used to build our tuples with, so the caller can
300     * verify we did what it was expecting.
301     */
302     rsinfo->setDesc = tupdesc;
303     MemoryContextSwitchTo(oldcontext);
304    
305 dpavlin 31 est_cond_delete(cond);
306    
307 dpavlin 19 if(!est_db_close(db, &ecode)){
308     ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
309     errmsg("est_db_close: %d", ecode),
310     errdetail(est_err_msg(ecode))));
311     }
312    
313 dpavlin 1 return (Datum) 0;
314     }
315    
316    
317     /* make text var from attr */
318     char *attr2text(ESTDOC *doc, char *attr) {
319     char *val;
320     const char *attrval;
321     int len;
322 dpavlin 4 int attrlen;
323 dpavlin 1
324 dpavlin 2 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
325 dpavlin 1
326 dpavlin 4 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
327     val = (char *) palloc(attrlen * sizeof(char));
328 dpavlin 1 } else {
329     return (Datum) NULL;
330     }
331    
332     len = strlen(attrval);
333 dpavlin 2 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
334 dpavlin 1
335     len++;
336     len *= sizeof(char);
337    
338     elog(DEBUG2, "palloc(%d)", len);
339    
340     val = palloc(len);
341    
342     memset(val, 0, len);
343     strncpy(val, attrval, len);
344    
345     elog(DEBUG2, "val=%s", val);
346    
347     return val;
348     }
349    

  ViewVC Help
Powered by ViewVC 1.1.26