/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 25 - (hide annotations)
Fri May 27 21:06:01 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 7902 byte(s)
removed obsolete implementeation without attribute specification which was
also slower according to benchmarks.

1 dpavlin 1 /*
2     * integrate Hyper Estraier into PostgreSQL
3     *
4     * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5     *
6     * TODO:
7     * - all
8     *
9     * NOTES:
10     * - clear structures with memset to support hash indexes (who whould like
11     * to create hash index on table returned from function?)
12     * - number of returned rows is set by PostgreSQL evaluator, see:
13     * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14     *
15     * Based on:
16     * - C example from PostgreSQL documentation (BSD licence)
17     * - example002.c from Hyper Estraier (GPL)
18     * - _textin/_textout from pgcurl.c (LGPL)
19     *
20     * This code is licenced under GPL
21     */
22    
23     #include "postgres.h"
24     #include "fmgr.h"
25     #include "funcapi.h"
26     #include "utils/builtins.h"
27     #include "utils/array.h"
28     #include "miscadmin.h"
29     #include <estraier.h>
30     #include <cabin.h>
31    
32     #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33     #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34     #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35     #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36    
37 dpavlin 5 /* prototype */
38     char *attr2text(ESTDOC *doc, char *attr);
39 dpavlin 1
40    
41     /* work in progress */
42 dpavlin 19 PG_FUNCTION_INFO_V1(pgest_attr);
43     Datum pgest_attr(PG_FUNCTION_ARGS)
44 dpavlin 1 {
45 dpavlin 19 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(5);
46 dpavlin 25 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
47     int attr_ndims = ARR_NDIM(attr_arr);
48     int *attr_dim_counts = ARR_DIMS(attr_arr);
49     int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
50 dpavlin 19 int ncols = 0;
51     int nrows = 0;
52     int indx[MAXDIM];
53 dpavlin 25 int16 attr_len;
54     bool attr_byval;
55     char attr_align;
56 dpavlin 1 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
57     AttInMetadata *attinmeta;
58     TupleDesc tupdesc;
59 dpavlin 19 Tuplestorestate *tupstore = NULL;
60 dpavlin 1 HeapTuple tuple;
61     MemoryContext per_query_ctx;
62     MemoryContext oldcontext;
63     Datum dvalue;
64     char **values;
65 dpavlin 19 int rsinfo_ncols;
66 dpavlin 1 int i, j;
67 dpavlin 19 /* estvars */
68 dpavlin 25 ESTDB *db;
69     ESTCOND *cond;
70     ESTDOC *doc;
71     const CBLIST *texts;
72     int ecode, *est_result, resnum;
73     int limit = 0;
74     int offset = 0;
75    
76 dpavlin 19 char *index_path;
77     char *query;
78     char *attr;
79 dpavlin 1
80 dpavlin 19
81     /* only allow 1D input array */
82 dpavlin 25 if (attr_ndims == 1)
83 dpavlin 19 {
84 dpavlin 25 ncols = attr_dim_counts[0];
85 dpavlin 19 }
86     else
87     ereport(ERROR,
88     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
89     errmsg("invalid input array"),
90     errdetail("Input array must have 1 dimension")));
91    
92 dpavlin 1 /* check to see if caller supports us returning a tuplestore */
93     if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
94     ereport(ERROR,
95     (errcode(ERRCODE_SYNTAX_ERROR),
96     errmsg("materialize mode required, but it is not " \
97     "allowed in this context")));
98    
99 dpavlin 19 /* get info about element type needed to construct the array */
100 dpavlin 25 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
101 dpavlin 19
102 dpavlin 1 /* get the requested return tuple description */
103     tupdesc = rsinfo->expectedDesc;
104 dpavlin 19 rsinfo_ncols = tupdesc->natts;
105 dpavlin 1
106     /*
107     * The requested tuple description better match up with the array
108     * we were given.
109     */
110 dpavlin 19 if (rsinfo_ncols != ncols)
111     ereport(ERROR,
112     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
113     errmsg("invalid input array"),
114     errdetail("Number of elements in array must match number of query specified columns.")));
115    
116 dpavlin 1 /* OK, use it */
117     attinmeta = TupleDescGetAttInMetadata(tupdesc);
118    
119     /* Now go to work */
120     rsinfo->returnMode = SFRM_Materialize;
121    
122     per_query_ctx = fcinfo->flinfo->fn_mcxt;
123     oldcontext = MemoryContextSwitchTo(per_query_ctx);
124    
125     /* initialize our tuplestore */
126     tupstore = tuplestore_begin_heap(true, false, SortMem);
127    
128 dpavlin 19
129     /* take rest of arguments from function */
130    
131     /* index path */
132     if (PG_ARGISNULL(0)) {
133     ereport(ERROR,
134     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
135     errmsg("index path can't be null"),
136     errdetail("Index path must be valid full path to HyperEstraier index")));
137     }
138     index_path = _textout(PG_GETARG_TEXT_P(0));
139    
140     /* query string */
141     if (PG_ARGISNULL(0)) {
142     query = "";
143     } else {
144     query = _textout(PG_GETARG_TEXT_P(1));
145     }
146    
147     /* atribute filter */
148     if (PG_ARGISNULL(2)) {
149     attr = "";
150     } else {
151     attr = _textout(PG_GETARG_TEXT_P(2));
152     }
153    
154     /* limit */
155     if (PG_ARGISNULL(3)) {
156     limit = 0;
157     } else {
158     limit = PG_GETARG_INT32(3);
159     }
160    
161     /* offset */
162     if (PG_ARGISNULL(4)) {
163     offset = 0;
164     } else {
165     offset = PG_GETARG_INT32(4);
166     }
167    
168    
169     /* open the database */
170     elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
171    
172     if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
173     ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
174     errmsg("est_db_open: can't open %s: %d", index_path, ecode),
175     errdetail(est_err_msg(ecode))));
176     }
177    
178 dpavlin 20 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
179 dpavlin 19
180     /* create a search condition object */
181     if (!(cond = est_cond_new())) {
182     ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
183     errmsg("pgest_attr: est_cond_new failed")));
184     }
185    
186     /* set the search phrase to the search condition object */
187     if (! PG_ARGISNULL(1) && strlen(query) > 0)
188     est_cond_set_phrase(cond, query);
189    
190     /* minimum valid attribute length is 10: @a STREQ a */
191     if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
192 dpavlin 20 elog(DEBUG1,"est_cond_add_attr(%s)", attr);
193 dpavlin 19 est_cond_add_attr(cond, attr);
194     }
195    
196     /* get the result of search */
197     est_result = est_db_search(db, cond, &resnum, NULL);
198    
199     /* check if results exists */
200     if ( 0 == resnum ) {
201     elog(INFO, "pgest_attr: no results for: %s", query );
202     }
203    
204     /* total number of tuples to be returned */
205     if (limit && limit < resnum) {
206     nrows = limit - offset;
207     } else {
208     nrows = resnum - offset;
209     }
210    
211    
212     elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
213    
214    
215 dpavlin 1 values = (char **) palloc(ncols * sizeof(char *));
216    
217     for (i = 0; i < nrows; i++)
218     {
219 dpavlin 19
220     /* get result from estraier */
221     if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
222     elog(INFO, "can't find result %d", i + offset);
223     } else {
224     elog(DEBUG1, "URI: %s\n Title: %s\n",
225     est_doc_attr(doc, "@uri"),
226     est_doc_attr(doc, "@title")
227     );
228     }
229    
230     /* iterate over results */
231 dpavlin 1 for (j = 0; j < ncols; j++)
232     {
233 dpavlin 19 bool isnull;
234    
235     /* array value of this position */
236 dpavlin 25 indx[0] = j + attr_dim_lower_bounds[0];
237 dpavlin 19
238 dpavlin 25 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
239 dpavlin 19
240     if (!isnull && doc)
241     values[j] = DatumGetCString(
242     attr2text(doc,
243 dpavlin 20 (char *)DirectFunctionCall1(textout, dvalue)
244 dpavlin 19 ));
245     else
246     values[j] = NULL;
247 dpavlin 1 }
248     /* construct the tuple */
249     tuple = BuildTupleFromCStrings(attinmeta, values);
250    
251     /* now store it */
252     tuplestore_puttuple(tupstore, tuple);
253 dpavlin 19
254    
255     /* delete estraier document object */
256     est_doc_delete(doc);
257 dpavlin 1 }
258    
259     tuplestore_donestoring(tupstore);
260     rsinfo->setResult = tupstore;
261    
262     /*
263     * SFRM_Materialize mode expects us to return a NULL Datum. The actual
264     * tuples are in our tuplestore and passed back through
265     * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
266     * that we actually used to build our tuples with, so the caller can
267     * verify we did what it was expecting.
268     */
269     rsinfo->setDesc = tupdesc;
270     MemoryContextSwitchTo(oldcontext);
271    
272 dpavlin 19 if(!est_db_close(db, &ecode)){
273     ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
274     errmsg("est_db_close: %d", ecode),
275     errdetail(est_err_msg(ecode))));
276     }
277    
278 dpavlin 1 return (Datum) 0;
279     }
280    
281    
282     /* make text var from attr */
283     char *attr2text(ESTDOC *doc, char *attr) {
284     char *val;
285     const char *attrval;
286     int len;
287 dpavlin 4 int attrlen;
288 dpavlin 1
289 dpavlin 2 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
290 dpavlin 1
291 dpavlin 4 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
292     val = (char *) palloc(attrlen * sizeof(char));
293 dpavlin 1 } else {
294     return (Datum) NULL;
295     }
296    
297     len = strlen(attrval);
298 dpavlin 2 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
299 dpavlin 1
300     len++;
301     len *= sizeof(char);
302    
303     elog(DEBUG2, "palloc(%d)", len);
304    
305     val = palloc(len);
306    
307     memset(val, 0, len);
308     strncpy(val, attrval, len);
309    
310     elog(DEBUG2, "val=%s", val);
311    
312     return val;
313     }
314    

  ViewVC Help
Powered by ViewVC 1.1.26