/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (hide annotations)
Thu May 26 17:56:53 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 13167 byte(s)
Created new function which allows users to specify attributes which should
be returned like this:

select * from pgest('/index/path', 'query', '@title ISTRINC foo', limit, offset, {'@id','@title','@size'}) as (id text, title text, size text);

1 dpavlin 1 /*
2     * integrate Hyper Estraier into PostgreSQL
3     *
4     * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5     *
6     * TODO:
7     * - all
8     *
9     * NOTES:
10     * - clear structures with memset to support hash indexes (who whould like
11     * to create hash index on table returned from function?)
12     * - number of returned rows is set by PostgreSQL evaluator, see:
13     * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14     *
15     * Based on:
16     * - C example from PostgreSQL documentation (BSD licence)
17     * - example002.c from Hyper Estraier (GPL)
18     * - _textin/_textout from pgcurl.c (LGPL)
19     *
20     * This code is licenced under GPL
21     */
22    
23     #include "postgres.h"
24     #include "fmgr.h"
25     #include "funcapi.h"
26     #include "utils/builtins.h"
27     #include "utils/array.h"
28     #include "miscadmin.h"
29     #include <estraier.h>
30     #include <cabin.h>
31    
32     #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33     #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34     #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35     #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36    
37 dpavlin 5 /* prototype */
38     char *attr2text(ESTDOC *doc, char *attr);
39 dpavlin 1
40     ESTDB *db;
41     ESTCOND *cond;
42     ESTDOC *doc;
43     const CBLIST *texts;
44     int ecode, *est_result, resnum, i, j;
45 dpavlin 3 int limit = 0;
46     int offset = 0;
47 dpavlin 1
48     /* define PostgreSQL v1 function */
49     PG_FUNCTION_INFO_V1(pgest);
50     Datum pgest(PG_FUNCTION_ARGS) {
51    
52     FuncCallContext *funcctx;
53     int call_cntr;
54     int max_calls;
55     TupleDesc tupdesc;
56     TupleTableSlot *slot;
57     AttInMetadata *attinmeta;
58     char *index_path;
59     char *query;
60 dpavlin 5 char *attr;
61 dpavlin 1
62     /* stuff done only on the first call of the function */
63     if (SRF_IS_FIRSTCALL()) {
64     MemoryContext oldcontext;
65    
66     /* create a function context for cross-call persistence */
67     funcctx = SRF_FIRSTCALL_INIT();
68    
69     /* switch to memory context appropriate for multiple function calls */
70     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
71 dpavlin 12 /* take arguments from function */
72 dpavlin 1
73 dpavlin 12 /* index path */
74     if (PG_ARGISNULL(0)) {
75     elog(ERROR, "index path can't be null");
76     SRF_RETURN_DONE(funcctx);
77     }
78     index_path = _textout(PG_GETARG_TEXT_P(0));
79    
80     /* query string */
81     if (PG_ARGISNULL(0)) {
82     query = "";
83     } else {
84     query = _textout(PG_GETARG_TEXT_P(1));
85     }
86    
87     /* atribute filter */
88     if (PG_ARGISNULL(2)) {
89     attr = "";
90     } else {
91     attr = _textout(PG_GETARG_TEXT_P(2));
92     }
93    
94     /* limit */
95 dpavlin 14 if (PG_ARGISNULL(3)) {
96     limit = 0;
97     } else {
98     limit = PG_GETARG_INT32(3);
99     }
100 dpavlin 12
101     /* offset */
102 dpavlin 14 if (PG_ARGISNULL(4)) {
103     offset = 0;
104     } else {
105     offset = PG_GETARG_INT32(4);
106     }
107 dpavlin 12
108    
109 dpavlin 1 /* open the database */
110     elog(DEBUG1, "pgest: est_db_open(%s)", index_path);
111    
112     if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
113     elog(ERROR, "est_db_open: can't open %s [%d]: %s", index_path, ecode, est_err_msg(ecode));
114     SRF_RETURN_DONE(funcctx);
115     }
116    
117 dpavlin 12 elog(INFO, "pgest: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
118 dpavlin 1
119     /* create a search condition object */
120     if (!(cond = est_cond_new())) {
121     elog(INFO, "pgest: est_cond_new failed");
122     SRF_RETURN_DONE(funcctx);
123     }
124    
125     /* set the search phrase to the search condition object */
126 dpavlin 12 if (! PG_ARGISNULL(1) && strlen(query) > 0)
127 dpavlin 9 est_cond_set_phrase(cond, query);
128 dpavlin 1
129 dpavlin 5 /* minimum valid attribute length is 10: @a STREQ a */
130 dpavlin 12 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
131 dpavlin 5 elog(INFO,"est_cond_add_attr(%s)", attr);
132     est_cond_add_attr(cond, attr);
133     }
134    
135 dpavlin 1 /* get the result of search */
136     est_result = est_db_search(db, cond, &resnum, NULL);
137    
138     /* total number of tuples to be returned */
139 dpavlin 5 if (limit && limit < resnum) {
140     funcctx->max_calls = limit - offset;
141     } else {
142     funcctx->max_calls = resnum - offset;
143     }
144 dpavlin 1
145     /* check if results exists */
146     if ( 0 == funcctx->max_calls )
147     elog(INFO, "pgest: no results for: %s", query );
148    
149 dpavlin 5 elog(DEBUG1, "pgest: found %d hits for %s", resnum, query);
150 dpavlin 1
151     /* Build a tuple description for a __pgest tuple */
152     tupdesc = RelationNameGetTupleDesc("__pgest");
153    
154     /* allocate a slot for a tuple with this tupdesc */
155     slot = TupleDescGetSlot(tupdesc);
156    
157     /* assign slot to function context */
158     funcctx->slot = slot;
159    
160     /*
161     * generate attribute metadata needed later to produce tuples from raw
162     * C strings
163     */
164     attinmeta = TupleDescGetAttInMetadata(tupdesc);
165     funcctx->attinmeta = attinmeta;
166    
167     MemoryContextSwitchTo(oldcontext);
168    
169 dpavlin 2 elog(DEBUG1, "SRF_IS_FIRSTCALL done");
170 dpavlin 1 }
171    
172     /* stuff done on every call of the function */
173     funcctx = SRF_PERCALL_SETUP();
174    
175     call_cntr = funcctx->call_cntr;
176     max_calls = funcctx->max_calls;
177     slot = funcctx->slot;
178     attinmeta = funcctx->attinmeta;
179 dpavlin 3
180     if (limit && call_cntr > limit - 1) {
181     elog(INFO, "call_cntr: %d limit: %d", call_cntr, limit);
182     SRF_RETURN_DONE(funcctx);
183     }
184    
185 dpavlin 1 if (call_cntr < max_calls) {
186     char **values;
187     HeapTuple tuple;
188     Datum result;
189    
190 dpavlin 2 elog(DEBUG1, "pgest: loop count %d", call_cntr);
191 dpavlin 1
192     if (! est_result) {
193     elog(ERROR, "pgest: no estraier results");
194     SRF_RETURN_DONE(funcctx);
195     }
196    
197     /*
198     * Prepare a values array for storage in our slot.
199     * This should be an array of C strings which will
200     * be processed later by the type input functions.
201     */
202    
203 dpavlin 3 if (doc = est_db_get_doc(db, est_result[call_cntr + offset], 0)) {
204 dpavlin 1
205 dpavlin 2 elog(DEBUG1, "URI: %s\n Title: %s\n",
206 dpavlin 1 est_doc_attr(doc, "@uri"),
207     est_doc_attr(doc, "@title")
208     );
209    
210     values = (char **) palloc(4 * sizeof(char *));
211    
212     // values[0] = (char *) palloc(strlen(_estval) * sizeof(char));
213    
214 dpavlin 5 values[0] = (char *) attr2text(doc,"@id");
215     values[1] = (char *) attr2text(doc,"@uri");
216     values[2] = (char *) attr2text(doc,"@title");
217 dpavlin 16 values[3] = (char *) attr2text(doc,"@size");
218 dpavlin 1
219     /* destloy the document object */
220     elog(DEBUG2, "est_doc_delete");
221     est_doc_delete(doc);
222     } else {
223     elog(INFO, "no result from estraier");
224 dpavlin 7 values[0] = DatumGetCString( "" );
225     values[1] = DatumGetCString( "" );
226     values[2] = DatumGetCString( "" );
227     values[3] = DatumGetCString( "" );
228 dpavlin 1 }
229    
230    
231     elog(DEBUG2, "build tuple");
232     /* build a tuple */
233     tuple = BuildTupleFromCStrings(attinmeta, values);
234    
235     elog(DEBUG2, "make tuple into datum");
236     /* make the tuple into a datum */
237     result = TupleGetDatum(slot, tuple);
238    
239     elog(DEBUG2, "cleanup");
240     /* clean up ? */
241     /*
242     pfree(values[0]);
243     pfree(values[1]);
244     pfree(values[2]);
245     pfree(values[3]);
246     pfree(values);
247     */
248    
249     elog(DEBUG2, "cleanup over");
250    
251     SRF_RETURN_NEXT(funcctx, result);
252     } else {
253 dpavlin 2 elog(DEBUG1, "loop over");
254 dpavlin 1
255     if(!est_db_close(db, &ecode)){
256     elog(INFO, "est_db_close error: %s", est_err_msg(ecode));
257     }
258    
259     /* do when there is no more left */
260     SRF_RETURN_DONE(funcctx);
261     }
262     }
263    
264     /* work in progress */
265 dpavlin 19 PG_FUNCTION_INFO_V1(pgest_attr);
266     Datum pgest_attr(PG_FUNCTION_ARGS)
267 dpavlin 1 {
268 dpavlin 19 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(5);
269     Oid element_type = ARR_ELEMTYPE(attr_arr);
270     int ndims = ARR_NDIM(attr_arr);
271     int *dim_counts = ARR_DIMS(attr_arr);
272     int *dim_lower_bounds = ARR_LBOUND(attr_arr);
273     int ncols = 0;
274     int nrows = 0;
275     int indx[MAXDIM];
276 dpavlin 1 int16 typlen;
277     bool typbyval;
278     char typalign;
279     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
280     AttInMetadata *attinmeta;
281     TupleDesc tupdesc;
282 dpavlin 19 Tuplestorestate *tupstore = NULL;
283 dpavlin 1 HeapTuple tuple;
284     MemoryContext per_query_ctx;
285     MemoryContext oldcontext;
286     Datum dvalue;
287     char **values;
288 dpavlin 19 int rsinfo_ncols;
289 dpavlin 1 int i, j;
290 dpavlin 19 /* estvars */
291     char *index_path;
292     char *query;
293     char *attr;
294 dpavlin 1
295 dpavlin 19
296     /* only allow 1D input array */
297     if (ndims == 1)
298     {
299     ncols = dim_counts[0];
300     }
301     else
302     ereport(ERROR,
303     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
304     errmsg("invalid input array"),
305     errdetail("Input array must have 1 dimension")));
306    
307 dpavlin 1 /* check to see if caller supports us returning a tuplestore */
308     if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
309     ereport(ERROR,
310     (errcode(ERRCODE_SYNTAX_ERROR),
311     errmsg("materialize mode required, but it is not " \
312     "allowed in this context")));
313    
314 dpavlin 19 /* get info about element type needed to construct the array */
315     get_typlenbyvalalign(element_type, &typlen, &typbyval, &typalign);
316    
317 dpavlin 1 /* get the requested return tuple description */
318     tupdesc = rsinfo->expectedDesc;
319 dpavlin 19 rsinfo_ncols = tupdesc->natts;
320 dpavlin 1
321     /*
322     * The requested tuple description better match up with the array
323     * we were given.
324     */
325 dpavlin 19 if (rsinfo_ncols != ncols)
326     ereport(ERROR,
327     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
328     errmsg("invalid input array"),
329     errdetail("Number of elements in array must match number of query specified columns.")));
330    
331 dpavlin 1 /* OK, use it */
332     attinmeta = TupleDescGetAttInMetadata(tupdesc);
333    
334     /* Now go to work */
335     rsinfo->returnMode = SFRM_Materialize;
336    
337     per_query_ctx = fcinfo->flinfo->fn_mcxt;
338     oldcontext = MemoryContextSwitchTo(per_query_ctx);
339    
340     /* initialize our tuplestore */
341     tupstore = tuplestore_begin_heap(true, false, SortMem);
342    
343 dpavlin 19
344     /* take rest of arguments from function */
345    
346     /* index path */
347     if (PG_ARGISNULL(0)) {
348     ereport(ERROR,
349     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
350     errmsg("index path can't be null"),
351     errdetail("Index path must be valid full path to HyperEstraier index")));
352     }
353     index_path = _textout(PG_GETARG_TEXT_P(0));
354    
355     /* query string */
356     if (PG_ARGISNULL(0)) {
357     query = "";
358     } else {
359     query = _textout(PG_GETARG_TEXT_P(1));
360     }
361    
362     /* atribute filter */
363     if (PG_ARGISNULL(2)) {
364     attr = "";
365     } else {
366     attr = _textout(PG_GETARG_TEXT_P(2));
367     }
368    
369     /* limit */
370     if (PG_ARGISNULL(3)) {
371     limit = 0;
372     } else {
373     limit = PG_GETARG_INT32(3);
374     }
375    
376     /* offset */
377     if (PG_ARGISNULL(4)) {
378     offset = 0;
379     } else {
380     offset = PG_GETARG_INT32(4);
381     }
382    
383    
384     /* open the database */
385     elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
386    
387     if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
388     ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
389     errmsg("est_db_open: can't open %s: %d", index_path, ecode),
390     errdetail(est_err_msg(ecode))));
391     }
392    
393     elog(INFO, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
394    
395     /* create a search condition object */
396     if (!(cond = est_cond_new())) {
397     ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
398     errmsg("pgest_attr: est_cond_new failed")));
399     }
400    
401     /* set the search phrase to the search condition object */
402     if (! PG_ARGISNULL(1) && strlen(query) > 0)
403     est_cond_set_phrase(cond, query);
404    
405     /* minimum valid attribute length is 10: @a STREQ a */
406     if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
407     elog(INFO,"est_cond_add_attr(%s)", attr);
408     est_cond_add_attr(cond, attr);
409     }
410    
411     /* get the result of search */
412     est_result = est_db_search(db, cond, &resnum, NULL);
413    
414     /* check if results exists */
415     if ( 0 == resnum ) {
416     elog(INFO, "pgest_attr: no results for: %s", query );
417     }
418    
419     /* total number of tuples to be returned */
420     if (limit && limit < resnum) {
421     nrows = limit - offset;
422     } else {
423     nrows = resnum - offset;
424     }
425    
426    
427     elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
428    
429    
430 dpavlin 1 values = (char **) palloc(ncols * sizeof(char *));
431    
432     for (i = 0; i < nrows; i++)
433     {
434 dpavlin 19
435     /* get result from estraier */
436     if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
437     elog(INFO, "can't find result %d", i + offset);
438     } else {
439     elog(DEBUG1, "URI: %s\n Title: %s\n",
440     est_doc_attr(doc, "@uri"),
441     est_doc_attr(doc, "@title")
442     );
443     }
444    
445     /* iterate over results */
446 dpavlin 1 for (j = 0; j < ncols; j++)
447     {
448 dpavlin 19 bool isnull;
449    
450     /* array value of this position */
451     indx[0] = j + dim_lower_bounds[0];
452    
453     dvalue = array_ref(attr_arr, ndims, indx, -1, typlen, typbyval, typalign, &isnull);
454    
455     if (!isnull && doc)
456     values[j] = DatumGetCString(
457     attr2text(doc,
458     DirectFunctionCall1(textout, dvalue)
459     ));
460     else
461     values[j] = NULL;
462 dpavlin 1 }
463     /* construct the tuple */
464     tuple = BuildTupleFromCStrings(attinmeta, values);
465    
466     /* now store it */
467     tuplestore_puttuple(tupstore, tuple);
468 dpavlin 19
469    
470     /* delete estraier document object */
471     est_doc_delete(doc);
472 dpavlin 1 }
473    
474     tuplestore_donestoring(tupstore);
475     rsinfo->setResult = tupstore;
476    
477     /*
478     * SFRM_Materialize mode expects us to return a NULL Datum. The actual
479     * tuples are in our tuplestore and passed back through
480     * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
481     * that we actually used to build our tuples with, so the caller can
482     * verify we did what it was expecting.
483     */
484     rsinfo->setDesc = tupdesc;
485     MemoryContextSwitchTo(oldcontext);
486    
487 dpavlin 19 if(!est_db_close(db, &ecode)){
488     ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
489     errmsg("est_db_close: %d", ecode),
490     errdetail(est_err_msg(ecode))));
491     }
492    
493 dpavlin 1 return (Datum) 0;
494     }
495    
496    
497     /* make text var from attr */
498     char *attr2text(ESTDOC *doc, char *attr) {
499     char *val;
500     const char *attrval;
501     int len;
502 dpavlin 4 int attrlen;
503 dpavlin 1
504 dpavlin 2 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
505 dpavlin 1
506 dpavlin 4 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
507     val = (char *) palloc(attrlen * sizeof(char));
508 dpavlin 1 } else {
509     return (Datum) NULL;
510     }
511    
512     len = strlen(attrval);
513 dpavlin 2 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
514 dpavlin 1
515     len++;
516     len *= sizeof(char);
517    
518     elog(DEBUG2, "palloc(%d)", len);
519    
520     val = palloc(len);
521    
522     memset(val, 0, len);
523     strncpy(val, attrval, len);
524    
525     elog(DEBUG2, "val=%s", val);
526    
527     return val;
528     }
529    

  ViewVC Help
Powered by ViewVC 1.1.26