/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 20 - (show annotations)
Thu May 26 19:42:36 2005 UTC (18 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 13185 byte(s)
moved all debug output to DEBUG1 instead of INFO, fixed compilation warning

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - example002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31
32 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36
37 /* prototype */
38 char *attr2text(ESTDOC *doc, char *attr);
39
40 ESTDB *db;
41 ESTCOND *cond;
42 ESTDOC *doc;
43 const CBLIST *texts;
44 int ecode, *est_result, resnum, i, j;
45 int limit = 0;
46 int offset = 0;
47
48 /* define PostgreSQL v1 function */
49 PG_FUNCTION_INFO_V1(pgest);
50 Datum pgest(PG_FUNCTION_ARGS) {
51
52 FuncCallContext *funcctx;
53 int call_cntr;
54 int max_calls;
55 TupleDesc tupdesc;
56 TupleTableSlot *slot;
57 AttInMetadata *attinmeta;
58 char *index_path;
59 char *query;
60 char *attr;
61
62 /* stuff done only on the first call of the function */
63 if (SRF_IS_FIRSTCALL()) {
64 MemoryContext oldcontext;
65
66 /* create a function context for cross-call persistence */
67 funcctx = SRF_FIRSTCALL_INIT();
68
69 /* switch to memory context appropriate for multiple function calls */
70 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
71 /* take arguments from function */
72
73 /* index path */
74 if (PG_ARGISNULL(0)) {
75 elog(ERROR, "index path can't be null");
76 SRF_RETURN_DONE(funcctx);
77 }
78 index_path = _textout(PG_GETARG_TEXT_P(0));
79
80 /* query string */
81 if (PG_ARGISNULL(0)) {
82 query = "";
83 } else {
84 query = _textout(PG_GETARG_TEXT_P(1));
85 }
86
87 /* atribute filter */
88 if (PG_ARGISNULL(2)) {
89 attr = "";
90 } else {
91 attr = _textout(PG_GETARG_TEXT_P(2));
92 }
93
94 /* limit */
95 if (PG_ARGISNULL(3)) {
96 limit = 0;
97 } else {
98 limit = PG_GETARG_INT32(3);
99 }
100
101 /* offset */
102 if (PG_ARGISNULL(4)) {
103 offset = 0;
104 } else {
105 offset = PG_GETARG_INT32(4);
106 }
107
108
109 /* open the database */
110 elog(DEBUG1, "pgest: est_db_open(%s)", index_path);
111
112 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
113 elog(ERROR, "est_db_open: can't open %s [%d]: %s", index_path, ecode, est_err_msg(ecode));
114 SRF_RETURN_DONE(funcctx);
115 }
116
117 elog(DEBUG1, "pgest: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
118
119 /* create a search condition object */
120 if (!(cond = est_cond_new())) {
121 elog(INFO, "pgest: est_cond_new failed");
122 SRF_RETURN_DONE(funcctx);
123 }
124
125 /* set the search phrase to the search condition object */
126 if (! PG_ARGISNULL(1) && strlen(query) > 0)
127 est_cond_set_phrase(cond, query);
128
129 /* minimum valid attribute length is 10: @a STREQ a */
130 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
131 elog(DEBUG1,"est_cond_add_attr(%s)", attr);
132 est_cond_add_attr(cond, attr);
133 }
134
135 /* get the result of search */
136 est_result = est_db_search(db, cond, &resnum, NULL);
137
138 /* total number of tuples to be returned */
139 if (limit && limit < resnum) {
140 funcctx->max_calls = limit - offset;
141 } else {
142 funcctx->max_calls = resnum - offset;
143 }
144
145 /* check if results exists */
146 if ( 0 == funcctx->max_calls )
147 elog(INFO, "pgest: no results for: %s", query );
148
149 elog(DEBUG1, "pgest: found %d hits for %s", resnum, query);
150
151 /* Build a tuple description for a __pgest tuple */
152 tupdesc = RelationNameGetTupleDesc("__pgest");
153
154 /* allocate a slot for a tuple with this tupdesc */
155 slot = TupleDescGetSlot(tupdesc);
156
157 /* assign slot to function context */
158 funcctx->slot = slot;
159
160 /*
161 * generate attribute metadata needed later to produce tuples from raw
162 * C strings
163 */
164 attinmeta = TupleDescGetAttInMetadata(tupdesc);
165 funcctx->attinmeta = attinmeta;
166
167 MemoryContextSwitchTo(oldcontext);
168
169 elog(DEBUG1, "SRF_IS_FIRSTCALL done");
170 }
171
172 /* stuff done on every call of the function */
173 funcctx = SRF_PERCALL_SETUP();
174
175 call_cntr = funcctx->call_cntr;
176 max_calls = funcctx->max_calls;
177 slot = funcctx->slot;
178 attinmeta = funcctx->attinmeta;
179
180 if (limit && call_cntr > limit - 1) {
181 elog(DEBUG1, "call_cntr: %d limit: %d", call_cntr, limit);
182 SRF_RETURN_DONE(funcctx);
183 }
184
185 if (call_cntr < max_calls) {
186 char **values;
187 HeapTuple tuple;
188 Datum result;
189
190 elog(DEBUG1, "pgest: loop count %d", call_cntr);
191
192 if (! est_result) {
193 elog(ERROR, "pgest: no estraier results");
194 SRF_RETURN_DONE(funcctx);
195 }
196
197 /*
198 * Prepare a values array for storage in our slot.
199 * This should be an array of C strings which will
200 * be processed later by the type input functions.
201 */
202
203 if (doc = est_db_get_doc(db, est_result[call_cntr + offset], 0)) {
204
205 elog(DEBUG1, "URI: %s\n Title: %s\n",
206 est_doc_attr(doc, "@uri"),
207 est_doc_attr(doc, "@title")
208 );
209
210 values = (char **) palloc(4 * sizeof(char *));
211
212 // values[0] = (char *) palloc(strlen(_estval) * sizeof(char));
213
214 values[0] = (char *) attr2text(doc,"@id");
215 values[1] = (char *) attr2text(doc,"@uri");
216 values[2] = (char *) attr2text(doc,"@title");
217 values[3] = (char *) attr2text(doc,"@size");
218
219 /* destloy the document object */
220 elog(DEBUG2, "est_doc_delete");
221 est_doc_delete(doc);
222 } else {
223 elog(INFO, "no result from estraier");
224 values[0] = DatumGetCString( "" );
225 values[1] = DatumGetCString( "" );
226 values[2] = DatumGetCString( "" );
227 values[3] = DatumGetCString( "" );
228 }
229
230
231 elog(DEBUG2, "build tuple");
232 /* build a tuple */
233 tuple = BuildTupleFromCStrings(attinmeta, values);
234
235 elog(DEBUG2, "make tuple into datum");
236 /* make the tuple into a datum */
237 result = TupleGetDatum(slot, tuple);
238
239 elog(DEBUG2, "cleanup");
240 /* clean up ? */
241 /*
242 pfree(values[0]);
243 pfree(values[1]);
244 pfree(values[2]);
245 pfree(values[3]);
246 pfree(values);
247 */
248
249 elog(DEBUG2, "cleanup over");
250
251 SRF_RETURN_NEXT(funcctx, result);
252 } else {
253 elog(DEBUG1, "loop over");
254
255 if(!est_db_close(db, &ecode)){
256 elog(INFO, "est_db_close error: %s", est_err_msg(ecode));
257 }
258
259 /* do when there is no more left */
260 SRF_RETURN_DONE(funcctx);
261 }
262 }
263
264 /* work in progress */
265 PG_FUNCTION_INFO_V1(pgest_attr);
266 Datum pgest_attr(PG_FUNCTION_ARGS)
267 {
268 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(5);
269 Oid element_type = ARR_ELEMTYPE(attr_arr);
270 int ndims = ARR_NDIM(attr_arr);
271 int *dim_counts = ARR_DIMS(attr_arr);
272 int *dim_lower_bounds = ARR_LBOUND(attr_arr);
273 int ncols = 0;
274 int nrows = 0;
275 int indx[MAXDIM];
276 int16 typlen;
277 bool typbyval;
278 char typalign;
279 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
280 AttInMetadata *attinmeta;
281 TupleDesc tupdesc;
282 Tuplestorestate *tupstore = NULL;
283 HeapTuple tuple;
284 MemoryContext per_query_ctx;
285 MemoryContext oldcontext;
286 Datum dvalue;
287 char **values;
288 int rsinfo_ncols;
289 int i, j;
290 /* estvars */
291 char *index_path;
292 char *query;
293 char *attr;
294
295
296 /* only allow 1D input array */
297 if (ndims == 1)
298 {
299 ncols = dim_counts[0];
300 }
301 else
302 ereport(ERROR,
303 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
304 errmsg("invalid input array"),
305 errdetail("Input array must have 1 dimension")));
306
307 /* check to see if caller supports us returning a tuplestore */
308 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
309 ereport(ERROR,
310 (errcode(ERRCODE_SYNTAX_ERROR),
311 errmsg("materialize mode required, but it is not " \
312 "allowed in this context")));
313
314 /* get info about element type needed to construct the array */
315 get_typlenbyvalalign(element_type, &typlen, &typbyval, &typalign);
316
317 /* get the requested return tuple description */
318 tupdesc = rsinfo->expectedDesc;
319 rsinfo_ncols = tupdesc->natts;
320
321 /*
322 * The requested tuple description better match up with the array
323 * we were given.
324 */
325 if (rsinfo_ncols != ncols)
326 ereport(ERROR,
327 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
328 errmsg("invalid input array"),
329 errdetail("Number of elements in array must match number of query specified columns.")));
330
331 /* OK, use it */
332 attinmeta = TupleDescGetAttInMetadata(tupdesc);
333
334 /* Now go to work */
335 rsinfo->returnMode = SFRM_Materialize;
336
337 per_query_ctx = fcinfo->flinfo->fn_mcxt;
338 oldcontext = MemoryContextSwitchTo(per_query_ctx);
339
340 /* initialize our tuplestore */
341 tupstore = tuplestore_begin_heap(true, false, SortMem);
342
343
344 /* take rest of arguments from function */
345
346 /* index path */
347 if (PG_ARGISNULL(0)) {
348 ereport(ERROR,
349 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
350 errmsg("index path can't be null"),
351 errdetail("Index path must be valid full path to HyperEstraier index")));
352 }
353 index_path = _textout(PG_GETARG_TEXT_P(0));
354
355 /* query string */
356 if (PG_ARGISNULL(0)) {
357 query = "";
358 } else {
359 query = _textout(PG_GETARG_TEXT_P(1));
360 }
361
362 /* atribute filter */
363 if (PG_ARGISNULL(2)) {
364 attr = "";
365 } else {
366 attr = _textout(PG_GETARG_TEXT_P(2));
367 }
368
369 /* limit */
370 if (PG_ARGISNULL(3)) {
371 limit = 0;
372 } else {
373 limit = PG_GETARG_INT32(3);
374 }
375
376 /* offset */
377 if (PG_ARGISNULL(4)) {
378 offset = 0;
379 } else {
380 offset = PG_GETARG_INT32(4);
381 }
382
383
384 /* open the database */
385 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
386
387 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
388 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
389 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
390 errdetail(est_err_msg(ecode))));
391 }
392
393 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
394
395 /* create a search condition object */
396 if (!(cond = est_cond_new())) {
397 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
398 errmsg("pgest_attr: est_cond_new failed")));
399 }
400
401 /* set the search phrase to the search condition object */
402 if (! PG_ARGISNULL(1) && strlen(query) > 0)
403 est_cond_set_phrase(cond, query);
404
405 /* minimum valid attribute length is 10: @a STREQ a */
406 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
407 elog(DEBUG1,"est_cond_add_attr(%s)", attr);
408 est_cond_add_attr(cond, attr);
409 }
410
411 /* get the result of search */
412 est_result = est_db_search(db, cond, &resnum, NULL);
413
414 /* check if results exists */
415 if ( 0 == resnum ) {
416 elog(INFO, "pgest_attr: no results for: %s", query );
417 }
418
419 /* total number of tuples to be returned */
420 if (limit && limit < resnum) {
421 nrows = limit - offset;
422 } else {
423 nrows = resnum - offset;
424 }
425
426
427 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
428
429
430 values = (char **) palloc(ncols * sizeof(char *));
431
432 for (i = 0; i < nrows; i++)
433 {
434
435 /* get result from estraier */
436 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
437 elog(INFO, "can't find result %d", i + offset);
438 } else {
439 elog(DEBUG1, "URI: %s\n Title: %s\n",
440 est_doc_attr(doc, "@uri"),
441 est_doc_attr(doc, "@title")
442 );
443 }
444
445 /* iterate over results */
446 for (j = 0; j < ncols; j++)
447 {
448 bool isnull;
449
450 /* array value of this position */
451 indx[0] = j + dim_lower_bounds[0];
452
453 dvalue = array_ref(attr_arr, ndims, indx, -1, typlen, typbyval, typalign, &isnull);
454
455 if (!isnull && doc)
456 values[j] = DatumGetCString(
457 attr2text(doc,
458 (char *)DirectFunctionCall1(textout, dvalue)
459 ));
460 else
461 values[j] = NULL;
462 }
463 /* construct the tuple */
464 tuple = BuildTupleFromCStrings(attinmeta, values);
465
466 /* now store it */
467 tuplestore_puttuple(tupstore, tuple);
468
469
470 /* delete estraier document object */
471 est_doc_delete(doc);
472 }
473
474 tuplestore_donestoring(tupstore);
475 rsinfo->setResult = tupstore;
476
477 /*
478 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
479 * tuples are in our tuplestore and passed back through
480 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
481 * that we actually used to build our tuples with, so the caller can
482 * verify we did what it was expecting.
483 */
484 rsinfo->setDesc = tupdesc;
485 MemoryContextSwitchTo(oldcontext);
486
487 if(!est_db_close(db, &ecode)){
488 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
489 errmsg("est_db_close: %d", ecode),
490 errdetail(est_err_msg(ecode))));
491 }
492
493 return (Datum) 0;
494 }
495
496
497 /* make text var from attr */
498 char *attr2text(ESTDOC *doc, char *attr) {
499 char *val;
500 const char *attrval;
501 int len;
502 int attrlen;
503
504 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
505
506 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
507 val = (char *) palloc(attrlen * sizeof(char));
508 } else {
509 return (Datum) NULL;
510 }
511
512 len = strlen(attrval);
513 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
514
515 len++;
516 len *= sizeof(char);
517
518 elog(DEBUG2, "palloc(%d)", len);
519
520 val = palloc(len);
521
522 memset(val, 0, len);
523 strncpy(val, attrval, len);
524
525 elog(DEBUG2, "val=%s", val);
526
527 return val;
528 }
529

  ViewVC Help
Powered by ViewVC 1.1.26