/[pgswish]/trunk/pgswish.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgswish.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 23 - (show annotations)
Sun May 29 23:00:19 2005 UTC (18 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 15946 byte(s)
make it less chatty at INFO logging level

1 /*
2 * integrate swish-e into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-02-18
5 *
6 * TODO:
7 * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx
8 * - support composite type arguments
9 * - split error_or_abort
10 * - use getResultPropValue not SwishResultPropertyStr
11 * - fix everything about pgswish_arr which is broken
12 *
13 * NOTES:
14 * - clear structures with memset to support hash indexes (who whould like
15 * to create hash index on table returned from function?)
16 * - number of returned rows is set by PostgreSQL evaluator, see:
17 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
18 *
19 * Based on:
20 * - C example from PostgreSQL documentation (BSD licence)
21 * - swish-e example src/libtest.c (GPL)
22 * - _textin/_textout from pgcurl.c (LGPL)
23 *
24 * This code is licenced under GPL
25 */
26
27 #include "postgres.h"
28 #include "fmgr.h"
29 #include "funcapi.h"
30 #include "utils/builtins.h"
31 #include "utils/array.h"
32 #include "miscadmin.h"
33 #include <swish-e.h>
34
35 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
36 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
37 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
38 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
39
40 /* Globals */
41 static SW_HANDLE swish_handle = NULL; /* Database handle */
42 static SW_SEARCH search = NULL; /* search handle -- search parameters */
43 static SW_RESULTS swish_results = NULL; /* results handle -- list of results */
44 static SW_RESULT *sw_res = NULL; /* one row from swish-e results */
45
46 /* define PostgreSQL v1 function */
47 PG_FUNCTION_INFO_V1(pgswish);
48 Datum pgswish(PG_FUNCTION_ARGS) {
49
50 FuncCallContext *funcctx;
51 int call_cntr;
52 int max_calls;
53 TupleDesc tupdesc;
54 TupleTableSlot *slot;
55 AttInMetadata *attinmeta;
56 char *index_path;
57 char *query;
58 FILE *logfh;
59
60 /* stuff done only on the first call of the function */
61 if (SRF_IS_FIRSTCALL()) {
62 MemoryContext oldcontext;
63
64 /* take arguments from function */
65 //index_path = _textout(PG_GETARG_TEXT_P(0));
66 index_path = _textout(PG_GETARG_TEXT_P(0));
67 query = _textout(PG_GETARG_TEXT_P(1));
68
69 /* create a function context for cross-call persistence */
70 funcctx = SRF_FIRSTCALL_INIT();
71
72 /* switch to memory context appropriate for multiple function calls */
73 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
74
75
76 /* Send any errors or warnings to log, as well as
77 * STDOUT and STDERR (just to be sure) */
78 if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
79 set_error_handle( logfh );
80 elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
81 /* redirect STDOUT and STDERR to log */
82 dup2(1, logfh);
83 dup2(2, logfh);
84 } else {
85 elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
86 }
87
88 elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);
89
90 swish_handle = SwishInit( index_path );
91
92 if ( SwishError( swish_handle ) || ! swish_handle) {
93 elog(ERROR, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle ));
94
95 SRF_RETURN_DONE(funcctx);
96 }
97
98 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
99 /* set ranking scheme. default is 0 */
100 SwishRankScheme( swish_handle, 0 );
101 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
102
103 elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
104 /* Here's a short-cut to searching that creates a search object and searches at the same time */
105 swish_results = SwishQuery( swish_handle, query);
106 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
107
108 /* total number of tuples to be returned */
109 funcctx->max_calls = SwishHits( swish_results );
110
111 /* check if results exists */
112 if ( 0 == funcctx->max_calls )
113 elog(INFO, "no results for: %s", query );
114
115 elog(DEBUG1, "pgswish: SwishHits = %d", funcctx->max_calls);
116
117 /* Build a tuple description for a __pgswish tuple */
118 tupdesc = RelationNameGetTupleDesc("__pgswish");
119
120 /* allocate a slot for a tuple with this tupdesc */
121 slot = TupleDescGetSlot(tupdesc);
122
123 /* assign slot to function context */
124 funcctx->slot = slot;
125
126 /*
127 * generate attribute metadata needed later to produce tuples from raw
128 * C strings
129 */
130 attinmeta = TupleDescGetAttInMetadata(tupdesc);
131 funcctx->attinmeta = attinmeta;
132
133 MemoryContextSwitchTo(oldcontext);
134
135 elog(DEBUG1, "SRF_IS_FIRSTCALL done");
136 }
137
138 /* stuff done on every call of the function */
139 funcctx = SRF_PERCALL_SETUP();
140
141 call_cntr = funcctx->call_cntr;
142 max_calls = funcctx->max_calls;
143 slot = funcctx->slot;
144 attinmeta = funcctx->attinmeta;
145
146 if (call_cntr < max_calls) {
147 char **values;
148 HeapTuple tuple;
149 Datum result;
150
151 elog(DEBUG1, "pgswish: loop count %d", call_cntr);
152
153 if (! swish_results) {
154 elog(ERROR, "pgswish: no swish-e results");
155 SRF_RETURN_DONE(funcctx);
156 }
157
158 elog(DEBUG1, "pgswish: check for swish-e error");
159 if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx);
160
161 /*
162 * Prepare a values array for storage in our slot.
163 * This should be an array of C strings which will
164 * be processed later by the type input functions.
165 */
166
167 sw_res = SwishNextResult( swish_results );
168 if (! sw_res) {
169 elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1);
170 Free_Results_Object( swish_results );
171 Free_Search_Object( search );
172 SRF_RETURN_DONE(funcctx);
173 }
174
175 elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
176 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
177 SwishResultPropertyULong ( sw_res, "swishrank" ),
178 SwishResultPropertyULong ( sw_res, "swishdocsize" ),
179 SwishResultPropertyStr ( sw_res, "swishtitle"),
180 SwishResultPropertyStr ( sw_res, "swishdbfile" ),
181 SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
182 SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
183 SwishResultPropertyULong ( sw_res, "swishfilenum" )
184 );
185
186 values = (char **) palloc(4 * sizeof(char *));
187
188 values[0] = prop2int( sw_res, "swishrank" );
189 values[1] = prop2text( sw_res, "swishdocpath" );
190 values[2] = prop2text( sw_res, "swishtitle" );
191 values[3] = prop2int( sw_res, "swishdocsize" );
192
193 /*
194 values[0] = (char *) palloc(16 * sizeof(char));
195 snprintf(values[0], 16, "%d", 1);
196 values[1] = (char *) palloc(16 * sizeof(char));
197 snprintf(values[1], 16, "%d", 2);
198 values[2] = (char *) palloc(16 * sizeof(char));
199 snprintf(values[2], 16, "%d", 3);
200 values[3] = (char *) palloc(16 * sizeof(char));
201 snprintf(values[3], 16, "%d", 4);
202 */
203
204 /* build a tuple */
205 tuple = BuildTupleFromCStrings(attinmeta, values);
206
207 /* make the tuple into a datum */
208 result = TupleGetDatum(slot, tuple);
209
210 /* clean up ? */
211 pfree(values[0]);
212 pfree(values[1]);
213 pfree(values[2]);
214 pfree(values[3]);
215 pfree(values);
216
217 elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]);
218
219 SRF_RETURN_NEXT(funcctx, result);
220 } else {
221 elog(DEBUG1, "loop over");
222
223 /* free swish object and close */
224 Free_Search_Object( search );
225 SwishClose( swish_handle );
226
227 /* do when there is no more left */
228 SRF_RETURN_DONE(funcctx);
229 }
230 }
231
232
233 /*
234 * new function with support for property selection
235 */
236
237 PG_FUNCTION_INFO_V1(pgswish_arr);
238 Datum pgswish_arr(PG_FUNCTION_ARGS)
239 {
240 ArrayType *prop_arr = PG_GETARG_ARRAYTYPE_P(5);
241 Oid prop_element_type = ARR_ELEMTYPE(prop_arr);
242 int prop_ndims = ARR_NDIM(prop_arr);
243 int *prop_dim_counts = ARR_DIMS(prop_arr);
244 int *prop_dim_lower_bounds = ARR_LBOUND(prop_arr);
245 int ncols = 0;
246 int nrows = 0;
247 int indx[MAXDIM];
248 int16 prop_len;
249 bool prop_byval;
250 char prop_align;
251 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
252 AttInMetadata *attinmeta;
253 TupleDesc tupdesc;
254 Tuplestorestate *tupstore = NULL;
255 HeapTuple tuple;
256 MemoryContext per_query_ctx;
257 MemoryContext oldcontext;
258 Datum dvalue;
259 char **values;
260 int rsinfo_ncols;
261 int i, j;
262 /* swish-e */
263 FILE *logfh;
264 int resnum;
265 int limit = 0;
266 int offset = 0;
267
268 char *index_path;
269 char *query;
270 char *attr;
271
272
273 /* only allow 1D input array */
274 if (prop_ndims == 1)
275 {
276 ncols = prop_dim_counts[0];
277 }
278 else
279 ereport(ERROR,
280 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
281 errmsg("invalid input array"),
282 errdetail("Input array must have 1 dimension")));
283
284 /* check to see if caller supports us returning a tuplestore */
285 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
286 ereport(ERROR,
287 (errcode(ERRCODE_SYNTAX_ERROR),
288 errmsg("materialize mode required, but it is not " \
289 "allowed in this context")));
290
291 /* get info about element type needed to construct the array */
292 get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align);
293
294 /* get the requested return tuple description */
295 tupdesc = rsinfo->expectedDesc;
296 rsinfo_ncols = tupdesc->natts;
297
298 /*
299 * The requested tuple description better match up with the array
300 * we were given.
301 */
302 if (rsinfo_ncols != ncols)
303 ereport(ERROR,
304 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
305 errmsg("invalid input array"),
306 errdetail("Number of elements in array must match number of query specified columns.")));
307
308 /* OK, use it */
309 attinmeta = TupleDescGetAttInMetadata(tupdesc);
310
311 /* Now go to work */
312 rsinfo->returnMode = SFRM_Materialize;
313
314 per_query_ctx = fcinfo->flinfo->fn_mcxt;
315 oldcontext = MemoryContextSwitchTo(per_query_ctx);
316
317 /* initialize our tuplestore */
318 tupstore = tuplestore_begin_heap(true, false, SortMem);
319
320
321 /* take rest of arguments from function */
322
323 /* index path */
324 if (PG_ARGISNULL(0)) {
325 ereport(ERROR,
326 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
327 errmsg("index path can't be null"),
328 errdetail("Index path must be valid full path to swish-e index")));
329 }
330 index_path = _textout(PG_GETARG_TEXT_P(0));
331
332 /* query string */
333 if (PG_ARGISNULL(0)) {
334 query = "";
335 } else {
336 query = _textout(PG_GETARG_TEXT_P(1));
337 }
338
339 /* atribute filter */
340 if (PG_ARGISNULL(2)) {
341 attr = "";
342 } else {
343 attr = _textout(PG_GETARG_TEXT_P(2));
344 }
345
346 /* limit */
347 if (PG_ARGISNULL(3)) {
348 limit = 0;
349 } else {
350 limit = PG_GETARG_INT32(3);
351 }
352
353 /* offset */
354 if (PG_ARGISNULL(4)) {
355 offset = 0;
356 } else {
357 offset = PG_GETARG_INT32(4);
358 }
359
360
361 /* Send any errors or warnings to log, as well as
362 * STDOUT and STDERR (just to be sure) */
363 if ( logfh = fopen("/tmp/pgswish.log", "a") ) {
364 set_error_handle( logfh );
365 elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log");
366 /* redirect STDOUT and STDERR to log */
367 dup2(1, logfh);
368 dup2(2, logfh);
369 } else {
370 elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!");
371 }
372
373 elog(DEBUG1, "pgswish: SwishInit(%s)", index_path);
374
375 swish_handle = SwishInit( index_path );
376
377 if ( SwishError( swish_handle ) || ! swish_handle )
378 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
379 errmsg("pgswish: SwishInit(%s) failed", index_path ),
380 errdetail( SwishErrorString( swish_handle ) )
381 ));
382
383 elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
384
385
386 /* set ranking scheme. default is 0 */
387 SwishRankScheme( swish_handle, 0 );
388 error_or_abort( swish_handle );
389
390 elog(DEBUG1, "pgswish: SwishQuery(%s)", query);
391 /* Here's a short-cut to searching that creates a search object
392 * and searches at the same time */
393
394 /* set the search phrase to the search condition object */
395 if (! PG_ARGISNULL(1) && strlen(query) > 0)
396 swish_results = SwishQuery( swish_handle, query);
397 error_or_abort( swish_handle );
398
399 /* total number of tuples to be returned */
400 resnum = SwishHits( swish_results );
401
402 /* FIXME */
403 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
404 elog(DEBUG1,"ignored: %s", attr);
405 }
406
407 /* check if results exists */
408 if ( 0 == resnum ) {
409 elog(INFO, "pgswish: no results for: %s", query );
410 }
411
412 /* total number of tuples to be returned */
413 if (limit && limit < resnum) {
414 nrows = limit - offset;
415 } else {
416 nrows = resnum - offset;
417 }
418
419
420 elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query);
421
422
423 values = (char **) palloc(ncols * sizeof(char *));
424
425 for (i = 0; i < nrows; i++)
426 {
427 SwishSeekResult( swish_results, i + offset );
428 sw_res = SwishNextResult( swish_results );
429
430 /* get result from swish-e */
431 if (! ( SwishErrorString( swish_handle ) ) ) {
432 elog(INFO, "can't find result %d", i + offset);
433 } else {
434 elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n",
435 SwishResultPropertyStr ( sw_res, "swishdocpath" ),
436 SwishResultPropertyULong ( sw_res, "swishrank" ),
437 SwishResultPropertyULong ( sw_res, "swishdocsize" ),
438 SwishResultPropertyStr ( sw_res, "swishtitle"),
439 SwishResultPropertyStr ( sw_res, "swishdbfile" ),
440 SwishResultPropertyStr ( sw_res, "swishlastmodified" ),
441 SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */
442 SwishResultPropertyULong ( sw_res, "swishfilenum" )
443 );
444 }
445
446 /* iterate over results */
447 for (j = 0; j < ncols; j++)
448 {
449 bool isnull;
450
451 /* array value of this position */
452 indx[0] = j + prop_dim_lower_bounds[0];
453
454 dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull);
455
456 if (!isnull && sw_res)
457 values[j] = DatumGetCString(
458 prop2text( sw_res,
459 (char *)DirectFunctionCall1(textout, dvalue)
460 ));
461 else
462 values[j] = NULL;
463 }
464 /* construct the tuple */
465 tuple = BuildTupleFromCStrings(attinmeta, values);
466
467 /* now store it */
468 tuplestore_puttuple(tupstore, tuple);
469
470 }
471
472 tuplestore_donestoring(tupstore);
473 rsinfo->setResult = tupstore;
474
475 /*
476 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
477 * tuples are in our tuplestore and passed back through
478 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
479 * that we actually used to build our tuples with, so the caller can
480 * verify we did what it was expecting.
481 */
482 rsinfo->setDesc = tupdesc;
483 MemoryContextSwitchTo(oldcontext);
484
485 /* free swish object and close */
486 Free_Search_Object( search );
487 SwishClose( swish_handle );
488
489 return (Datum) 0;
490 }
491
492
493
494
495 /* make text var from property */
496 char *prop2text(SW_RESULT sw_res, char *propname) {
497 char *val;
498 char *prop;
499 int len;
500
501 elog(DEBUG2, "prop2text(%s)", propname);
502
503 prop = SwishResultPropertyStr( sw_res, propname );
504 if (error_or_abort( swish_handle )) return NULL;
505
506 len = strlen(prop);
507 elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len);
508
509 len++;
510 len *= sizeof(char);
511
512 elog(DEBUG2, "palloc(%d)", len);
513
514 val = palloc(len);
515
516 memset(val, 0, len);
517 strncpy(val, prop, len);
518
519 elog(DEBUG2, "val=%s", val);
520
521 return val;
522 }
523
524 /* make integer variable from property */
525 char *prop2int(SW_RESULT sw_res, char *propname) {
526 char *val;
527 unsigned long prop;
528 int len;
529
530 elog(DEBUG2, "prop2int(%s)", propname);
531
532 prop = SwishResultPropertyULong( sw_res, propname );
533 if (error_or_abort( swish_handle )) return NULL;
534
535 elog(DEBUG1, "prop2int(%s) = %lu", propname, prop);
536
537 len = 128 * sizeof(char);
538 elog(DEBUG2, "palloc(%d)", len);
539
540 val = palloc(len);
541 memset(val, 0, len);
542
543 snprintf(val, len, "%lu", prop);
544
545 elog(DEBUG2, "val=%s", val);
546
547 return val;
548 }
549
550
551 /*
552 * check if swish has returned error, and elog it.
553 */
554 static int error_or_abort( SW_HANDLE swish_handle ) {
555 if ( !SwishError( swish_handle ) )
556 return 0;
557
558 /* print a message */
559 elog(ERROR,
560 "pgswish error: Number [%d], Type [%s], Optional Message: [%s]\n",
561 SwishError( swish_handle ),
562 SwishErrorString( swish_handle ),
563 SwishLastErrorMsg( swish_handle )
564 );
565 if ( swish_results ) Free_Results_Object( swish_results );
566 if ( search ) Free_Search_Object( search );
567 SwishClose( swish_handle );
568
569 return 1;
570 }
571

  ViewVC Help
Powered by ViewVC 1.1.26