--- trunk/pgswish.c 2005/02/19 00:59:08 9 +++ trunk/pgswish.c 2005/05/29 23:00:19 23 @@ -6,10 +6,15 @@ * TODO: * - check null input using PG_ARGISNULL before using PG_GETARG_xxxx * - support composite type arguments + * - split error_or_abort + * - use getResultPropValue not SwishResultPropertyStr + * - fix everything about pgswish_arr which is broken * * NOTES: * - clear structures with memset to support hash indexes (who whould like * to create hash index on table returned from function?) + * - number of returned rows is set by PostgreSQL evaluator, see: + * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php * * Based on: * - C example from PostgreSQL documentation (BSD licence) @@ -23,15 +28,20 @@ #include "fmgr.h" #include "funcapi.h" #include "utils/builtins.h" +#include "utils/array.h" +#include "miscadmin.h" #include #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str)) #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str))) +#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) +#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp))) - -SW_HANDLE swish_handle = NULL;/* Database handle */ -SW_SEARCH search = NULL; /* search handle -- holds search parameters */ -SW_RESULTS results = NULL; /* results handle -- holds list of results */ +/* Globals */ +static SW_HANDLE swish_handle = NULL; /* Database handle */ +static SW_SEARCH search = NULL; /* search handle -- search parameters */ +static SW_RESULTS swish_results = NULL; /* results handle -- list of results */ +static SW_RESULT *sw_res = NULL; /* one row from swish-e results */ /* define PostgreSQL v1 function */ PG_FUNCTION_INFO_V1(pgswish); @@ -43,11 +53,9 @@ TupleDesc tupdesc; TupleTableSlot *slot; AttInMetadata *attinmeta; - SW_HANDLE swish_handle = NULL; /* Database handle */ - SW_SEARCH search = NULL; /* search handle -- holds search parameters */ - SW_RESULTS results = NULL; /* results handle -- holds list of results */ char *index_path; char *query; + FILE *logfh; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { @@ -65,41 +73,46 @@ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - /* Send any errors or warnings to stderr (default is stdout) */ - SwishErrorsToStderr(); + /* Send any errors or warnings to log, as well as + * STDOUT and STDERR (just to be sure) */ + if ( logfh = fopen("/tmp/pgswish.log", "a") ) { + set_error_handle( logfh ); + elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log"); + /* redirect STDOUT and STDERR to log */ + dup2(1, logfh); + dup2(2, logfh); + } else { + elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!"); + } + + elog(DEBUG1, "pgswish: SwishInit(%s)", index_path); - elog(INFO, "pgswish: SwishInit(%s)", index_path); - swish_handle = SwishInit( index_path ); - if (! swish_handle) { - elog(ERROR, "pgswish: can't open %s", index_path); + if ( SwishError( swish_handle ) || ! swish_handle) { + elog(ERROR, "pgswish: SwishInit(%s) failed: %s", index_path, SwishErrorString( swish_handle )); + SRF_RETURN_DONE(funcctx); } - if ( SwishError( swish_handle ) ) error_or_abort( swish_handle ); + if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx); /* set ranking scheme. default is 0 */ - SwishRankScheme( swish_handle, 1 ); - - /* Check for errors after every call */ - if ( SwishError( swish_handle ) ) - error_or_abort( swish_handle ); /* print an error or abort -- see below */ + SwishRankScheme( swish_handle, 0 ); + if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx); - elog(INFO, "pgswish: SwishQuery(%s)", query); + elog(DEBUG1, "pgswish: SwishQuery(%s)", query); /* Here's a short-cut to searching that creates a search object and searches at the same time */ -elog(INFO,"## FIXME: SwishQuery kills back-end?"); - results = SwishQuery( swish_handle, query); -elog(INFO,"## FIXME: no..."); - if ( SwishError( swish_handle ) ) error_or_abort( swish_handle ); + swish_results = SwishQuery( swish_handle, query); + if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx); /* total number of tuples to be returned */ - funcctx->max_calls = SwishHits( results ); + funcctx->max_calls = SwishHits( swish_results ); /* check if results exists */ if ( 0 == funcctx->max_calls ) elog(INFO, "no results for: %s", query ); - elog(INFO, "pgswish: SwishHits = %d", funcctx->max_calls); + elog(DEBUG1, "pgswish: SwishHits = %d", funcctx->max_calls); /* Build a tuple description for a __pgswish tuple */ tupdesc = RelationNameGetTupleDesc("__pgswish"); @@ -118,6 +131,8 @@ funcctx->attinmeta = attinmeta; MemoryContextSwitchTo(oldcontext); + + elog(DEBUG1, "SRF_IS_FIRSTCALL done"); } /* stuff done on every call of the function */ @@ -133,18 +148,58 @@ HeapTuple tuple; Datum result; -if (0) { + elog(DEBUG1, "pgswish: loop count %d", call_cntr); + + if (! swish_results) { + elog(ERROR, "pgswish: no swish-e results"); + SRF_RETURN_DONE(funcctx); + } + + elog(DEBUG1, "pgswish: check for swish-e error"); + if (error_or_abort( swish_handle )) SRF_RETURN_DONE(funcctx); + /* * Prepare a values array for storage in our slot. * This should be an array of C strings which will * be processed later by the type input functions. */ - values = (char **) palloc(5 * sizeof(char *)); - values[0] = _textout( SwishResultPropertyULong ( result, "swishrank" ) ); - values[1] = _textout( SwishResultPropertyStr ( result, "swishdocpath" ) ); - values[2] = _textout( SwishResultPropertyStr ( result, "swishtitle" ) ); - values[3] = _textout( SwishResultPropertyStr ( result, "swishdocsize" ) ); - values[4] = _textout( SwishResultPropertyStr ( result, "swishdbfile" ) ); + + sw_res = SwishNextResult( swish_results ); + if (! sw_res) { + elog(ERROR, "pgswish: swish-e sort result list: %d rows expected %d", call_cntr, max_calls - 1); + Free_Results_Object( swish_results ); + Free_Search_Object( search ); + SRF_RETURN_DONE(funcctx); + } + + elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n", + SwishResultPropertyStr ( sw_res, "swishdocpath" ), + SwishResultPropertyULong ( sw_res, "swishrank" ), + SwishResultPropertyULong ( sw_res, "swishdocsize" ), + SwishResultPropertyStr ( sw_res, "swishtitle"), + SwishResultPropertyStr ( sw_res, "swishdbfile" ), + SwishResultPropertyStr ( sw_res, "swishlastmodified" ), + SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */ + SwishResultPropertyULong ( sw_res, "swishfilenum" ) + ); + + values = (char **) palloc(4 * sizeof(char *)); + + values[0] = prop2int( sw_res, "swishrank" ); + values[1] = prop2text( sw_res, "swishdocpath" ); + values[2] = prop2text( sw_res, "swishtitle" ); + values[3] = prop2int( sw_res, "swishdocsize" ); + +/* + values[0] = (char *) palloc(16 * sizeof(char)); + snprintf(values[0], 16, "%d", 1); + values[1] = (char *) palloc(16 * sizeof(char)); + snprintf(values[1], 16, "%d", 2); + values[2] = (char *) palloc(16 * sizeof(char)); + snprintf(values[2], 16, "%d", 3); + values[3] = (char *) palloc(16 * sizeof(char)); + snprintf(values[3], 16, "%d", 4); +*/ /* build a tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); @@ -152,11 +207,19 @@ /* make the tuple into a datum */ result = TupleGetDatum(slot, tuple); -} - /* clean up (this is not really necessary) */ - + /* clean up ? */ + pfree(values[0]); + pfree(values[1]); + pfree(values[2]); + pfree(values[3]); + pfree(values); + + elog(DEBUG1, "row: %s|%s|%s|%s",values[0],values[1],values[2],values[3]); + SRF_RETURN_NEXT(funcctx, result); } else { + elog(DEBUG1, "loop over"); + /* free swish object and close */ Free_Search_Object( search ); SwishClose( swish_handle ); @@ -166,14 +229,331 @@ } } + /* - * elog errors - * + * new function with support for property selection */ -static void error_or_abort( SW_HANDLE swish_handle ) { +PG_FUNCTION_INFO_V1(pgswish_arr); +Datum pgswish_arr(PG_FUNCTION_ARGS) +{ + ArrayType *prop_arr = PG_GETARG_ARRAYTYPE_P(5); + Oid prop_element_type = ARR_ELEMTYPE(prop_arr); + int prop_ndims = ARR_NDIM(prop_arr); + int *prop_dim_counts = ARR_DIMS(prop_arr); + int *prop_dim_lower_bounds = ARR_LBOUND(prop_arr); + int ncols = 0; + int nrows = 0; + int indx[MAXDIM]; + int16 prop_len; + bool prop_byval; + char prop_align; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + AttInMetadata *attinmeta; + TupleDesc tupdesc; + Tuplestorestate *tupstore = NULL; + HeapTuple tuple; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Datum dvalue; + char **values; + int rsinfo_ncols; + int i, j; + /* swish-e */ + FILE *logfh; + int resnum; + int limit = 0; + int offset = 0; + + char *index_path; + char *query; + char *attr; + + + /* only allow 1D input array */ + if (prop_ndims == 1) + { + ncols = prop_dim_counts[0]; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Input array must have 1 dimension"))); + + /* check to see if caller supports us returning a tuplestore */ + if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* get info about element type needed to construct the array */ + get_typlenbyvalalign(prop_element_type, &prop_len, &prop_byval, &prop_align); + + /* get the requested return tuple description */ + tupdesc = rsinfo->expectedDesc; + rsinfo_ncols = tupdesc->natts; + + /* + * The requested tuple description better match up with the array + * we were given. + */ + if (rsinfo_ncols != ncols) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Number of elements in array must match number of query specified columns."))); + + /* OK, use it */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + /* Now go to work */ + rsinfo->returnMode = SFRM_Materialize; + + per_query_ctx = fcinfo->flinfo->fn_mcxt; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* initialize our tuplestore */ + tupstore = tuplestore_begin_heap(true, false, SortMem); + + + /* take rest of arguments from function */ + + /* index path */ + if (PG_ARGISNULL(0)) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("index path can't be null"), + errdetail("Index path must be valid full path to swish-e index"))); + } + index_path = _textout(PG_GETARG_TEXT_P(0)); + + /* query string */ + if (PG_ARGISNULL(0)) { + query = ""; + } else { + query = _textout(PG_GETARG_TEXT_P(1)); + } + + /* atribute filter */ + if (PG_ARGISNULL(2)) { + attr = ""; + } else { + attr = _textout(PG_GETARG_TEXT_P(2)); + } + + /* limit */ + if (PG_ARGISNULL(3)) { + limit = 0; + } else { + limit = PG_GETARG_INT32(3); + } + + /* offset */ + if (PG_ARGISNULL(4)) { + offset = 0; + } else { + offset = PG_GETARG_INT32(4); + } + + + /* Send any errors or warnings to log, as well as + * STDOUT and STDERR (just to be sure) */ + if ( logfh = fopen("/tmp/pgswish.log", "a") ) { + set_error_handle( logfh ); + elog(DEBUG1, "loggin swish-e errors to /tmp/pgswish.log"); + /* redirect STDOUT and STDERR to log */ + dup2(1, logfh); + dup2(2, logfh); + } else { + elog(INFO, "can't open /tmp/pgswish.log -- errors from swish-e won't be cought and may result in back-end crashes!"); + } + + elog(DEBUG1, "pgswish: SwishInit(%s)", index_path); + + swish_handle = SwishInit( index_path ); + + if ( SwishError( swish_handle ) || ! swish_handle ) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pgswish: SwishInit(%s) failed", index_path ), + errdetail( SwishErrorString( swish_handle ) ) + )); + + elog(DEBUG1, "pgswish: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset); + + + /* set ranking scheme. default is 0 */ + SwishRankScheme( swish_handle, 0 ); + error_or_abort( swish_handle ); + + elog(DEBUG1, "pgswish: SwishQuery(%s)", query); + /* Here's a short-cut to searching that creates a search object + * and searches at the same time */ + + /* set the search phrase to the search condition object */ + if (! PG_ARGISNULL(1) && strlen(query) > 0) + swish_results = SwishQuery( swish_handle, query); + error_or_abort( swish_handle ); + + /* total number of tuples to be returned */ + resnum = SwishHits( swish_results ); + + /* FIXME */ + if (! PG_ARGISNULL(2) && strlen(attr) >= 10) { + elog(DEBUG1,"ignored: %s", attr); + } + + /* check if results exists */ + if ( 0 == resnum ) { + elog(INFO, "pgswish: no results for: %s", query ); + } + + /* total number of tuples to be returned */ + if (limit && limit < resnum) { + nrows = limit - offset; + } else { + nrows = resnum - offset; + } + + + elog(DEBUG1, "pgswish: found %d hits for %s", resnum, query); + + + values = (char **) palloc(ncols * sizeof(char *)); + + for (i = 0; i < nrows; i++) + { + SwishSeekResult( swish_results, i + offset ); + sw_res = SwishNextResult( swish_results ); + + /* get result from swish-e */ + if (! ( SwishErrorString( swish_handle ) ) ) { + elog(INFO, "can't find result %d", i + offset); + } else { + elog(DEBUG1, "Path: %s\n Rank: %lu\n Size: %lu\n Title: %s\n Index: %s\n Modified: %s\n Record #: %lu\n File #: %lu\n\n", + SwishResultPropertyStr ( sw_res, "swishdocpath" ), + SwishResultPropertyULong ( sw_res, "swishrank" ), + SwishResultPropertyULong ( sw_res, "swishdocsize" ), + SwishResultPropertyStr ( sw_res, "swishtitle"), + SwishResultPropertyStr ( sw_res, "swishdbfile" ), + SwishResultPropertyStr ( sw_res, "swishlastmodified" ), + SwishResultPropertyULong ( sw_res, "swishreccount" ), /* can figure this out in loop, of course */ + SwishResultPropertyULong ( sw_res, "swishfilenum" ) + ); + } + + /* iterate over results */ + for (j = 0; j < ncols; j++) + { + bool isnull; + + /* array value of this position */ + indx[0] = j + prop_dim_lower_bounds[0]; + + dvalue = array_ref(prop_arr, prop_ndims, indx, -1, prop_len, prop_byval, prop_align, &isnull); + + if (!isnull && sw_res) + values[j] = DatumGetCString( + prop2text( sw_res, + (char *)DirectFunctionCall1(textout, dvalue) + )); + else + values[j] = NULL; + } + /* construct the tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* now store it */ + tuplestore_puttuple(tupstore, tuple); + + } + + tuplestore_donestoring(tupstore); + rsinfo->setResult = tupstore; + + /* + * SFRM_Materialize mode expects us to return a NULL Datum. The actual + * tuples are in our tuplestore and passed back through + * rsinfo->setResult. rsinfo->setDesc is set to the tuple description + * that we actually used to build our tuples with, so the caller can + * verify we did what it was expecting. + */ + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + /* free swish object and close */ + Free_Search_Object( search ); + SwishClose( swish_handle ); + + return (Datum) 0; +} + + + + +/* make text var from property */ +char *prop2text(SW_RESULT sw_res, char *propname) { + char *val; + char *prop; + int len; + + elog(DEBUG2, "prop2text(%s)", propname); + + prop = SwishResultPropertyStr( sw_res, propname ); + if (error_or_abort( swish_handle )) return NULL; + + len = strlen(prop); + elog(DEBUG1, "prop2text(%s) = '%s' %d bytes", propname, prop, len); + + len++; + len *= sizeof(char); + + elog(DEBUG2, "palloc(%d)", len); + + val = palloc(len); + + memset(val, 0, len); + strncpy(val, prop, len); + + elog(DEBUG2, "val=%s", val); + + return val; +} + +/* make integer variable from property */ +char *prop2int(SW_RESULT sw_res, char *propname) { + char *val; + unsigned long prop; + int len; + + elog(DEBUG2, "prop2int(%s)", propname); + + prop = SwishResultPropertyULong( sw_res, propname ); + if (error_or_abort( swish_handle )) return NULL; + + elog(DEBUG1, "prop2int(%s) = %lu", propname, prop); + + len = 128 * sizeof(char); + elog(DEBUG2, "palloc(%d)", len); + + val = palloc(len); + memset(val, 0, len); + + snprintf(val, len, "%lu", prop); + + elog(DEBUG2, "val=%s", val); + + return val; +} + + +/* + * check if swish has returned error, and elog it. + */ +static int error_or_abort( SW_HANDLE swish_handle ) { if ( !SwishError( swish_handle ) ) - return; + return 0; /* print a message */ elog(ERROR, @@ -182,9 +562,10 @@ SwishErrorString( swish_handle ), SwishLastErrorMsg( swish_handle ) ); + if ( swish_results ) Free_Results_Object( swish_results ); if ( search ) Free_Search_Object( search ); SwishClose( swish_handle ); - /* do when there is no more left */ + return 1; }