--- trunk/pgest.c 2005/05/22 21:18:11 9 +++ trunk/pgest.c 2005/07/08 12:47:49 38 @@ -34,224 +34,67 @@ #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp))) +/* SortMem got renamed in PostgreSQL 8.0 */ +#ifndef SortMem + #define SortMem 16 * 1024 +#endif + /* prototype */ char *attr2text(ESTDOC *doc, char *attr); -ESTDB *db; -ESTCOND *cond; -ESTDOC *doc; -const CBLIST *texts; -int ecode, *est_result, resnum, i, j; -int limit = 0; -int offset = 0; - -/* define PostgreSQL v1 function */ -PG_FUNCTION_INFO_V1(pgest); -Datum pgest(PG_FUNCTION_ARGS) { - - FuncCallContext *funcctx; - int call_cntr; - int max_calls; - TupleDesc tupdesc; - TupleTableSlot *slot; - AttInMetadata *attinmeta; - char *index_path; - char *query; - char *attr; - - /* stuff done only on the first call of the function */ - if (SRF_IS_FIRSTCALL()) { - MemoryContext oldcontext; - - /* take arguments from function */ - //index_path = _textout(PG_GETARG_TEXT_P(0)); - index_path = _textout(PG_GETARG_TEXT_P(0)); - query = _textout(PG_GETARG_TEXT_P(1)); - attr = _textout(PG_GETARG_TEXT_P(2)); - limit = PG_GETARG_INT32(3); - offset = PG_GETARG_INT32(4); - - /* create a function context for cross-call persistence */ - funcctx = SRF_FIRSTCALL_INIT(); - - /* switch to memory context appropriate for multiple function calls */ - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* open the database */ - elog(DEBUG1, "pgest: est_db_open(%s)", index_path); - - if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){ - elog(ERROR, "est_db_open: can't open %s [%d]: %s", index_path, ecode, est_err_msg(ecode)); - SRF_RETURN_DONE(funcctx); - } - - elog(INFO, "pgest: query[%s] attr[%s] limit %d offset %d", query, attr, limit, offset); - - /* create a search condition object */ - if (!(cond = est_cond_new())) { - elog(INFO, "pgest: est_cond_new failed"); - SRF_RETURN_DONE(funcctx); - } - - /* set the search phrase to the search condition object */ - if (strlen(query) > 0) - est_cond_set_phrase(cond, query); - - /* minimum valid attribute length is 10: @a STREQ a */ - if (attr != NULL && strlen(attr) >= 10) { - elog(INFO,"est_cond_add_attr(%s)", attr); - est_cond_add_attr(cond, attr); - } - - /* get the result of search */ - est_result = est_db_search(db, cond, &resnum, NULL); - - /* total number of tuples to be returned */ - if (limit && limit < resnum) { - funcctx->max_calls = limit - offset; - } else { - funcctx->max_calls = resnum - offset; - } - - /* check if results exists */ - if ( 0 == funcctx->max_calls ) - elog(INFO, "pgest: no results for: %s", query ); - - elog(DEBUG1, "pgest: found %d hits for %s", resnum, query); - - /* Build a tuple description for a __pgest tuple */ - tupdesc = RelationNameGetTupleDesc("__pgest"); - - /* allocate a slot for a tuple with this tupdesc */ - slot = TupleDescGetSlot(tupdesc); - - /* assign slot to function context */ - funcctx->slot = slot; - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - attinmeta = TupleDescGetAttInMetadata(tupdesc); - funcctx->attinmeta = attinmeta; - - MemoryContextSwitchTo(oldcontext); - - elog(DEBUG1, "SRF_IS_FIRSTCALL done"); - } - - /* stuff done on every call of the function */ - funcctx = SRF_PERCALL_SETUP(); - - call_cntr = funcctx->call_cntr; - max_calls = funcctx->max_calls; - slot = funcctx->slot; - attinmeta = funcctx->attinmeta; - - if (limit && call_cntr > limit - 1) { - elog(INFO, "call_cntr: %d limit: %d", call_cntr, limit); - SRF_RETURN_DONE(funcctx); - } - - if (call_cntr < max_calls) { - char **values; - HeapTuple tuple; - Datum result; - - elog(DEBUG1, "pgest: loop count %d", call_cntr); - - if (! est_result) { - elog(ERROR, "pgest: no estraier results"); - SRF_RETURN_DONE(funcctx); - } - - /* - * Prepare a values array for storage in our slot. - * This should be an array of C strings which will - * be processed later by the type input functions. - */ - - if (doc = est_db_get_doc(db, est_result[call_cntr + offset], 0)) { - - elog(DEBUG1, "URI: %s\n Title: %s\n", - est_doc_attr(doc, "@uri"), - est_doc_attr(doc, "@title") - ); - - values = (char **) palloc(4 * sizeof(char *)); - -// values[0] = (char *) palloc(strlen(_estval) * sizeof(char)); - - values[0] = (char *) attr2text(doc,"@id"); - values[1] = (char *) attr2text(doc,"@uri"); - values[2] = (char *) attr2text(doc,"@title"); - values[3] = (char *) attr2text(doc,"@type"); - - /* destloy the document object */ - elog(DEBUG2, "est_doc_delete"); - est_doc_delete(doc); - } else { - elog(INFO, "no result from estraier"); - values[0] = DatumGetCString( "" ); - values[1] = DatumGetCString( "" ); - values[2] = DatumGetCString( "" ); - values[3] = DatumGetCString( "" ); - } - - - elog(DEBUG2, "build tuple"); - /* build a tuple */ - tuple = BuildTupleFromCStrings(attinmeta, values); - - elog(DEBUG2, "make tuple into datum"); - /* make the tuple into a datum */ - result = TupleGetDatum(slot, tuple); - - elog(DEBUG2, "cleanup"); - /* clean up ? */ -/* - pfree(values[0]); - pfree(values[1]); - pfree(values[2]); - pfree(values[3]); - pfree(values); -*/ - - elog(DEBUG2, "cleanup over"); - - SRF_RETURN_NEXT(funcctx, result); - } else { - elog(DEBUG1, "loop over"); - - if(!est_db_close(db, &ecode)){ - elog(INFO, "est_db_close error: %s", est_err_msg(ecode)); - } - - /* do when there is no more left */ - SRF_RETURN_DONE(funcctx); - } -} /* work in progress */ -PG_FUNCTION_INFO_V1(pgest2); -Datum pgest2(PG_FUNCTION_ARGS) +PG_FUNCTION_INFO_V1(pgest_attr); +Datum pgest_attr(PG_FUNCTION_ARGS) { - int nrows = 3; - int16 typlen; - bool typbyval; - char typalign; + ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6); + Oid attr_element_type = ARR_ELEMTYPE(attr_arr); + int attr_ndims = ARR_NDIM(attr_arr); + int *attr_dim_counts = ARR_DIMS(attr_arr); + int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr); + int ncols = 0; + int nrows = 0; + int indx[MAXDIM]; + int16 attr_len; + bool attr_byval; + char attr_align; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; AttInMetadata *attinmeta; TupleDesc tupdesc; - Tuplestorestate *tupstore = NULL; + Tuplestorestate *tupstore = NULL; HeapTuple tuple; MemoryContext per_query_ctx; MemoryContext oldcontext; Datum dvalue; char **values; - int ncols; + int rsinfo_ncols; int i, j; + /* estvars */ + ESTDB *db; + ESTCOND *cond; + ESTDOC *doc; + const CBLIST *texts; + int ecode, *est_result, resnum; + int limit = 0; + int offset = 0; + + char *index_path; + char *query; + char *attr; + char *order; + + /* only allow 1D input array */ + if (attr_ndims == 1) + { + ncols = attr_dim_counts[0]; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Input array must have 1 dimension"))); + /* check to see if caller supports us returning a tuplestore */ if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, @@ -259,14 +102,23 @@ errmsg("materialize mode required, but it is not " \ "allowed in this context"))); + /* get info about element type needed to construct the array */ + get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align); + /* get the requested return tuple description */ tupdesc = rsinfo->expectedDesc; - ncols = tupdesc->natts; + rsinfo_ncols = tupdesc->natts; /* * The requested tuple description better match up with the array * we were given. */ + if (rsinfo_ncols != ncols) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Number of elements in array must match number of query specified columns."))); + /* OK, use it */ attinmeta = TupleDescGetAttInMetadata(tupdesc); @@ -279,19 +131,155 @@ /* initialize our tuplestore */ tupstore = tuplestore_begin_heap(true, false, SortMem); + + /* take rest of arguments from function */ + + /* index path */ + if (PG_ARGISNULL(0)) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("index path can't be null"), + errdetail("Index path must be valid full path to HyperEstraier index"))); + } + index_path = _textout(PG_GETARG_TEXT_P(0)); + + /* query string */ + if (PG_ARGISNULL(1)) { + query = ""; + } else { + query = _textout(PG_GETARG_TEXT_P(1)); + } + + /* atribute filter */ + if (PG_ARGISNULL(2)) { + attr = ""; + } else { + attr = _textout(PG_GETARG_TEXT_P(2)); + } + + /* sort order */ + if (PG_ARGISNULL(3)) { + order = ""; + } else { + order = _textout(PG_GETARG_TEXT_P(3)); + } + + + /* limit */ + if (PG_ARGISNULL(4)) { + limit = 0; + } else { + limit = PG_GETARG_INT32(4); + } + + /* offset */ + if (PG_ARGISNULL(5)) { + offset = 0; + } else { + offset = PG_GETARG_INT32(5); + } + + + /* open the database */ + elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path); + + if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){ + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("est_db_open: can't open %s: %d", index_path, ecode), + errdetail(est_err_msg(ecode)))); + } + + elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset); + + /* create a search condition object */ + if (!(cond = est_cond_new())) { + ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), + errmsg("pgest_attr: est_cond_new failed"))); + } + + /* set the search phrase to the search condition object */ + if (! PG_ARGISNULL(1) && strlen(query) > 0) + est_cond_set_phrase(cond, query); + + /* minimum valid attribute length is 10: @a STREQ a */ + if (! PG_ARGISNULL(2) && strlen(attr) >= 10) { + elog(DEBUG1,"est_cond_add_attr(%s)", attr); + est_cond_add_attr(cond, attr); + } + + /* set the search phrase to the search condition object */ + if (! PG_ARGISNULL(3) && strlen(order) > 0) { + elog(DEBUG1,"est_cond_set_order(%s)", order); + est_cond_set_order(cond, order); + } + + if (limit) { + elog(DEBUG1,"est_cond_set_max(%d)", limit + offset); + est_cond_set_max(cond, limit + offset); + } + + /* get the result of search */ + est_result = est_db_search(db, cond, &resnum, NULL); + + /* check if results exists */ + if ( 0 == resnum ) { + elog(INFO, "pgest_attr: no results for: %s", query ); + } + + /* total number of tuples to be returned */ + if (limit && limit < resnum) { + nrows = limit; + elog(INFO, "This is probably a bug in limit implementation. Please report it to dpavlin@rot13.org"); + } else { + nrows = resnum - offset; + } + + + elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query); + + values = (char **) palloc(ncols * sizeof(char *)); for (i = 0; i < nrows; i++) { + + /* get result from estraier */ + if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) { + elog(INFO, "can't find result %d", i + offset); + } else { + elog(DEBUG1, "URI: %s\n Title: %s\n", + est_doc_attr(doc, "@uri"), + est_doc_attr(doc, "@title") + ); + } + + /* iterate over results */ for (j = 0; j < ncols; j++) { - values[j] = DatumGetCString( "foo" ); + bool isnull; + + /* array value of this position */ + indx[0] = j + attr_dim_lower_bounds[0]; + + dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull); + + if (!isnull && doc) + values[j] = DatumGetCString( + attr2text(doc, + (char *)DirectFunctionCall1(textout, dvalue) + )); + else + values[j] = NULL; } /* construct the tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); /* now store it */ tuplestore_puttuple(tupstore, tuple); + + + /* delete estraier document object */ + est_doc_delete(doc); } tuplestore_donestoring(tupstore); @@ -307,6 +295,14 @@ rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); + est_cond_delete(cond); + + if(!est_db_close(db, &ecode)){ + ereport(ERROR, (errcode(ERRCODE_IO_ERROR), + errmsg("est_db_close: %d", ecode), + errdetail(est_err_msg(ecode)))); + } + return (Datum) 0; } @@ -344,30 +340,3 @@ return val; } -/* make integer variable from property */ -/* -char *prop2int(SW_RESULT sw_res, char *propname) { - char *val; - unsigned long prop; - int len; - - elog(DEBUG2, "prop2int(%s)", propname); - - prop = estResultPropertyULong( sw_res, propname ); - if (error_or_abort( est_handle )) return NULL; - - elog(DEBUG1, "prop2int(%s) = %lu", propname, prop); - - len = 128 * sizeof(char); - elog(DEBUG2, "palloc(%d)", len); - - val = palloc(len); - memset(val, 0, len); - - snprintf(val, len, "%lu", prop); - - elog(DEBUG2, "val=%s", val); - - return val; -} -*/