--- trunk/pgest.c 2005/09/10 18:51:13 40 +++ trunk/pgest.c 2005/09/11 21:44:56 47 @@ -14,7 +14,7 @@ * * Based on: * - C example from PostgreSQL documentation (BSD licence) - * - example002.c from Hyper Estraier (GPL) + * - coreexample002.c and nodeexample002.c from Hyper Estraier (GPL) * - _textin/_textout from pgcurl.c (LGPL) * * This code is licenced under GPL @@ -28,6 +28,7 @@ #include "miscadmin.h" #include #include +#include #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str)) #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str))) @@ -43,6 +44,7 @@ /* prototype */ char *attr2text(ESTDOC *doc, char *attr); +char *node_attr2text(ESTRESDOC *rdoc, char *attr); /* work in progress */ @@ -244,7 +246,6 @@ elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query); - values = (char **) palloc(ncols * sizeof(char *)); for (i = 0; i < nrows; i++) @@ -252,7 +253,7 @@ /* get result from estraier */ if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) { - elog(INFO, "can't find result %d", i + offset); + elog(INFO, "pgest_attr: can't find result %d", i + offset); } else { elog(DEBUG1, "URI: %s\n Title: %s\n", est_doc_attr(doc, "@uri"), @@ -284,9 +285,8 @@ /* now store it */ tuplestore_puttuple(tupstore, tuple); - /* delete estraier document object */ - est_doc_delete(doc); + if (doc) est_doc_delete(doc); } tuplestore_donestoring(tupstore); @@ -321,6 +321,8 @@ int len; int attrlen; + if (! doc) return (Datum) NULL; + elog(DEBUG1, "doc: %08x, attr: %s", doc, attr); if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) { @@ -334,6 +336,341 @@ len++; len *= sizeof(char); + + elog(DEBUG2, "palloc(%d)", len); + + val = palloc(len); + + memset(val, 0, len); + strncpy(val, attrval, len); + + elog(DEBUG2, "val=%s", val); + + return val; +} + +/* + * variation on theme: use node API which doesn't open index on + * every query which is much faster for large indexes + * + */ + +PG_FUNCTION_INFO_V1(pgest_node); +Datum pgest_node(PG_FUNCTION_ARGS) +{ + ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(8); + Oid attr_element_type = ARR_ELEMTYPE(attr_arr); + int attr_ndims = ARR_NDIM(attr_arr); + int *attr_dim_counts = ARR_DIMS(attr_arr); + int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr); + int ncols = 0; + int nrows = 0; + int indx[MAXDIM]; + int16 attr_len; + bool attr_byval; + char attr_align; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + AttInMetadata *attinmeta; + TupleDesc tupdesc; + Tuplestorestate *tupstore = NULL; + HeapTuple tuple; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Datum dvalue; + char **values; + int rsinfo_ncols; + int i, j; + /* estvars */ + ESTNODE *node; + ESTCOND *cond; + ESTNODERES *nres; + ESTRESDOC *rdoc; + const CBLIST *texts; + int resnum = 0; + int limit = 0; + int offset = 0; + + char *node_url; + char *user, *passwd; + char *query; + char *attr; + char *order; + + + /* only allow 1D input array */ + if (attr_ndims == 1) + { + ncols = attr_dim_counts[0]; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Input array must have 1 dimension"))); + + /* check to see if caller supports us returning a tuplestore */ + if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* get info about element type needed to construct the array */ + get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align); + + /* get the requested return tuple description */ + tupdesc = rsinfo->expectedDesc; + rsinfo_ncols = tupdesc->natts; + + /* + * The requested tuple description better match up with the array + * we were given. + */ + if (rsinfo_ncols != ncols) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid input array"), + errdetail("Number of elements in array must match number of query specified columns."))); + + /* OK, use it */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + /* Now go to work */ + rsinfo->returnMode = SFRM_Materialize; + + per_query_ctx = fcinfo->flinfo->fn_mcxt; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* initialize our tuplestore */ + tupstore = tuplestore_begin_heap(true, false, SortMem); + + + /* take rest of arguments from function */ + + /* node URL */ + if (PG_ARGISNULL(0)) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("node URL can't be null"), + errdetail("Node URL must be valid URL to HyperEstraier node"))); + } + node_url = _textout(PG_GETARG_TEXT_P(0)); + + /* login and password */ + if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("username and password can't be NULL"), + errdetail("You must specify valid username and password to HyperEstraier node"))); + } + user = _textout(PG_GETARG_TEXT_P(1)); + passwd = _textout(PG_GETARG_TEXT_P(2)); + + /* query string */ + if (PG_ARGISNULL(3)) { + query = ""; + } else { + query = _textout(PG_GETARG_TEXT_P(3)); + } + + /* atribute filter */ + if (PG_ARGISNULL(4)) { + attr = ""; + } else { + attr = _textout(PG_GETARG_TEXT_P(4)); + } + + /* sort order */ + if (PG_ARGISNULL(5)) { + order = ""; + } else { + order = _textout(PG_GETARG_TEXT_P(5)); + } + + + /* limit */ + if (PG_ARGISNULL(6)) { + limit = 0; + } else { + limit = PG_GETARG_INT32(6); + } + + /* offset */ + if (PG_ARGISNULL(7)) { + offset = 0; + } else { + offset = PG_GETARG_INT32(7); + } + + /* initialize the network environment */ + if(!est_init_net_env()){ + ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), + errmsg("pgest_node: can't create network enviroment"))); + } + + /* create the node connection object */ + elog(DEBUG1, "pgest_node: est_node_new(%s) as %s", node_url, user); + node = est_node_new(node_url); + est_node_set_auth(node, user, passwd); + + elog(DEBUG1, "pgest_node: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(4) ? "NULL" : attr), limit, offset); + + /* create a search condition object */ + if (!(cond = est_cond_new())) { + ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), + errmsg("pgest_node: est_cond_new failed"))); + } + + /* set the search phrase to the search condition object */ + if (! PG_ARGISNULL(3) && strlen(query) > 0) + est_cond_set_phrase(cond, query); + + /* minimum valid attribute length is 10: @a STREQ a */ + if (! PG_ARGISNULL(4) && strlen(attr) >= 10) { + elog(DEBUG1,"attributes: %s", attr); + char *curr_attr; + curr_attr = strtok(attr, ATTR_DELIMITER); + while (curr_attr) { + elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr); + est_cond_add_attr(cond, curr_attr); + curr_attr = strtok(NULL, ATTR_DELIMITER); + } + } + + /* set the search phrase to the search condition object */ + if (! PG_ARGISNULL(5) && strlen(order) > 0) { + elog(DEBUG1,"est_cond_set_order(%s)", order); + est_cond_set_order(cond, order); + } + + if (limit) { + elog(DEBUG1,"est_cond_set_max(%d)", limit + offset); + est_cond_set_max(cond, limit + offset); + } + + /* get the result of search */ + /* FIXME: allow user to specify depath of search */ + nres = est_node_search(node, cond, 0); + + if (! nres) { + int status = est_node_status(node); + est_cond_delete(cond); + est_node_delete(node); + est_free_net_env(); + ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), + errmsg("pgest_node: search failed, node status %d", status))); + } + + /* get number of results */ + resnum = est_noderes_doc_num(nres); + + /* check if results exists */ + if ( 0 == resnum ) { + elog(INFO, "pgest_node: no results for: %s", query ); + } + + /* total number of tuples to be returned */ + if (limit && limit < resnum) { + nrows = limit; + } else { + nrows = resnum - offset; + } + + + elog(DEBUG1, "pgest_node: found %d hits for %s", resnum, query); + + + values = (char **) palloc(ncols * sizeof(char *)); + + for (i = 0; i < nrows; i++) + { + + /* get result from estraier */ + if (! ( rdoc = est_noderes_get_doc(nres, i + offset) )) { + elog(INFO, "pgest_node: can't find result %d", i + offset); + } else { + elog(DEBUG1, "URI: %s\n Title: %s\n", + est_resdoc_attr(rdoc, "@uri"), + est_resdoc_attr(rdoc, "@title") + ); + } + + /* iterate over results */ + for (j = 0; j < ncols; j++) + { + bool isnull; + + /* array value of this position */ + indx[0] = j + attr_dim_lower_bounds[0]; + + dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull); + + if (!isnull && rdoc) + values[j] = DatumGetCString( + node_attr2text(rdoc, + (char *)DirectFunctionCall1(textout, dvalue) + )); + else + values[j] = NULL; + } + /* construct the tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* now store it */ + tuplestore_puttuple(tupstore, tuple); + + } + + tuplestore_donestoring(tupstore); + rsinfo->setResult = tupstore; + + /* + * SFRM_Materialize mode expects us to return a NULL Datum. The actual + * tuples are in our tuplestore and passed back through + * rsinfo->setResult. rsinfo->setDesc is set to the tuple description + * that we actually used to build our tuples with, so the caller can + * verify we did what it was expecting. + */ + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); + + /* delete the node result object */ + est_noderes_delete(nres); + + /* destroy the search condition object */ + est_cond_delete(cond); + + /* destroy the node object */ + est_node_delete(node); + + /* free the networking environment */ + est_free_net_env(); + + return (Datum) 0; +} + +/* make text var from node attr */ +char *node_attr2text(ESTRESDOC *rdoc, char *attr) { + char *val; + const char *attrval; + int len; + int attrlen; + + if (! rdoc) return (Datum) NULL; + + elog(DEBUG1, "doc: %08x, attr: %s", rdoc, attr); + + if ( (attrval = est_resdoc_attr(rdoc, attr)) && (attrlen = strlen(attrval)) ) { + val = (char *) palloc(attrlen * sizeof(char)); + } else { + return (Datum) NULL; + } + + len = strlen(attrval); + elog(DEBUG1, "node_attr2text(%s) = '%s' %d bytes", attr, attrval, len); + + len++; + len *= sizeof(char); elog(DEBUG2, "palloc(%d)", len);