/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (show annotations)
Thu Jun 30 20:01:36 2005 UTC (18 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 7997 byte(s)
fix for compilation on PostgreSQL 8.0

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - example002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31
32 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36
37 /* SortMem got renamed in PostgreSQL 8.0 */
38 #ifndef SortMem
39 #define SortMem 16 * 1024
40 #endif
41
42 /* prototype */
43 char *attr2text(ESTDOC *doc, char *attr);
44
45
46 /* work in progress */
47 PG_FUNCTION_INFO_V1(pgest_attr);
48 Datum pgest_attr(PG_FUNCTION_ARGS)
49 {
50 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(5);
51 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
52 int attr_ndims = ARR_NDIM(attr_arr);
53 int *attr_dim_counts = ARR_DIMS(attr_arr);
54 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
55 int ncols = 0;
56 int nrows = 0;
57 int indx[MAXDIM];
58 int16 attr_len;
59 bool attr_byval;
60 char attr_align;
61 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
62 AttInMetadata *attinmeta;
63 TupleDesc tupdesc;
64 Tuplestorestate *tupstore = NULL;
65 HeapTuple tuple;
66 MemoryContext per_query_ctx;
67 MemoryContext oldcontext;
68 Datum dvalue;
69 char **values;
70 int rsinfo_ncols;
71 int i, j;
72 /* estvars */
73 ESTDB *db;
74 ESTCOND *cond;
75 ESTDOC *doc;
76 const CBLIST *texts;
77 int ecode, *est_result, resnum;
78 int limit = 0;
79 int offset = 0;
80
81 char *index_path;
82 char *query;
83 char *attr;
84
85
86 /* only allow 1D input array */
87 if (attr_ndims == 1)
88 {
89 ncols = attr_dim_counts[0];
90 }
91 else
92 ereport(ERROR,
93 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
94 errmsg("invalid input array"),
95 errdetail("Input array must have 1 dimension")));
96
97 /* check to see if caller supports us returning a tuplestore */
98 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
99 ereport(ERROR,
100 (errcode(ERRCODE_SYNTAX_ERROR),
101 errmsg("materialize mode required, but it is not " \
102 "allowed in this context")));
103
104 /* get info about element type needed to construct the array */
105 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
106
107 /* get the requested return tuple description */
108 tupdesc = rsinfo->expectedDesc;
109 rsinfo_ncols = tupdesc->natts;
110
111 /*
112 * The requested tuple description better match up with the array
113 * we were given.
114 */
115 if (rsinfo_ncols != ncols)
116 ereport(ERROR,
117 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118 errmsg("invalid input array"),
119 errdetail("Number of elements in array must match number of query specified columns.")));
120
121 /* OK, use it */
122 attinmeta = TupleDescGetAttInMetadata(tupdesc);
123
124 /* Now go to work */
125 rsinfo->returnMode = SFRM_Materialize;
126
127 per_query_ctx = fcinfo->flinfo->fn_mcxt;
128 oldcontext = MemoryContextSwitchTo(per_query_ctx);
129
130 /* initialize our tuplestore */
131 tupstore = tuplestore_begin_heap(true, false, SortMem);
132
133
134 /* take rest of arguments from function */
135
136 /* index path */
137 if (PG_ARGISNULL(0)) {
138 ereport(ERROR,
139 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
140 errmsg("index path can't be null"),
141 errdetail("Index path must be valid full path to HyperEstraier index")));
142 }
143 index_path = _textout(PG_GETARG_TEXT_P(0));
144
145 /* query string */
146 if (PG_ARGISNULL(0)) {
147 query = "";
148 } else {
149 query = _textout(PG_GETARG_TEXT_P(1));
150 }
151
152 /* atribute filter */
153 if (PG_ARGISNULL(2)) {
154 attr = "";
155 } else {
156 attr = _textout(PG_GETARG_TEXT_P(2));
157 }
158
159 /* limit */
160 if (PG_ARGISNULL(3)) {
161 limit = 0;
162 } else {
163 limit = PG_GETARG_INT32(3);
164 }
165
166 /* offset */
167 if (PG_ARGISNULL(4)) {
168 offset = 0;
169 } else {
170 offset = PG_GETARG_INT32(4);
171 }
172
173
174 /* open the database */
175 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
176
177 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
178 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
179 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
180 errdetail(est_err_msg(ecode))));
181 }
182
183 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
184
185 /* create a search condition object */
186 if (!(cond = est_cond_new())) {
187 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
188 errmsg("pgest_attr: est_cond_new failed")));
189 }
190
191 /* set the search phrase to the search condition object */
192 if (! PG_ARGISNULL(1) && strlen(query) > 0)
193 est_cond_set_phrase(cond, query);
194
195 /* minimum valid attribute length is 10: @a STREQ a */
196 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
197 elog(DEBUG1,"est_cond_add_attr(%s)", attr);
198 est_cond_add_attr(cond, attr);
199 }
200
201 /* get the result of search */
202 est_result = est_db_search(db, cond, &resnum, NULL);
203
204 /* check if results exists */
205 if ( 0 == resnum ) {
206 elog(INFO, "pgest_attr: no results for: %s", query );
207 }
208
209 /* total number of tuples to be returned */
210 if (limit && limit < resnum) {
211 nrows = limit - offset;
212 } else {
213 nrows = resnum - offset;
214 }
215
216
217 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
218
219
220 values = (char **) palloc(ncols * sizeof(char *));
221
222 for (i = 0; i < nrows; i++)
223 {
224
225 /* get result from estraier */
226 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
227 elog(INFO, "can't find result %d", i + offset);
228 } else {
229 elog(DEBUG1, "URI: %s\n Title: %s\n",
230 est_doc_attr(doc, "@uri"),
231 est_doc_attr(doc, "@title")
232 );
233 }
234
235 /* iterate over results */
236 for (j = 0; j < ncols; j++)
237 {
238 bool isnull;
239
240 /* array value of this position */
241 indx[0] = j + attr_dim_lower_bounds[0];
242
243 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
244
245 if (!isnull && doc)
246 values[j] = DatumGetCString(
247 attr2text(doc,
248 (char *)DirectFunctionCall1(textout, dvalue)
249 ));
250 else
251 values[j] = NULL;
252 }
253 /* construct the tuple */
254 tuple = BuildTupleFromCStrings(attinmeta, values);
255
256 /* now store it */
257 tuplestore_puttuple(tupstore, tuple);
258
259
260 /* delete estraier document object */
261 est_doc_delete(doc);
262 }
263
264 tuplestore_donestoring(tupstore);
265 rsinfo->setResult = tupstore;
266
267 /*
268 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
269 * tuples are in our tuplestore and passed back through
270 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
271 * that we actually used to build our tuples with, so the caller can
272 * verify we did what it was expecting.
273 */
274 rsinfo->setDesc = tupdesc;
275 MemoryContextSwitchTo(oldcontext);
276
277 if(!est_db_close(db, &ecode)){
278 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
279 errmsg("est_db_close: %d", ecode),
280 errdetail(est_err_msg(ecode))));
281 }
282
283 return (Datum) 0;
284 }
285
286
287 /* make text var from attr */
288 char *attr2text(ESTDOC *doc, char *attr) {
289 char *val;
290 const char *attrval;
291 int len;
292 int attrlen;
293
294 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
295
296 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
297 val = (char *) palloc(attrlen * sizeof(char));
298 } else {
299 return (Datum) NULL;
300 }
301
302 len = strlen(attrval);
303 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
304
305 len++;
306 len *= sizeof(char);
307
308 elog(DEBUG2, "palloc(%d)", len);
309
310 val = palloc(len);
311
312 memset(val, 0, len);
313 strncpy(val, attrval, len);
314
315 elog(DEBUG2, "val=%s", val);
316
317 return val;
318 }
319

  ViewVC Help
Powered by ViewVC 1.1.26