/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 40 - (show annotations)
Sat Sep 10 18:51:13 2005 UTC (18 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 8667 byte(s)
add support for multiple attributes delimited by {{!}}

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - example002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31
32 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
33 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
34 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
35 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
36
37 /* SortMem got renamed in PostgreSQL 8.0 */
38 #ifndef SortMem
39 #define SortMem 16 * 1024
40 #endif
41
42 #define ATTR_DELIMITER "{{!}}"
43
44 /* prototype */
45 char *attr2text(ESTDOC *doc, char *attr);
46
47
48 /* work in progress */
49 PG_FUNCTION_INFO_V1(pgest_attr);
50 Datum pgest_attr(PG_FUNCTION_ARGS)
51 {
52 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
53 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
54 int attr_ndims = ARR_NDIM(attr_arr);
55 int *attr_dim_counts = ARR_DIMS(attr_arr);
56 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
57 int ncols = 0;
58 int nrows = 0;
59 int indx[MAXDIM];
60 int16 attr_len;
61 bool attr_byval;
62 char attr_align;
63 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
64 AttInMetadata *attinmeta;
65 TupleDesc tupdesc;
66 Tuplestorestate *tupstore = NULL;
67 HeapTuple tuple;
68 MemoryContext per_query_ctx;
69 MemoryContext oldcontext;
70 Datum dvalue;
71 char **values;
72 int rsinfo_ncols;
73 int i, j;
74 /* estvars */
75 ESTDB *db;
76 ESTCOND *cond;
77 ESTDOC *doc;
78 const CBLIST *texts;
79 int ecode, *est_result, resnum;
80 int limit = 0;
81 int offset = 0;
82
83 char *index_path;
84 char *query;
85 char *attr;
86 char *order;
87
88
89 /* only allow 1D input array */
90 if (attr_ndims == 1)
91 {
92 ncols = attr_dim_counts[0];
93 }
94 else
95 ereport(ERROR,
96 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
97 errmsg("invalid input array"),
98 errdetail("Input array must have 1 dimension")));
99
100 /* check to see if caller supports us returning a tuplestore */
101 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
102 ereport(ERROR,
103 (errcode(ERRCODE_SYNTAX_ERROR),
104 errmsg("materialize mode required, but it is not " \
105 "allowed in this context")));
106
107 /* get info about element type needed to construct the array */
108 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
109
110 /* get the requested return tuple description */
111 tupdesc = rsinfo->expectedDesc;
112 rsinfo_ncols = tupdesc->natts;
113
114 /*
115 * The requested tuple description better match up with the array
116 * we were given.
117 */
118 if (rsinfo_ncols != ncols)
119 ereport(ERROR,
120 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
121 errmsg("invalid input array"),
122 errdetail("Number of elements in array must match number of query specified columns.")));
123
124 /* OK, use it */
125 attinmeta = TupleDescGetAttInMetadata(tupdesc);
126
127 /* Now go to work */
128 rsinfo->returnMode = SFRM_Materialize;
129
130 per_query_ctx = fcinfo->flinfo->fn_mcxt;
131 oldcontext = MemoryContextSwitchTo(per_query_ctx);
132
133 /* initialize our tuplestore */
134 tupstore = tuplestore_begin_heap(true, false, SortMem);
135
136
137 /* take rest of arguments from function */
138
139 /* index path */
140 if (PG_ARGISNULL(0)) {
141 ereport(ERROR,
142 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
143 errmsg("index path can't be null"),
144 errdetail("Index path must be valid full path to HyperEstraier index")));
145 }
146 index_path = _textout(PG_GETARG_TEXT_P(0));
147
148 /* query string */
149 if (PG_ARGISNULL(1)) {
150 query = "";
151 } else {
152 query = _textout(PG_GETARG_TEXT_P(1));
153 }
154
155 /* atribute filter */
156 if (PG_ARGISNULL(2)) {
157 attr = "";
158 } else {
159 attr = _textout(PG_GETARG_TEXT_P(2));
160 }
161
162 /* sort order */
163 if (PG_ARGISNULL(3)) {
164 order = "";
165 } else {
166 order = _textout(PG_GETARG_TEXT_P(3));
167 }
168
169
170 /* limit */
171 if (PG_ARGISNULL(4)) {
172 limit = 0;
173 } else {
174 limit = PG_GETARG_INT32(4);
175 }
176
177 /* offset */
178 if (PG_ARGISNULL(5)) {
179 offset = 0;
180 } else {
181 offset = PG_GETARG_INT32(5);
182 }
183
184
185 /* open the database */
186 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
187
188 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
189 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
190 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
191 errdetail(est_err_msg(ecode))));
192 }
193
194 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
195
196 /* create a search condition object */
197 if (!(cond = est_cond_new())) {
198 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
199 errmsg("pgest_attr: est_cond_new failed")));
200 }
201
202 /* set the search phrase to the search condition object */
203 if (! PG_ARGISNULL(1) && strlen(query) > 0)
204 est_cond_set_phrase(cond, query);
205
206 /* minimum valid attribute length is 10: @a STREQ a */
207 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
208 elog(DEBUG1,"attributes: %s", attr);
209 char *curr_attr;
210 curr_attr = strtok(attr, ATTR_DELIMITER);
211 while (curr_attr) {
212 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
213 est_cond_add_attr(cond, curr_attr);
214 curr_attr = strtok(NULL, ATTR_DELIMITER);
215 }
216 }
217
218 /* set the search phrase to the search condition object */
219 if (! PG_ARGISNULL(3) && strlen(order) > 0) {
220 elog(DEBUG1,"est_cond_set_order(%s)", order);
221 est_cond_set_order(cond, order);
222 }
223
224 if (limit) {
225 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
226 est_cond_set_max(cond, limit + offset);
227 }
228
229 /* get the result of search */
230 est_result = est_db_search(db, cond, &resnum, NULL);
231
232 /* check if results exists */
233 if ( 0 == resnum ) {
234 elog(INFO, "pgest_attr: no results for: %s", query );
235 }
236
237 /* total number of tuples to be returned */
238 if (limit && limit < resnum) {
239 nrows = limit;
240 } else {
241 nrows = resnum - offset;
242 }
243
244
245 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
246
247
248 values = (char **) palloc(ncols * sizeof(char *));
249
250 for (i = 0; i < nrows; i++)
251 {
252
253 /* get result from estraier */
254 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
255 elog(INFO, "can't find result %d", i + offset);
256 } else {
257 elog(DEBUG1, "URI: %s\n Title: %s\n",
258 est_doc_attr(doc, "@uri"),
259 est_doc_attr(doc, "@title")
260 );
261 }
262
263 /* iterate over results */
264 for (j = 0; j < ncols; j++)
265 {
266 bool isnull;
267
268 /* array value of this position */
269 indx[0] = j + attr_dim_lower_bounds[0];
270
271 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
272
273 if (!isnull && doc)
274 values[j] = DatumGetCString(
275 attr2text(doc,
276 (char *)DirectFunctionCall1(textout, dvalue)
277 ));
278 else
279 values[j] = NULL;
280 }
281 /* construct the tuple */
282 tuple = BuildTupleFromCStrings(attinmeta, values);
283
284 /* now store it */
285 tuplestore_puttuple(tupstore, tuple);
286
287
288 /* delete estraier document object */
289 est_doc_delete(doc);
290 }
291
292 tuplestore_donestoring(tupstore);
293 rsinfo->setResult = tupstore;
294
295 /*
296 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
297 * tuples are in our tuplestore and passed back through
298 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
299 * that we actually used to build our tuples with, so the caller can
300 * verify we did what it was expecting.
301 */
302 rsinfo->setDesc = tupdesc;
303 MemoryContextSwitchTo(oldcontext);
304
305 est_cond_delete(cond);
306
307 if(!est_db_close(db, &ecode)){
308 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
309 errmsg("est_db_close: %d", ecode),
310 errdetail(est_err_msg(ecode))));
311 }
312
313 return (Datum) 0;
314 }
315
316
317 /* make text var from attr */
318 char *attr2text(ESTDOC *doc, char *attr) {
319 char *val;
320 const char *attrval;
321 int len;
322 int attrlen;
323
324 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
325
326 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
327 val = (char *) palloc(attrlen * sizeof(char));
328 } else {
329 return (Datum) NULL;
330 }
331
332 len = strlen(attrval);
333 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
334
335 len++;
336 len *= sizeof(char);
337
338 elog(DEBUG2, "palloc(%d)", len);
339
340 val = palloc(len);
341
342 memset(val, 0, len);
343 strncpy(val, attrval, len);
344
345 elog(DEBUG2, "val=%s", val);
346
347 return val;
348 }
349

  ViewVC Help
Powered by ViewVC 1.1.26