/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 44 - (show annotations)
Sat Sep 10 22:51:03 2005 UTC (18 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 17054 byte(s)
check if search failed and report status

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - coreexample002.c and nodeexample002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31 #include <estnode.h>
32
33 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
34 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
35 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
36 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
37
38 /* SortMem got renamed in PostgreSQL 8.0 */
39 #ifndef SortMem
40 #define SortMem 16 * 1024
41 #endif
42
43 #define ATTR_DELIMITER "{{!}}"
44
45 /* prototype */
46 char *attr2text(ESTDOC *doc, char *attr);
47 char *node_attr2text(ESTRESDOC *rdoc, char *attr);
48
49
50 /* work in progress */
51 PG_FUNCTION_INFO_V1(pgest_attr);
52 Datum pgest_attr(PG_FUNCTION_ARGS)
53 {
54 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
55 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
56 int attr_ndims = ARR_NDIM(attr_arr);
57 int *attr_dim_counts = ARR_DIMS(attr_arr);
58 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
59 int ncols = 0;
60 int nrows = 0;
61 int indx[MAXDIM];
62 int16 attr_len;
63 bool attr_byval;
64 char attr_align;
65 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
66 AttInMetadata *attinmeta;
67 TupleDesc tupdesc;
68 Tuplestorestate *tupstore = NULL;
69 HeapTuple tuple;
70 MemoryContext per_query_ctx;
71 MemoryContext oldcontext;
72 Datum dvalue;
73 char **values;
74 int rsinfo_ncols;
75 int i, j;
76 /* estvars */
77 ESTDB *db;
78 ESTCOND *cond;
79 ESTDOC *doc;
80 const CBLIST *texts;
81 int ecode, *est_result, resnum;
82 int limit = 0;
83 int offset = 0;
84
85 char *index_path;
86 char *query;
87 char *attr;
88 char *order;
89
90
91 /* only allow 1D input array */
92 if (attr_ndims == 1)
93 {
94 ncols = attr_dim_counts[0];
95 }
96 else
97 ereport(ERROR,
98 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
99 errmsg("invalid input array"),
100 errdetail("Input array must have 1 dimension")));
101
102 /* check to see if caller supports us returning a tuplestore */
103 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
104 ereport(ERROR,
105 (errcode(ERRCODE_SYNTAX_ERROR),
106 errmsg("materialize mode required, but it is not " \
107 "allowed in this context")));
108
109 /* get info about element type needed to construct the array */
110 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
111
112 /* get the requested return tuple description */
113 tupdesc = rsinfo->expectedDesc;
114 rsinfo_ncols = tupdesc->natts;
115
116 /*
117 * The requested tuple description better match up with the array
118 * we were given.
119 */
120 if (rsinfo_ncols != ncols)
121 ereport(ERROR,
122 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
123 errmsg("invalid input array"),
124 errdetail("Number of elements in array must match number of query specified columns.")));
125
126 /* OK, use it */
127 attinmeta = TupleDescGetAttInMetadata(tupdesc);
128
129 /* Now go to work */
130 rsinfo->returnMode = SFRM_Materialize;
131
132 per_query_ctx = fcinfo->flinfo->fn_mcxt;
133 oldcontext = MemoryContextSwitchTo(per_query_ctx);
134
135 /* initialize our tuplestore */
136 tupstore = tuplestore_begin_heap(true, false, SortMem);
137
138
139 /* take rest of arguments from function */
140
141 /* index path */
142 if (PG_ARGISNULL(0)) {
143 ereport(ERROR,
144 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
145 errmsg("index path can't be null"),
146 errdetail("Index path must be valid full path to HyperEstraier index")));
147 }
148 index_path = _textout(PG_GETARG_TEXT_P(0));
149
150 /* query string */
151 if (PG_ARGISNULL(1)) {
152 query = "";
153 } else {
154 query = _textout(PG_GETARG_TEXT_P(1));
155 }
156
157 /* atribute filter */
158 if (PG_ARGISNULL(2)) {
159 attr = "";
160 } else {
161 attr = _textout(PG_GETARG_TEXT_P(2));
162 }
163
164 /* sort order */
165 if (PG_ARGISNULL(3)) {
166 order = "";
167 } else {
168 order = _textout(PG_GETARG_TEXT_P(3));
169 }
170
171
172 /* limit */
173 if (PG_ARGISNULL(4)) {
174 limit = 0;
175 } else {
176 limit = PG_GETARG_INT32(4);
177 }
178
179 /* offset */
180 if (PG_ARGISNULL(5)) {
181 offset = 0;
182 } else {
183 offset = PG_GETARG_INT32(5);
184 }
185
186
187 /* open the database */
188 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
189
190 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
191 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
192 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
193 errdetail(est_err_msg(ecode))));
194 }
195
196 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
197
198 /* create a search condition object */
199 if (!(cond = est_cond_new())) {
200 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
201 errmsg("pgest_attr: est_cond_new failed")));
202 }
203
204 /* set the search phrase to the search condition object */
205 if (! PG_ARGISNULL(1) && strlen(query) > 0)
206 est_cond_set_phrase(cond, query);
207
208 /* minimum valid attribute length is 10: @a STREQ a */
209 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
210 elog(DEBUG1,"attributes: %s", attr);
211 char *curr_attr;
212 curr_attr = strtok(attr, ATTR_DELIMITER);
213 while (curr_attr) {
214 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
215 est_cond_add_attr(cond, curr_attr);
216 curr_attr = strtok(NULL, ATTR_DELIMITER);
217 }
218 }
219
220 /* set the search phrase to the search condition object */
221 if (! PG_ARGISNULL(3) && strlen(order) > 0) {
222 elog(DEBUG1,"est_cond_set_order(%s)", order);
223 est_cond_set_order(cond, order);
224 }
225
226 if (limit) {
227 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
228 est_cond_set_max(cond, limit + offset);
229 }
230
231 /* get the result of search */
232 est_result = est_db_search(db, cond, &resnum, NULL);
233
234 /* check if results exists */
235 if ( 0 == resnum ) {
236 elog(INFO, "pgest_attr: no results for: %s", query );
237 }
238
239 /* total number of tuples to be returned */
240 if (limit && limit < resnum) {
241 nrows = limit;
242 } else {
243 nrows = resnum - offset;
244 }
245
246
247 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
248
249
250 values = (char **) palloc(ncols * sizeof(char *));
251
252 for (i = 0; i < nrows; i++)
253 {
254
255 /* get result from estraier */
256 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
257 elog(INFO, "can't find result %d", i + offset);
258 } else {
259 elog(DEBUG1, "URI: %s\n Title: %s\n",
260 est_doc_attr(doc, "@uri"),
261 est_doc_attr(doc, "@title")
262 );
263 }
264
265 /* iterate over results */
266 for (j = 0; j < ncols; j++)
267 {
268 bool isnull;
269
270 /* array value of this position */
271 indx[0] = j + attr_dim_lower_bounds[0];
272
273 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
274
275 if (!isnull && doc)
276 values[j] = DatumGetCString(
277 attr2text(doc,
278 (char *)DirectFunctionCall1(textout, dvalue)
279 ));
280 else
281 values[j] = NULL;
282 }
283 /* construct the tuple */
284 tuple = BuildTupleFromCStrings(attinmeta, values);
285
286 /* now store it */
287 tuplestore_puttuple(tupstore, tuple);
288
289
290 /* delete estraier document object */
291 est_doc_delete(doc);
292 }
293
294 tuplestore_donestoring(tupstore);
295 rsinfo->setResult = tupstore;
296
297 /*
298 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
299 * tuples are in our tuplestore and passed back through
300 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
301 * that we actually used to build our tuples with, so the caller can
302 * verify we did what it was expecting.
303 */
304 rsinfo->setDesc = tupdesc;
305 MemoryContextSwitchTo(oldcontext);
306
307 est_cond_delete(cond);
308
309 if(!est_db_close(db, &ecode)){
310 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
311 errmsg("est_db_close: %d", ecode),
312 errdetail(est_err_msg(ecode))));
313 }
314
315 return (Datum) 0;
316 }
317
318
319 /* make text var from attr */
320 char *attr2text(ESTDOC *doc, char *attr) {
321 char *val;
322 const char *attrval;
323 int len;
324 int attrlen;
325
326 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
327
328 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
329 val = (char *) palloc(attrlen * sizeof(char));
330 } else {
331 return (Datum) NULL;
332 }
333
334 len = strlen(attrval);
335 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
336
337 len++;
338 len *= sizeof(char);
339
340 elog(DEBUG2, "palloc(%d)", len);
341
342 val = palloc(len);
343
344 memset(val, 0, len);
345 strncpy(val, attrval, len);
346
347 elog(DEBUG2, "val=%s", val);
348
349 return val;
350 }
351
352 /*
353 * variation on theme: use node API which doesn't open index on
354 * every query which is much faster for large indexes
355 *
356 */
357
358 PG_FUNCTION_INFO_V1(pgest_node);
359 Datum pgest_node(PG_FUNCTION_ARGS)
360 {
361 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(8);
362 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
363 int attr_ndims = ARR_NDIM(attr_arr);
364 int *attr_dim_counts = ARR_DIMS(attr_arr);
365 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
366 int ncols = 0;
367 int nrows = 0;
368 int indx[MAXDIM];
369 int16 attr_len;
370 bool attr_byval;
371 char attr_align;
372 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
373 AttInMetadata *attinmeta;
374 TupleDesc tupdesc;
375 Tuplestorestate *tupstore = NULL;
376 HeapTuple tuple;
377 MemoryContext per_query_ctx;
378 MemoryContext oldcontext;
379 Datum dvalue;
380 char **values;
381 int rsinfo_ncols;
382 int i, j;
383 /* estvars */
384 ESTNODE *node;
385 ESTCOND *cond;
386 ESTNODERES *nres;
387 ESTRESDOC *rdoc;
388 const CBLIST *texts;
389 int resnum = 0;
390 int limit = 0;
391 int offset = 0;
392
393 char *node_url;
394 char *user, *passwd;
395 char *query;
396 char *attr;
397 char *order;
398
399
400 /* only allow 1D input array */
401 if (attr_ndims == 1)
402 {
403 ncols = attr_dim_counts[0];
404 }
405 else
406 ereport(ERROR,
407 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
408 errmsg("invalid input array"),
409 errdetail("Input array must have 1 dimension")));
410
411 /* check to see if caller supports us returning a tuplestore */
412 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
413 ereport(ERROR,
414 (errcode(ERRCODE_SYNTAX_ERROR),
415 errmsg("materialize mode required, but it is not " \
416 "allowed in this context")));
417
418 /* get info about element type needed to construct the array */
419 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
420
421 /* get the requested return tuple description */
422 tupdesc = rsinfo->expectedDesc;
423 rsinfo_ncols = tupdesc->natts;
424
425 /*
426 * The requested tuple description better match up with the array
427 * we were given.
428 */
429 if (rsinfo_ncols != ncols)
430 ereport(ERROR,
431 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
432 errmsg("invalid input array"),
433 errdetail("Number of elements in array must match number of query specified columns.")));
434
435 /* OK, use it */
436 attinmeta = TupleDescGetAttInMetadata(tupdesc);
437
438 /* Now go to work */
439 rsinfo->returnMode = SFRM_Materialize;
440
441 per_query_ctx = fcinfo->flinfo->fn_mcxt;
442 oldcontext = MemoryContextSwitchTo(per_query_ctx);
443
444 /* initialize our tuplestore */
445 tupstore = tuplestore_begin_heap(true, false, SortMem);
446
447
448 /* take rest of arguments from function */
449
450 /* node URL */
451 if (PG_ARGISNULL(0)) {
452 ereport(ERROR,
453 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
454 errmsg("node URL can't be null"),
455 errdetail("Node URL must be valid URL to HyperEstraier node")));
456 }
457 node_url = _textout(PG_GETARG_TEXT_P(0));
458
459 /* login and password */
460 if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) {
461 ereport(ERROR,
462 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
463 errmsg("username and password can't be NULL"),
464 errdetail("You must specify valid username and password to HyperEstraier node")));
465 }
466 user = _textout(PG_GETARG_TEXT_P(1));
467 passwd = _textout(PG_GETARG_TEXT_P(2));
468
469 /* query string */
470 if (PG_ARGISNULL(3)) {
471 query = "";
472 } else {
473 query = _textout(PG_GETARG_TEXT_P(3));
474 }
475
476 /* atribute filter */
477 if (PG_ARGISNULL(4)) {
478 attr = "";
479 } else {
480 attr = _textout(PG_GETARG_TEXT_P(4));
481 }
482
483 /* sort order */
484 if (PG_ARGISNULL(5)) {
485 order = "";
486 } else {
487 order = _textout(PG_GETARG_TEXT_P(5));
488 }
489
490
491 /* limit */
492 if (PG_ARGISNULL(6)) {
493 limit = 0;
494 } else {
495 limit = PG_GETARG_INT32(6);
496 }
497
498 /* offset */
499 if (PG_ARGISNULL(7)) {
500 offset = 0;
501 } else {
502 offset = PG_GETARG_INT32(7);
503 }
504
505 /* initialize the network environment */
506 if(!est_init_net_env()){
507 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
508 errmsg("pgest_node: can't create network enviroment")));
509 }
510
511 /* create the node connection object */
512 elog(DEBUG1, "pgest_node: est_node_new(%s) as %s", node_url, user);
513 node = est_node_new(node_url);
514 est_node_set_auth(node, user, passwd);
515
516 elog(DEBUG1, "pgest_node: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(4) ? "NULL" : attr), limit, offset);
517
518 /* create a search condition object */
519 if (!(cond = est_cond_new())) {
520 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
521 errmsg("pgest_node: est_cond_new failed")));
522 }
523
524 /* set the search phrase to the search condition object */
525 if (! PG_ARGISNULL(3) && strlen(query) > 0)
526 est_cond_set_phrase(cond, query);
527
528 /* minimum valid attribute length is 10: @a STREQ a */
529 if (! PG_ARGISNULL(4) && strlen(attr) >= 10) {
530 elog(DEBUG1,"attributes: %s", attr);
531 char *curr_attr;
532 curr_attr = strtok(attr, ATTR_DELIMITER);
533 while (curr_attr) {
534 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
535 est_cond_add_attr(cond, curr_attr);
536 curr_attr = strtok(NULL, ATTR_DELIMITER);
537 }
538 }
539
540 /* set the search phrase to the search condition object */
541 if (! PG_ARGISNULL(5) && strlen(order) > 0) {
542 elog(DEBUG1,"est_cond_set_order(%s)", order);
543 est_cond_set_order(cond, order);
544 }
545
546 if (limit) {
547 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
548 est_cond_set_max(cond, limit + offset);
549 }
550
551 /* get the result of search */
552 /* FIXME: allow user to specify depath of search */
553 nres = est_node_search(node, cond, 0);
554
555 if (! nres) {
556 int status = est_node_status(node);
557 est_cond_delete(cond);
558 est_node_delete(node);
559 est_free_net_env();
560 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
561 errmsg("pgest_node: search failed, node status %d", status)));
562 }
563
564 /* get number of results */
565 resnum = est_noderes_doc_num(nres);
566
567 /* check if results exists */
568 if ( 0 == resnum ) {
569 elog(INFO, "pgest_node: no results for: %s", query );
570 }
571
572 /* total number of tuples to be returned */
573 if (limit && limit < resnum) {
574 nrows = limit;
575 } else {
576 nrows = resnum - offset;
577 }
578
579
580 elog(DEBUG1, "pgest_node: found %d hits for %s", resnum, query);
581
582
583 values = (char **) palloc(ncols * sizeof(char *));
584
585 for (i = 0; i < nrows; i++)
586 {
587
588 /* get result from estraier */
589 if (! ( rdoc = est_noderes_get_doc(nres, i + offset) )) {
590 elog(INFO, "can't find result %d", i + offset);
591 } else {
592 elog(DEBUG1, "URI: %s\n Title: %s\n",
593 est_resdoc_attr(rdoc, "@uri"),
594 est_resdoc_attr(rdoc, "@title")
595 );
596 }
597
598 /* iterate over results */
599 for (j = 0; j < ncols; j++)
600 {
601 bool isnull;
602
603 /* array value of this position */
604 indx[0] = j + attr_dim_lower_bounds[0];
605
606 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
607
608 if (!isnull && rdoc)
609 values[j] = DatumGetCString(
610 node_attr2text(rdoc,
611 (char *)DirectFunctionCall1(textout, dvalue)
612 ));
613 else
614 values[j] = NULL;
615 }
616 /* construct the tuple */
617 tuple = BuildTupleFromCStrings(attinmeta, values);
618
619 /* now store it */
620 tuplestore_puttuple(tupstore, tuple);
621
622 }
623
624 tuplestore_donestoring(tupstore);
625 rsinfo->setResult = tupstore;
626
627 /*
628 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
629 * tuples are in our tuplestore and passed back through
630 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
631 * that we actually used to build our tuples with, so the caller can
632 * verify we did what it was expecting.
633 */
634 rsinfo->setDesc = tupdesc;
635 MemoryContextSwitchTo(oldcontext);
636
637 /* delete the node result object */
638 est_noderes_delete(nres);
639
640 /* destroy the search condition object */
641 est_cond_delete(cond);
642
643 /* destroy the node object */
644 est_node_delete(node);
645
646 /* free the networking environment */
647 est_free_net_env();
648
649 return (Datum) 0;
650 }
651
652 /* make text var from node attr */
653 char *node_attr2text(ESTRESDOC *rdoc, char *attr) {
654 char *val;
655 const char *attrval;
656 int len;
657 int attrlen;
658
659 elog(DEBUG1, "doc: %08x, attr: %s", rdoc, attr);
660
661 if ( (attrval = est_resdoc_attr(rdoc, attr)) && (attrlen = strlen(attrval)) ) {
662 val = (char *) palloc(attrlen * sizeof(char));
663 } else {
664 return (Datum) NULL;
665 }
666
667 len = strlen(attrval);
668 elog(DEBUG1, "node_attr2text(%s) = '%s' %d bytes", attr, attrval, len);
669
670 len++;
671 len *= sizeof(char);
672
673 elog(DEBUG2, "palloc(%d)", len);
674
675 val = palloc(len);
676
677 memset(val, 0, len);
678 strncpy(val, attrval, len);
679
680 elog(DEBUG2, "val=%s", val);
681
682 return val;
683 }
684

  ViewVC Help
Powered by ViewVC 1.1.26