/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 48 - (show annotations)
Thu Oct 20 16:24:26 2005 UTC (18 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 17608 byte(s)
moved argument offsets into #define(s)

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - coreexample002.c and nodeexample002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31 #include <estnode.h>
32
33 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
34 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
35 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
36 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
37
38 /* SortMem got renamed in PostgreSQL 8.0 */
39 #ifndef SortMem
40 #define SortMem 16 * 1024
41 #endif
42
43 #define ATTR_DELIMITER "{{!}}"
44
45 /* prototype */
46 char *attr2text(ESTDOC *doc, char *attr);
47 char *node_attr2text(ESTRESDOC *rdoc, char *attr);
48
49
50 /* work in progress */
51 PG_FUNCTION_INFO_V1(pgest_attr);
52 Datum pgest_attr(PG_FUNCTION_ARGS)
53 {
54 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
55 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
56 int attr_ndims = ARR_NDIM(attr_arr);
57 int *attr_dim_counts = ARR_DIMS(attr_arr);
58 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
59 int ncols = 0;
60 int nrows = 0;
61 int indx[MAXDIM];
62 int16 attr_len;
63 bool attr_byval;
64 char attr_align;
65 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
66 AttInMetadata *attinmeta;
67 TupleDesc tupdesc;
68 Tuplestorestate *tupstore = NULL;
69 HeapTuple tuple;
70 MemoryContext per_query_ctx;
71 MemoryContext oldcontext;
72 Datum dvalue;
73 char **values;
74 int rsinfo_ncols;
75 int i, j;
76 /* estvars */
77 ESTDB *db;
78 ESTCOND *cond;
79 ESTDOC *doc;
80 const CBLIST *texts;
81 int ecode, *est_result, resnum;
82 int limit = 0;
83 int offset = 0;
84
85 char *index_path;
86 char *query;
87 char *attr;
88 char *order;
89
90
91 /* only allow 1D input array */
92 if (attr_ndims == 1)
93 {
94 ncols = attr_dim_counts[0];
95 }
96 else
97 ereport(ERROR,
98 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
99 errmsg("invalid input array"),
100 errdetail("Input array must have 1 dimension")));
101
102 /* check to see if caller supports us returning a tuplestore */
103 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
104 ereport(ERROR,
105 (errcode(ERRCODE_SYNTAX_ERROR),
106 errmsg("materialize mode required, but it is not " \
107 "allowed in this context")));
108
109 /* get info about element type needed to construct the array */
110 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
111
112 /* get the requested return tuple description */
113 tupdesc = rsinfo->expectedDesc;
114 rsinfo_ncols = tupdesc->natts;
115
116 /*
117 * The requested tuple description better match up with the array
118 * we were given.
119 */
120 if (rsinfo_ncols != ncols)
121 ereport(ERROR,
122 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
123 errmsg("invalid input array"),
124 errdetail("Number of elements in array must match number of query specified columns.")));
125
126 /* OK, use it */
127 attinmeta = TupleDescGetAttInMetadata(tupdesc);
128
129 /* Now go to work */
130 rsinfo->returnMode = SFRM_Materialize;
131
132 per_query_ctx = fcinfo->flinfo->fn_mcxt;
133 oldcontext = MemoryContextSwitchTo(per_query_ctx);
134
135 /* initialize our tuplestore */
136 tupstore = tuplestore_begin_heap(true, false, SortMem);
137
138
139 /* take rest of arguments from function */
140
141 /* index path */
142 if (PG_ARGISNULL(0)) {
143 ereport(ERROR,
144 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
145 errmsg("index path can't be null"),
146 errdetail("Index path must be valid full path to HyperEstraier index")));
147 }
148 index_path = _textout(PG_GETARG_TEXT_P(0));
149
150 /* query string */
151 if (PG_ARGISNULL(1)) {
152 query = "";
153 } else {
154 query = _textout(PG_GETARG_TEXT_P(1));
155 }
156
157 /* atribute filter */
158 if (PG_ARGISNULL(2)) {
159 attr = "";
160 } else {
161 attr = _textout(PG_GETARG_TEXT_P(2));
162 }
163
164 /* sort order */
165 if (PG_ARGISNULL(3)) {
166 order = "";
167 } else {
168 order = _textout(PG_GETARG_TEXT_P(3));
169 }
170
171
172 /* limit */
173 if (PG_ARGISNULL(4)) {
174 limit = 0;
175 } else {
176 limit = PG_GETARG_INT32(4);
177 }
178
179 /* offset */
180 if (PG_ARGISNULL(5)) {
181 offset = 0;
182 } else {
183 offset = PG_GETARG_INT32(5);
184 }
185
186
187 /* open the database */
188 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
189
190 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
191 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
192 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
193 errdetail(est_err_msg(ecode))));
194 }
195
196 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
197
198 /* create a search condition object */
199 if (!(cond = est_cond_new())) {
200 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
201 errmsg("pgest_attr: est_cond_new failed")));
202 }
203
204 /* set the search phrase to the search condition object */
205 if (! PG_ARGISNULL(1) && strlen(query) > 0)
206 est_cond_set_phrase(cond, query);
207
208 /* minimum valid attribute length is 10: @a STREQ a */
209 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
210 elog(DEBUG1,"attributes: %s", attr);
211 char *curr_attr;
212 curr_attr = strtok(attr, ATTR_DELIMITER);
213 while (curr_attr) {
214 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
215 est_cond_add_attr(cond, curr_attr);
216 curr_attr = strtok(NULL, ATTR_DELIMITER);
217 }
218 }
219
220 /* set the search phrase to the search condition object */
221 if (! PG_ARGISNULL(3) && strlen(order) > 0) {
222 elog(DEBUG1,"est_cond_set_order(%s)", order);
223 est_cond_set_order(cond, order);
224 }
225
226 if (limit) {
227 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
228 est_cond_set_max(cond, limit + offset);
229 }
230
231 /* get the result of search */
232 est_result = est_db_search(db, cond, &resnum, NULL);
233
234 /* check if results exists */
235 if ( 0 == resnum ) {
236 elog(INFO, "pgest_attr: no results for: %s", query );
237 }
238
239 /* total number of tuples to be returned */
240 if (limit && limit < resnum) {
241 nrows = limit;
242 } else {
243 nrows = resnum - offset;
244 }
245
246
247 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
248
249 values = (char **) palloc(ncols * sizeof(char *));
250
251 for (i = 0; i < nrows; i++)
252 {
253
254 /* get result from estraier */
255 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
256 elog(INFO, "pgest_attr: can't find result %d", i + offset);
257 } else {
258 elog(DEBUG1, "URI: %s\n Title: %s\n",
259 est_doc_attr(doc, "@uri"),
260 est_doc_attr(doc, "@title")
261 );
262 }
263
264 /* iterate over results */
265 for (j = 0; j < ncols; j++)
266 {
267 bool isnull;
268
269 /* array value of this position */
270 indx[0] = j + attr_dim_lower_bounds[0];
271
272 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
273
274 if (!isnull && doc)
275 values[j] = DatumGetCString(
276 attr2text(doc,
277 (char *)DirectFunctionCall1(textout, dvalue)
278 ));
279 else
280 values[j] = NULL;
281 }
282 /* construct the tuple */
283 tuple = BuildTupleFromCStrings(attinmeta, values);
284
285 /* now store it */
286 tuplestore_puttuple(tupstore, tuple);
287
288 /* delete estraier document object */
289 if (doc) est_doc_delete(doc);
290 }
291
292 tuplestore_donestoring(tupstore);
293 rsinfo->setResult = tupstore;
294
295 /*
296 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
297 * tuples are in our tuplestore and passed back through
298 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
299 * that we actually used to build our tuples with, so the caller can
300 * verify we did what it was expecting.
301 */
302 rsinfo->setDesc = tupdesc;
303 MemoryContextSwitchTo(oldcontext);
304
305 est_cond_delete(cond);
306
307 if(!est_db_close(db, &ecode)){
308 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
309 errmsg("est_db_close: %d", ecode),
310 errdetail(est_err_msg(ecode))));
311 }
312
313 return (Datum) 0;
314 }
315
316
317 /* make text var from attr */
318 char *attr2text(ESTDOC *doc, char *attr) {
319 char *val;
320 const char *attrval;
321 int len;
322 int attrlen;
323
324 if (! doc) return (Datum) NULL;
325
326 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
327
328 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
329 val = (char *) palloc(attrlen * sizeof(char));
330 } else {
331 return (Datum) NULL;
332 }
333
334 len = strlen(attrval);
335 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
336
337 len++;
338 len *= sizeof(char);
339
340 elog(DEBUG2, "palloc(%d)", len);
341
342 val = palloc(len);
343
344 memset(val, 0, len);
345 strncpy(val, attrval, len);
346
347 elog(DEBUG2, "val=%s", val);
348
349 return val;
350 }
351
352 /*
353 * variation on theme: use node API which doesn't open index on
354 * every query which is much faster for large indexes
355 *
356 */
357
358 /* select * from pgest( */
359 #define _arg_node_uri 0
360 #define _arg_login 1
361 #define _arg_passwd 2
362 #define _arg_query 3
363 #define _arg_attr 4
364 #define _arg_order 5
365 #define _arg_limit 6
366 #define _arg_offset 7
367 #define _arg_attr_array 8
368 /* as (foo text, ... ); */
369
370
371 PG_FUNCTION_INFO_V1(pgest_node);
372 Datum pgest_node(PG_FUNCTION_ARGS)
373 {
374 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(_arg_attr_array);
375 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
376 int attr_ndims = ARR_NDIM(attr_arr);
377 int *attr_dim_counts = ARR_DIMS(attr_arr);
378 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
379 int ncols = 0;
380 int nrows = 0;
381 int indx[MAXDIM];
382 int16 attr_len;
383 bool attr_byval;
384 char attr_align;
385 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
386 AttInMetadata *attinmeta;
387 TupleDesc tupdesc;
388 Tuplestorestate *tupstore = NULL;
389 HeapTuple tuple;
390 MemoryContext per_query_ctx;
391 MemoryContext oldcontext;
392 Datum dvalue;
393 char **values;
394 int rsinfo_ncols;
395 int i, j;
396 /* estvars */
397 ESTNODE *node;
398 ESTCOND *cond;
399 ESTNODERES *nres;
400 ESTRESDOC *rdoc;
401 const CBLIST *texts;
402 int resnum = 0;
403 int limit = 0;
404 int offset = 0;
405
406 char *node_url;
407 char *user, *passwd;
408 char *query;
409 char *attr;
410 char *order;
411
412
413 /* only allow 1D input array */
414 if (attr_ndims == 1)
415 {
416 ncols = attr_dim_counts[0];
417 }
418 else
419 ereport(ERROR,
420 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
421 errmsg("invalid input array"),
422 errdetail("Input array must have 1 dimension")));
423
424 /* check to see if caller supports us returning a tuplestore */
425 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
426 ereport(ERROR,
427 (errcode(ERRCODE_SYNTAX_ERROR),
428 errmsg("materialize mode required, but it is not " \
429 "allowed in this context")));
430
431 /* get info about element type needed to construct the array */
432 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
433
434 /* get the requested return tuple description */
435 tupdesc = rsinfo->expectedDesc;
436 rsinfo_ncols = tupdesc->natts;
437
438 /*
439 * The requested tuple description better match up with the array
440 * we were given.
441 */
442 if (rsinfo_ncols != ncols)
443 ereport(ERROR,
444 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
445 errmsg("invalid input array"),
446 errdetail("Number of elements in array must match number of query specified columns.")));
447
448 /* OK, use it */
449 attinmeta = TupleDescGetAttInMetadata(tupdesc);
450
451 /* Now go to work */
452 rsinfo->returnMode = SFRM_Materialize;
453
454 per_query_ctx = fcinfo->flinfo->fn_mcxt;
455 oldcontext = MemoryContextSwitchTo(per_query_ctx);
456
457 /* initialize our tuplestore */
458 tupstore = tuplestore_begin_heap(true, false, SortMem);
459
460
461 /* take rest of arguments from function */
462
463 /* node URL */
464 if (PG_ARGISNULL(_arg_node_uri)) {
465 ereport(ERROR,
466 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
467 errmsg("node URL can't be null"),
468 errdetail("Node URL must be valid URL to HyperEstraier node")));
469 }
470 node_url = _textout(PG_GETARG_TEXT_P(_arg_node_uri));
471
472 /* login and password */
473 if (PG_ARGISNULL(_arg_login) || PG_ARGISNULL(_arg_passwd)) {
474 ereport(ERROR,
475 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
476 errmsg("username and password can't be NULL"),
477 errdetail("You must specify valid username and password to HyperEstraier node")));
478 }
479 user = _textout(PG_GETARG_TEXT_P(_arg_login));
480 passwd = _textout(PG_GETARG_TEXT_P(_arg_passwd));
481
482 /* query string */
483 if (PG_ARGISNULL(_arg_query)) {
484 query = "";
485 } else {
486 query = _textout(PG_GETARG_TEXT_P(_arg_query));
487 }
488
489 /* atribute filter */
490 if (PG_ARGISNULL(_arg_attr)) {
491 attr = "";
492 } else {
493 attr = _textout(PG_GETARG_TEXT_P(_arg_attr));
494 }
495
496 /* sort order */
497 if (PG_ARGISNULL(_arg_order)) {
498 order = "";
499 } else {
500 order = _textout(PG_GETARG_TEXT_P(_arg_order));
501 }
502
503
504 /* limit */
505 if (PG_ARGISNULL(_arg_limit)) {
506 limit = 0;
507 } else {
508 limit = PG_GETARG_INT32(_arg_limit);
509 }
510
511 /* offset */
512 if (PG_ARGISNULL(_arg_offset)) {
513 offset = 0;
514 } else {
515 offset = PG_GETARG_INT32(_arg_offset);
516 }
517
518 /* initialize the network environment */
519 if(!est_init_net_env()){
520 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
521 errmsg("pgest_node: can't create network enviroment")));
522 }
523
524 /* create the node connection object */
525 elog(DEBUG1, "pgest_node: est_node_new(%s) as %s", node_url, user);
526 node = est_node_new(node_url);
527 est_node_set_auth(node, user, passwd);
528
529 elog(DEBUG1, "pgest_node: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(_arg_attr) ? "NULL" : attr), limit, offset);
530
531 /* create a search condition object */
532 if (!(cond = est_cond_new())) {
533 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
534 errmsg("pgest_node: est_cond_new failed")));
535 }
536
537 /* set the search phrase to the search condition object */
538 if (! PG_ARGISNULL(_arg_query) && strlen(query) > 0)
539 est_cond_set_phrase(cond, query);
540
541 /* minimum valid attribute length is 10: @a STREQ a */
542 if (! PG_ARGISNULL(_arg_attr) && strlen(attr) >= 10) {
543 elog(DEBUG1,"attributes: %s", attr);
544 char *curr_attr;
545 curr_attr = strtok(attr, ATTR_DELIMITER);
546 while (curr_attr) {
547 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
548 est_cond_add_attr(cond, curr_attr);
549 curr_attr = strtok(NULL, ATTR_DELIMITER);
550 }
551 }
552
553 /* set the search phrase to the search condition object */
554 if (! PG_ARGISNULL(_arg_order) && strlen(order) > 0) {
555 elog(DEBUG1,"est_cond_set_order(%s)", order);
556 est_cond_set_order(cond, order);
557 }
558
559 if (limit) {
560 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
561 est_cond_set_max(cond, limit + offset);
562 }
563
564 /* get the result of search */
565 /* FIXME: allow user to specify depath of search */
566 nres = est_node_search(node, cond, 0);
567
568 if (! nres) {
569 int status = est_node_status(node);
570 est_cond_delete(cond);
571 est_node_delete(node);
572 est_free_net_env();
573 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
574 errmsg("pgest_node: search failed, node status %d", status)));
575 }
576
577 /* get number of results */
578 resnum = est_noderes_doc_num(nres);
579
580 /* check if results exists */
581 if ( 0 == resnum ) {
582 elog(INFO, "pgest_node: no results for: %s", query );
583 }
584
585 /* total number of tuples to be returned */
586 if (limit && limit < resnum) {
587 nrows = limit;
588 } else {
589 nrows = resnum - offset;
590 }
591
592
593 elog(DEBUG1, "pgest_node: found %d hits for %s", resnum, query);
594
595
596 values = (char **) palloc(ncols * sizeof(char *));
597
598 for (i = 0; i < nrows; i++)
599 {
600
601 /* get result from estraier */
602 if (! ( rdoc = est_noderes_get_doc(nres, i + offset) )) {
603 elog(INFO, "pgest_node: can't find result %d", i + offset);
604 } else {
605 elog(DEBUG1, "URI: %s\n Title: %s\n",
606 est_resdoc_attr(rdoc, "@uri"),
607 est_resdoc_attr(rdoc, "@title")
608 );
609 }
610
611 /* iterate over results */
612 for (j = 0; j < ncols; j++)
613 {
614 bool isnull;
615
616 /* array value of this position */
617 indx[0] = j + attr_dim_lower_bounds[0];
618
619 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
620
621 if (!isnull && rdoc)
622 values[j] = DatumGetCString(
623 node_attr2text(rdoc,
624 (char *)DirectFunctionCall1(textout, dvalue)
625 ));
626 else
627 values[j] = NULL;
628 }
629 /* construct the tuple */
630 tuple = BuildTupleFromCStrings(attinmeta, values);
631
632 /* now store it */
633 tuplestore_puttuple(tupstore, tuple);
634
635 }
636
637 tuplestore_donestoring(tupstore);
638 rsinfo->setResult = tupstore;
639
640 /*
641 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
642 * tuples are in our tuplestore and passed back through
643 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
644 * that we actually used to build our tuples with, so the caller can
645 * verify we did what it was expecting.
646 */
647 rsinfo->setDesc = tupdesc;
648 MemoryContextSwitchTo(oldcontext);
649
650 /* delete the node result object */
651 est_noderes_delete(nres);
652
653 /* destroy the search condition object */
654 est_cond_delete(cond);
655
656 /* destroy the node object */
657 est_node_delete(node);
658
659 /* free the networking environment */
660 est_free_net_env();
661
662 return (Datum) 0;
663 }
664
665 /* make text var from node attr */
666 char *node_attr2text(ESTRESDOC *rdoc, char *attr) {
667 char *val;
668 const char *attrval;
669 int len;
670 int attrlen;
671
672 if (! rdoc) return (Datum) NULL;
673
674 elog(DEBUG1, "doc: %08x, attr: %s", rdoc, attr);
675
676 if ( (attrval = est_resdoc_attr(rdoc, attr)) && (attrlen = strlen(attrval)) ) {
677 val = (char *) palloc(attrlen * sizeof(char));
678 } else {
679 return (Datum) NULL;
680 }
681
682 len = strlen(attrval);
683 elog(DEBUG1, "node_attr2text(%s) = '%s' %d bytes", attr, attrval, len);
684
685 len++;
686 len *= sizeof(char);
687
688 elog(DEBUG2, "palloc(%d)", len);
689
690 val = palloc(len);
691
692 memset(val, 0, len);
693 strncpy(val, attrval, len);
694
695 elog(DEBUG2, "val=%s", val);
696
697 return val;
698 }
699

  ViewVC Help
Powered by ViewVC 1.1.26