/[pgestraier]/trunk/pgest.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/pgest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 49 - (show annotations)
Sat Oct 29 18:54:40 2005 UTC (18 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 17751 byte(s)
added depth to node API version of pgest, note that you have to use modified
perl wrapper with node API

1 /*
2 * integrate Hyper Estraier into PostgreSQL
3 *
4 * Dobrica Pavlinusic <dpavlin@rot13.org> 2005-05-19
5 *
6 * TODO:
7 * - all
8 *
9 * NOTES:
10 * - clear structures with memset to support hash indexes (who whould like
11 * to create hash index on table returned from function?)
12 * - number of returned rows is set by PostgreSQL evaluator, see:
13 * http://archives.postgresql.org/pgsql-hackers/2005-02/msg00546.php
14 *
15 * Based on:
16 * - C example from PostgreSQL documentation (BSD licence)
17 * - coreexample002.c and nodeexample002.c from Hyper Estraier (GPL)
18 * - _textin/_textout from pgcurl.c (LGPL)
19 *
20 * This code is licenced under GPL
21 */
22
23 #include "postgres.h"
24 #include "fmgr.h"
25 #include "funcapi.h"
26 #include "utils/builtins.h"
27 #include "utils/array.h"
28 #include "miscadmin.h"
29 #include <estraier.h>
30 #include <cabin.h>
31 #include <estnode.h>
32
33 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
34 #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
35 #define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
36 #define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
37
38 /* SortMem got renamed in PostgreSQL 8.0 */
39 #ifndef SortMem
40 #define SortMem 16 * 1024
41 #endif
42
43 #define ATTR_DELIMITER "{{!}}"
44
45 /* prototype */
46 char *attr2text(ESTDOC *doc, char *attr);
47 char *node_attr2text(ESTRESDOC *rdoc, char *attr);
48
49
50 /* work in progress */
51 PG_FUNCTION_INFO_V1(pgest_attr);
52 Datum pgest_attr(PG_FUNCTION_ARGS)
53 {
54 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(6);
55 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
56 int attr_ndims = ARR_NDIM(attr_arr);
57 int *attr_dim_counts = ARR_DIMS(attr_arr);
58 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
59 int ncols = 0;
60 int nrows = 0;
61 int indx[MAXDIM];
62 int16 attr_len;
63 bool attr_byval;
64 char attr_align;
65 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
66 AttInMetadata *attinmeta;
67 TupleDesc tupdesc;
68 Tuplestorestate *tupstore = NULL;
69 HeapTuple tuple;
70 MemoryContext per_query_ctx;
71 MemoryContext oldcontext;
72 Datum dvalue;
73 char **values;
74 int rsinfo_ncols;
75 int i, j;
76 /* estvars */
77 ESTDB *db;
78 ESTCOND *cond;
79 ESTDOC *doc;
80 const CBLIST *texts;
81 int ecode, *est_result, resnum;
82 int limit = 0;
83 int offset = 0;
84
85 char *index_path;
86 char *query;
87 char *attr;
88 char *order;
89
90
91 /* only allow 1D input array */
92 if (attr_ndims == 1)
93 {
94 ncols = attr_dim_counts[0];
95 }
96 else
97 ereport(ERROR,
98 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
99 errmsg("invalid input array"),
100 errdetail("Input array must have 1 dimension")));
101
102 /* check to see if caller supports us returning a tuplestore */
103 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
104 ereport(ERROR,
105 (errcode(ERRCODE_SYNTAX_ERROR),
106 errmsg("materialize mode required, but it is not " \
107 "allowed in this context")));
108
109 /* get info about element type needed to construct the array */
110 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
111
112 /* get the requested return tuple description */
113 tupdesc = rsinfo->expectedDesc;
114 rsinfo_ncols = tupdesc->natts;
115
116 /*
117 * The requested tuple description better match up with the array
118 * we were given.
119 */
120 if (rsinfo_ncols != ncols)
121 ereport(ERROR,
122 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
123 errmsg("invalid input array"),
124 errdetail("Number of elements in array must match number of query specified columns.")));
125
126 /* OK, use it */
127 attinmeta = TupleDescGetAttInMetadata(tupdesc);
128
129 /* Now go to work */
130 rsinfo->returnMode = SFRM_Materialize;
131
132 per_query_ctx = fcinfo->flinfo->fn_mcxt;
133 oldcontext = MemoryContextSwitchTo(per_query_ctx);
134
135 /* initialize our tuplestore */
136 tupstore = tuplestore_begin_heap(true, false, SortMem);
137
138
139 /* take rest of arguments from function */
140
141 /* index path */
142 if (PG_ARGISNULL(0)) {
143 ereport(ERROR,
144 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
145 errmsg("index path can't be null"),
146 errdetail("Index path must be valid full path to HyperEstraier index")));
147 }
148 index_path = _textout(PG_GETARG_TEXT_P(0));
149
150 /* query string */
151 if (PG_ARGISNULL(1)) {
152 query = "";
153 } else {
154 query = _textout(PG_GETARG_TEXT_P(1));
155 }
156
157 /* atribute filter */
158 if (PG_ARGISNULL(2)) {
159 attr = "";
160 } else {
161 attr = _textout(PG_GETARG_TEXT_P(2));
162 }
163
164 /* sort order */
165 if (PG_ARGISNULL(3)) {
166 order = "";
167 } else {
168 order = _textout(PG_GETARG_TEXT_P(3));
169 }
170
171
172 /* limit */
173 if (PG_ARGISNULL(4)) {
174 limit = 0;
175 } else {
176 limit = PG_GETARG_INT32(4);
177 }
178
179 /* offset */
180 if (PG_ARGISNULL(5)) {
181 offset = 0;
182 } else {
183 offset = PG_GETARG_INT32(5);
184 }
185
186
187 /* open the database */
188 elog(DEBUG1, "pgest_attr: est_db_open(%s)", index_path);
189
190 if(!(db = est_db_open(index_path, ESTDBREADER, &ecode))){
191 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
192 errmsg("est_db_open: can't open %s: %d", index_path, ecode),
193 errdetail(est_err_msg(ecode))));
194 }
195
196 elog(DEBUG1, "pgest_attr: query[%s] attr[%s] limit %d offset %d", query, (PG_ARGISNULL(2) ? "NULL" : attr), limit, offset);
197
198 /* create a search condition object */
199 if (!(cond = est_cond_new())) {
200 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
201 errmsg("pgest_attr: est_cond_new failed")));
202 }
203
204 /* set the search phrase to the search condition object */
205 if (! PG_ARGISNULL(1) && strlen(query) > 0)
206 est_cond_set_phrase(cond, query);
207
208 /* minimum valid attribute length is 10: @a STREQ a */
209 if (! PG_ARGISNULL(2) && strlen(attr) >= 10) {
210 elog(DEBUG1,"attributes: %s", attr);
211 char *curr_attr;
212 curr_attr = strtok(attr, ATTR_DELIMITER);
213 while (curr_attr) {
214 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
215 est_cond_add_attr(cond, curr_attr);
216 curr_attr = strtok(NULL, ATTR_DELIMITER);
217 }
218 }
219
220 /* set the search phrase to the search condition object */
221 if (! PG_ARGISNULL(3) && strlen(order) > 0) {
222 elog(DEBUG1,"est_cond_set_order(%s)", order);
223 est_cond_set_order(cond, order);
224 }
225
226 if (limit) {
227 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
228 est_cond_set_max(cond, limit + offset);
229 }
230
231 /* get the result of search */
232 est_result = est_db_search(db, cond, &resnum, NULL);
233
234 /* check if results exists */
235 if ( 0 == resnum ) {
236 elog(INFO, "pgest_attr: no results for: %s", query );
237 }
238
239 /* total number of tuples to be returned */
240 if (limit && limit < resnum) {
241 nrows = limit;
242 } else {
243 nrows = resnum - offset;
244 }
245
246
247 elog(DEBUG1, "pgest_attr: found %d hits for %s", resnum, query);
248
249 values = (char **) palloc(ncols * sizeof(char *));
250
251 for (i = 0; i < nrows; i++)
252 {
253
254 /* get result from estraier */
255 if (! ( doc = est_db_get_doc(db, est_result[i + offset], 0)) ) {
256 elog(INFO, "pgest_attr: can't find result %d", i + offset);
257 } else {
258 elog(DEBUG1, "URI: %s\n Title: %s\n",
259 est_doc_attr(doc, "@uri"),
260 est_doc_attr(doc, "@title")
261 );
262 }
263
264 /* iterate over results */
265 for (j = 0; j < ncols; j++)
266 {
267 bool isnull;
268
269 /* array value of this position */
270 indx[0] = j + attr_dim_lower_bounds[0];
271
272 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
273
274 if (!isnull && doc)
275 values[j] = DatumGetCString(
276 attr2text(doc,
277 (char *)DirectFunctionCall1(textout, dvalue)
278 ));
279 else
280 values[j] = NULL;
281 }
282 /* construct the tuple */
283 tuple = BuildTupleFromCStrings(attinmeta, values);
284
285 /* now store it */
286 tuplestore_puttuple(tupstore, tuple);
287
288 /* delete estraier document object */
289 if (doc) est_doc_delete(doc);
290 }
291
292 tuplestore_donestoring(tupstore);
293 rsinfo->setResult = tupstore;
294
295 /*
296 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
297 * tuples are in our tuplestore and passed back through
298 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
299 * that we actually used to build our tuples with, so the caller can
300 * verify we did what it was expecting.
301 */
302 rsinfo->setDesc = tupdesc;
303 MemoryContextSwitchTo(oldcontext);
304
305 est_cond_delete(cond);
306
307 if(!est_db_close(db, &ecode)){
308 ereport(ERROR, (errcode(ERRCODE_IO_ERROR),
309 errmsg("est_db_close: %d", ecode),
310 errdetail(est_err_msg(ecode))));
311 }
312
313 return (Datum) 0;
314 }
315
316
317 /* make text var from attr */
318 char *attr2text(ESTDOC *doc, char *attr) {
319 char *val;
320 const char *attrval;
321 int len;
322 int attrlen;
323
324 if (! doc) return (Datum) NULL;
325
326 elog(DEBUG1, "doc: %08x, attr: %s", doc, attr);
327
328 if ( (attrval = est_doc_attr(doc, attr)) && (attrlen = strlen(attrval)) ) {
329 val = (char *) palloc(attrlen * sizeof(char));
330 } else {
331 return (Datum) NULL;
332 }
333
334 len = strlen(attrval);
335 elog(DEBUG1, "attr2text(%s) = '%s' %d bytes", attr, attrval, len);
336
337 len++;
338 len *= sizeof(char);
339
340 elog(DEBUG2, "palloc(%d)", len);
341
342 val = palloc(len);
343
344 memset(val, 0, len);
345 strncpy(val, attrval, len);
346
347 elog(DEBUG2, "val=%s", val);
348
349 return val;
350 }
351
352 /*
353 * variation on theme: use node API which doesn't open index on
354 * every query which is much faster for large indexes
355 *
356 */
357
358 /* select * from pgest( */
359 #define _arg_node_uri 0
360 #define _arg_login 1
361 #define _arg_passwd 2
362 #define _arg_depth 3
363 #define _arg_query 4
364 #define _arg_attr 5
365 #define _arg_order 6
366 #define _arg_limit 7
367 #define _arg_offset 8
368 #define _arg_attr_array 9
369 /* as (foo text, ... ); */
370
371
372 PG_FUNCTION_INFO_V1(pgest_node);
373 Datum pgest_node(PG_FUNCTION_ARGS)
374 {
375 ArrayType *attr_arr = PG_GETARG_ARRAYTYPE_P(_arg_attr_array);
376 Oid attr_element_type = ARR_ELEMTYPE(attr_arr);
377 int attr_ndims = ARR_NDIM(attr_arr);
378 int *attr_dim_counts = ARR_DIMS(attr_arr);
379 int *attr_dim_lower_bounds = ARR_LBOUND(attr_arr);
380 int ncols = 0;
381 int nrows = 0;
382 int indx[MAXDIM];
383 int16 attr_len;
384 bool attr_byval;
385 char attr_align;
386 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
387 AttInMetadata *attinmeta;
388 TupleDesc tupdesc;
389 Tuplestorestate *tupstore = NULL;
390 HeapTuple tuple;
391 MemoryContext per_query_ctx;
392 MemoryContext oldcontext;
393 Datum dvalue;
394 char **values;
395 int rsinfo_ncols;
396 int i, j;
397 /* estvars */
398 ESTNODE *node;
399 ESTCOND *cond;
400 ESTNODERES *nres;
401 ESTRESDOC *rdoc;
402 const CBLIST *texts;
403 int resnum = 0;
404 int limit = 0;
405 int offset = 0;
406 int depth = 0;
407
408 char *node_url;
409 char *user, *passwd;
410 char *query;
411 char *attr;
412 char *order;
413
414
415 /* only allow 1D input array */
416 if (attr_ndims == 1)
417 {
418 ncols = attr_dim_counts[0];
419 }
420 else
421 ereport(ERROR,
422 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
423 errmsg("invalid input array"),
424 errdetail("Input array must have 1 dimension")));
425
426 /* check to see if caller supports us returning a tuplestore */
427 if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
428 ereport(ERROR,
429 (errcode(ERRCODE_SYNTAX_ERROR),
430 errmsg("materialize mode required, but it is not " \
431 "allowed in this context")));
432
433 /* get info about element type needed to construct the array */
434 get_typlenbyvalalign(attr_element_type, &attr_len, &attr_byval, &attr_align);
435
436 /* get the requested return tuple description */
437 tupdesc = rsinfo->expectedDesc;
438 rsinfo_ncols = tupdesc->natts;
439
440 /*
441 * The requested tuple description better match up with the array
442 * we were given.
443 */
444 if (rsinfo_ncols != ncols)
445 ereport(ERROR,
446 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
447 errmsg("invalid input array"),
448 errdetail("Number of elements in array must match number of query specified columns.")));
449
450 /* OK, use it */
451 attinmeta = TupleDescGetAttInMetadata(tupdesc);
452
453 /* Now go to work */
454 rsinfo->returnMode = SFRM_Materialize;
455
456 per_query_ctx = fcinfo->flinfo->fn_mcxt;
457 oldcontext = MemoryContextSwitchTo(per_query_ctx);
458
459 /* initialize our tuplestore */
460 tupstore = tuplestore_begin_heap(true, false, SortMem);
461
462
463 /* take rest of arguments from function */
464
465 /* node URL */
466 if (PG_ARGISNULL(_arg_node_uri)) {
467 ereport(ERROR,
468 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
469 errmsg("node URL can't be null"),
470 errdetail("Node URL must be valid URL to HyperEstraier node")));
471 }
472 node_url = _textout(PG_GETARG_TEXT_P(_arg_node_uri));
473
474 /* login and password */
475 if (PG_ARGISNULL(_arg_login) || PG_ARGISNULL(_arg_passwd)) {
476 ereport(ERROR,
477 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
478 errmsg("username and password can't be NULL"),
479 errdetail("You must specify valid username and password to HyperEstraier node")));
480 }
481 user = _textout(PG_GETARG_TEXT_P(_arg_login));
482 passwd = _textout(PG_GETARG_TEXT_P(_arg_passwd));
483
484 /* depth of search */
485 if (PG_ARGISNULL(_arg_depth)) {
486 depth = 0;
487 } else {
488 depth = PG_GETARG_INT32(_arg_depth);
489 }
490
491 /* query string */
492 if (PG_ARGISNULL(_arg_query)) {
493 query = "";
494 } else {
495 query = _textout(PG_GETARG_TEXT_P(_arg_query));
496 }
497
498 /* atribute filter */
499 if (PG_ARGISNULL(_arg_attr)) {
500 attr = "";
501 } else {
502 attr = _textout(PG_GETARG_TEXT_P(_arg_attr));
503 }
504
505 /* sort order */
506 if (PG_ARGISNULL(_arg_order)) {
507 order = "";
508 } else {
509 order = _textout(PG_GETARG_TEXT_P(_arg_order));
510 }
511
512
513 /* limit */
514 if (PG_ARGISNULL(_arg_limit)) {
515 limit = 0;
516 } else {
517 limit = PG_GETARG_INT32(_arg_limit);
518 }
519
520 /* offset */
521 if (PG_ARGISNULL(_arg_offset)) {
522 offset = 0;
523 } else {
524 offset = PG_GETARG_INT32(_arg_offset);
525 }
526
527 /* initialize the network environment */
528 if(!est_init_net_env()){
529 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
530 errmsg("pgest_node: can't create network enviroment")));
531 }
532
533 /* create the node connection object */
534 elog(DEBUG1, "pgest_node: est_node_new(%s) as %s", node_url, user);
535 node = est_node_new(node_url);
536 est_node_set_auth(node, user, passwd);
537
538 elog(DEBUG1, "pgest_node: node: %s (d:%d) query[%s] attr[%s] limit %d offset %d", node_url, depth, query, (PG_ARGISNULL(_arg_attr) ? "NULL" : attr), limit, offset);
539
540 /* create a search condition object */
541 if (!(cond = est_cond_new())) {
542 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
543 errmsg("pgest_node: est_cond_new failed")));
544 }
545
546 /* set the search phrase to the search condition object */
547 if (! PG_ARGISNULL(_arg_query) && strlen(query) > 0)
548 est_cond_set_phrase(cond, query);
549
550 /* minimum valid attribute length is 10: @a STREQ a */
551 if (! PG_ARGISNULL(_arg_attr) && strlen(attr) >= 10) {
552 elog(DEBUG1,"attributes: %s", attr);
553 char *curr_attr;
554 curr_attr = strtok(attr, ATTR_DELIMITER);
555 while (curr_attr) {
556 elog(DEBUG1,"est_cond_add_attr(%s)", curr_attr);
557 est_cond_add_attr(cond, curr_attr);
558 curr_attr = strtok(NULL, ATTR_DELIMITER);
559 }
560 }
561
562 /* set the search phrase to the search condition object */
563 if (! PG_ARGISNULL(_arg_order) && strlen(order) > 0) {
564 elog(DEBUG1,"est_cond_set_order(%s)", order);
565 est_cond_set_order(cond, order);
566 }
567
568 if (limit) {
569 elog(DEBUG1,"est_cond_set_max(%d)", limit + offset);
570 est_cond_set_max(cond, limit + offset);
571 }
572
573 /* get the result of search */
574 nres = est_node_search(node, cond, depth);
575
576 if (! nres) {
577 int status = est_node_status(node);
578 est_cond_delete(cond);
579 est_node_delete(node);
580 est_free_net_env();
581 ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED),
582 errmsg("pgest_node: search failed, node status %d", status)));
583 }
584
585 /* get number of results */
586 resnum = est_noderes_doc_num(nres);
587
588 /* check if results exists */
589 if ( 0 == resnum ) {
590 elog(INFO, "pgest_node: no results for: %s", query );
591 }
592
593 /* total number of tuples to be returned */
594 if (limit && limit < resnum) {
595 nrows = limit;
596 } else {
597 nrows = resnum - offset;
598 }
599
600
601 elog(DEBUG1, "pgest_node: found %d hits for %s", resnum, query);
602
603
604 values = (char **) palloc(ncols * sizeof(char *));
605
606 for (i = 0; i < nrows; i++)
607 {
608
609 /* get result from estraier */
610 if (! ( rdoc = est_noderes_get_doc(nres, i + offset) )) {
611 elog(INFO, "pgest_node: can't find result %d", i + offset);
612 } else {
613 elog(DEBUG1, "URI: %s\n Title: %s\n",
614 est_resdoc_attr(rdoc, "@uri"),
615 est_resdoc_attr(rdoc, "@title")
616 );
617 }
618
619 /* iterate over results */
620 for (j = 0; j < ncols; j++)
621 {
622 bool isnull;
623
624 /* array value of this position */
625 indx[0] = j + attr_dim_lower_bounds[0];
626
627 dvalue = array_ref(attr_arr, attr_ndims, indx, -1, attr_len, attr_byval, attr_align, &isnull);
628
629 if (!isnull && rdoc)
630 values[j] = DatumGetCString(
631 node_attr2text(rdoc,
632 (char *)DirectFunctionCall1(textout, dvalue)
633 ));
634 else
635 values[j] = NULL;
636 }
637 /* construct the tuple */
638 tuple = BuildTupleFromCStrings(attinmeta, values);
639
640 /* now store it */
641 tuplestore_puttuple(tupstore, tuple);
642
643 }
644
645 tuplestore_donestoring(tupstore);
646 rsinfo->setResult = tupstore;
647
648 /*
649 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
650 * tuples are in our tuplestore and passed back through
651 * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
652 * that we actually used to build our tuples with, so the caller can
653 * verify we did what it was expecting.
654 */
655 rsinfo->setDesc = tupdesc;
656 MemoryContextSwitchTo(oldcontext);
657
658 /* delete the node result object */
659 est_noderes_delete(nres);
660
661 /* destroy the search condition object */
662 est_cond_delete(cond);
663
664 /* destroy the node object */
665 est_node_delete(node);
666
667 /* free the networking environment */
668 est_free_net_env();
669
670 return (Datum) 0;
671 }
672
673 /* make text var from node attr */
674 char *node_attr2text(ESTRESDOC *rdoc, char *attr) {
675 char *val;
676 const char *attrval;
677 int len;
678 int attrlen;
679
680 if (! rdoc) return (Datum) NULL;
681
682 elog(DEBUG1, "doc: %08x, attr: %s", rdoc, attr);
683
684 if ( (attrval = est_resdoc_attr(rdoc, attr)) && (attrlen = strlen(attrval)) ) {
685 val = (char *) palloc(attrlen * sizeof(char));
686 } else {
687 return (Datum) NULL;
688 }
689
690 len = strlen(attrval);
691 elog(DEBUG1, "node_attr2text(%s) = '%s' %d bytes", attr, attrval, len);
692
693 len++;
694 len *= sizeof(char);
695
696 elog(DEBUG2, "palloc(%d)", len);
697
698 val = palloc(len);
699
700 memset(val, 0, len);
701 strncpy(val, attrval, len);
702
703 elog(DEBUG2, "val=%s", val);
704
705 return val;
706 }
707

  ViewVC Help
Powered by ViewVC 1.1.26