1 |
/************************************************************************************************* |
2 |
* The node API of Hyper Estraier |
3 |
* Copyright (C) 2004-2005 Mikio Hirabayashi |
4 |
* This file is part of Hyper Estraier. |
5 |
* Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of |
6 |
* the GNU Lesser General Public License as published by the Free Software Foundation; either |
7 |
* version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope |
8 |
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
10 |
* License for more details. |
11 |
* You should have received a copy of the GNU Lesser General Public License along with Hyper |
12 |
* Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, |
13 |
* Boston, MA 02111-1307 USA. |
14 |
*************************************************************************************************/ |
15 |
|
16 |
|
17 |
#ifndef _ESTNODE_H /* duplication check */ |
18 |
#define _ESTNODE_H |
19 |
|
20 |
#if defined(__cplusplus) /* export for C++ */ |
21 |
extern "C" { |
22 |
#endif |
23 |
|
24 |
|
25 |
#include <estraier.h> |
26 |
#include <estmtdb.h> |
27 |
#include <cabin.h> |
28 |
#include <pthread.h> |
29 |
#include <stdlib.h> |
30 |
|
31 |
|
32 |
|
33 |
/************************************************************************************************* |
34 |
* API for the network environment |
35 |
*************************************************************************************************/ |
36 |
|
37 |
|
38 |
/* Initialize the networking environment. |
39 |
The return value is true if success, else it is false. |
40 |
As it is allowable to call this function multiple times, it is needed to call the function |
41 |
`est_free_net_env' at the same frequency. */ |
42 |
int est_init_net_env(void); |
43 |
|
44 |
|
45 |
/* Free the networking environment. */ |
46 |
void est_free_net_env(void); |
47 |
|
48 |
|
49 |
|
50 |
/************************************************************************************************* |
51 |
* API for search result of node |
52 |
*************************************************************************************************/ |
53 |
|
54 |
|
55 |
typedef struct { /* type of structure for a document in result */ |
56 |
const char *uri; /* URI */ |
57 |
CBMAP *attrs; /* attributes */ |
58 |
char *snippet; /* snippet */ |
59 |
} ESTRESDOC; |
60 |
|
61 |
typedef struct { /* type of structure for search result */ |
62 |
ESTRESDOC *docs; /* array of documents */ |
63 |
int top; /* offset number of the top element */ |
64 |
int max; /* maximum number of allcated elements */ |
65 |
int dnum; /* number of effective elements */ |
66 |
CBMAP *hints; /* map object for hints */ |
67 |
} ESTNODERES; |
68 |
|
69 |
|
70 |
/* Delete a node result object. |
71 |
`nres' specifies a node result object. */ |
72 |
void est_noderes_delete(ESTNODERES *nres); |
73 |
|
74 |
|
75 |
/* Get a map object for hints of a node result object. |
76 |
`nres' specifies a node result object. |
77 |
The return value is a map object for hints. Keys of the map are "VERSION", "NODE", "HIT", |
78 |
"HINT#n", "DOCNUM", "WORDNUM", "TIME", "LINK#n", and "VIEW". The life duration of the |
79 |
returned object is synchronous with the one of the node result object. */ |
80 |
CBMAP *est_noderes_hints(ESTNODERES *nres); |
81 |
|
82 |
|
83 |
/* Get the number of documents in a node result object. |
84 |
`nres' specifies a node result object. |
85 |
The return value is the number of documents in a node result object. */ |
86 |
int est_noderes_doc_num(ESTNODERES *nres); |
87 |
|
88 |
|
89 |
/* Refer a result document object in a node result object. |
90 |
`nres' specifies a node result object. |
91 |
`index' specifies the index of a document. |
92 |
The return value is a result document object or `NULL' if `index' is equal to or more than |
93 |
the number of documents. The life duration of the returned object is synchronous with the |
94 |
one of the node result object. */ |
95 |
ESTRESDOC *est_noderes_get_doc(ESTNODERES *nres, int index); |
96 |
|
97 |
|
98 |
/* Get the URI of a result document object. |
99 |
`doc' specifies a result document object. |
100 |
The return value is the URI of the result document object. The life duration of the returned |
101 |
string is synchronous with the one of the result document object. */ |
102 |
const char *est_resdoc_uri(ESTRESDOC *rdoc); |
103 |
|
104 |
|
105 |
/* Get a list of attribute names of a result document object. |
106 |
`rdoc' specifies a result document object. |
107 |
The return value is a new list object of attribute names of the result document object. |
108 |
Because the object of the return value is opened with the function `cblistopen', it should be |
109 |
closed with the function `cblistclose' if it is no longer in use. */ |
110 |
CBLIST *est_resdoc_attr_names(ESTRESDOC *rdoc); |
111 |
|
112 |
|
113 |
/* Get the value of an attribute of a result document object. |
114 |
`rdoc' specifies a result document object. |
115 |
`name' specifies the name of an attribute. |
116 |
The return value is the value of the attribute or `NULL' if it does not exist. The life |
117 |
duration of the returned string is synchronous with the one of the result document object. */ |
118 |
const char *est_resdoc_attr(ESTRESDOC *rdoc, const char *name); |
119 |
|
120 |
|
121 |
/* Get the snippet of a result document object. |
122 |
`rdoc' specifies a result document object. |
123 |
The return value is the snippet of the result document object. There are tab separated |
124 |
values. Each line is a string to be shown. Though most lines have only one field, some |
125 |
lines have two fields. If the second field exists, the first field is to be shown with |
126 |
highlighted, and the second field means its normalized form. The life duration of the |
127 |
returned string is synchronous with the one of the result document object. */ |
128 |
const char *est_resdoc_snippet(ESTRESDOC *rdoc); |
129 |
|
130 |
|
131 |
|
132 |
/************************************************************************************************* |
133 |
* API for node |
134 |
*************************************************************************************************/ |
135 |
|
136 |
|
137 |
typedef struct { /* type of structure for a node object */ |
138 |
char *url; /* URL */ |
139 |
char *pxhost; /* host name of the proxy */ |
140 |
int pxport; /* port number of the proxy */ |
141 |
int timeout; /* timeout in seconds */ |
142 |
char *auth; /* authentication information */ |
143 |
char *name; /* name */ |
144 |
char *label; /* label */ |
145 |
int dnum; /* number of documents */ |
146 |
int wnum; /* number of words */ |
147 |
double size; /* size of the database */ |
148 |
int status; /* last status code */ |
149 |
CBMAP *heads; /* extention headers */ |
150 |
} ESTNODE; |
151 |
|
152 |
|
153 |
/* Create a node connection object. |
154 |
`url' specifies the URL of a node. |
155 |
The return value is a node connection object. */ |
156 |
ESTNODE *est_node_new(const char *url); |
157 |
|
158 |
|
159 |
/* Destroy a node connection object. |
160 |
`node' specifies a node connection object. */ |
161 |
void est_node_delete(ESTNODE *node); |
162 |
|
163 |
|
164 |
/* Set the proxy information of a node connection object. |
165 |
`node' specifies a node connection object. |
166 |
`host' specifies the host name of a proxy server. |
167 |
`port' specifies the port number of the proxy server. */ |
168 |
void est_node_set_proxy(ESTNODE *node, const char *host, int port); |
169 |
|
170 |
|
171 |
/* Set timeout of a connection. |
172 |
`node' specifies a node connection object. |
173 |
`sec' specifies timeout of the connection in seconds. */ |
174 |
void est_node_set_timeout(ESTNODE *node, int sec); |
175 |
|
176 |
|
177 |
/* Set the authentication information of a node connection object. |
178 |
`node' specifies a node connection object. |
179 |
`name' specifies the name of authentication. |
180 |
`passwd' specifies the password of the authentication. */ |
181 |
void est_node_set_auth(ESTNODE *node, const char *name, const char *passwd); |
182 |
|
183 |
|
184 |
/* Get the status code of the last request of a node. |
185 |
`node' specifies a node connection object. |
186 |
The return value is the status code of the last request of the node. -1 means failure of |
187 |
connection. */ |
188 |
int est_node_status(ESTNODE *node); |
189 |
|
190 |
|
191 |
/* Add a document to a node. |
192 |
`node' specifies a node connection object. |
193 |
`doc' specifies a document object. The document object should have the URI attribute. |
194 |
The return value is true if success, else it is false. |
195 |
If the URI attribute is same with an existing document in the node, the existing one is |
196 |
deleted. */ |
197 |
int est_node_put_doc(ESTNODE *node, ESTDOC *doc); |
198 |
|
199 |
|
200 |
/* Remove a document from a node. |
201 |
`node' specifies a node connection object. |
202 |
`id' specifies the ID number of a registered document. |
203 |
The return value is true if success, else it is false. */ |
204 |
int est_node_out_doc(ESTNODE *node, int id); |
205 |
|
206 |
|
207 |
/* Remove a document specified by URI from a node. |
208 |
`node' specifies a node connection object. |
209 |
`uri' specifies the URI of a registered document. |
210 |
The return value is true if success, else it is false. */ |
211 |
int est_node_out_doc_by_uri(ESTNODE *node, const char *uri); |
212 |
|
213 |
|
214 |
/* Retrieve a document in a node. |
215 |
`node' specifies a node connection object. |
216 |
`id' specifies the ID number of a registered document. |
217 |
The return value is a document object. It should be deleted with `est_doc_delete' if it is |
218 |
no longer in use. On error, `NULL' is returned. */ |
219 |
ESTDOC *est_node_get_doc(ESTNODE *node, int id); |
220 |
|
221 |
|
222 |
/* Retrieve a document specified by URI in a node. |
223 |
`node' specifies a node connection object. |
224 |
`uri' specifies the URI of a registered document. |
225 |
The return value is a document object. It should be deleted with `est_doc_delete' if it is |
226 |
no longer in use. On error, `NULL' is returned. */ |
227 |
ESTDOC *est_node_get_doc_by_uri(ESTNODE *node, const char *uri); |
228 |
|
229 |
|
230 |
/* Retrieve the value of an attribute of a document in a node. |
231 |
`node' specifies a node connection object. |
232 |
`id' specifies the ID number of a registered document. |
233 |
`name' specifies the name of an attribute. |
234 |
The return value is the value of the attribute or `NULL' if it does not exist. Because the |
235 |
region of the return value is allocated with the `malloc' call, it should be released with |
236 |
the `free' call if it is no longer in use. */ |
237 |
char *est_node_get_doc_attr(ESTNODE *node, int id, const char *name); |
238 |
|
239 |
|
240 |
/* Retrieve the value of an attribute of a document specified by URI in a node. |
241 |
`node' specifies a node connection object. |
242 |
`uri' specifies the URI of a registered document. |
243 |
`name' specifies the name of an attribute. |
244 |
The return value is the value of the attribute or `NULL' if it does not exist. Because the |
245 |
region of the return value is allocated with the `malloc' call, it should be released with |
246 |
the `free' call if it is no longer in use. */ |
247 |
char *est_node_get_doc_attr_by_uri(ESTNODE *node, const char *uri, const char *name); |
248 |
|
249 |
|
250 |
/* Get the ID of a document spacified by URI. |
251 |
`node' specifies a node connection object. |
252 |
`uri' specifies the URI of a registered document. |
253 |
The return value is the ID of the document. On error, -1 is returned. */ |
254 |
int est_node_uri_to_id(ESTNODE *node, const char *uri); |
255 |
|
256 |
|
257 |
/* Get the name of a node. |
258 |
`node' specifies a node connection object. |
259 |
The return value is the name of the node. On error, `NULL' is returned. The life duration |
260 |
of the returned string is synchronous with the one of the node object. */ |
261 |
const char *est_node_name(ESTNODE *node); |
262 |
|
263 |
|
264 |
/* Get the label of a node. |
265 |
`node' specifies a node connection object. |
266 |
The return value is the label of the node. On error, `NULL' is returned. The life duration |
267 |
of the returned string is synchronous with the one of the node object. */ |
268 |
const char *est_node_label(ESTNODE *node); |
269 |
|
270 |
|
271 |
/* Get the number of documents in a node. |
272 |
`node' specifies a node connection object. |
273 |
The return value is the number of documents in the node. On error, -1 is returned. */ |
274 |
int est_node_doc_num(ESTNODE *node); |
275 |
|
276 |
|
277 |
/* Get the number of unique words in a node. |
278 |
`node' specifies a node connection object. |
279 |
The return value is the number of unique words in the node. On error, -1 is returned. */ |
280 |
int est_node_word_num(ESTNODE *node); |
281 |
|
282 |
|
283 |
/* Get the size of the datbase of a node. |
284 |
`node' specifies a node connection object. |
285 |
The return value is the size of the datbase of the node. On error, -1.0 is returned. */ |
286 |
double est_node_size(ESTNODE *node); |
287 |
|
288 |
|
289 |
/* Search documents corresponding a condition for a node. |
290 |
`node' specifies a node connection object. |
291 |
`cond' specifies a condition object. |
292 |
`depth' specifies the depth of meta search. |
293 |
The return value is a node result object. It should be deleted with `est_noderes_delete' if |
294 |
it is no longer in use. On error, `NULL' is returned. */ |
295 |
ESTNODERES *est_node_search(ESTNODE *node, ESTCOND *cond, int depth); |
296 |
|
297 |
|
298 |
/* Manage a user account of a node. |
299 |
`node' specifies a node connection object. |
300 |
`name' specifies the name of a user. |
301 |
`mode' specifies the operation mode. 0 means to delete the account. 1 means to set the |
302 |
account as an administrator. 2 means to set the account as a normal user. |
303 |
The return value is true if success, else it is false. */ |
304 |
int est_node_set_user(ESTNODE *node, const char *name, int mode); |
305 |
|
306 |
|
307 |
/* Manage a link of a node. |
308 |
`node' specifies a node connection object. |
309 |
`url' specifies the URL of the target node of a link. |
310 |
`label' specifies the label of the link. |
311 |
`credit' specifies the credit of the link. If it is negative, the link is removed. |
312 |
The return value is true if success, else it is false. */ |
313 |
int est_node_set_link(ESTNODE *node, const char *url, const char *label, int credit); |
314 |
|
315 |
|
316 |
|
317 |
/************************************************************************************************* |
318 |
* features for experts |
319 |
*************************************************************************************************/ |
320 |
|
321 |
|
322 |
#define ESTAGENTNAME "HyperEstraier" /* name of the user agent */ |
323 |
#define ESTFORMTYPE "application/x-www-form-urlencoded" /* media type of docuemnt draft */ |
324 |
#define ESTINFORMTYPE "text/x-estraier-nodeinfo" /* media type of node information */ |
325 |
#define ESTRESULTTYPE "text/x-estraier-result" /* media type of search result */ |
326 |
#define ESTDRAFTTYPE "text/x-estraier-draft" /* media type of docuemnt draft */ |
327 |
#define ESTHTHVIA "X-Estraier-Via" /* header to escape from looping route */ |
328 |
|
329 |
|
330 |
/* Get the name of this host. |
331 |
The return value is the name of this host. */ |
332 |
const char *est_get_host_name(void); |
333 |
|
334 |
|
335 |
/* Get the address of a host. |
336 |
`name' specifies the name of a host. |
337 |
The return value is the address of a host or `NULL' if failure. Because the region of the |
338 |
return value is allocated with the `malloc' call, it should be released with the `free' call |
339 |
if it is no longer in use. */ |
340 |
char *est_get_host_addr(const char *name); |
341 |
|
342 |
|
343 |
/* Get a server socket of an address and a port. |
344 |
`addr' specifies an address of a host. If it is `NULL', every network address is binded. |
345 |
`port' specifies a port number. |
346 |
The return value is the socket of the address and the port or -1 if failure. */ |
347 |
int est_get_server_sock(const char *addr, int port); |
348 |
|
349 |
|
350 |
/* Accept a connection from a client. |
351 |
`sock' specifies a server socket. |
352 |
`abuf' specifies a buffer into which the address of a connected client is written. The size of |
353 |
the buffer should be more than 32. If it is `NULL', it is not used. |
354 |
`pp' specifies the pointer to a variable to which the port of the client is assigned. If it |
355 |
is `NULL', it is not used. |
356 |
The return value is a socket connected to the client, or 0 if intterupted, or -1 if failure. |
357 |
The thread blocks until the connection is established. */ |
358 |
int est_accept_conn(int sock, char *abuf, int *pp); |
359 |
|
360 |
|
361 |
/* Get a client socket to an address and a port. |
362 |
`addr' specifies an address of a host. |
363 |
`port' specifies a port number. |
364 |
The return value is the socket to the address and the port or -1 if failure. */ |
365 |
int est_get_client_sock(const char *addr, int port); |
366 |
|
367 |
|
368 |
/* Shutdown and close a socket. |
369 |
`sock' specifies a socket. */ |
370 |
void est_sock_down(int sock); |
371 |
|
372 |
|
373 |
/* Receive all data from a socket. |
374 |
`sock' specifies a socket. |
375 |
`len' specifies the length of data to be read. |
376 |
The return value is the pointer to the region of an allocated region containing the received |
377 |
data. |
378 |
Because an additional zero code is appended at the end of the region of the return value, the |
379 |
return value can be treated as a character string. Because the region of the return value is |
380 |
allocated with the `malloc' call, it should be released with the `free' call if it is no |
381 |
longer in use. */ |
382 |
char *est_sock_recv_all(int sock, int len); |
383 |
|
384 |
|
385 |
/* Receive a line from a socket. |
386 |
`sock' specifies a socket. |
387 |
`buf' specifies a buffer to store read data. |
388 |
`max' specifies the maximum length to read. It should be more than 0. |
389 |
The return value is the size of received data. |
390 |
Because an additional zero code is appended at the end of the region of the buffer, it can be |
391 |
treated as a character string. */ |
392 |
int est_sock_recv_line(int sock, char *buf, int max); |
393 |
|
394 |
|
395 |
/* Receive void data from a socket. |
396 |
`sock' specifies a socket. */ |
397 |
void est_sock_recv_void(int sock); |
398 |
|
399 |
|
400 |
/* Send all data into a socket. |
401 |
`sock' specifies a socket. |
402 |
`buf' specifies a buffer of data to write. |
403 |
`len' specifies the length of the data. */ |
404 |
void est_sock_send_all(int sock, const char *buf, int len); |
405 |
|
406 |
|
407 |
/* Perform formatted output into a datum object. |
408 |
`format' specifies a printf-like format string. |
409 |
The conversion character `%' can be used with such flag characters as `s', `d', `o', `u', |
410 |
`x', `X', `c', `e', `E', `f', `g', `G', `@', `?', `%'. `@' works as with `s' but escapes |
411 |
meta characters of XML. `?' works as with `s' but escapes meta characters of URL. The other |
412 |
conversion character work as with each original. */ |
413 |
void est_datum_printf(CBDATUM *datum, const char *format, ...); |
414 |
|
415 |
|
416 |
/* Perform an interaction of a URL. |
417 |
`url' specifies a URL. |
418 |
`pxhost' specifies the host name of a proxy. If it is `NULL', it is not used. |
419 |
`pxport' specifies the port number of the proxy. |
420 |
`outsec' specifies timeout in seconds. If it is negative, it is not used. |
421 |
`auth' specifies an authority information in such form as "user:pass". If it is `NULL', it is |
422 |
not used. |
423 |
`reqheads' specifies a list object of extension headers. If it is `NULL' it is not used. |
424 |
`reqbody' specifies the pointer of the entitiy body of request. If it is `NULL', "GET" |
425 |
method is used. |
426 |
`rbsiz' specifies the size of the entity body. |
427 |
`rescodep' specifies the pointer to a variable to which the status code of respnese is |
428 |
assigned. If it is `NULL', it is not used. |
429 |
`resheads' specifies a map object into which headers of response is stored. The value of each |
430 |
header is recorded as an attribute whose name is converted from the header name into lower |
431 |
cases. The top header for the status code is recorded with the key of an empty string. If it |
432 |
is `NULL', it is not used. |
433 |
`resbody' specifies a datum object into which the entity body of response is stored. If it is |
434 |
`NULL', it is not used. |
435 |
The return value is true if success, else it is false. |
436 |
Headers of "Host", "Connection", "User-Agent", "Authorization", and "Content-Length" are sent |
437 |
implicitly. */ |
438 |
int est_url_shuttle(const char *url, const char *pxhost, int pxport, int outsec, |
439 |
const char *auth, const CBLIST *reqheads, const char *reqbody, int rbsiz, |
440 |
int *rescodep, CBMAP *resheads, CBDATUM *resbody); |
441 |
|
442 |
|
443 |
/* Add a header to a node connection object. |
444 |
`node' specifies a node connection object. |
445 |
`name' specifies the name of a header. |
446 |
`value' specifies the value of the header. If it is `NULL', the header is removed. |
447 |
If the specified header is already added, the value is concatenated at the end. */ |
448 |
void est_node_add_header(ESTNODE *node, const char *name, const char *value); |
449 |
|
450 |
|
451 |
/* Create a node result object. |
452 |
The return value is a node result object. */ |
453 |
ESTNODERES *est_noderes_new(void); |
454 |
|
455 |
|
456 |
/* Add a document information to a node result object. |
457 |
`nres' specifies a node result object. |
458 |
`attrs' specifies a map object of attributes of the document. The object is closed internally. |
459 |
`snippet' specifies the snippet of the document. The region is released internally. */ |
460 |
void est_noderes_add_doc(ESTNODERES *nres, CBMAP *attrs, char *snippet); |
461 |
|
462 |
|
463 |
/* Remove the top of result document objects in a node result object. |
464 |
`nres' specifies a node result object. |
465 |
`attrp' specifies the pointer to a variable to which reference of the map object of attribute |
466 |
is assigned. The object should be deleted with the function `cbmapclose'. |
467 |
`snippetp' specifies the pointer to a variable to which reference of the snippet string is |
468 |
assigned. The region should be released with the function `free'. |
469 |
The return value is true if success, else it is false. */ |
470 |
int est_noderes_shift(ESTNODERES *nres, CBMAP **attrp, char **snippetp); |
471 |
|
472 |
|
473 |
|
474 |
#if defined(__cplusplus) /* export for C++ */ |
475 |
} |
476 |
#endif |
477 |
|
478 |
#endif /* duplication check */ |
479 |
|
480 |
|
481 |
/* END OF FILE */ |