/[hyperestraier]/upstream/0.5.3/estraier.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.5.3/estraier.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (show annotations)
Wed Aug 3 15:25:48 2005 UTC (18 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 39415 byte(s)
import of upstream 0.5.3

1 /*************************************************************************************************
2 * The core API of Hyper Estraier
3 * Copyright (C) 2004-2005 Mikio Hirabayashi
4 * This file is part of Hyper Estraier.
5 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Hyper
12 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #ifndef _ESTRAIER_H /* duplication check */
18 #define _ESTRAIER_H
19
20 #if defined(__cplusplus) /* export for C++ */
21 extern "C" {
22 #endif
23
24
25
26 /*************************************************************************************************
27 * common settings
28 *************************************************************************************************/
29
30
31 /* version of QDBM */
32 extern const char *est_version;
33
34
35
36 /*************************************************************************************************
37 * underlying headers
38 *************************************************************************************************/
39
40
41 #include <depot.h>
42 #include <curia.h>
43 #include <cabin.h>
44 #include <villa.h>
45 #include <stdlib.h>
46
47
48
49 /*************************************************************************************************
50 * API for document
51 *************************************************************************************************/
52
53
54 #define ESTDATTRID "@id" /* name of the attribute of ID */
55 #define ESTDATTRURI "@uri" /* name of the attribute of URI */
56 #define ESTDATTRCDATE "@cdate" /* name of the attribute of creation date */
57 #define ESTDATTRMDATE "@mdate" /* name of the attribute of modification date */
58 #define ESTDATTRTITLE "@title" /* name of the attribute of title */
59 #define ESTDATTRAUTHOR "@author" /* name of the attribute of author */
60 #define ESTDATTRTYPE "@type" /* name of the attribute of content type */
61 #define ESTDATTRLANG "@lang" /* name of the attribute of language */
62 #define ESTDATTRSIZE "@size" /* name of the attribute of entity size */
63
64 typedef struct { /* type of structure for a document */
65 int id; /* identification number */
66 CBMAP *attrs; /* map of attributes */
67 CBLIST *dtexts; /* list of shown text */
68 } ESTDOC;
69
70
71 /* Create a document object.
72 The return value is an object of a document. */
73 ESTDOC *est_doc_new(void);
74
75
76 /* Create a document object made from draft data.
77 `draft' specifies a string of draft data.
78 The return value is an object of a document. */
79 ESTDOC *est_doc_new_from_draft(const char *draft);
80
81
82 /* Destroy a document object.
83 `doc' specifies a document object. */
84 void est_doc_delete(ESTDOC *doc);
85
86
87 /* Add an attribute to a document object.
88 `doc' specifies a document object.
89 `name' specifies the name of an attribute.
90 `value' specifies the value of the attribute. If it is `NULL', the attribute is removed. */
91 void est_doc_add_attr(ESTDOC *doc, const char *name, const char *value);
92
93
94 /* Add a sentence of text to a document object.
95 `doc' specifies a document object.
96 `text' specifies a sentence of text. */
97 void est_doc_add_text(ESTDOC *doc, const char *text);
98
99
100 /* Add a hidden sentence to a document object.
101 `doc' specifies a document object.
102 `text' specifies a hidden sentence. */
103 void est_doc_add_hidden_text(ESTDOC *doc, const char *text);
104
105
106 /* Get the ID number of a document object.
107 `doc' specifies a document object.
108 The return value is the ID number of the document object. If the object has not been
109 registered, -1 is returned. */
110 int est_doc_id(ESTDOC *doc);
111
112
113 /* Get a list of attribute names of a document object.
114 `doc' specifies a document object.
115 The return value is a new list object of attribute names of the document object. Because
116 the object of the return value is opened with the function `cblistopen', it should be closed
117 with the function `cblistclose' if it is no longer in use. */
118 CBLIST *est_doc_attr_names(ESTDOC *doc);
119
120
121 /* Get the value of an attribute of a document object.
122 `doc' specifies a document object.
123 `name' specifies the name of an attribute.
124 The return value is the value of the attribute or `NULL' if it does not exist. The life
125 duration of the returned string is synchronous with the one of the document object. */
126 const char *est_doc_attr(ESTDOC *doc, const char *name);
127
128
129 /* Get a list of sentences of the text of a document object.
130 `doc' specifies a document object.
131 The return value is a list object of sentences of the text of the document object. The life
132 duration of the returned object is synchronous with the one of the document object. */
133 const CBLIST *est_doc_texts(ESTDOC *doc);
134
135
136 /* Concatenate sentences of the text of a document object.
137 `doc' specifies a document object.
138 The return value is concatenated sentences of the document object. Because the region of the
139 return value is allocated with the `malloc' call, it should be released with the `free' call
140 if it is no longer in use. */
141 char *est_doc_cat_texts(ESTDOC *doc);
142
143
144 /* Dump draft data of a document object.
145 `doc' specifies a document object.
146 The return value is draft data of the document object. Because the region of the return value
147 is allocated with the `malloc' call, it should be released with the `free' call if it is no
148 longer in use. */
149 char *est_doc_dump_draft(ESTDOC *doc);
150
151
152 /* Make a snippet of the body text of a document object.
153 `doc' specifies a document object.
154 `word' specifies a list object of words to be highlight.
155 `wwitdh' specifies whole width of the result.
156 `hwitdh' specifies width of strings picked up from the beginning of the text.
157 `awitdh' specifies width of strings picked up around each highlighted word.
158 The return value is a snippet string of the body text of the document object. There are tab
159 separated values. Each line is a string to be shown. Though most lines have only one field,
160 some lines have two fields. If the second field exists, the first field is to be shown with
161 highlighted, and the second field means its normalized form. Because the region of the
162 return value is allocated with the `malloc' call, it should be released with the `free' call
163 if it is no longer in use. */
164 char *est_doc_make_snippet(ESTDOC *doc, const CBLIST *words, int wwidth, int hwidth, int awidth);
165
166
167 /* Check whether the text of a document object includes every specified words.
168 `doc' specifies a document object.
169 `word' specifies a list object of words to be checked.
170 The return value is true if every specified words is found, else it is false. */
171 int est_doc_scan_words(ESTDOC *doc, const CBLIST *words);
172
173
174
175 /*************************************************************************************************
176 * API for search conditions
177 *************************************************************************************************/
178
179
180 #define ESTOPUVSET "[UVSET]" /* universal set */
181 #define ESTOPSIMILAR "[SIMILAR]" /* similarity search */
182
183 #define ESTOPUNION "OR" /* union (conjunction) */
184 #define ESTOPISECT "AND" /* intersection (disjunction) */
185 #define ESTOPDIFF "ANDNOT" /* difference (intersection with negation) */
186 #define ESTOPWITH "WITH" /* delimiter for elements */
187
188 #define ESTOPSTREQ "STREQ" /* string is equal */
189 #define ESTOPSTRNE "STRNE" /* string is not equal */
190 #define ESTOPSTRINC "STRINC" /* string is included in */
191 #define ESTOPSTRBW "STRBW" /* string begins with */
192 #define ESTOPSTREW "STREW" /* string ends with */
193 #define ESTOPNUMEQ "NUMEQ" /* number or date is equal */
194 #define ESTOPNUMNE "NUMNE" /* number or date is not equal */
195 #define ESTOPNUMGT "NUMGT" /* number or date is greater than */
196 #define ESTOPNUMGE "NUMGE" /* number or date is greater than or equal to */
197 #define ESTOPNUMLT "NUMLT" /* number or date is less than */
198 #define ESTOPNUMLE "NUMLE" /* number or date is less than or equal to */
199 #define ESTOPREGEX "REGEX" /* string matches regular expressions */
200
201 #define ESTORDSTRA "STRA" /* strings in ascending order */
202 #define ESTORDSTRD "STRD" /* strings in descending order */
203 #define ESTORDNUMA "NUMA" /* numbers in ascending order */
204 #define ESTORDNUMD "NUMD" /* numbers in descending order */
205
206 typedef struct { /* type of structure for search conditions */
207 char *phrase; /* search phrase */
208 int gstep; /* step of N-gram */
209 int tfidf; /* whether with TF-IDF tuning */
210 int simple; /* whether with the simplified phrase */
211 CBLIST *attrs; /* conditions with attributes */
212 char *order; /* sorting order */
213 int max; /* maximum number of retrieval */
214 int scfb; /* whether to feed back scores */
215 int *scores; /* array of scores */
216 int snum; /* number of elemnts of the score array */
217 int opts; /* options for preservation */
218 } ESTCOND;
219
220 enum { /* enumeration for options */
221 ESTCONDSURE = 1 << 0, /* check every N-gram key */
222 ESTCONDUSU = 1 << 1, /* check N-gram keys skipping by one */
223 ESTCONDFAST = 1 << 2, /* check N-gram keys skipping by two */
224 ESTCONDAGIT = 1 << 3, /* check N-gram keys skipping by three */
225 ESTCONDNOIDF = 1 << 4, /* without TF-IDF tuning */
226 ESTCONDSIMPLE = 1 << 10, /* with the simplified phrase */
227 ESTCONDSCFB = 1 << 30 /* feed back scores (for debug) */
228 };
229
230
231 /* Create a condition object.
232 The return value is an object of search conditions. */
233 ESTCOND *est_cond_new(void);
234
235
236 /* Destroy a condition object.
237 `cond' specifies a condition object. */
238 void est_cond_delete(ESTCOND *cond);
239
240
241 /* Set the search phrase to a condition object.
242 `cond' specifies a condition object.
243 `phrase' specifies a search phrase. */
244 void est_cond_set_phrase(ESTCOND *cond, const char *phrase);
245
246
247 /* Add an expression for an attribute to a condition object.
248 `cond' specifies a condition object.
249 `expr' specifies an expression for an attribute. */
250 void est_cond_add_attr(ESTCOND *cond, const char *expr);
251
252
253 /* Set the order of a condition object.
254 `cond' specifies a condition object.
255 `expr' specifies an expression for the order. By default, the order is by score descending. */
256 void est_cond_set_order(ESTCOND *cond, const char *expr);
257
258
259 /* Set the maximum number of retrieval of a condition object.
260 `cond' specifies a condition object.
261 `max' specifies the maximum number of retrieval. By default, the number of retrieval is not
262 limited. */
263 void est_cond_set_max(ESTCOND *cond, int max);
264
265
266 /* Set options of retrieval of a condition object.
267 `cond' specifies a condition object.
268 `options' specifies options: `ESTCONDSURE' specifies that it checks every N-gram key,
269 `ESTCONDUSU', which is the default, specifies that it checks N-gram keys with skipping one
270 key, `ESTCONDFAST' skips two keys, `ESTCONDAGIT' skips three keys, `ESTCONDNOIDF' specifies
271 not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplified phrase. Each option
272 can be specified at the same time by bitwise or. If keys are skipped, though search speed is
273 improved, the relevance ratio grows less. */
274 void est_cond_set_options(ESTCOND *cond, int options);
275
276
277
278 /*************************************************************************************************
279 * API for database
280 *************************************************************************************************/
281
282
283 #define ESTIDXDMAX 16 /* max number of the inverted index */
284
285 typedef struct { /* type of structure for the inverted index */
286 char *name; /* name of the database */
287 int omode; /* open mode */
288 VILLA *dbs[ESTIDXDMAX]; /* database handles */
289 int dnum; /* number of division */
290 VILLA *cdb; /* current database handle */
291 } ESTIDX;
292
293 typedef struct { /* type of structure for a database object */
294 char *name; /* name of the database */
295 DEPOT *metadb; /* handle of the meta database */
296 ESTIDX *idxdb; /* handles of the inverted indexs */
297 VILLA *fwmdb; /* handle of the database for forward matching */
298 CURIA *attrdb; /* handle of the database for attrutes */
299 CURIA *textdb; /* handle of the database for texts */
300 VILLA *listdb; /* handle of the database for document list */
301 int ecode; /* last happened error code */
302 int fatal; /* whether to have a fatal error */
303 int dseq; /* sequence for document IDs */
304 int dnum; /* number of the documents */
305 int amode; /* mode of text analyzer */
306 CBMAP *idxcc; /* cache for the inverted index */
307 size_t icsiz; /* power of the cache */
308 size_t icmax; /* max size of the cache */
309 CBMAP *outcc; /* cache for deleted documents */
310 CBMAP *keycc; /* cache for keys for TF-IDF */
311 int kcmnum; /* max number of the key cache */
312 CBMAP *attrcc; /* cache for attributes */
313 int acmnum; /* max number of the attribute cache */
314 CBMAP *textcc; /* cache for texts */
315 int tcmnum; /* max number of the text cache */
316 CBMAP *spacc; /* special cache for attributes */
317 int scmnum; /* max number of the special cache */
318 char *scname; /* name of the attribute for the special cache */
319 void (*cbinfo)(const char *); /* callback function to inform of events */
320 CBMAP *(*cbvec)(void *, int, void *); /* callback function to create a vector */
321 void *vecdata; /* arbitrary object for the vectorizer */
322 CBMAP *metacc; /* cache for meta data */
323 } ESTDB;
324
325 enum { /* enumeration for error codes */
326 ESTENOERR, /* no error */
327 ESTEINVAL, /* invalid argument */
328 ESTEACCES, /* access forbidden */
329 ESTELOCK, /* lock failure */
330 ESTEDB, /* database problem */
331 ESTEIO, /* I/O problem */
332 ESTENOITEM, /* no item */
333 ESTEMISC = 9999 /* miscellaneous */
334 };
335
336 enum { /* enumeration for open modes */
337 ESTDBREADER = 1 << 0, /* open as a reader */
338 ESTDBWRITER = 1 << 1, /* open as a writer */
339 ESTDBCREAT = 1 << 2, /* a writer creating */
340 ESTDBTRUNC = 1 << 3, /* a writer truncating */
341 ESTDBNOLCK = 1 << 4, /* open without locking */
342 ESTDBLCKNB = 1 << 5, /* lock without blocking */
343 ESTDBPERFNG = 1 << 6 /* use perfect N-gram analyzer */
344 };
345
346 enum { /* enumeration for options of document registration */
347 ESTPDCLEAN = 1 << 0 /* clean up dispensable regions */
348 };
349
350 enum { /* enumeration for options of document deletion */
351 ESTODCLEAN = 1 << 0 /* clean up dispensable regions */
352 };
353
354 enum { /* enumeration for options of optimization */
355 ESTOPTNOPURGE = 1 << 0, /* omit purging dispensable region of deleted */
356 ESTOPTNODBOPT = 1 << 1 /* omit optimization of the database files */
357 };
358
359 enum { /* enumeration for options of document retrieval */
360 ESTGDNOATTR = 1 << 0, /* no attributes */
361 ESTGDNOTEXT = 1 << 1 /* no text */
362 };
363
364
365 /* Get the string of an error code.
366 `ecode' specifies an error code.
367 The return value is the string of the error code. */
368 const char *est_err_msg(int ecode);
369
370
371 /* Open a database.
372 `name' specifies the name of a database directory.
373 `mode' specifies open modes: `ESTDBWRITER' as a writer, `ESTDBREADER' as a reader. If the
374 mode is `ESTDBWRITER', the following may be added by bitwise or: `ESTDBCREAT', which means it
375 creates a new database if not exist, `ESTDBTRUNC', which means it creates a new database
376 regardless if one exists. Both of `ESTDBREADER' and `ESTDBWRITER' can be added to by
377 bitwise or: `ESTDBNOLCK', which means it opens a database file without file locking, or
378 `ESTDBLCKNB', which means locking is performed without blocking. If `ESTDBNOLCK' is used,
379 the application is responsible for exclusion control. `ESTDBCREAT' can be added to by bitwise
380 or: `ESTDBPERFNG', which means N-gram analysis is performed against European text also.
381 `ecp' specifies the pointer to a variable to which the error code is assigned.
382 The return value is a database object of the database or `NULL' if failure. */
383 ESTDB *est_db_open(const char *name, int omode, int *ecp);
384
385
386 /* Close a database.
387 `db' specifies a database object.
388 `ecp' specifies the pointer to a variable to which the error code is assigned.
389 The return value is true if success, else it is false. */
390 int est_db_close(ESTDB *db, int *ecp);
391
392
393 /* Get the last happened error code of a database.
394 `db' specifies a database object.
395 The return value is the last happened error code of the database. */
396 int est_db_error(ESTDB *db);
397
398
399 /* Check whether a database has a fatal error.
400 `db' specifies a database object.
401 The return value is true if the database has fatal erroor, else it is false. */
402 int est_db_fatal(ESTDB *db);
403
404
405 /* Flush index words in the cache of a database.
406 `db' specifies a database object connected as a writer.
407 `max' specifies the maximum number of words to be flushed. If it not more than zero, all
408 words are flushed.
409 The return value is true if success, else it is false. */
410 int est_db_flush(ESTDB *db, int max);
411
412
413 /* Synchronize updating contents of a database.
414 `db' specifies a database object connected as a writer.
415 The return value is true if success, else it is false. */
416 int est_db_sync(ESTDB *db);
417
418
419 /* Optimize a database.
420 `db' specifies a database object connected as a writer.
421 `options' specifies options: `ESTOPTNOPURGE' to omit purging dispensable region of deleted
422 documents, `ESTOPTNODBOPT' to omit optimization of the database files. The two can be
423 specified at the same time by bitwise or.
424 The return value is true if success, else it is false. */
425 int est_db_optimize(ESTDB *db, int options);
426
427
428 /* Add a document to a database.
429 `db' specifies a database object connected as a writer.
430 `doc' specifies a document object. The document object should have the URI attribute.
431 `options' specifies options: `ESTPDCLEAN' to clean up dispensable regions of the overwritten
432 document.
433 The return value is true if success, else it is false.
434 If the URI attribute is same with an existing document in the database, the existing one is
435 deleted. */
436 int est_db_put_doc(ESTDB *db, ESTDOC *doc, int options);
437
438
439 /* Remove a document from a database.
440 `db' specifies a database object connected as a writer.
441 `id' specifies the ID number of a registered document.
442 `options' specifies options: `ESTODCLEAN' to clean up dispensable regions of the deleted
443 document.
444 The return value is true if success, else it is false. */
445 int est_db_out_doc(ESTDB *db, int id, int options);
446
447
448 /* Retrieve a document in a database.
449 `db' specifies a database object.
450 `id' specifies the ID number of a registered document.
451 `options' specifies options: `ESTGDNOATTR' to ignore attributes, `ESTGDNOTEXT' to ignore
452 the body text. The two can be specified at the same time by bitwise or.
453 The return value is a document object. On error, `NULL' is returned. */
454 ESTDOC *est_db_get_doc(ESTDB *db, int id, int options);
455
456
457 /* Retrieve the value of an attribute of a document in a database.
458 `db' specifies a database object.
459 `id' specifies the ID number of a registered document.
460 `name' specifies the name of an attribute.
461 The return value is the value of the attribute or `NULL' if it does not exist. Because the
462 region of the return value is allocated with the `malloc' call, it should be released with
463 the `free' call if it is no longer in use. */
464 char *est_db_get_doc_attr(ESTDB *db, int id, const char *name);
465
466
467 /* Get the ID of a document specified by URI.
468 `db' specifies a database object.
469 `uri' specifies the URI of a registered document.
470 The return value is the ID of the document. On error, -1 is returned. */
471 int est_db_uri_to_id(ESTDB *db, const char *uri);
472
473
474 /* Extract keywords of a document object.
475 `db' specifies a database object for TF-IDF tuning. If it is `NULL', it is not used.
476 `doc' specifies a document object.
477 `max' specifies the maximum number of keywords to be extracted.
478 The return value is a new map object of keywords and their scores in decimal string. Because
479 the object of the return value is opened with the function `cbmapopen', it should be closed
480 with the function `cbmapclose' if it is no longer in use. */
481 CBMAP *est_db_etch_doc(ESTDB *db, ESTDOC *doc, int max);
482
483
484 /* Initialize the iterator of a database.
485 `db' specifies a database object.
486 The return value is true if success, else it is false. */
487 int est_db_iter_init(ESTDB *db);
488
489
490 /* Get the next ID of the iterator of a database.
491 `db' specifies a database object.
492 The return value is the next ID. If there is no more document, 0 is returned. On error,
493 -1 is returned. */
494 int est_db_iter_next(ESTDB *db);
495
496
497 /* Get the name of a database.
498 `db' specifies a database object.
499 The return value is the name of the database. The life duration of the returned string is
500 synchronous with the one of the database object. */
501 const char *est_db_name(ESTDB *db);
502
503
504 /* Get the number of documents in a database.
505 `db' specifies a database object.
506 The return value is the number of documents in the database. */
507 int est_db_doc_num(ESTDB *db);
508
509
510 /* Get the number of unique words in a database.
511 `db' specifies a database object.
512 The return value is the number of unique words in the database. */
513 int est_db_word_num(ESTDB *db);
514
515
516 /* Get the size of a database.
517 `db' specifies a database object.
518 The return value is the size of the database. */
519 double est_db_size(ESTDB *db);
520
521
522 /* Search documents corresponding a condition for a database.
523 `db' specifies a database object.
524 `cond' specifies a condition object.
525 `nump' specifies the pointer to a variable to which the number of elements in the result is
526 assigned.
527 `hints' specifies a map object into which the number of documents corresponding to each word
528 is stored. If a word is in a negative condition, the number is negative. The element whose
529 key is an empty string specifies the number of whole result. If it is `NULL', it is not used.
530 The return value is an array whose elements are ID numbers of corresponding documents.
531 This function does never fail. Even if no document corresponds or an error occurs, an empty
532 array is returned. Because the region of the return value is allocated with the `malloc'
533 call, it should be released with the `free' call if it is no longer in use. */
534 int *est_db_search(ESTDB *db, ESTCOND *cond, int *nump, CBMAP *hints);
535
536
537 /* Set the maximum size of the cache memory of a database.
538 `db' specifies a database object.
539 `size' specifies the maximum size of the index cache. By default, it is 64MB. If it is not
540 more than 0, the current size is not changed.
541 `anum' specifies the maximum number of cached records for document attributes. By default, it
542 is 8192. If it is not more than 0, the current size is not changed.
543 `tnum' specifies the maximum number of cached records for document texts. By default, it is
544 1024. If it is not more than 0, the current size is not changed. */
545 void est_db_set_cache_size(ESTDB *db, size_t size, int anum, int tnum);
546
547
548 /* Set the special cache for narrowing and sorting with document attributes.
549 `db' specifies a database object.
550 `name' specifies the name of a document.
551 `num' specifies the maximum number of cached records. */
552 void est_db_set_special_cache(ESTDB *db, const char *name, int num);
553
554
555
556 /*************************************************************************************************
557 * features for experts
558 *************************************************************************************************/
559
560
561 #define _EST_VERSION "0.5.3"
562 #define _EST_LIBVER 200
563 #define _EST_PROTVER "0.9"
564
565 enum { /* enumeration for languages */
566 ESTLANGEN, /* English */
567 ESTLANGJA, /* Japanese */
568 ESTLANGZH, /* Chinese */
569 ESTLANGKO, /* Korean */
570 ESTLANGMISC /* miscellaneous */
571 };
572
573
574 /* Break a sentence of text and extract words.
575 `text' specifies a sentence of text.
576 `list' specifies a list object to which extract words are added.
577 `norm' specifies whether to normalize the text.
578 `tail' specifies whether to pick up oddness N-gram at the end. */
579 void est_break_text(const char *text, CBLIST *list, int norm, int tail);
580
581
582 /* Break a sentence of text and extract words using perfect N-gram analyzer.
583 `text' specifies a sentence of text.
584 `list' specifies a list object to which extract words are added.
585 `norm' specifies whether to normalize the text.
586 `tail' specifies whether to pick up oddness N-gram at the end. */
587 void est_break_text_perfng(const char *text, CBLIST *list, int norm, int tail);
588
589
590 /* Convert the character encoding of a string.
591 `ptr' specifies the pointer to a region.
592 `size' specifies the size of the region. If it is negative, the size is assigned with
593 `strlen(ptr)'.
594 `icode' specifies the name of encoding of the input string.
595 `ocode' specifies the name of encoding of the output string.
596 `sp' specifies the pointer to a variable to which the size of the region of the return
597 value is assigned. If it is `NULL', it is not used.
598 `mp' specifies the pointer to a variable to which the number of missing characters by failure
599 of conversion is assigned. If it is `NULL', it is not used.
600 If successful, the return value is the pointer to the result object, else, it is `NULL'.
601 Because an additional zero code is appended at the end of the region of the return value,
602 the return value can be treated as a character string. Because the region of the return
603 value is allocated with the `malloc' call, it should be released with the `free' call if it
604 is no longer in use. */
605 char *est_iconv(const char *ptr, int size, const char *icode, const char *ocode,
606 int *sp, int *mp);
607
608
609 /* Detect the encoding of a string automatically.
610 `ptr' specifies the pointer to a region.
611 `size' specifies the size of the region. If it is negative, the size is assigned with
612 `strlen(ptr)'.
613 `plang' specifies a preferred language. As for now, `ESTLANGEN', `ESTLANGJA', `ESTLANGZH',
614 and `ESTLANGKO' are supported.
615 The return value is the string of the encoding name of the string. */
616 const char *est_enc_name(const char *ptr, int size, int plang);
617
618
619 /* Convert a UTF-8 string into UTF-16BE.
620 `ptr' specifies the pointer to a region.
621 `size' specifies the size of the region.
622 `sp' specifies the pointer to a variable to which the size of the region of the return
623 value is assigned.
624 The return value is the pointer to the result object. Because an additional zero code is
625 appended at the end of the region of the return value, the return value can be treated as a
626 character string. Because the region of the return value is allocated with the `malloc' call,
627 it should be released with the `free' call if it is no longer in use. */
628 char *est_uconv_in(const char *ptr, int size, int *sp);
629
630
631 /* Convert a UTF-16BE string into UTF-8.
632 `ptr' specifies the pointer to a region.
633 `size' specifies the size of the region.
634 `sp' specifies the pointer to a variable to which the size of the region of the return
635 value is assigned. If it is `NULL', it is not used.
636 The return value is the pointer to the result object. Because an additional zero code is
637 appended at the end of the region of the return value, the return value can be treated as a
638 character string. Because the region of the return value is allocated with the `malloc' call,
639 it should be released with the `free' call if it is no longer in use. */
640 char *est_uconv_out(const char *ptr, int size, int *sp);
641
642
643 /* Compress a serial object with ZLIB.
644 `ptr' specifies the pointer to a region.
645 `size' specifies the size of the region. If it is negative, the size is assigned with
646 `strlen(ptr)'.
647 `sp' specifies the pointer to a variable to which the size of the region of the return
648 value is assigned.
649 If successful, the return value is the pointer to the result object, else, it is `NULL'.
650 Because the region of the return value is allocated with the `malloc' call, it should be
651 released with the `free' call if it is no longer in use. */
652 char *est_deflate(const char *ptr, int size, int *sp);
653
654
655 /* Decompress a serial object compressed with ZLIB.
656 `ptr' specifies the pointer to a region.
657 `size' specifies the size of the region.
658 `sp' specifies the pointer to a variable to which the size of the region of the return
659 value is assigned. If it is `NULL', it is not used.
660 If successful, the return value is the pointer to the result object, else, it is `NULL'.
661 Because an additional zero code is appended at the end of the region of the return value,
662 the return value can be treated as a character string. Because the region of the return
663 value is allocated with the `malloc' call, it should be released with the `free' call if it
664 is no longer in use. */
665 char *est_inflate(const char *ptr, int size, int *sp);
666
667
668 /* Get the border string for draft data of documents.
669 The return value is the border string for draft data of documents. */
670 const char *est_border_str(void);
671
672
673 /* Get the real random number.
674 The return value is the real random number between 0.0 and 1.0. */
675 double est_random(void);
676
677
678 /* Get the random number in normal distribution.
679 The return value is the random number in normal distribution between 0.0 and 1.0. */
680 double est_random_nd(void);
681
682
683 /* Get an MD5 hash string of a key string.
684 `key' specifies a string to be encrypted.
685 The return value is an MD5 hash string of the key string. Because the region of the return
686 value is allocated with the `malloc' call, it should be released with the `free' call if it
687 is no longer in use. */
688 char *est_make_crypt(const char *key);
689
690
691 /* Check whether a key matches an MD5 hash string.
692 `key' specifies a string to be checked.
693 `hash' specifies an MD5 hash string.
694 The return value is true if the key matches the hash string, else it is false. */
695 int est_match_crypt(const char *key, const char *hash);
696
697
698 /* Get the hidden texts of a document object.
699 `doc' specifies a document object.
700 The return value is concatenated sentences of the hidden text of the document object. The
701 life duration of the returned string is synchronous with the one of the document object. */
702 const char *est_doc_hidden_texts(ESTDOC *doc);
703
704
705 /* Get the phrase of a condition object.
706 `cond' specifies a condition object.
707 The return value is the phrase of the condition object or `NULL' if it is not specified. The
708 life duration of the returned string is synchronous with the one of the condition object. */
709 const char *est_cond_phrase(ESTCOND *cond);
710
711
712 /* Get a list object of attribute expressions of a condition object.
713 `cond' specifies a condition object.
714 The return value is a list object of attribute expressions of the condition object or `NULL' if
715 it is not specified. The life duration of the returned object is synchronous with the one of
716 the condition object. */
717 const CBLIST *est_cond_attrs(ESTCOND *cond);
718
719
720 /* Get the order expression of a condition object.
721 `cond' specifies a condition object.
722 The return value is the order expression of the condition object or `NULL' if it is not
723 specified. The life duration of the returned string is synchronous with the one of the
724 condition object. */
725 const char *est_cond_order(ESTCOND *cond);
726
727
728 /* Get the maximum number of retrieval of a condition object.
729 `cond' specifies a condition object.
730 The return value is the maximum number of retrieval of the condition object or -1 if it is not
731 specified. */
732 int est_cond_max(ESTCOND *cond);
733
734
735 /* Get the options of a condition object.
736 `cond' specifies a condition object.
737 The return value is the options of the condition object. */
738 int est_cond_options(ESTCOND *cond);
739
740
741 /* Get the score of a document corresponding to a condition object.
742 `cond' specifies a condition object.
743 `index' specifies the index of an element of the result array of `est_db_search'.
744 The return value is the score of the element or -1 if the index is out of bounds. */
745 int est_cond_score(ESTCOND *cond, int index);
746
747
748 /* Set the error code of a database.
749 `db' specifies a database object.
750 `ecode' specifies a error code to set. */
751 void est_db_set_ecode(ESTDB *db, int ecode);
752
753
754 /* Edit attributes of a document object in a database.
755 `db' specifies a database object connected as a writer.
756 `doc' specifies a document object.
757 The return value is true if success, else it is false. */
758 int est_db_edit_doc(ESTDB *db, ESTDOC *doc);
759
760
761 /* Add a piece of meta data to a database.
762 `db' specifies a database object connected as a writer.
763 `name' specifies the name of a piece of meta data.
764 `value' specifies the value of the meta data. If it is `NULL', the meta data is removed. */
765 void est_db_add_meta(ESTDB *db, const char *name, const char *value);
766
767
768 /* Get a list of names of meta data of a database.
769 `db' specifies a database object.
770 The return value is a new list object of meta data names of the document object. Because the
771 object of the return value is opened with the function `cblistopen', it should be closed with
772 the function `cblistclose' if it is no longer in use. */
773 CBLIST *est_db_meta_names(ESTDB *db);
774
775
776 /* Get the value of a piece of meta data of a database.
777 `db' specifies a database object.
778 `name' specifies the name of a piece of meta data.
779 The return value is the value of the meta data or `NULL' if it does not exist. Because the
780 region of the return value is allocated with the `malloc' call, it should be released with
781 the `free' call if it is no longer in use. */
782 char *est_db_meta(ESTDB *db, const char *name);
783
784
785 /* Get the number of records in the cache memory of a database.
786 `db' specifies a database object.
787 The return value is the cache memory of a database. */
788 int est_db_cache_num(ESTDB *db);
789
790
791 /* Set the callback function to inform of database events.
792 `db' specifies a database object.
793 `func' specifies the pointer to a function. The argument of the callback specifies a message
794 of each event. */
795 void est_db_set_informer(ESTDB *db, void (*func)(const char *));
796
797
798 /* Set the callback function to create a vector of keywords of a document.
799 `db' specifies a database object.
800 `func' specifies the pointer to a function. The arguments of the callback specify the
801 database object, the ID of a document, and an arbitrary pointer. The return value is the
802 callback is a new map object conforming to the return value of `est_db_etch_doc'.
803 `data' specifies the pointer to an object given as the third argument of the callback. */
804 void est_db_set_vectorizer(ESTDB *db, CBMAP *(*func)(void *, int, void *), void *data);
805
806
807 /* Fill the cache for keys for TF-IDF.
808 `db' specifies a database object. */
809 void est_db_fill_key_cache(ESTDB *db);
810
811
812 /* Make a directory.
813 `path' specifies the path of a new directory.
814 The return value is true if success, else it is false. */
815 int est_mkdir(const char *path);
816
817
818 /* Remove a directory and its contents recursively.
819 `path' specifies the path of a directory.
820 The return value is true if success, else it is false. */
821 int est_rmdir_rec(const char *path);
822
823
824 /* Get the canonicalized absolute pathname of a file.
825 `path' specifies the path of a new directory.
826 The return value is the canonicalized absolute pathname of a file. Because the region of the
827 return value is allocated with the `malloc' call, it should be released with the `free' call
828 if it is no longer in use. */
829 char *est_realpath(const char *path);
830
831
832 /* Get the time of day in milliseconds.
833 The return value is the time of day in milliseconds. */
834 double est_gettimeofday(void);
835
836
837 /* Suspend execution for microsecond intervals.
838 `usec' specifies microseconds to sleep for. */
839 void est_usleep(unsigned long usec);
840
841
842 /* Send a signal to a process.
843 `pid' specifies the PID of a target process.
844 `sig' specifies a signal code.
845 The return value is true if success, else it is false. */
846 int est_kill(int pid, int sig);
847
848
849 /* Get the media type of an extention.
850 `ext' specifies the extension of a file path.
851 The return value is the media time of the extension. */
852 const char *est_ext_type(const char *ext);
853
854
855
856 #if defined(__cplusplus) /* export for C++ */
857 }
858 #endif
859
860 #endif /* duplication check */
861
862
863 /* END OF FILE */

  ViewVC Help
Powered by ViewVC 1.1.26