/[hyperestraier_wrappers]/trunk/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Sat Sep 3 18:04:41 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 11650 byte(s)
make working copy
1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12 #include <stdexcept>
13
14 namespace estraier {
15
16 class IOError : public std::runtime_error {
17 public:
18 explicit IOError (const std::string& w) : std::runtime_error(w) {}
19 };
20
21 class Condition {
22 public:
23 enum { // enumeration for options
24 SURE = ESTCONDSURE, // check every N-gram key
25 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
26 FAST = ESTCONDFAST, // check N-gram keys skipping by two
27 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
28 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
29 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
30 };
31 ESTCOND * cond;
32 Condition() {
33 /**
34 * constructor
35 */
36 cond = est_cond_new();
37 }
38 ~Condition() {
39 /**
40 * destructor
41 */
42 est_cond_delete(cond);
43 }
44 void set_phrase(const char *phrase) {
45 /**
46 * set the search phrase
47 */
48 est_cond_set_phrase(cond, phrase);
49 }
50 void add_attr(const char *expr) {
51 /**
52 * set the attribute expression
53 */
54 est_cond_add_attr(cond, expr);
55 }
56 void set_order(const char *expr) {
57 /**
58 * set the order of a condition object
59 */
60 est_cond_set_order(cond, expr);
61 }
62 void set_max(int _max) {
63 /**
64 * set the maximum number of retrieval of a condition object
65 */
66 est_cond_set_max(cond, _max);
67 }
68 void set_options(int options) {
69 /**
70 * set options of retrieval of a condition object
71 */
72 est_cond_set_options(cond, options);
73 }
74 };
75
76 class Document {
77 private:
78 std::string text_buf;
79 public:
80 ESTDOC *doc;
81 Document() {
82 /**
83 * constructor
84 */
85 doc = est_doc_new();
86 }
87 Document(const char* draft) {
88 /**
89 * constructor
90 */
91 doc = est_doc_new_from_draft(draft);
92 }
93 Document(ESTDOC *_doc) {
94 /**
95 * constructor
96 */
97 doc = _doc;
98 }
99 ~Document() {
100 /**
101 * destructor
102 */
103 est_doc_delete(doc);
104 }
105 void add_attr(const char * name, const char*value) {
106 /**
107 * add an attribute to a document object
108 */
109 est_doc_add_attr(doc, name, value);
110 }
111 void add_text(const char *text) {
112 /**
113 * add a sentence of text to a document object
114 */
115 est_doc_add_text(doc, text);
116 }
117 void add_hidden_text(const char * text) {
118 /**
119 * add a hidden sentence to a document object
120 */
121 est_doc_add_hidden_text(doc, text);
122 }
123 int id() {
124 /**
125 * get the ID number of a document object
126 */
127 return est_doc_id(doc);
128 }
129 std::vector<std::string> * attr_names() {
130 /**
131 * get a list of attribute names of a document object
132 */
133 std::vector<std::string> * vs = new std::vector<std::string>;
134 CBLIST * attr_names = est_doc_attr_names(doc);
135 for (int i=0; i < cblistnum(attr_names); i++) {
136 vs->push_back(cblistval(attr_names, i, NULL));
137 }
138 cblistclose(attr_names);
139 return vs;
140 }
141 const char * attr(const char *name) {
142 /**
143 * get the value of an attribute of a document object
144 */
145 return est_doc_attr(doc, name);
146 }
147 const char * cat_texts() {
148 /**
149 * get a list of sentences of the text of a document object
150 */
151 return est_doc_cat_texts(doc);
152 }
153 std::vector<std::string>* texts() {
154 /**
155 * get a list of sentences of the text of a document object
156 */
157 std::vector<std::string> * vs = new std::vector<std::string>;
158 const CBLIST *texts;
159 texts = est_doc_texts(doc);
160 for(int i = 0; i < cblistnum(texts); i++) {
161 vs->push_back(cblistval(texts, i, NULL));
162 }
163 return vs;
164 }
165 const char * dump_draft() {
166 /**
167 * dump draft data of a document object
168 */
169 return est_doc_dump_draft(doc);
170 }
171 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
172 /**
173 * make a snippet of the body text of a document object
174 */
175 CBLIST * words;
176 std::vector<std::string>::iterator iter;
177 words = cblistopen();
178 for (iter = _words.begin(); _words.end() != iter; iter++) {
179 cblistpush(words, iter->c_str(), -1);
180 }
181 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
182 cblistclose(words);
183 return result;
184 }
185 const char * hidden_texts() {
186 /**
187 * get the hidden texts of a document object.
188 */
189 return est_doc_hidden_texts(doc);
190 }
191 };
192
193 class Database {
194 private:
195 ESTMTDB *db;
196 int ecode;
197 public:
198 enum { // enumeration for error codes
199 ERRNOERR = ESTENOERR, // no error
200 ERRINVAL = ESTEINVAL, // invalid argument
201 ERRACCES = ESTEACCES, // access forbidden
202 ERRLOCK = ESTELOCK, // lock failure
203 ERRDB = ESTEDB, // database problem
204 ERRIO = ESTEIO, // I/O problem
205 ERRNOITEM = ESTENOITEM, // no item
206 ERRMISC = ESTEMISC // miscellaneous
207 };
208 enum { // enumeration for open modes
209 DBREADER = ESTDBREADER, // open as a reader
210 DBWRITER = ESTDBWRITER, // open as a writer
211 DBCREAT = ESTDBCREAT, // a writer creating
212 DBTRUNC = ESTDBTRUNC, // a writer truncating
213 DBNOLCK = ESTDBNOLCK, // open without locking
214 DBLCKNB = ESTDBLCKNB, // lock without blocking
215 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
216 };
217 enum { // enumeration for options of document registration
218 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
219 };
220 enum { // enumeration for options of document deletion
221 ODCLEAN = ESTODCLEAN // clean up dispensable regions
222 };
223 enum { // enumeration for options of optimization
224 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
225 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
226 };
227 enum { // enumeration for options of document retrieval
228 GDNOATTR = ESTGDNOATTR, // no attributes
229 GDNOTEXT = ESTGDNOTEXT // no text
230 };
231 Database() {
232 /**
233 * constructor(dummy)
234 */
235 db = NULL;
236 ecode = ERRNOERR;
237 }
238 ~Database() {
239 if (db) close();
240 }
241 bool open(const char * dbname, int mode) {
242 /**
243 * open the database
244 */
245 if (db) close();
246 int ec;
247 db = est_mtdb_open(dbname, mode, &ec);
248 if (!db) ecode = ec;
249 return db;
250 }
251 bool close() {
252 /**
253 * close the database
254 */
255 if (!db) throw IOError("closed database");
256 int ec;
257 bool result = est_mtdb_close(db, &ec);
258 if (!result) ecode = ec;
259 db = NULL;
260 return result;
261 }
262 bool put_doc(Document *doc, int options) {
263 /**
264 * add a document to a database
265 */
266 if (!db) throw IOError("closed database");
267 bool result = est_mtdb_put_doc(db, doc->doc, options);
268 if (!result) ecode = est_mtdb_error(db);
269 return result;
270 }
271 std::vector<int> * search(Condition * cond, int options) {
272 /**
273 * search documents corresponding a condition for a database
274 */
275 if (!db) throw IOError("closed database");
276 int resnum;
277 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
278 std::vector<int> *numbers = new std::vector<int>;
279 for (int i=0; i<resnum; i++) {
280 numbers->push_back(result[i]);
281 }
282 return numbers;
283 }
284 static const char * err_msg(int ecode) {
285 /**
286 * get the string of an error
287 */
288 return est_err_msg(ecode);
289 }
290 int error() {
291 /**
292 * get the last happended error code of a database
293 */
294 return ecode;
295 }
296 bool fatal() {
297 /**
298 * check whether a database has a fatal error
299 */
300 if (!db) throw IOError("closed database");
301 return est_mtdb_fatal(db);
302 }
303 bool flush(int _max) {
304 /**
305 * flush index words in the cache of a database
306 */
307 if (!db) throw IOError("closed database");
308 bool result = est_mtdb_flush(db, _max);
309 if (!result) ecode = est_mtdb_error(db);
310 return result;
311 }
312 bool sync() {
313 /**
314 * synchronize updating contents of a database
315 */
316 if (!db) throw IOError("closed database");
317 bool result = est_mtdb_sync(db);
318 if (!result) ecode = est_mtdb_error(db);
319 return result;
320 }
321 bool optimize(int options) {
322 /**
323 * optimize a database
324 */
325 if (!db) throw IOError("closed database");
326 bool result = est_mtdb_optimize(db, options);
327 if (!result) ecode = est_mtdb_error(db);
328 return result;
329 }
330 bool out_doc(int id, int options) {
331 /**
332 * remove a document from a database
333 */
334 if (!db) throw IOError("closed database");
335 bool result = est_mtdb_out_doc(db, id, options);
336 if (!result) ecode = est_mtdb_error(db);
337 return result;
338 }
339 bool edit_doc(Document *doc) {
340 /**
341 * edit an attribute of a document in a database
342 */
343 if (!db) throw IOError("closed database");
344 bool result = est_mtdb_edit_doc(db, doc->doc);
345 if (!result) ecode = est_mtdb_error(db);
346 return result;
347 }
348 Document * get_doc(int id, int options) {
349 /**
350 * retrieve a document in a database
351 */
352 if (!db) throw IOError("closed database");
353 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
354 if (!doc) {
355 ecode = est_mtdb_error(db);
356 throw est_err_msg(est_mtdb_error(db));
357 } else {
358 return new Document(doc);
359 }
360 }
361 int uri_to_id(const char *uri) {
362 /**
363 * get the ID of a document spacified by URI
364 */
365 if (!db) throw IOError("closed database");
366 int result = est_mtdb_uri_to_id(db, uri);
367 if(result == -1) ecode = est_mtdb_error(db);
368 return result;
369 }
370 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
371 /**
372 * extract keywords of a document object
373 */
374 if (!db) throw IOError("closed database");
375 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
376 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
377 cbmapiterinit(keys);
378 int ksiz;
379 while (const char *key = cbmapiternext(keys, &ksiz)) {
380 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
381 }
382 return mss;
383 }
384 const char * name() {
385 /**
386 * get the name of a database
387 */
388 if (!db) throw IOError("closed database");
389 return est_mtdb_name(db);
390 }
391 int doc_num() {
392 /**
393 * get the number of documents in a database
394 */
395 if (!db) throw IOError("closed database");
396 return est_mtdb_doc_num(db);
397 }
398 int word_num() {
399 /**
400 * get the number of unique words in a database
401 */
402 if (!db) throw IOError("closed database");
403 return est_mtdb_word_num(db);
404 }
405 double size() {
406 /**
407 * get the size of a database
408 */
409 if (!db) throw IOError("closed database");
410 return est_mtdb_size(db);
411 }
412 void set_cache_size(size_t size, int anum, int tnum) {
413 /**
414 * set the maximum size of the cache memory of a database
415 */
416 if (!db) throw IOError("closed database");
417 est_mtdb_set_cache_size(db, size, anum, tnum);
418 }
419 void set_special_cache(const char *name, int num) {
420 /**
421 * Set the special cache for narrowing and sorting
422 * with document attributes
423 */
424 est_mtdb_set_special_cache(db, name, num);
425 }
426 };
427
428 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
429 std::vector<std::string> * vs = new std::vector<std::string>;
430 CBLIST *list;
431 list = cblistopen();
432 est_break_text(text, list, norm, tail);
433 for (int i=0; i < cblistnum(list); i++) {
434 vs->push_back(cblistval(list, i, NULL));
435 }
436 cblistclose(list);
437 return vs;
438 }
439
440 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
441 std::vector<std::string> * vs = new std::vector<std::string>;
442 CBLIST *list;
443 list = cblistopen();
444 est_break_text_perfng(text, list, norm, tail);
445 for (int i=0; i < cblistnum(list); i++) {
446 vs->push_back(cblistval(list, i, NULL));
447 }
448 cblistclose(list);
449 return vs;
450 }
451
452 };

  ViewVC Help
Powered by ViewVC 1.1.26