/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (show annotations)
Sat Sep 3 18:44:31 2005 UTC (18 years, 7 months ago) by dpavlin
File size: 9508 byte(s)
backward compatibility for 0.5.4
1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12
13 /* backward compatibility for 0.5.4 */
14 #ifndef ESTCONDAGITO
15 #define ESTCONDAGITO ESTCONDAGIT
16 #endif
17
18 namespace estraier {
19 class Condition {
20 public:
21 enum { // enumeration for options
22 SURE = ESTCONDSURE, // check every N-gram key
23 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
24 FAST = ESTCONDFAST, // check N-gram keys skipping by two
25 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
26 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
27 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
28 };
29 ESTCOND * cond;
30 Condition() {
31 /**
32 * constructor
33 */
34 cond = est_cond_new();
35 }
36 ~Condition() {
37 /**
38 * destructor
39 */
40 est_cond_delete(cond);
41 }
42 void set_phrase(const char *phrase) {
43 /**
44 * set the search phrase
45 */
46 est_cond_set_phrase(cond, phrase);
47 }
48 void add_attr(const char *expr) {
49 /**
50 * set the attribute expression
51 */
52 est_cond_add_attr(cond, expr);
53 }
54 void set_order(const char *expr) {
55 /**
56 * set the order of a condition object
57 */
58 est_cond_set_order(cond, expr);
59 }
60 void set_max(int _max) {
61 /**
62 * set the maximum number of retrieval of a condition object
63 */
64 est_cond_set_max(cond, _max);
65 }
66 void set_options(int options) {
67 /**
68 * set options of retrieval of a condition object
69 */
70 est_cond_set_options(cond, options);
71 }
72 };
73
74 class Document {
75 private:
76 std::string text_buf;
77 public:
78 ESTDOC *doc;
79
80 Document() {
81 /**
82 * constructor
83 */
84 doc = est_doc_new();
85 }
86 Document(const char* draft) {
87 /**
88 * constructor
89 */
90 doc = est_doc_new_from_draft(draft);
91 }
92 Document(ESTDOC *_doc) {
93 /**
94 * constructor
95 */
96 doc = _doc;
97 }
98 ~Document() {
99 /**
100 * destructor
101 */
102 est_doc_delete(doc);
103 }
104 void add_attr(const char * name, const char*value) {
105 /**
106 * add an attribute to a document object
107 */
108 est_doc_add_attr(doc, name, value);
109 }
110 void add_text(const char *text) {
111 /**
112 * add a sentence of text to a document object
113 */
114 est_doc_add_text(doc, text);
115 }
116 void add_hidden_text(const char * text) {
117 /**
118 * add a hidden sentence to a document object
119 */
120 est_doc_add_hidden_text(doc, text);
121 }
122 int id() {
123 /**
124 * get the ID number of a document object
125 */
126 return est_doc_id(doc);
127 }
128 std::vector<std::string> * attr_names() {
129 /**
130 * get a list of attribute names of a document object
131 */
132 std::vector<std::string> * vs = new std::vector<std::string>;
133 CBLIST * attr_names = est_doc_attr_names(doc);
134 for (int i=0; i < cblistnum(attr_names); i++) {
135 vs->push_back(cblistval(attr_names, i, NULL));
136 }
137 cblistclose(attr_names);
138 return vs;
139 }
140 const char * attr(const char *name) {
141 /**
142 * get the value of an attribute of a document object
143 */
144 return est_doc_attr(doc, name);
145 }
146 const char * cat_texts() {
147 /**
148 * get a list of sentences of the text of a document object
149 */
150 // return est_doc_cat_texts(doc);
151 return "This is mockup!";
152 }
153 std::vector<std::string>* texts() {
154 /**
155 * get a list of sentences of the text of a document object
156 */
157 std::vector<std::string> * vs = new std::vector<std::string>;
158 const CBLIST *texts;
159 texts = est_doc_texts(doc);
160 for(int i = 0; i < cblistnum(texts); i++) {
161 vs->push_back(cblistval(texts, i, NULL));
162 }
163 return vs;
164 }
165 const char * dump_draft() {
166 /**
167 * dump draft data of a document object
168 */
169 return est_doc_dump_draft(doc);
170 }
171 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
172 /**
173 * make a snippet of the body text of a document object
174 */
175 CBLIST * words;
176 std::vector<std::string>::iterator iter;
177
178 words = cblistopen();
179
180 for (iter = _words.begin(); _words.end() != iter; iter++) {
181 cblistpush(words, iter->c_str(), -1);
182 }
183
184 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
185
186 cblistclose(words);
187
188 return result;
189 }
190 };
191
192 class Database {
193 private:
194 ESTMTDB *db;
195 public:
196 enum { // enumeration for error codes
197 ERRNOERR = ESTENOERR, // no error
198 ERRINVAL = ESTEINVAL, // invalid argument
199 ERRACCES = ESTEACCES, // access forbidden
200 ERRLOCK = ESTELOCK, // lock failure
201 ERRDB = ESTEDB, // database problem
202 ERRIO = ESTEIO, // I/O problem
203 ERRNOITEM = ESTENOITEM, // no item
204 ERRMISC = ESTEMISC // miscellaneous
205 };
206 enum { // enumeration for open modes
207 DBREADER = ESTDBREADER, // open as a reader
208 DBWRITER = ESTDBWRITER, // open as a writer
209 DBCREAT = ESTDBCREAT, // a writer creating
210 DBTRUNC = ESTDBTRUNC, // a writer truncating
211 DBNOLCK = ESTDBNOLCK, // open without locking
212 DBLCKNB = ESTDBLCKNB, // lock without blocking
213 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
214 };
215 enum { // enumeration for options of document registration
216 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
217 };
218 enum { // enumeration for options of document deletion
219 ODCLEAN = ESTODCLEAN // clean up dispensable regions
220 };
221 enum { // enumeration for options of optimization
222 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
223 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
224 };
225 enum { // enumeration for options of document retrieval
226 GDNOATTR = ESTGDNOATTR, // no attributes
227 GDNOTEXT = ESTGDNOTEXT // no text
228 };
229 Database() {
230 /**
231 * constructor(dummy)
232 */
233 }
234 ~Database() {
235 close();
236 }
237 bool open(const char * dbname, int mode) {
238 /**
239 * open the database
240 */
241 int ecode;
242 db = est_mtdb_open(dbname, mode, &ecode);
243 return db;
244 }
245 bool close() {
246 /**
247 * close the database
248 */
249 if (db) {
250 int ecode;
251 bool result = est_mtdb_close(db, &ecode);
252 db = NULL;
253 return result;
254 } else {
255 return false;
256 }
257 }
258 bool put_doc(Document *doc, int options) {
259 /**
260 * add a document to a database
261 */
262 return est_mtdb_put_doc(db, doc->doc, options);
263 }
264 std::vector<int> * search(Condition * cond, int options) {
265 /**
266 * search documents corresponding a condition for a database
267 */
268 int resnum;
269 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
270 std::vector<int> *numbers = new std::vector<int>;
271 for (int i=0; i<resnum; i++) {
272 numbers->push_back(result[i]);
273 }
274 return numbers;
275 }
276 static const char * err_msg(int ecode) {
277 /**
278 * get the string of an error
279 */
280 return est_err_msg(ecode);
281 }
282 int error() {
283 /**
284 * get the last happended error code of a database
285 */
286 return est_mtdb_error(db);
287 }
288 bool fatal() {
289 /**
290 * check whether a database has a fatal error
291 */
292 return est_mtdb_fatal(db);
293 }
294 bool flush(int _max) {
295 /**
296 * flush index words in the cache of a database
297 */
298 return est_mtdb_flush(db, _max);
299 }
300 bool sync() {
301 /**
302 * synchronize updating contents of a database
303 */
304 return est_mtdb_sync(db);
305 }
306 bool optimize(int options) {
307 /**
308 * optimize a database
309 */
310 return est_mtdb_optimize(db, options);
311 }
312 bool out_doc(int id, int options) {
313 /**
314 * remove a document from a database
315 */
316 return est_mtdb_out_doc(db, id, options);
317 }
318 Document * get_doc(int id, int options) {
319 /**
320 * retrieve a document in a database
321 */
322 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
323 if (!doc) {
324 throw est_err_msg(est_mtdb_error(db));
325 } else {
326 return new Document(doc);
327 }
328 }
329 int uri_to_id(const char *uri) {
330 /**
331 * get the ID of a document spacified by URI
332 */
333 return est_mtdb_uri_to_id(db, uri);
334 }
335 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
336 /**
337 * extract keywords of a document object
338 */
339 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
340
341 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
342
343 cbmapiterinit(keys);
344 int ksiz;
345 while (const char *key = cbmapiternext(keys, &ksiz)) {
346 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
347 }
348 return mss;
349 }
350 bool iter_init() {
351 /**
352 * initialize the iterator of a database
353 */
354 return est_mtdb_iter_init(db);
355 }
356 int iter_next() {
357 /**
358 * get the next ID of the iterator of a database
359 */
360 return est_mtdb_iter_next(db);
361 }
362 const char * name() {
363 /**
364 * get the name of a database
365 */
366 return est_mtdb_name(db);
367 }
368 int doc_num() {
369 /**
370 * get the number of documents in a database
371 */
372 return est_mtdb_doc_num(db);
373 }
374 int word_num() {
375 /**
376 * get the number of unique words in a database
377 */
378 return est_mtdb_word_num(db);
379 }
380 double size() {
381 /**
382 * get the size of a database
383 */
384 return est_mtdb_size(db);
385 }
386 void set_cache_size(size_t size, int anum, int tnum) {
387 /**
388 * set the maximum size of the cache memory of a database
389 */
390 est_mtdb_set_cache_size(db, size, anum, tnum);
391 }
392 void set_special_cache(const char *name, int num) {
393 /**
394 * Set the special cache for narrowing and sorting
395 * with document attributes
396 */
397 est_mtdb_set_special_cache(db, name, num);
398 }
399 };
400 };

  ViewVC Help
Powered by ViewVC 1.1.26