1 |
/************************************************************************************************* |
2 |
* The command line interface of the MT-safe API |
3 |
* Copyright (C) 2004-2005 Mikio Hirabayashi |
4 |
* This file is part of Hyper Estraier. |
5 |
* Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of |
6 |
* the GNU Lesser General Public License as published by the Free Software Foundation; either |
7 |
* version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope |
8 |
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
10 |
* License for more details. |
11 |
* You should have received a copy of the GNU Lesser General Public License along with Hyper |
12 |
* Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, |
13 |
* Boston, MA 02111-1307 USA. |
14 |
*************************************************************************************************/ |
15 |
|
16 |
|
17 |
#include "estraier.h" |
18 |
#include "estmtdb.h" |
19 |
#include "myconf.h" |
20 |
|
21 |
#define URIBUFSIZ 8192 /* size of a buffer for an URI */ |
22 |
#define BRANCHDBNUM 4 /* number of branch databases */ |
23 |
|
24 |
typedef struct { /* type of structure for a thread mission */ |
25 |
int id; /* ID number */ |
26 |
ESTMTDB *db; /* database object */ |
27 |
int dnum; /* number of documents */ |
28 |
} MISSION; |
29 |
|
30 |
enum { /* enumeration for test documents */ |
31 |
RD_ENG, /* English */ |
32 |
RD_LAT, /* Latin */ |
33 |
RD_EURO, /* European mix */ |
34 |
RD_ORI, /* Oriental */ |
35 |
RD_JPN, /* Japanese */ |
36 |
RD_CHAO, /* chaos */ |
37 |
RD_RAND /* selected at random */ |
38 |
}; |
39 |
|
40 |
|
41 |
/* global variables */ |
42 |
const char *g_progname; /* program name */ |
43 |
int g_sigterm = FALSE; /* flag for termination signal */ |
44 |
|
45 |
|
46 |
/* function prototypes */ |
47 |
int main(int argc, char **argv); |
48 |
static void printferror(const char *format, ...); |
49 |
static void printfinfo(const char *format, ...); |
50 |
static void dbinform(const char *msg); |
51 |
static void setsignals(void); |
52 |
static void sigtermhandler(int num); |
53 |
static void usage(void); |
54 |
static int runwicked(int argc, char **argv); |
55 |
static int procwicked(const char *dbname, int dnum, int tnum); |
56 |
static void *mtwkfunc(void *mission); |
57 |
static ESTDOC *est_doc_new_from_chaos(int cnum, int snum, int mode); |
58 |
static char *est_random_str(int cnum, int mode); |
59 |
|
60 |
|
61 |
/* main routine */ |
62 |
int main(int argc, char **argv){ |
63 |
const char *tmp; |
64 |
int rv; |
65 |
if((tmp = getenv("ESTDBGFD")) != NULL) dpdbgfd = atoi(tmp); |
66 |
cbstdiobin(); |
67 |
g_progname = argv[0]; |
68 |
g_sigterm = FALSE; |
69 |
if(argc < 2) usage(); |
70 |
rv = 0; |
71 |
if(!strcmp(argv[1], "wicked")){ |
72 |
setsignals(); |
73 |
rv = runwicked(argc, argv); |
74 |
} else { |
75 |
usage(); |
76 |
} |
77 |
return rv; |
78 |
} |
79 |
|
80 |
|
81 |
/* print formatted error string and flush the buffer */ |
82 |
static void printferror(const char *format, ...){ |
83 |
va_list ap; |
84 |
va_start(ap, format); |
85 |
fprintf(stderr, "%s: ERROR: ", g_progname); |
86 |
vfprintf(stderr, format, ap); |
87 |
fputc('\n', stderr); |
88 |
fflush(stderr); |
89 |
va_end(ap); |
90 |
} |
91 |
|
92 |
|
93 |
/* print formatted information string and flush the buffer */ |
94 |
static void printfinfo(const char *format, ...){ |
95 |
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; |
96 |
va_list ap; |
97 |
va_start(ap, format); |
98 |
if(pthread_mutex_lock(&mutex) != 0) return; |
99 |
printf("%s: INFO: ", g_progname); |
100 |
vprintf(format, ap); |
101 |
putchar('\n'); |
102 |
fflush(stdout); |
103 |
pthread_mutex_unlock(&mutex); |
104 |
va_end(ap); |
105 |
} |
106 |
|
107 |
|
108 |
/* callback function for database events */ |
109 |
static void dbinform(const char *msg){ |
110 |
printfinfo("%s", msg); |
111 |
} |
112 |
|
113 |
|
114 |
/* set signal handlers */ |
115 |
static void setsignals(void){ |
116 |
signal(1, sigtermhandler); |
117 |
signal(2, sigtermhandler); |
118 |
signal(3, sigtermhandler); |
119 |
signal(13, sigtermhandler); |
120 |
signal(15, sigtermhandler); |
121 |
} |
122 |
|
123 |
|
124 |
/* handler of termination signal */ |
125 |
static void sigtermhandler(int num){ |
126 |
static int tries = 0; |
127 |
if(tries++ <= 4){ |
128 |
signal(num, sigtermhandler); |
129 |
} else { |
130 |
signal(num, SIG_DFL); |
131 |
} |
132 |
g_sigterm = TRUE; |
133 |
printfinfo("the termination signal %d catched", num); |
134 |
} |
135 |
|
136 |
|
137 |
/* print the usage and exit */ |
138 |
static void usage(void){ |
139 |
fprintf(stderr, "%s: command line utility of Hyper Estraier\n", g_progname); |
140 |
fprintf(stderr, "\n"); |
141 |
fprintf(stderr, "usage:\n"); |
142 |
fprintf(stderr, " %s wicked db dnum tnum\n", g_progname); |
143 |
fprintf(stderr, "\n"); |
144 |
exit(1); |
145 |
} |
146 |
|
147 |
|
148 |
/* parse arguments of the wicked command */ |
149 |
static int runwicked(int argc, char **argv){ |
150 |
char *dbname, *dnstr, *tnstr; |
151 |
int i, dnum, tnum, rv; |
152 |
dbname = NULL; |
153 |
dnstr = NULL; |
154 |
tnstr = NULL; |
155 |
for(i = 2; i < argc; i++){ |
156 |
if(!dbname && argv[i][0] == '-'){ |
157 |
usage(); |
158 |
} else if(!dbname){ |
159 |
dbname = argv[i]; |
160 |
} else if(!dnstr){ |
161 |
dnstr = argv[i]; |
162 |
} else if(!tnstr){ |
163 |
tnstr = argv[i]; |
164 |
} else { |
165 |
usage(); |
166 |
} |
167 |
} |
168 |
if(!dbname || !dnstr || !tnstr) usage(); |
169 |
if((dnum = atoi(dnstr)) < 1) usage(); |
170 |
if((tnum = atoi(tnstr)) < 1) usage(); |
171 |
rv = procwicked(dbname, dnum, tnum); |
172 |
return rv; |
173 |
} |
174 |
|
175 |
|
176 |
/* perform the wicked command */ |
177 |
static int procwicked(const char *dbname, int dnum, int tnum){ |
178 |
ESTMTDB *dbs[BRANCHDBNUM]; |
179 |
MISSION *missions; |
180 |
pthread_t *threads; |
181 |
void *rv; |
182 |
char *name; |
183 |
int i, ecode, err; |
184 |
time_t curtime; |
185 |
curtime = time(NULL); |
186 |
for(i = 0; i < BRANCHDBNUM; i++){ |
187 |
name = cbsprintf("%s-%08d", dbname, i + 1); |
188 |
if(!(dbs[i] = est_mtdb_open(name, ESTDBWRITER | ESTDBCREAT | ESTDBTRUNC, &ecode))){ |
189 |
printferror("%s: %s", name, est_err_msg(ecode)); |
190 |
while(i >= 0){ |
191 |
est_mtdb_close(dbs[i], &ecode); |
192 |
i--; |
193 |
} |
194 |
free(name); |
195 |
return -1; |
196 |
} |
197 |
est_mtdb_set_informer(dbs[i], dbinform); |
198 |
est_mtdb_set_cache_size(dbs[i], 1024 * 1024 * 32, 1024, 256); |
199 |
est_mtdb_set_special_cache(dbs[i], ESTDATTRURI, 128); |
200 |
free(name); |
201 |
} |
202 |
missions = cbmalloc(sizeof(MISSION) * tnum); |
203 |
threads = cbmalloc(sizeof(pthread_t) * tnum); |
204 |
err = FALSE; |
205 |
if(tnum == 1){ |
206 |
missions[0].id = 1; |
207 |
missions[0].db = dbs[0]; |
208 |
missions[0].dnum = dnum; |
209 |
if(mtwkfunc(missions) != NULL) err = TRUE; |
210 |
} else { |
211 |
for(i = 0; i < tnum; i++){ |
212 |
missions[i].id = i + 1; |
213 |
missions[i].db = dbs[i%BRANCHDBNUM]; |
214 |
missions[i].dnum = dnum; |
215 |
if(pthread_create(threads + i, NULL, mtwkfunc, missions + i) != 0){ |
216 |
printferror("%d: pthread_create failed", i + 1); |
217 |
missions[i].id = -1; |
218 |
err = TRUE; |
219 |
} |
220 |
} |
221 |
for(i = 0; i < tnum; i++){ |
222 |
if(missions[i].id == -1) continue; |
223 |
if(pthread_join(threads[i], &rv) != 0){ |
224 |
printferror("%s: pthread_join failed", missions[i].id); |
225 |
err = TRUE; |
226 |
} else if(rv){ |
227 |
err = TRUE; |
228 |
} |
229 |
} |
230 |
} |
231 |
for(i = 0; i < BRANCHDBNUM; i++){ |
232 |
if(!est_mtdb_close(dbs[i], &ecode)){ |
233 |
printferror("%s: %s", dbname, est_err_msg(ecode)); |
234 |
err = TRUE; |
235 |
} |
236 |
} |
237 |
free(threads); |
238 |
free(missions); |
239 |
curtime = time(NULL) - curtime; |
240 |
if(!err) printfinfo("finished successfully: elapsed time: %dh %dm %ds", |
241 |
(int)(curtime / 3600), (int)((curtime / 60) % 60), (int)(curtime % 60)); |
242 |
return err ? 1 : 0; |
243 |
} |
244 |
|
245 |
|
246 |
/* Thread function for wicked command */ |
247 |
static void *mtwkfunc(void *mission){ |
248 |
ESTMTDB *db; |
249 |
ESTDOC *doc; |
250 |
ESTCOND *cond; |
251 |
char uri[URIBUFSIZ], *oper, *value, *first, *second, *phrase; |
252 |
int i, j, err, id, dnum, *res, rnum; |
253 |
double rnd; |
254 |
id = ((MISSION *)mission)->id; |
255 |
db = ((MISSION *)mission)->db; |
256 |
dnum = ((MISSION *)mission)->dnum; |
257 |
printfinfo("%d: started", id); |
258 |
err = FALSE; |
259 |
for(i = 0; i < dnum; i++){ |
260 |
rnd = est_random(); |
261 |
if((int)(rnd * INT_MAX) % dnum < 1){ |
262 |
rnd = est_random(); |
263 |
if(rnd < 0.3){ |
264 |
if(!est_mtdb_optimize(db, (int)(est_random() * INT_MAX) % 2 == 0) ? ESTOPTNOPURGE : 0) |
265 |
err = TRUE; |
266 |
} else if(rnd < 0.8){ |
267 |
if(!est_mtdb_flush(db, 1024)) err = TRUE; |
268 |
} else { |
269 |
if(!est_mtdb_sync(db)) err = TRUE; |
270 |
} |
271 |
} else if(rnd < 0.05){ |
272 |
if(est_mtdb_out_doc(db, (int)(est_random() * INT_MAX) % (i + 1) + 1, |
273 |
((int)(est_random() * INT_MAX) % 2 == 0) ? ESTODCLEAN : 0)){ |
274 |
printfinfo("%d: [%d:%d]: out", id, i + 1, est_mtdb_doc_num(db)); |
275 |
} else if(est_mtdb_error(db) != ESTENOITEM){ |
276 |
err = TRUE; |
277 |
} |
278 |
} else if(rnd < 0.1){ |
279 |
if((value = est_mtdb_get_doc_attr(db, (int)(est_random() * INT_MAX) % (i + 1) + 1, |
280 |
ESTDATTRURI)) != NULL){ |
281 |
printfinfo("[%d:%d]: attr: %s", i + 1, est_mtdb_doc_num(db), value); |
282 |
free(value); |
283 |
} |
284 |
} else if(rnd < 0.25){ |
285 |
rnd = est_random(); |
286 |
if(rnd < 0.5){ |
287 |
oper = " OR "; |
288 |
} else if(rnd < 0.7){ |
289 |
oper = " AND "; |
290 |
} else if(rnd < 0.8){ |
291 |
oper = " NOTAND "; |
292 |
} else if(rnd < 0.9){ |
293 |
oper = " "; |
294 |
} else { |
295 |
oper = ""; |
296 |
} |
297 |
first = est_random_str(5, (int)(est_random() * INT_MAX) % RD_RAND); |
298 |
second = est_random_str(2, (int)(est_random() * INT_MAX) % RD_RAND); |
299 |
phrase = cbsprintf("%s%s%s", first, oper, second); |
300 |
cond = est_cond_new(); |
301 |
est_cond_set_phrase(cond, phrase); |
302 |
if(est_random() < 0.05) est_cond_add_attr(cond, "@uri STREW 0.est"); |
303 |
if(est_random() < 0.05) est_cond_set_order(cond, "@uri STRD"); |
304 |
res = est_mtdb_search(db, cond, &rnum, NULL); |
305 |
printfinfo("%d: [%d:%d]: search: %d hits", id, i + 1, est_mtdb_doc_num(db), rnum); |
306 |
if(est_random() < 0.01){ |
307 |
for(j = 0; j < rnum && j < 100; j++){ |
308 |
if((doc = est_mtdb_get_doc(db, res[j], 0)) != NULL){ |
309 |
est_doc_delete(doc); |
310 |
} else if(est_mtdb_error(db) != ESTENOITEM){ |
311 |
err = TRUE; |
312 |
} |
313 |
} |
314 |
} |
315 |
free(res); |
316 |
est_cond_delete(cond); |
317 |
free(phrase); |
318 |
free(first); |
319 |
free(second); |
320 |
} else { |
321 |
doc = est_doc_new_from_chaos(100, 3, est_random() < 0.5 ? RD_EURO : RD_RAND); |
322 |
if(est_random() < 0.2){ |
323 |
sprintf(uri, "file:///tmp/wicked-%08d-%05d.est", |
324 |
(int)(est_random() * INT_MAX) % (i + 1) + 1, getpid()); |
325 |
} else { |
326 |
sprintf(uri, "file:///tmp/wicked-%08d-%05d.est", i + 1, getpid()); |
327 |
} |
328 |
est_doc_add_attr(doc, ESTDATTRURI, uri); |
329 |
if(!est_mtdb_put_doc(db, doc, est_random() < 0.5 ? ESTPDCLEAN : 0)) err = TRUE; |
330 |
est_doc_delete(doc); |
331 |
} |
332 |
if(err || g_sigterm) break; |
333 |
} |
334 |
if(err) printferror("%s: %s", est_mtdb_name(db), est_err_msg(est_mtdb_error(db))); |
335 |
printfinfo("%d: finished", id); |
336 |
return err ? "error" : NULL; |
337 |
} |
338 |
|
339 |
|
340 |
/* generate a document with random text */ |
341 |
static ESTDOC *est_doc_new_from_chaos(int cnum, int snum, int mode){ |
342 |
ESTDOC *doc; |
343 |
char *str; |
344 |
int i; |
345 |
doc = est_doc_new(); |
346 |
snum *= pow(est_random_nd() + 0.5, 3.0); |
347 |
if(mode == RD_RAND){ |
348 |
mode = est_random() * 100; |
349 |
if(mode < 20){ |
350 |
mode = RD_ENG; |
351 |
est_doc_add_attr(doc, "mode", "english"); |
352 |
} else if(mode < 40){ |
353 |
mode = RD_LAT; |
354 |
est_doc_add_attr(doc, "mode", "latin"); |
355 |
} else if(mode < 60){ |
356 |
mode = RD_EURO; |
357 |
est_doc_add_attr(doc, "mode", "euromix"); |
358 |
} else if(mode < 65){ |
359 |
mode = RD_ORI; |
360 |
est_doc_add_attr(doc, "mode", "oriental"); |
361 |
} else if(mode < 95){ |
362 |
mode = RD_JPN; |
363 |
est_doc_add_attr(doc, "mode", "japanese"); |
364 |
} else { |
365 |
mode = RD_CHAO; |
366 |
est_doc_add_attr(doc, "mode", "chaos"); |
367 |
} |
368 |
} |
369 |
switch(mode){ |
370 |
case RD_ENG: est_doc_add_attr(doc, "mode", "english"); break; |
371 |
case RD_LAT: est_doc_add_attr(doc, "mode", "latin"); break; |
372 |
case RD_ORI: est_doc_add_attr(doc, "mode", "oriental"); break; |
373 |
case RD_JPN: est_doc_add_attr(doc, "mode", "japanese"); break; |
374 |
case RD_EURO: est_doc_add_attr(doc, "mode", "euromix"); break; |
375 |
case RD_CHAO: est_doc_add_attr(doc, "mode", "chaos"); break; |
376 |
} |
377 |
for(i = 0; i <= snum; i++){ |
378 |
str = est_random_str(cnum, mode); |
379 |
if(est_random() < 0.05){ |
380 |
est_doc_add_hidden_text(doc, str); |
381 |
} else { |
382 |
est_doc_add_text(doc, str); |
383 |
} |
384 |
free(str); |
385 |
} |
386 |
return doc; |
387 |
} |
388 |
|
389 |
|
390 |
/* generate random string */ |
391 |
static char *est_random_str(int cnum, int mode){ |
392 |
const char echrs[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
393 |
CBDATUM *buf; |
394 |
char wc[2], *str; |
395 |
int i, c, wlen, dec, mm, big, n; |
396 |
buf = cbdatumopen("", 0); |
397 |
cnum *= pow(est_random_nd() + 0.5, 3.0); |
398 |
wlen = est_random_nd() * 8 + 4; |
399 |
dec = (int)(est_random() * INT_MAX) % 10; |
400 |
big = (((int)(est_random() * INT_MAX) % 0x29)) * 0x100; |
401 |
for(i = 0; i < cnum; i++){ |
402 |
switch(mode){ |
403 |
case RD_ENG: case RD_LAT: case RD_EURO: |
404 |
mm = (int)(est_random() * INT_MAX) % 100; |
405 |
if((mode == RD_LAT || mode == RD_EURO) && mm < 5){ |
406 |
c = 0x00a1 + (int)(pow(est_random_nd(), 2.0) * (0x00ff - 0x00a0)); |
407 |
} else if(mode == RD_EURO && (mm < 30 || dec > 8)){ |
408 |
if(dec % 2 == 0){ |
409 |
c = 0x0391 + (int)(pow(est_random_nd(), 2.0) * (0x03d6 - 0x0391)); |
410 |
} else { |
411 |
c = 0x0400 + (int)(pow(est_random_nd(), 2.0) * (0x045f - 0x0400)); |
412 |
} |
413 |
} else if(mm < 95){ |
414 |
if((n = est_random_nd() * (sizeof(echrs) - 1)) == (sizeof(echrs) - 1)) n = 0; |
415 |
c = echrs[n]; |
416 |
} else { |
417 |
c = (int)(est_random() * ('@' - ' ')) + ' '; |
418 |
} |
419 |
if(--wlen < 1){ |
420 |
c = ' '; |
421 |
wlen = pow(est_random_nd(), 3.0) * 8 + 4; |
422 |
dec = (int)(est_random() * INT_MAX) % 10; |
423 |
} |
424 |
break; |
425 |
case RD_ORI: |
426 |
c = big + est_random_nd() * 0x100; |
427 |
if(--wlen < 1){ |
428 |
wlen = pow(est_random_nd(), 3.0) * 12 + 6; |
429 |
big = (((int)(est_random() * INT_MAX) % 0x29)) * 0x100; |
430 |
} |
431 |
break; |
432 |
case RD_JPN: |
433 |
if(dec < 4){ |
434 |
c = 0x3041 + pow(est_random_nd(), 3.0) * (0x3094 - 0x3041); |
435 |
} else if(dec < 7){ |
436 |
c = 0x30a1 + pow(est_random_nd(), 3.0) * (0x30fe - 0x30a1); |
437 |
} else if(dec < 9){ |
438 |
c = 0x4e00 + pow(est_random_nd(), 3.0) * (0x9faf - 0x4e00); |
439 |
} else { |
440 |
if(est_random() < 0.7){ |
441 |
c = 0x00a1 + (int)(pow(est_random_nd(), 2.0) * (0x00ff - 0x00a0)); |
442 |
} else { |
443 |
c = 0x3041 + est_random() * (0xffef - 0x3041); |
444 |
} |
445 |
} |
446 |
if(--wlen < 1){ |
447 |
wlen = pow(est_random_nd(), 3.0) * 12 + 6; |
448 |
dec = (int)(est_random() * INT_MAX) % 10; |
449 |
} |
450 |
break; |
451 |
default: |
452 |
if(est_random() < 0.2){ |
453 |
c = 0x00a1 + (int)est_random() * (0x00ff - 0x00a0); |
454 |
} else { |
455 |
c = (int)(est_random() * 0x10000); |
456 |
} |
457 |
break; |
458 |
} |
459 |
if(c <= 0 || c >= 0x10000) c = 0x0020; |
460 |
wc[0] = c / 0x100; |
461 |
wc[1] = c % 0x100; |
462 |
cbdatumcat(buf, wc, 2); |
463 |
} |
464 |
str = est_iconv(CB_DATUMPTR(buf), CB_DATUMSIZE(buf), "UTF-16BE", "UTF-8", NULL, NULL); |
465 |
cbdatumclose(buf); |
466 |
return str; |
467 |
} |
468 |
|
469 |
|
470 |
|
471 |
/* END OF FILE */ |