1 |
dpavlin |
2 |
/************************************************************************************************* |
2 |
|
|
* A sample searcher of Hyper Estraier |
3 |
|
|
* Copyright (C) 2004-2005 Mikio Hirabayashi |
4 |
|
|
* This file is part of Hyper Estraier. |
5 |
|
|
* Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of |
6 |
|
|
* the GNU Lesser General Public License as published by the Free Software Foundation; either |
7 |
|
|
* version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope |
8 |
|
|
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
10 |
|
|
* License for more details. |
11 |
|
|
* You should have received a copy of the GNU Lesser General Public License along with Hyper |
12 |
|
|
* Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, |
13 |
|
|
* Boston, MA 02111-1307 USA. |
14 |
|
|
*************************************************************************************************/ |
15 |
|
|
|
16 |
|
|
|
17 |
|
|
#if defined(MYFCGI) |
18 |
|
|
#include <fcgi_stdio.h> |
19 |
|
|
#endif |
20 |
|
|
#include "estraier.h" |
21 |
|
|
#include "myconf.h" |
22 |
|
|
|
23 |
|
|
#define CONFSUFFIX ".conf" /* suffix of the configuration file */ |
24 |
|
|
#define KWDBNAME "kwords" /* name of the database for keywords */ |
25 |
|
|
#define DATTRLFILE "_lfile" /* name of the attribute of the local file name */ |
26 |
|
|
#define DATTRSCORE "#score" /* name of the pseudo-attribute of score */ |
27 |
|
|
#define NUMBUFSIZ 32 /* size of a buffer for a number */ |
28 |
|
|
#define OUTBUFSIZ 262144 /* size of the output buffer */ |
29 |
|
|
#define MINIBNUM 31 /* bucket number of map for trivial use */ |
30 |
|
|
#define LOCKRETRYNUM 16 /* number of retries when locking failure */ |
31 |
|
|
#define MISSRETRYNUM 3 /* number of retries when missing documents */ |
32 |
|
|
#define MISSINCRATIO 8 /* ratio of increment number when missing */ |
33 |
|
|
#define DEFPERPAGE 10 /* default number of show documents per page */ |
34 |
|
|
#define NAVIPAGES 10 /* number of pages in paging navigation */ |
35 |
|
|
#define SPCACHEMNUM 1048576 /* max number of the special cache */ |
36 |
|
|
|
37 |
|
|
|
38 |
|
|
/* global variables for configurations */ |
39 |
|
|
const char *g_conffile = NULL; /* path of the configuration file */ |
40 |
|
|
const char *g_indexname = NULL; /* name of the index */ |
41 |
|
|
const char *g_tmplfile = NULL; /* path of the template file */ |
42 |
|
|
const char *g_topfile = NULL; /* path of the top page file */ |
43 |
|
|
const char *g_logfile = NULL; /* path of the log file */ |
44 |
|
|
const char *g_lprefix = NULL; /* local prefix of the URI of each document */ |
45 |
|
|
const char *g_gprefix = NULL; /* global prefix of the URI of each document */ |
46 |
|
|
const char *g_gsuffix = NULL; /* global suffix of the URI of each document */ |
47 |
|
|
const char *g_dirindex = NULL; /* name of the index file in a directory */ |
48 |
|
|
const CBLIST *g_replexprs = NULL; /* list of URI replacement expressions */ |
49 |
|
|
const char *g_perpage = NULL; /* CSV of numbers of shown documents per page */ |
50 |
|
|
int g_attrselect = FALSE; /* whether to use select boxes for extension form */ |
51 |
|
|
int g_showscore = FALSE; /* whether to show scores */ |
52 |
|
|
const CBLIST *g_extattrs = NULL; /* list of extra attributes of each document */ |
53 |
|
|
int g_snipwwidth = -1; /* whole width of the snippet */ |
54 |
|
|
int g_sniphwidth = -1; /* width of beginning of the text */ |
55 |
|
|
int g_snipawidth = -1; /* width around each highlighted word */ |
56 |
|
|
int g_condgstep = -1; /* step of N-gram */ |
57 |
|
|
int g_dotfidf = FALSE; /* whether to do TF-IDF tuning */ |
58 |
|
|
int g_smplphrase = FALSE; /* whether to use simplefied phrase */ |
59 |
|
|
int g_candetail = FALSE; /* whether to show detail link */ |
60 |
|
|
int g_smlrvnum = -1; /* number of elements of a vecter for similarity */ |
61 |
|
|
const char *g_spcache = NULL; /* name of the attribute of special cache */ |
62 |
|
|
|
63 |
|
|
|
64 |
|
|
/* global variables for parameters */ |
65 |
|
|
const char *p_phrase = NULL; /* search phrase */ |
66 |
|
|
const char *p_attr = NULL; /* narrowing attribute */ |
67 |
|
|
const char *p_attrval = NULL; /* separated value of narrowing attribute */ |
68 |
|
|
const char *p_order = NULL; /* ordering attribute */ |
69 |
|
|
int p_perpage = 0; /* number of show documents per page */ |
70 |
|
|
int p_pagenum = 0; /* number of the page */ |
71 |
|
|
int p_detail = 0; /* ID of the document to be detailed */ |
72 |
|
|
int p_similar = 0; /* ID of the seed document of similarity search */ |
73 |
|
|
|
74 |
|
|
|
75 |
|
|
/* other global variables */ |
76 |
|
|
char g_outbuf[OUTBUFSIZ]; /* output buffer */ |
77 |
|
|
const char *g_scriptname = NULL; /* name of the script */ |
78 |
|
|
const char *g_tmpltext = NULL; /* text of the template */ |
79 |
|
|
const char *g_toptext = NULL; /* text of the top page */ |
80 |
|
|
ESTDB *g_db = NULL; /* main database object */ |
81 |
|
|
CURIA *g_kwdb = NULL; /* keyword database object */ |
82 |
|
|
double g_etime = 0.0; /* elepsed time */ |
83 |
|
|
int g_tabidx = 0; /* counter of tab indexes */ |
84 |
|
|
|
85 |
|
|
|
86 |
|
|
/* function prototypes */ |
87 |
|
|
int main(int argc, char **argv); |
88 |
|
|
static int realmain(int argc, char **argv); |
89 |
|
|
static void showerror(const char *msg); |
90 |
|
|
static const char *skiplabel(const char *str); |
91 |
|
|
static CBMAP *getparameters(void); |
92 |
|
|
static void myestdbclose(ESTDB *db); |
93 |
|
|
static void xmlprintf(const char *format, ...); |
94 |
|
|
static CBMAP *vectorizer(void *db, int id, void *kwdb); |
95 |
|
|
static void setsimilarphrase(void); |
96 |
|
|
static void showpage(void); |
97 |
|
|
static void showform(void); |
98 |
|
|
static void showtop(void); |
99 |
|
|
static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss); |
100 |
|
|
static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail); |
101 |
|
|
static char *makeshownuri(const char *uri); |
102 |
|
|
static void showinfo(void); |
103 |
|
|
static void outputlog(void); |
104 |
|
|
|
105 |
|
|
|
106 |
|
|
/* main routine */ |
107 |
|
|
int main(int argc, char **argv){ |
108 |
|
|
#if defined(MYFCGI) |
109 |
|
|
static int cnt = 0; |
110 |
|
|
while(FCGI_Accept() >= 0){ |
111 |
|
|
if(++cnt >= 256){ |
112 |
|
|
cbggcsweep(); |
113 |
|
|
g_db = NULL; |
114 |
|
|
g_kwdb = NULL; |
115 |
|
|
cnt = 0; |
116 |
|
|
} |
117 |
|
|
p_phrase = NULL; |
118 |
|
|
p_attr = NULL; |
119 |
|
|
p_attrval = NULL; |
120 |
|
|
p_order = NULL; |
121 |
|
|
p_perpage = 0; |
122 |
|
|
p_pagenum = 0; |
123 |
|
|
p_detail = 0; |
124 |
|
|
p_similar = 0; |
125 |
|
|
realmain(argc, argv); |
126 |
|
|
} |
127 |
|
|
return 0; |
128 |
|
|
#else |
129 |
|
|
return realmain(argc, argv); |
130 |
|
|
#endif |
131 |
|
|
} |
132 |
|
|
|
133 |
|
|
|
134 |
|
|
/* real main routine */ |
135 |
|
|
static int realmain(int argc, char **argv){ |
136 |
|
|
CBLIST *lines, *rlist, *alist; |
137 |
|
|
CBMAP *params; |
138 |
|
|
const char *rp; |
139 |
|
|
char *tmp, *wp; |
140 |
|
|
int i, ecode; |
141 |
|
|
/* set configurations */ |
142 |
|
|
cbstdiobin(); |
143 |
|
|
setvbuf(stdout, g_outbuf, _IOFBF, OUTBUFSIZ); |
144 |
|
|
g_scriptname = argv[0]; |
145 |
|
|
if((rp = getenv("SCRIPT_NAME")) != NULL) g_scriptname = rp; |
146 |
|
|
if((rp = strrchr(g_scriptname, '/')) != NULL) g_scriptname = rp + 1; |
147 |
|
|
tmp = cbmalloc(strlen(g_scriptname) + strlen(CONFSUFFIX) + 1); |
148 |
|
|
sprintf(tmp, "%s", g_scriptname); |
149 |
|
|
cbglobalgc(tmp, free); |
150 |
|
|
if(!(wp = strrchr(tmp, '.'))) wp = tmp + strlen(tmp); |
151 |
|
|
sprintf(wp, "%s", CONFSUFFIX); |
152 |
|
|
g_conffile = tmp; |
153 |
|
|
if(!(lines = cbreadlines(g_conffile))) showerror("the configuration file is missing."); |
154 |
|
|
cbglobalgc(lines, (void (*)(void *))cblistclose); |
155 |
|
|
rlist = cblistopen(); |
156 |
|
|
cbglobalgc(rlist, (void (*)(void *))cblistclose); |
157 |
|
|
alist = cblistopen(); |
158 |
|
|
cbglobalgc(alist, (void (*)(void *))cblistclose); |
159 |
|
|
for(i = 0; i < cblistnum(lines); i++){ |
160 |
|
|
rp = cblistval(lines, i, NULL); |
161 |
|
|
if(cbstrfwimatch(rp, "indexname:")){ |
162 |
|
|
g_indexname = skiplabel(rp); |
163 |
|
|
} else if(cbstrfwimatch(rp, "tmplfile:")){ |
164 |
|
|
g_tmplfile = skiplabel(rp); |
165 |
|
|
} else if(cbstrfwimatch(rp, "topfile:")){ |
166 |
|
|
g_topfile = skiplabel(rp); |
167 |
|
|
} else if(cbstrfwimatch(rp, "logfile:")){ |
168 |
|
|
g_logfile = skiplabel(rp); |
169 |
|
|
} else if(cbstrfwimatch(rp, "lprefix:")){ |
170 |
|
|
g_lprefix = skiplabel(rp); |
171 |
|
|
} else if(cbstrfwimatch(rp, "gprefix:")){ |
172 |
|
|
g_gprefix = skiplabel(rp); |
173 |
|
|
} else if(cbstrfwimatch(rp, "gsuffix:")){ |
174 |
|
|
g_gsuffix = skiplabel(rp); |
175 |
|
|
} else if(cbstrfwimatch(rp, "dirindex:")){ |
176 |
|
|
g_dirindex = skiplabel(rp); |
177 |
|
|
} else if(cbstrfwimatch(rp, "replace:")){ |
178 |
|
|
cblistpush(rlist, skiplabel(rp), -1); |
179 |
|
|
} else if(cbstrfwimatch(rp, "perpage:")){ |
180 |
|
|
g_perpage = skiplabel(rp); |
181 |
|
|
} else if(cbstrfwimatch(rp, "attrselect:")){ |
182 |
|
|
if(!cbstricmp(skiplabel(rp), "true")) g_attrselect = TRUE; |
183 |
|
|
} else if(cbstrfwimatch(rp, "showscore:")){ |
184 |
|
|
if(!cbstricmp(skiplabel(rp), "true")) g_showscore = TRUE; |
185 |
|
|
} else if(cbstrfwimatch(rp, "extattr:")){ |
186 |
|
|
cblistpush(alist, skiplabel(rp), -1); |
187 |
|
|
} else if(cbstrfwimatch(rp, "snipwwidth:")){ |
188 |
|
|
g_snipwwidth = atoi(skiplabel(rp)); |
189 |
|
|
} else if(cbstrfwimatch(rp, "sniphwidth:")){ |
190 |
|
|
g_sniphwidth = atoi(skiplabel(rp)); |
191 |
|
|
} else if(cbstrfwimatch(rp, "snipawidth:")){ |
192 |
|
|
g_snipawidth = atoi(skiplabel(rp)); |
193 |
|
|
} else if(cbstrfwimatch(rp, "condgstep:")){ |
194 |
|
|
g_condgstep = atoi(skiplabel(rp)); |
195 |
|
|
} else if(cbstrfwimatch(rp, "dotfidf:")){ |
196 |
|
|
if(!cbstricmp(skiplabel(rp), "true")) g_dotfidf = TRUE; |
197 |
|
|
} else if(cbstrfwimatch(rp, "smplphrase:")){ |
198 |
|
|
if(!cbstricmp(skiplabel(rp), "true")) g_smplphrase = TRUE; |
199 |
|
|
} else if(cbstrfwimatch(rp, "candetail:")){ |
200 |
|
|
if(!cbstricmp(skiplabel(rp), "true")) g_candetail = TRUE; |
201 |
|
|
} else if(cbstrfwimatch(rp, "smlrvnum:")){ |
202 |
|
|
g_smlrvnum = atoi(skiplabel(rp)); |
203 |
|
|
} else if(cbstrfwimatch(rp, "spcache:")){ |
204 |
|
|
g_spcache = skiplabel(rp); |
205 |
|
|
} |
206 |
|
|
} |
207 |
|
|
if(!g_indexname) showerror("indexname is undefined."); |
208 |
|
|
if(!g_tmplfile) showerror("tmplfile is undefined."); |
209 |
|
|
if(!g_topfile) showerror("topfile is undefined."); |
210 |
|
|
if(!g_logfile) showerror("logfile is undefined."); |
211 |
|
|
if(!g_lprefix) showerror("lprefix is undefined."); |
212 |
|
|
if(!g_gprefix) showerror("gprefix is undefined."); |
213 |
|
|
if(!g_gsuffix) showerror("gsuffix is undefined."); |
214 |
|
|
if(!g_dirindex) showerror("dirindex is undefined."); |
215 |
|
|
g_replexprs = rlist; |
216 |
|
|
if(!g_perpage) showerror("perpage is undefined."); |
217 |
|
|
g_extattrs = alist; |
218 |
|
|
if(g_snipwwidth < 0) showerror("snipwwidth is undefined."); |
219 |
|
|
if(g_sniphwidth < 0) showerror("sniphwidth is undefined."); |
220 |
|
|
if(g_snipawidth < 0) showerror("snipawidth is undefined."); |
221 |
|
|
if(g_condgstep < 1) showerror("condgstep is undefined."); |
222 |
|
|
if(!g_spcache) showerror("spcache is undefined."); |
223 |
|
|
/* read parameters */ |
224 |
|
|
params = getparameters(); |
225 |
|
|
cbglobalgc(params, (void (*)(void *))cbmapclose); |
226 |
|
|
if(!(p_phrase = cbmapget(params, "phrase", -1, NULL))) p_phrase = ""; |
227 |
|
|
while(*p_phrase == ' ' || *p_phrase == '\t'){ |
228 |
|
|
p_phrase++; |
229 |
|
|
} |
230 |
|
|
if(!(p_attr = cbmapget(params, "attr", -1, NULL))) p_attr = ""; |
231 |
|
|
while(*p_attr == ' ' || *p_attr == '\t'){ |
232 |
|
|
p_attr++; |
233 |
|
|
} |
234 |
|
|
if(!(p_attrval = cbmapget(params, "attrval", -1, NULL))) p_attrval = ""; |
235 |
|
|
while(*p_attrval == ' ' || *p_attrval == '\t'){ |
236 |
|
|
p_attrval++; |
237 |
|
|
} |
238 |
|
|
if(cbstrfwmatch(p_attr, "gstep=")){ |
239 |
|
|
g_condgstep = atoi(p_attr + 6); |
240 |
|
|
p_attr = ""; |
241 |
|
|
} |
242 |
|
|
if(cbstrfwmatch(p_attr, "tfidf=")){ |
243 |
|
|
g_dotfidf = !cbstricmp(p_attr + 6, "true"); |
244 |
|
|
p_attr = ""; |
245 |
|
|
} |
246 |
|
|
if(!(p_order = cbmapget(params, "order", -1, NULL))) p_order = ""; |
247 |
|
|
while(*p_order == ' ' || *p_order == '\t'){ |
248 |
|
|
p_order++; |
249 |
|
|
} |
250 |
|
|
if((rp = cbmapget(params, "perpage", -1, NULL)) != NULL) p_perpage = atoi(rp); |
251 |
|
|
if(p_perpage < 1) p_perpage = DEFPERPAGE; |
252 |
|
|
if((rp = cbmapget(params, "detail", -1, NULL)) != NULL) p_detail = atoi(rp); |
253 |
|
|
if(p_detail < 1) p_detail = 0; |
254 |
|
|
if((rp = cbmapget(params, "similar", -1, NULL)) != NULL) p_similar = atoi(rp); |
255 |
|
|
if(p_similar < 1) p_similar = 0; |
256 |
|
|
if((rp = cbmapget(params, "pagenum", -1, NULL)) != NULL) p_pagenum = atoi(rp); |
257 |
|
|
if(p_pagenum < 1) p_pagenum = 1; |
258 |
|
|
if((rp = cbmapget(params, "enc", -1, NULL)) != NULL){ |
259 |
|
|
if((tmp = est_iconv(p_phrase, -1, rp, "UTF-8", NULL, NULL)) != NULL){ |
260 |
|
|
p_phrase = tmp; |
261 |
|
|
cbglobalgc(tmp, free); |
262 |
|
|
} |
263 |
|
|
if((tmp = est_iconv(p_attr, -1, rp, "UTF-8", NULL, NULL)) != NULL){ |
264 |
|
|
p_attr = tmp; |
265 |
|
|
cbglobalgc(tmp, free); |
266 |
|
|
} |
267 |
|
|
if((tmp = est_iconv(p_attrval, -1, rp, "UTF-8", NULL, NULL)) != NULL){ |
268 |
|
|
p_attrval = tmp; |
269 |
|
|
cbglobalgc(tmp, free); |
270 |
|
|
} |
271 |
|
|
if((tmp = est_iconv(p_order, -1, rp, "UTF-8", NULL, NULL)) != NULL){ |
272 |
|
|
p_order = tmp; |
273 |
|
|
cbglobalgc(tmp, free); |
274 |
|
|
} |
275 |
|
|
} |
276 |
|
|
/* read the other files and the database */ |
277 |
|
|
if(!g_db){ |
278 |
|
|
if(!(tmp = cbreadfile(g_tmplfile, NULL))) showerror("the template file is missing."); |
279 |
|
|
cbglobalgc(tmp, free); |
280 |
|
|
g_tmpltext = tmp; |
281 |
|
|
if(!(tmp = cbreadfile(g_topfile, NULL))) showerror("the top page file is missing."); |
282 |
|
|
cbglobalgc(tmp, free); |
283 |
|
|
g_toptext = tmp; |
284 |
|
|
for(i = 0; i <= LOCKRETRYNUM; i++){ |
285 |
|
|
if((g_db = est_db_open(g_indexname, ESTDBREADER | ESTDBLCKNB, &ecode)) != NULL) break; |
286 |
|
|
if(ecode != ESTELOCK) showerror("the index is missing or broken."); |
287 |
|
|
est_usleep(1000 * 1000); |
288 |
|
|
} |
289 |
|
|
if(!g_db) showerror("the index is being updated now."); |
290 |
|
|
cbglobalgc(g_db, (void (*)(void *))myestdbclose); |
291 |
|
|
if(g_spcache[0] != '\0') est_db_set_special_cache(g_db, g_spcache, SPCACHEMNUM); |
292 |
|
|
} |
293 |
|
|
setsimilarphrase(); |
294 |
|
|
/* show the page */ |
295 |
|
|
showpage(); |
296 |
|
|
/* output the log message */ |
297 |
|
|
outputlog(); |
298 |
|
|
return 0; |
299 |
|
|
} |
300 |
|
|
|
301 |
|
|
|
302 |
|
|
/* show the error page and exit */ |
303 |
|
|
static void showerror(const char *msg){ |
304 |
|
|
printf("Status: 500 Internal Server Error\r\n"); |
305 |
|
|
printf("Content-Type: text/plain; charset=UTF-8\r\n"); |
306 |
|
|
printf("\r\n"); |
307 |
|
|
printf("Error: %s\n", msg); |
308 |
|
|
exit(1); |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
|
312 |
|
|
/* skip the label of a line */ |
313 |
|
|
static const char *skiplabel(const char *str){ |
314 |
|
|
if(!(str = strchr(str, ':'))) return ""; |
315 |
|
|
str++; |
316 |
|
|
while(*str != '\0' && (*str == ' ' || *str == '\t')){ |
317 |
|
|
str++; |
318 |
|
|
} |
319 |
|
|
return str; |
320 |
|
|
} |
321 |
|
|
|
322 |
|
|
|
323 |
|
|
/* get CGI parameters */ |
324 |
|
|
static CBMAP *getparameters(void){ |
325 |
|
|
int maxlen = 1024 * 1024 * 32; |
326 |
|
|
CBMAP *map, *attrs; |
327 |
|
|
CBLIST *pairs, *parts; |
328 |
|
|
const char *rp, *body; |
329 |
|
|
char *buf, *key, *val, *dkey, *dval, *wp, *bound, *fbuf, *aname; |
330 |
|
|
int i, len, c, blen, flen; |
331 |
|
|
map = cbmapopenex(37); |
332 |
|
|
buf = NULL; |
333 |
|
|
len = 0; |
334 |
|
|
if((rp = getenv("REQUEST_METHOD")) != NULL && !strcmp(rp, "POST") && |
335 |
|
|
(rp = getenv("CONTENT_LENGTH")) != NULL && (len = atoi(rp)) > 0){ |
336 |
|
|
if(len > maxlen) len = maxlen; |
337 |
|
|
buf = cbmalloc(len + 1); |
338 |
|
|
for(i = 0; i < len && (c = getchar()) != EOF; i++){ |
339 |
|
|
buf[i] = c; |
340 |
|
|
} |
341 |
|
|
buf[i] = '\0'; |
342 |
|
|
if(i != len){ |
343 |
|
|
free(buf); |
344 |
|
|
buf = NULL; |
345 |
|
|
} |
346 |
|
|
} else if((rp = getenv("QUERY_STRING")) != NULL){ |
347 |
|
|
buf = cbmemdup(rp, -1); |
348 |
|
|
len = strlen(buf); |
349 |
|
|
} |
350 |
|
|
if(buf && len > 0){ |
351 |
|
|
if((rp = getenv("CONTENT_TYPE")) != NULL && cbstrfwmatch(rp, "multipart/form-data") && |
352 |
|
|
(rp = strstr(rp, "boundary=")) != NULL){ |
353 |
|
|
rp += 9; |
354 |
|
|
bound = cbmemdup(rp, -1); |
355 |
|
|
if((wp = strchr(bound, ';')) != NULL) *wp = '\0'; |
356 |
|
|
parts = cbmimeparts(buf, len, bound); |
357 |
|
|
for(i = 0; i < cblistnum(parts); i++){ |
358 |
|
|
body = cblistval(parts, i, &blen); |
359 |
|
|
attrs = cbmapopen(); |
360 |
|
|
fbuf = cbmimebreak(body, blen, attrs, &flen); |
361 |
|
|
if((rp = cbmapget(attrs, "NAME", -1, NULL)) != NULL){ |
362 |
|
|
cbmapput(map, rp, -1, fbuf, flen, FALSE); |
363 |
|
|
aname = cbsprintf("%s-filename", rp); |
364 |
|
|
if((rp = cbmapget(attrs, "FILENAME", -1, NULL)) != NULL) |
365 |
|
|
cbmapput(map, aname, -1, rp, -1, FALSE); |
366 |
|
|
free(aname); |
367 |
|
|
} |
368 |
|
|
free(fbuf); |
369 |
|
|
cbmapclose(attrs); |
370 |
|
|
} |
371 |
|
|
cblistclose(parts); |
372 |
|
|
free(bound); |
373 |
|
|
} else { |
374 |
|
|
pairs = cbsplit(buf, -1, "&"); |
375 |
|
|
for(i = 0; i < cblistnum(pairs); i++){ |
376 |
|
|
key = cbmemdup(cblistval(pairs, i, NULL), -1); |
377 |
|
|
if((val = strchr(key, '=')) != NULL){ |
378 |
|
|
*(val++) = '\0'; |
379 |
|
|
dkey = cburldecode(key, NULL); |
380 |
|
|
dval = cburldecode(val, NULL); |
381 |
|
|
cbmapput(map, dkey, -1, dval, -1, FALSE); |
382 |
|
|
free(dval); |
383 |
|
|
free(dkey); |
384 |
|
|
} |
385 |
|
|
free(key); |
386 |
|
|
} |
387 |
|
|
cblistclose(pairs); |
388 |
|
|
} |
389 |
|
|
} |
390 |
|
|
free(buf); |
391 |
|
|
return map; |
392 |
|
|
} |
393 |
|
|
|
394 |
|
|
|
395 |
|
|
/* close the database */ |
396 |
|
|
static void myestdbclose(ESTDB *db){ |
397 |
|
|
int ecode; |
398 |
|
|
est_db_close(db, &ecode); |
399 |
|
|
} |
400 |
|
|
|
401 |
|
|
|
402 |
|
|
/* output escaped string */ |
403 |
|
|
static void xmlprintf(const char *format, ...){ |
404 |
|
|
va_list ap; |
405 |
|
|
char *tmp, cbuf[32]; |
406 |
|
|
unsigned char c; |
407 |
|
|
int cblen; |
408 |
|
|
va_start(ap, format); |
409 |
|
|
while(*format != '\0'){ |
410 |
|
|
if(*format == '%'){ |
411 |
|
|
cbuf[0] = '%'; |
412 |
|
|
cblen = 1; |
413 |
|
|
format++; |
414 |
|
|
while(strchr("0123456789 .+-", *format) && *format != '\0' && cblen < 31){ |
415 |
|
|
cbuf[cblen++] = *format; |
416 |
|
|
format++; |
417 |
|
|
} |
418 |
|
|
cbuf[cblen++] = *format; |
419 |
|
|
cbuf[cblen] = '\0'; |
420 |
|
|
switch(*format){ |
421 |
|
|
case 's': |
422 |
|
|
tmp = va_arg(ap, char *); |
423 |
|
|
if(!tmp) tmp = "(null)"; |
424 |
|
|
printf(cbuf, tmp); |
425 |
|
|
break; |
426 |
|
|
case 'd': |
427 |
|
|
printf(cbuf, va_arg(ap, int)); |
428 |
|
|
break; |
429 |
|
|
case 'o': case 'u': case 'x': case 'X': case 'c': |
430 |
|
|
printf(cbuf, va_arg(ap, unsigned int)); |
431 |
|
|
break; |
432 |
|
|
case 'e': case 'E': case 'f': case 'g': case 'G': |
433 |
|
|
printf(cbuf, va_arg(ap, double)); |
434 |
|
|
break; |
435 |
|
|
case '@': |
436 |
|
|
tmp = va_arg(ap, char *); |
437 |
|
|
if(!tmp) tmp = "(null)"; |
438 |
|
|
while(*tmp){ |
439 |
|
|
switch(*tmp){ |
440 |
|
|
case '&': printf("&"); break; |
441 |
|
|
case '<': printf("<"); break; |
442 |
|
|
case '>': printf(">"); break; |
443 |
|
|
case '"': printf("""); break; |
444 |
|
|
default: |
445 |
|
|
if(!((*tmp >= 0 && *tmp <= 0x8) || (*tmp >= 0x0e && *tmp <= 0x1f))) putchar(*tmp); |
446 |
|
|
break; |
447 |
|
|
} |
448 |
|
|
tmp++; |
449 |
|
|
} |
450 |
|
|
break; |
451 |
|
|
case '?': |
452 |
|
|
tmp = va_arg(ap, char *); |
453 |
|
|
if(!tmp) tmp = "(null)"; |
454 |
|
|
while(*tmp){ |
455 |
|
|
c = *(unsigned char *)tmp; |
456 |
|
|
if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
457 |
|
|
(c >= '0' && c <= '9') || (c != '\0' && strchr("_-.", c))){ |
458 |
|
|
putchar(c); |
459 |
|
|
} else { |
460 |
|
|
printf("%%%02X", c); |
461 |
|
|
} |
462 |
|
|
tmp++; |
463 |
|
|
} |
464 |
|
|
break; |
465 |
|
|
case '%': |
466 |
|
|
putchar('%'); |
467 |
|
|
break; |
468 |
|
|
} |
469 |
|
|
} else { |
470 |
|
|
putchar(*format); |
471 |
|
|
} |
472 |
|
|
format++; |
473 |
|
|
} |
474 |
|
|
va_end(ap); |
475 |
|
|
} |
476 |
|
|
|
477 |
|
|
|
478 |
|
|
/* create a vector of keywords */ |
479 |
|
|
static CBMAP *vectorizer(void *db, int id, void *kwdb){ |
480 |
|
|
CBMAP *kwords; |
481 |
|
|
char *mbuf; |
482 |
|
|
int msiz; |
483 |
|
|
if(!(mbuf = crget((CURIA *)kwdb, (char *)&id, sizeof(int), 0, -1, &msiz))) return NULL; |
484 |
|
|
kwords = cbmapload(mbuf, msiz); |
485 |
|
|
free(mbuf); |
486 |
|
|
return kwords; |
487 |
|
|
} |
488 |
|
|
|
489 |
|
|
|
490 |
|
|
/* set the phrase for similarity search */ |
491 |
|
|
static void setsimilarphrase(void){ |
492 |
|
|
ESTDOC *doc; |
493 |
|
|
CBMAP *svmap; |
494 |
|
|
CBDATUM *datum; |
495 |
|
|
const char *kbuf, *vbuf; |
496 |
|
|
char *ptr; |
497 |
|
|
int ksiz, vsiz; |
498 |
|
|
if(!cbstrfwimatch(p_phrase, ESTOPSIMILAR) && p_similar < 1) return; |
499 |
|
|
if(g_smlrvnum < 1){ |
500 |
|
|
p_phrase = ""; |
501 |
|
|
return; |
502 |
|
|
} |
503 |
|
|
if(!g_kwdb){ |
504 |
|
|
ptr = cbsprintf("%s%c%s", g_indexname, ESTPATHCHR, KWDBNAME); |
505 |
|
|
if((g_kwdb = cropen(ptr, CR_OREADER, -1, -1)) != NULL){ |
506 |
|
|
cbglobalgc(g_kwdb, (void (*)(void *))crclose); |
507 |
|
|
est_db_set_vectorizer(g_db, vectorizer, g_kwdb); |
508 |
|
|
} |
509 |
|
|
free(ptr); |
510 |
|
|
} |
511 |
|
|
if(p_similar < 1) return; |
512 |
|
|
svmap = g_kwdb ? vectorizer(g_db, p_similar, g_kwdb) : NULL; |
513 |
|
|
if(!svmap && (doc = est_db_get_doc(g_db, p_similar, 0)) != NULL){ |
514 |
|
|
svmap = est_db_etch_doc(g_dotfidf ? g_db : NULL, doc, g_smlrvnum); |
515 |
|
|
est_doc_delete(doc); |
516 |
|
|
} else if(!svmap){ |
517 |
|
|
return; |
518 |
|
|
} |
519 |
|
|
datum = cbdatumopen(ESTOPSIMILAR, -1); |
520 |
|
|
cbmapiterinit(svmap); |
521 |
|
|
while((kbuf = cbmapiternext(svmap, &ksiz)) != NULL){ |
522 |
|
|
vbuf = cbmapget(svmap, kbuf, ksiz, &vsiz); |
523 |
|
|
cbdatumcat(datum, " WITH ", -1); |
524 |
|
|
cbdatumcat(datum, vbuf, vsiz); |
525 |
|
|
cbdatumcat(datum, " ", 1); |
526 |
|
|
cbdatumcat(datum, kbuf, ksiz); |
527 |
|
|
} |
528 |
|
|
ptr = cbdatumtomalloc(datum, NULL); |
529 |
|
|
cbglobalgc(ptr, free); |
530 |
|
|
p_phrase = ptr; |
531 |
|
|
cbmapclose(svmap); |
532 |
|
|
} |
533 |
|
|
|
534 |
|
|
|
535 |
|
|
/* show the page */ |
536 |
|
|
static void showpage(void){ |
537 |
|
|
ESTCOND *cond; |
538 |
|
|
ESTDOC **docs; |
539 |
|
|
CBMAP *hints; |
540 |
|
|
CBLIST *elems; |
541 |
|
|
const char *rp; |
542 |
|
|
char *tmp, numbuf[NUMBUFSIZ]; |
543 |
|
|
int i, tnum, max, *res, rnum, sc, dnum, miss; |
544 |
|
|
printf("Cache-Control: no-cache, must-revalidate, no-transform\r\n"); |
545 |
|
|
printf("Pragma: no-cache\r\n"); |
546 |
|
|
printf("Content-Disposition: inline; filename=%s\r\n", g_scriptname); |
547 |
|
|
printf("Content-Type: text/html; charset=UTF-8\r\n"); |
548 |
|
|
printf("\r\n"); |
549 |
|
|
g_etime = est_gettimeofday(); |
550 |
|
|
cond = est_cond_new(); |
551 |
|
|
if(p_phrase[0] != '\0') est_cond_set_phrase(cond, p_phrase); |
552 |
|
|
if(p_attr[0] != '\0'){ |
553 |
|
|
if(p_attrval[0] != '\0'){ |
554 |
|
|
tmp = cbsprintf("%s %s", p_attr, p_attrval); |
555 |
|
|
est_cond_add_attr(cond, tmp); |
556 |
|
|
free(tmp); |
557 |
|
|
} else { |
558 |
|
|
est_cond_add_attr(cond, p_attr); |
559 |
|
|
} |
560 |
|
|
} |
561 |
|
|
if(p_order[0] != '\0') est_cond_set_order(cond, p_order); |
562 |
|
|
switch(g_condgstep){ |
563 |
|
|
case 1: |
564 |
|
|
est_cond_set_options(cond, ESTCONDSURE); |
565 |
|
|
break; |
566 |
|
|
case 2: |
567 |
|
|
est_cond_set_options(cond, ESTCONDUSU); |
568 |
|
|
break; |
569 |
|
|
case 3: |
570 |
|
|
est_cond_set_options(cond, ESTCONDFAST); |
571 |
|
|
break; |
572 |
|
|
case 4: |
573 |
|
|
est_cond_set_options(cond, ESTCONDAGIT); |
574 |
|
|
break; |
575 |
|
|
} |
576 |
|
|
if(!g_dotfidf) est_cond_set_options(cond, ESTCONDNOIDF); |
577 |
|
|
if(g_smplphrase) est_cond_set_options(cond, ESTCONDSIMPLE); |
578 |
|
|
if(g_showscore) est_cond_set_options(cond, ESTCONDSCFB); |
579 |
|
|
tnum = 0; |
580 |
|
|
max = p_pagenum * p_perpage * 1.3 + 1; |
581 |
|
|
do { |
582 |
|
|
est_cond_set_max(cond, max); |
583 |
|
|
hints = cbmapopenex(MINIBNUM); |
584 |
|
|
res = est_db_search(g_db, cond, &rnum, hints); |
585 |
|
|
if(g_candetail && p_detail > 0){ |
586 |
|
|
if(rnum < 1) cbmapput(hints, "", 0, "1", 1, TRUE); |
587 |
|
|
free(res); |
588 |
|
|
res = cbmalloc(sizeof(int)); |
589 |
|
|
res[0] = p_detail; |
590 |
|
|
rnum = 1; |
591 |
|
|
} |
592 |
|
|
docs = cbmalloc(rnum * sizeof(ESTDOC *) + 1); |
593 |
|
|
dnum = 0; |
594 |
|
|
miss = 0; |
595 |
|
|
for(i = 0; i < rnum; i++){ |
596 |
|
|
if(!(docs[dnum] = est_db_get_doc(g_db, res[i], dnum < p_pagenum * p_perpage ? 0 : |
597 |
|
|
ESTGDNOATTR | ESTGDNOTEXT))){ |
598 |
|
|
miss++; |
599 |
|
|
continue; |
600 |
|
|
} |
601 |
|
|
if((sc = est_cond_score(cond, i)) >= 0){ |
602 |
|
|
sprintf(numbuf, "%d", sc); |
603 |
|
|
est_doc_add_attr(docs[dnum], DATTRSCORE, numbuf); |
604 |
|
|
} |
605 |
|
|
dnum++; |
606 |
|
|
} |
607 |
|
|
if(tnum <= MISSRETRYNUM && miss > 0 && max <= rnum && dnum < p_pagenum * p_perpage + 1){ |
608 |
|
|
for(i = 0; i < dnum; i++){ |
609 |
|
|
est_doc_delete(docs[i]); |
610 |
|
|
} |
611 |
|
|
free(docs); |
612 |
|
|
free(res); |
613 |
|
|
cbmapclose(hints); |
614 |
|
|
max *= MISSINCRATIO; |
615 |
|
|
tnum++; |
616 |
|
|
continue; |
617 |
|
|
} |
618 |
|
|
break; |
619 |
|
|
} while(TRUE); |
620 |
|
|
g_etime = est_gettimeofday() - g_etime; |
621 |
|
|
elems = cbxmlbreak(g_tmpltext, FALSE); |
622 |
|
|
for(i = 0; i < cblistnum(elems); i++){ |
623 |
|
|
rp = cblistval(elems, i, NULL); |
624 |
|
|
if(!strcmp(rp, "<!--ESTFORM-->")){ |
625 |
|
|
showform(); |
626 |
|
|
} else if(!strcmp(rp, "<!--ESTRESULT-->")){ |
627 |
|
|
if(p_phrase[0] == '\0' && p_attr[0] == '\0' && p_detail < 1){ |
628 |
|
|
showtop(); |
629 |
|
|
} else { |
630 |
|
|
showresult(docs, dnum, hints, miss); |
631 |
|
|
} |
632 |
|
|
} else if(!strcmp(rp, "<!--ESTINFO-->")){ |
633 |
|
|
showinfo(); |
634 |
|
|
} else { |
635 |
|
|
printf("%s", rp); |
636 |
|
|
} |
637 |
|
|
} |
638 |
|
|
for(i = 0; i < dnum; i++){ |
639 |
|
|
est_doc_delete(docs[i]); |
640 |
|
|
} |
641 |
|
|
cblistclose(elems); |
642 |
|
|
free(docs); |
643 |
|
|
free(res); |
644 |
|
|
cbmapclose(hints); |
645 |
|
|
est_cond_delete(cond); |
646 |
|
|
} |
647 |
|
|
|
648 |
|
|
|
649 |
|
|
/* show the form */ |
650 |
|
|
static void showform(void){ |
651 |
|
|
CBLIST *list; |
652 |
|
|
const char *elem; |
653 |
|
|
int i, num; |
654 |
|
|
xmlprintf("<div id=\"estform\" class=\"estform\">\n"); |
655 |
|
|
xmlprintf("<form action=\"%@\" method=\"get\" id=\"form_self\">\n", g_scriptname); |
656 |
|
|
xmlprintf("<div class=\"form_basic\">\n"); |
657 |
|
|
xmlprintf("<input type=\"text\" name=\"phrase\" value=\"%@\"" |
658 |
|
|
" size=\"80\" id=\"phrase\" class=\"text\" tabindex=\"%d\" accesskey=\"0\" />\n", |
659 |
|
|
p_phrase, ++g_tabidx); |
660 |
|
|
xmlprintf("<input type=\"submit\" value=\"Search\"" |
661 |
|
|
" id=\"search\" class=\"submit\" tabindex=\"%d\" accesskey=\"1\" />\n", |
662 |
|
|
++g_tabidx); |
663 |
|
|
xmlprintf("</div>\n"); |
664 |
|
|
xmlprintf("<div class=\"form_extension\">\n"); |
665 |
|
|
xmlprintf("<select name=\"perpage\" id=\"perpage\" tabindex=\"%d\">\n", ++g_tabidx); |
666 |
|
|
list = cbsplit(g_perpage, -1, ","); |
667 |
|
|
for(i = 0; i < cblistnum(list); i++){ |
668 |
|
|
elem = cblistval(list, i, NULL); |
669 |
|
|
if(elem[0] == '\0') continue; |
670 |
|
|
num = atoi(elem); |
671 |
|
|
xmlprintf("<option value=\"%d\"%s>%d</option>\n", |
672 |
|
|
num, num == p_perpage ? " selected=\"selected\"" : "", num); |
673 |
|
|
} |
674 |
|
|
cblistclose(list); |
675 |
|
|
xmlprintf("</select>\n"); |
676 |
|
|
xmlprintf("per page, with\n"); |
677 |
|
|
if(g_attrselect){ |
678 |
|
|
xmlprintf("<select name=\"attr\" id=\"attr\" tabindex=\"%d\">\n", ++g_tabidx); |
679 |
|
|
xmlprintf("<option value=\"\">--</option>\n"); |
680 |
|
|
xmlprintf("<option value=\"@title ISTRINC\"%s>title including</option>\n", |
681 |
|
|
cbstrfwmatch(p_attr, "@title ISTRINC") ? " selected=\"selected\"" : ""); |
682 |
|
|
xmlprintf("<option value=\"@title ISTRBW\"%s>title beginning with</option>\n", |
683 |
|
|
cbstrfwmatch(p_attr, "@title ISTRBW") ? " selected=\"selected\"" : ""); |
684 |
|
|
xmlprintf("<option value=\"@title ISTREW\"%s>title ending with</option>\n", |
685 |
|
|
cbstrfwmatch(p_attr, "@title ISTREW") ? " selected=\"selected\"" : ""); |
686 |
|
|
xmlprintf("<option value=\"@author ISTRINC\"%s>author including</option>\n", |
687 |
|
|
cbstrfwmatch(p_attr, "@author ISTRINC") ? " selected=\"selected\"" : ""); |
688 |
|
|
xmlprintf("<option value=\"@author ISTRBW\"%s>author beginning with</option>\n", |
689 |
|
|
cbstrfwmatch(p_attr, "@author ISTRBW") ? " selected=\"selected\"" : ""); |
690 |
|
|
xmlprintf("<option value=\"@author ISTREW\"%s>author ending with</option>\n", |
691 |
|
|
cbstrfwmatch(p_attr, "@author ISTREW") ? " selected=\"selected\"" : ""); |
692 |
|
|
xmlprintf("<option value=\"@mdate NUMLT\"%s>date less than</option>\n", |
693 |
|
|
cbstrfwmatch(p_attr, "@mdate NUMLT") ? " selected=\"selected\"" : ""); |
694 |
|
|
xmlprintf("<option value=\"@mdate NUMGE\"%s>date not less than</option>\n", |
695 |
|
|
cbstrfwmatch(p_attr, "@mdate NUMGE") ? " selected=\"selected\"" : ""); |
696 |
|
|
xmlprintf("<option value=\"@size NUMLT\"%s>size less than</option>\n", |
697 |
|
|
cbstrfwmatch(p_attr, "@size NUMLT") ? " selected=\"selected\"" : ""); |
698 |
|
|
xmlprintf("<option value=\"@size NUMGE\"%s>size not less than</option>\n", |
699 |
|
|
cbstrfwmatch(p_attr, "@size NUMGE") ? " selected=\"selected\"" : ""); |
700 |
|
|
xmlprintf("</select>\n"); |
701 |
|
|
xmlprintf("<input type=\"text\" name=\"attrval\" value=\"%@\"" |
702 |
|
|
" size=\"16\" id=\"attrval\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n", |
703 |
|
|
p_attrval, ++g_tabidx); |
704 |
|
|
xmlprintf(", order by\n"); |
705 |
|
|
xmlprintf("<select name=\"order\" id=\"order\" tabindex=\"%d\">\n", ++g_tabidx); |
706 |
|
|
xmlprintf("<option value=\"\">score</option>\n"); |
707 |
|
|
xmlprintf("<option value=\"@title STRA\"%s>title (asc)</option>\n", |
708 |
|
|
!strcmp(p_order, "@title STRA") ? " selected=\"selected\"" : ""); |
709 |
|
|
xmlprintf("<option value=\"@title STRD\"%s>title (desc)</option>\n", |
710 |
|
|
!strcmp(p_order, "@title STRD") ? " selected=\"selected\"" : ""); |
711 |
|
|
xmlprintf("<option value=\"@author STRA\"%s>author (asc)</option>\n", |
712 |
|
|
!strcmp(p_order, "@author STRA") ? " selected=\"selected\"" : ""); |
713 |
|
|
xmlprintf("<option value=\"@author STRD\"%s>author (desc)</option>\n", |
714 |
|
|
!strcmp(p_order, "@author STRD") ? " selected=\"selected\"" : ""); |
715 |
|
|
xmlprintf("<option value=\"@mdate NUMA\"%s>date (asc)</option>\n", |
716 |
|
|
!strcmp(p_order, "@mdate NUMA") ? " selected=\"selected\"" : ""); |
717 |
|
|
xmlprintf("<option value=\"@mdate NUMD\"%s>date (desc)</option>\n", |
718 |
|
|
!strcmp(p_order, "@mdate NUMD") ? " selected=\"selected\"" : ""); |
719 |
|
|
xmlprintf("<option value=\"@size NUMA\"%s>size (asc)</option>\n", |
720 |
|
|
!strcmp(p_order, "@size NUMA") ? " selected=\"selected\"" : ""); |
721 |
|
|
xmlprintf("<option value=\"@size NUMD\"%s>size (desc)</option>\n", |
722 |
|
|
!strcmp(p_order, "@size NUMD") ? " selected=\"selected\"" : ""); |
723 |
|
|
xmlprintf("</select>\n"); |
724 |
|
|
} else { |
725 |
|
|
xmlprintf("<input type=\"text\" name=\"attr\" value=\"%@\"" |
726 |
|
|
" size=\"24\" id=\"attr\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n", |
727 |
|
|
p_attr, ++g_tabidx); |
728 |
|
|
xmlprintf(", order by\n"); |
729 |
|
|
xmlprintf("<input type=\"text\" name=\"order\" value=\"%@\"" |
730 |
|
|
" size=\"24\" id=\"order\" class=\"text\" tabindex=\"%d\" accesskey=\"3\" />\n", |
731 |
|
|
p_order, ++g_tabidx); |
732 |
|
|
} |
733 |
|
|
xmlprintf("</div>\n"); |
734 |
|
|
xmlprintf("</form>\n"); |
735 |
|
|
xmlprintf("</div>\n"); |
736 |
|
|
} |
737 |
|
|
|
738 |
|
|
|
739 |
|
|
/* show the top message */ |
740 |
|
|
static void showtop(void){ |
741 |
|
|
printf("%s", g_toptext); |
742 |
|
|
} |
743 |
|
|
|
744 |
|
|
|
745 |
|
|
/* show the result */ |
746 |
|
|
static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss){ |
747 |
|
|
CBMAP *cnames; |
748 |
|
|
CBLIST *words; |
749 |
|
|
const char *key, *myphrase; |
750 |
|
|
char cname[NUMBUFSIZ]; |
751 |
|
|
int i, hits, snum, start, end, cnum, pnum; |
752 |
|
|
xmlprintf("<div id=\"estresult\" class=\"estresult\">\n"); |
753 |
|
|
hits = atoi(cbmapget(hints, "", 0, NULL)) - miss; |
754 |
|
|
start = (p_pagenum - 1) * p_perpage; |
755 |
|
|
end = p_pagenum * p_perpage; |
756 |
|
|
if(end > dnum) end = dnum; |
757 |
|
|
xmlprintf("<div class=\"resinfo\">"); |
758 |
|
|
xmlprintf("Results of <strong>%d</strong> - <strong>%d</strong>", |
759 |
|
|
start + (hits > 0 ? 1 : 0), end); |
760 |
|
|
xmlprintf(" of about <strong>%d</strong>", hits); |
761 |
|
|
if(p_phrase[0] != '\0' && strlen(p_phrase) < 128) |
762 |
|
|
xmlprintf(" for <strong>%@</strong>", p_phrase); |
763 |
|
|
if(g_etime > 0.0) xmlprintf(" (%.3f sec.)", g_etime / 1000.0); |
764 |
|
|
if(miss > p_perpage * p_pagenum) xmlprintf("*"); |
765 |
|
|
xmlprintf("</div>\n"); |
766 |
|
|
if(cbmaprnum(hints) > 2 || (p_phrase[0] != '\0' && p_attr[0] != '\0')){ |
767 |
|
|
xmlprintf("<div class=\"hints\">"); |
768 |
|
|
cbmapiterinit(hints); |
769 |
|
|
i = 0; |
770 |
|
|
while((key = cbmapiternext(hints, NULL)) != NULL){ |
771 |
|
|
if(key[0] == '\0') continue; |
772 |
|
|
if(i++ > 0) xmlprintf(", "); |
773 |
|
|
xmlprintf("<span class=\"hword\">%s (%s)</span>", key, cbmapget(hints, key, -1, NULL)); |
774 |
|
|
} |
775 |
|
|
xmlprintf("</div>\n"); |
776 |
|
|
} |
777 |
|
|
words = cblistopen(); |
778 |
|
|
cbmapiterinit(hints); |
779 |
|
|
while((key = cbmapiternext(hints, NULL)) != NULL){ |
780 |
|
|
if(key[0] == '\0' || atoi(cbmapget(hints, key, -1, NULL)) < 0) continue; |
781 |
|
|
cblistpush(words, key, -1); |
782 |
|
|
} |
783 |
|
|
cnames = cbmapopenex(MINIBNUM); |
784 |
|
|
cnum = 0; |
785 |
|
|
for(i = 0; i < cblistnum(words); i++){ |
786 |
|
|
sprintf(cname, "key%d", ++cnum); |
787 |
|
|
cbmapput(cnames, cblistval(words, i, NULL), -1, cname, -1, FALSE); |
788 |
|
|
} |
789 |
|
|
for(snum = start; snum < end; snum++){ |
790 |
|
|
showdoc(docs[snum], words, cnames, g_candetail && p_detail > 0); |
791 |
|
|
} |
792 |
|
|
cbmapclose(cnames); |
793 |
|
|
cblistclose(words); |
794 |
|
|
if(dnum < 1) xmlprintf("<p class=\"note\">Your search did not match any documents.</p>\n"); |
795 |
|
|
myphrase = p_similar > 0 ? "" : p_phrase; |
796 |
|
|
xmlprintf("<div class=\"paging\">\n"); |
797 |
|
|
if(p_pagenum > 1){ |
798 |
|
|
xmlprintf("<a href=\"%@?phrase=%?&attr=%?&attrval=%?&order=%?" |
799 |
|
|
"&perpage=%d&pagenum=%d&similar=%d\" class=\"navi\">PREV</a>\n", |
800 |
|
|
g_scriptname, myphrase, p_attr, p_attrval, p_order, |
801 |
|
|
p_perpage, p_pagenum - 1, p_similar); |
802 |
|
|
} else { |
803 |
|
|
xmlprintf("<span class=\"void\">PREV</span>\n"); |
804 |
|
|
} |
805 |
|
|
pnum = (hits - 1 - (hits - 1) % p_perpage + p_perpage) / p_perpage; |
806 |
|
|
if(hits > 0 && p_detail < 1){ |
807 |
|
|
for(i = p_pagenum > NAVIPAGES ? p_pagenum - NAVIPAGES + 1 : 1; |
808 |
|
|
i == 1 || (i <= pnum && i < p_pagenum + NAVIPAGES); i++){ |
809 |
|
|
if(i == p_pagenum){ |
810 |
|
|
printf("<span class=\"pnow\">%d</span>\n", i); |
811 |
|
|
} else { |
812 |
|
|
xmlprintf("<a href=\"%@?phrase=%?&attr=%?&attrval=%?&order=%?" |
813 |
|
|
"&perpage=%d&pagenum=%d&similar=%d\" class=\"pnum\">%d</a>\n", |
814 |
|
|
g_scriptname, myphrase, p_attr, p_attrval, p_order, p_perpage, i, p_similar, i); |
815 |
|
|
} |
816 |
|
|
} |
817 |
|
|
} |
818 |
|
|
if(snum < dnum){ |
819 |
|
|
xmlprintf("<a href=\"%@?phrase=%?&attr=%?&attrval=%?&order=%?" |
820 |
|
|
"&perpage=%d&pagenum=%d&similar=%d\" class=\"navi\">NEXT</a>\n", |
821 |
|
|
g_scriptname, myphrase, p_attr, p_attrval, p_order, |
822 |
|
|
p_perpage, p_pagenum + 1, p_similar); |
823 |
|
|
} else { |
824 |
|
|
xmlprintf("<span class=\"void\">NEXT</span>\n"); |
825 |
|
|
} |
826 |
|
|
xmlprintf("</div>\n"); |
827 |
|
|
xmlprintf("</div>\n"); |
828 |
|
|
} |
829 |
|
|
|
830 |
|
|
|
831 |
|
|
/* show a document */ |
832 |
|
|
static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail){ |
833 |
|
|
CBMAP *kwords; |
834 |
|
|
CBLIST *names, *lines; |
835 |
|
|
const char *uri, *title, *score, *val, *name, *line, *cname; |
836 |
|
|
char *turi, *tsv, *pv, *str; |
837 |
|
|
int i, id; |
838 |
|
|
id = est_doc_id(doc); |
839 |
|
|
if(!(uri = est_doc_attr(doc, ESTDATTRURI))) uri = "."; |
840 |
|
|
turi = makeshownuri(uri); |
841 |
|
|
if(!(title = est_doc_attr(doc, ESTDATTRTITLE))) title = ""; |
842 |
|
|
if(title[0] == '\0' && !(title = est_doc_attr(doc, DATTRLFILE))) title = ""; |
843 |
|
|
if(title[0] == '\0' && ((pv = strrchr(uri, '/')) != NULL)) title = pv + 1; |
844 |
|
|
if(title[0] == '\0') title = "(no title)"; |
845 |
|
|
if(!(score = est_doc_attr(doc, DATTRSCORE))) score = ""; |
846 |
|
|
xmlprintf("<dl class=\"doc\" id=\"doc_%d\">\n", id); |
847 |
|
|
xmlprintf("<dt>"); |
848 |
|
|
xmlprintf("<a href=\"%@\" class=\"doc_title\">%@</a>", turi, title); |
849 |
|
|
if(score[0] != '\0') xmlprintf(" <span class=\"doc_score\">%@</span>", score); |
850 |
|
|
xmlprintf("</dt>\n"); |
851 |
|
|
if(detail){ |
852 |
|
|
names = est_doc_attr_names(doc); |
853 |
|
|
for(i = 0; i < cblistnum(names); i++){ |
854 |
|
|
name = cblistval(names, i, NULL); |
855 |
|
|
if(name[0] != '_' && strcmp(name, ESTDATTRURI) && strcmp(name, ESTDATTRTITLE) && |
856 |
|
|
(val = est_doc_attr(doc, name)) != NULL && val[0] != '\0'){ |
857 |
|
|
xmlprintf("<dd class=\"doc_attr\">"); |
858 |
|
|
xmlprintf("%@: <span class=\"doc_val\">%@</span>", name, val); |
859 |
|
|
xmlprintf("</dd>\n"); |
860 |
|
|
} |
861 |
|
|
} |
862 |
|
|
cblistclose(names); |
863 |
|
|
if(g_smlrvnum > 0){ |
864 |
|
|
xmlprintf("<dd class=\"doc_attr\">"); |
865 |
|
|
xmlprintf("#vector: <span class=\"doc_val\">"); |
866 |
|
|
kwords = est_db_etch_doc(g_db, doc, g_smlrvnum); |
867 |
|
|
cbmapiterinit(kwords); |
868 |
|
|
for(i = 0; (name = cbmapiternext(kwords, NULL)) != NULL; i++){ |
869 |
|
|
if(i > 0) xmlprintf(", "); |
870 |
|
|
xmlprintf("%@ (%@)\n", name, cbmapget(kwords, name, -1, NULL)); |
871 |
|
|
} |
872 |
|
|
cbmapclose(kwords); |
873 |
|
|
xmlprintf("</span>"); |
874 |
|
|
xmlprintf("</dd>\n"); |
875 |
|
|
} |
876 |
|
|
} else { |
877 |
|
|
for(i = 0; i < cblistnum(g_extattrs); i++){ |
878 |
|
|
str = cbmemdup(cblistval(g_extattrs, i, NULL), -1); |
879 |
|
|
if((pv = strchr(str, '|')) != NULL){ |
880 |
|
|
*pv = '\0'; |
881 |
|
|
pv++; |
882 |
|
|
if((val = est_doc_attr(doc, str)) != NULL && val[0] != '\0'){ |
883 |
|
|
xmlprintf("<dd class=\"doc_attr\">"); |
884 |
|
|
xmlprintf("%@: <span class=\"doc_val\">%@</span>", pv, val); |
885 |
|
|
xmlprintf("</dd>\n"); |
886 |
|
|
} |
887 |
|
|
} |
888 |
|
|
free(str); |
889 |
|
|
} |
890 |
|
|
} |
891 |
|
|
xmlprintf("<dd class=\"doc_text\">"); |
892 |
|
|
tsv = est_doc_make_snippet(doc, words, detail ? INT_MAX : g_snipwwidth, |
893 |
|
|
detail ? INT_MAX : g_sniphwidth, g_snipawidth); |
894 |
|
|
lines = cbsplit(tsv, -1, "\n"); |
895 |
|
|
for(i = 0; i < cblistnum(lines); i++){ |
896 |
|
|
line = cblistval(lines, i, NULL); |
897 |
|
|
if(line[0] == '\0'){ |
898 |
|
|
if(i < cblistnum(lines) - 1) xmlprintf(" ... "); |
899 |
|
|
} else if((pv = strchr(line, '\t')) != NULL){ |
900 |
|
|
str = cbmemdup(line, pv - line); |
901 |
|
|
if(!(cname = cbmapget(cnames, pv + 1, -1, NULL))) cname = "key0"; |
902 |
|
|
xmlprintf("<strong class=\"key %@\">%@</strong>", cname, str); |
903 |
|
|
free(str); |
904 |
|
|
} else { |
905 |
|
|
xmlprintf("%@", line); |
906 |
|
|
} |
907 |
|
|
} |
908 |
|
|
cblistclose(lines); |
909 |
|
|
free(tsv); |
910 |
|
|
xmlprintf("</dd>\n"); |
911 |
|
|
xmlprintf("<dd class=\"doc_navi\">\n"); |
912 |
|
|
xmlprintf("<span class=\"doc_uri\">%@</span>\n", turi); |
913 |
|
|
if(g_candetail) |
914 |
|
|
xmlprintf("- <a href=\"%@?phrase=%?&detail=%d&perpage=%d\" class=\"detail\">" |
915 |
|
|
"[detail]</a>\n", g_scriptname, p_similar > 0 ? "" : p_phrase, id, p_perpage); |
916 |
|
|
if(g_smlrvnum > 0) |
917 |
|
|
xmlprintf("- <a href=\"%@?similar=%d&perpage=%d\" class=\"similar\">[similar]</a>\n", |
918 |
|
|
g_scriptname, id, p_perpage); |
919 |
|
|
xmlprintf("</dd>\n"); |
920 |
|
|
xmlprintf("</dl>\n"); |
921 |
|
|
free(turi); |
922 |
|
|
} |
923 |
|
|
|
924 |
|
|
|
925 |
|
|
/* make a URI to be shown */ |
926 |
|
|
static char *makeshownuri(const char *uri){ |
927 |
|
|
const char *prefix; |
928 |
|
|
char *turi, *file, *bef, *aft, *pv, *nuri, *wp; |
929 |
|
|
int i; |
930 |
|
|
if(cbstrfwimatch(uri, g_lprefix)) uri += strlen(g_lprefix); |
931 |
|
|
prefix = g_gprefix; |
932 |
|
|
if(cbstrfwimatch(uri, "file://") || cbstrfwimatch(uri, "ftp://") || |
933 |
|
|
cbstrfwimatch(uri, "http://") || cbstrfwimatch(uri, "https://")) prefix = ""; |
934 |
|
|
turi = cbsprintf("%s%s%s", prefix, uri, g_gsuffix); |
935 |
|
|
if(g_dirindex[0] != '\0' && (file = strrchr(turi, '/')) != NULL && |
936 |
|
|
!cbstricmp(file + 1, g_dirindex)){ |
937 |
|
|
file[1] = '\0'; |
938 |
|
|
} |
939 |
|
|
for(i = 0; i < cblistnum(g_replexprs); i++){ |
940 |
|
|
bef = cbmemdup(cblistval(g_replexprs, i, NULL), -1); |
941 |
|
|
if((pv = strstr(bef, "{{!}}")) != NULL){ |
942 |
|
|
*pv = '\0'; |
943 |
|
|
aft = pv + 5; |
944 |
|
|
} else { |
945 |
|
|
aft = ""; |
946 |
|
|
} |
947 |
|
|
if((pv = strstr(turi, bef)) != NULL){ |
948 |
|
|
nuri = cbmalloc(strlen(turi) + strlen(aft) + 1); |
949 |
|
|
wp = nuri; |
950 |
|
|
memcpy(wp, turi, pv - turi); |
951 |
|
|
wp += pv - turi; |
952 |
|
|
wp += sprintf(wp, "%s", aft); |
953 |
|
|
sprintf(wp, "%s", pv + strlen(bef)); |
954 |
|
|
free(turi); |
955 |
|
|
turi = nuri; |
956 |
|
|
} |
957 |
|
|
free(bef); |
958 |
|
|
} |
959 |
|
|
return turi; |
960 |
|
|
} |
961 |
|
|
|
962 |
|
|
|
963 |
|
|
/* show the top */ |
964 |
|
|
static void showinfo(void){ |
965 |
|
|
xmlprintf("<div id=\"estinfo\" class=\"estinfo\">"); |
966 |
|
|
xmlprintf("Powered by Hyper Estraier %@, with %d documents and %d words.", |
967 |
|
|
est_version, est_db_doc_num(g_db), est_db_word_num(g_db)); |
968 |
|
|
xmlprintf("</div>\n"); |
969 |
|
|
} |
970 |
|
|
|
971 |
|
|
|
972 |
|
|
/* output the log message */ |
973 |
|
|
static void outputlog(void){ |
974 |
|
|
FILE *ofp; |
975 |
|
|
const char *val; |
976 |
|
|
if(g_logfile[0] == '\0' || !(ofp = fopen(g_logfile, "ab"))) return; |
977 |
|
|
if(!(val = getenv("REMOTE_ADDR"))) val = "0.0.0.0"; |
978 |
|
|
fprintf(ofp, "%s:", val); |
979 |
|
|
if(!(val = getenv("REMOTE_PORT"))) val = "0"; |
980 |
|
|
fprintf(ofp, "%s\t", val); |
981 |
|
|
fprintf(ofp, "%s\t", p_phrase); |
982 |
|
|
if(!(val = getenv("HTTP_USER_AGENT"))) val = "*"; |
983 |
|
|
fprintf(ofp, "%s\n", val); |
984 |
|
|
fclose(ofp); |
985 |
|
|
} |
986 |
|
|
|
987 |
|
|
|
988 |
|
|
|
989 |
|
|
/* END OF FILE */ |