/[hyperestraier]/trunk/estseek.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/estseek.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (hide annotations)
Fri Jul 29 21:57:20 2005 UTC (18 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 36945 byte(s)
make working copy from version 0.5.1

1 dpavlin 2 /*************************************************************************************************
2     * A sample searcher of Hyper Estraier
3     * Copyright (C) 2004-2005 Mikio Hirabayashi
4     * This file is part of Hyper Estraier.
5     * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6     * the GNU Lesser General Public License as published by the Free Software Foundation; either
7     * version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
8     * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10     * License for more details.
11     * You should have received a copy of the GNU Lesser General Public License along with Hyper
12     * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13     * Boston, MA 02111-1307 USA.
14     *************************************************************************************************/
15    
16    
17     #if defined(MYFCGI)
18     #include <fcgi_stdio.h>
19     #endif
20     #include "estraier.h"
21     #include "myconf.h"
22    
23     #define CONFSUFFIX ".conf" /* suffix of the configuration file */
24     #define KWDBNAME "kwords" /* name of the database for keywords */
25     #define DATTRLFILE "_lfile" /* name of the attribute of the local file name */
26     #define DATTRSCORE "#score" /* name of the pseudo-attribute of score */
27     #define NUMBUFSIZ 32 /* size of a buffer for a number */
28     #define OUTBUFSIZ 262144 /* size of the output buffer */
29     #define MINIBNUM 31 /* bucket number of map for trivial use */
30     #define LOCKRETRYNUM 16 /* number of retries when locking failure */
31     #define MISSRETRYNUM 3 /* number of retries when missing documents */
32     #define MISSINCRATIO 8 /* ratio of increment number when missing */
33     #define DEFPERPAGE 10 /* default number of show documents per page */
34     #define NAVIPAGES 10 /* number of pages in paging navigation */
35     #define SPCACHEMNUM 1048576 /* max number of the special cache */
36    
37    
38     /* global variables for configurations */
39     const char *g_conffile = NULL; /* path of the configuration file */
40     const char *g_indexname = NULL; /* name of the index */
41     const char *g_tmplfile = NULL; /* path of the template file */
42     const char *g_topfile = NULL; /* path of the top page file */
43     const char *g_logfile = NULL; /* path of the log file */
44     const char *g_lprefix = NULL; /* local prefix of the URI of each document */
45     const char *g_gprefix = NULL; /* global prefix of the URI of each document */
46     const char *g_gsuffix = NULL; /* global suffix of the URI of each document */
47     const char *g_dirindex = NULL; /* name of the index file in a directory */
48     const CBLIST *g_replexprs = NULL; /* list of URI replacement expressions */
49     const char *g_perpage = NULL; /* CSV of numbers of shown documents per page */
50     int g_attrselect = FALSE; /* whether to use select boxes for extension form */
51     int g_showscore = FALSE; /* whether to show scores */
52     const CBLIST *g_extattrs = NULL; /* list of extra attributes of each document */
53     int g_snipwwidth = -1; /* whole width of the snippet */
54     int g_sniphwidth = -1; /* width of beginning of the text */
55     int g_snipawidth = -1; /* width around each highlighted word */
56     int g_condgstep = -1; /* step of N-gram */
57     int g_dotfidf = FALSE; /* whether to do TF-IDF tuning */
58     int g_smplphrase = FALSE; /* whether to use simplefied phrase */
59     int g_candetail = FALSE; /* whether to show detail link */
60     int g_smlrvnum = -1; /* number of elements of a vecter for similarity */
61     const char *g_spcache = NULL; /* name of the attribute of special cache */
62    
63    
64     /* global variables for parameters */
65     const char *p_phrase = NULL; /* search phrase */
66     const char *p_attr = NULL; /* narrowing attribute */
67     const char *p_attrval = NULL; /* separated value of narrowing attribute */
68     const char *p_order = NULL; /* ordering attribute */
69     int p_perpage = 0; /* number of show documents per page */
70     int p_pagenum = 0; /* number of the page */
71     int p_detail = 0; /* ID of the document to be detailed */
72     int p_similar = 0; /* ID of the seed document of similarity search */
73    
74    
75     /* other global variables */
76     char g_outbuf[OUTBUFSIZ]; /* output buffer */
77     const char *g_scriptname = NULL; /* name of the script */
78     const char *g_tmpltext = NULL; /* text of the template */
79     const char *g_toptext = NULL; /* text of the top page */
80     ESTDB *g_db = NULL; /* main database object */
81     CURIA *g_kwdb = NULL; /* keyword database object */
82     double g_etime = 0.0; /* elepsed time */
83     int g_tabidx = 0; /* counter of tab indexes */
84    
85    
86     /* function prototypes */
87     int main(int argc, char **argv);
88     static int realmain(int argc, char **argv);
89     static void showerror(const char *msg);
90     static const char *skiplabel(const char *str);
91     static CBMAP *getparameters(void);
92     static void myestdbclose(ESTDB *db);
93     static void xmlprintf(const char *format, ...);
94     static CBMAP *vectorizer(void *db, int id, void *kwdb);
95     static void setsimilarphrase(void);
96     static void showpage(void);
97     static void showform(void);
98     static void showtop(void);
99     static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss);
100     static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail);
101     static char *makeshownuri(const char *uri);
102     static void showinfo(void);
103     static void outputlog(void);
104    
105    
106     /* main routine */
107     int main(int argc, char **argv){
108     #if defined(MYFCGI)
109     static int cnt = 0;
110     while(FCGI_Accept() >= 0){
111     if(++cnt >= 256){
112     cbggcsweep();
113     g_db = NULL;
114     g_kwdb = NULL;
115     cnt = 0;
116     }
117     p_phrase = NULL;
118     p_attr = NULL;
119     p_attrval = NULL;
120     p_order = NULL;
121     p_perpage = 0;
122     p_pagenum = 0;
123     p_detail = 0;
124     p_similar = 0;
125     realmain(argc, argv);
126     }
127     return 0;
128     #else
129     return realmain(argc, argv);
130     #endif
131     }
132    
133    
134     /* real main routine */
135     static int realmain(int argc, char **argv){
136     CBLIST *lines, *rlist, *alist;
137     CBMAP *params;
138     const char *rp;
139     char *tmp, *wp;
140     int i, ecode;
141     /* set configurations */
142     cbstdiobin();
143     setvbuf(stdout, g_outbuf, _IOFBF, OUTBUFSIZ);
144     g_scriptname = argv[0];
145     if((rp = getenv("SCRIPT_NAME")) != NULL) g_scriptname = rp;
146     if((rp = strrchr(g_scriptname, '/')) != NULL) g_scriptname = rp + 1;
147     tmp = cbmalloc(strlen(g_scriptname) + strlen(CONFSUFFIX) + 1);
148     sprintf(tmp, "%s", g_scriptname);
149     cbglobalgc(tmp, free);
150     if(!(wp = strrchr(tmp, '.'))) wp = tmp + strlen(tmp);
151     sprintf(wp, "%s", CONFSUFFIX);
152     g_conffile = tmp;
153     if(!(lines = cbreadlines(g_conffile))) showerror("the configuration file is missing.");
154     cbglobalgc(lines, (void (*)(void *))cblistclose);
155     rlist = cblistopen();
156     cbglobalgc(rlist, (void (*)(void *))cblistclose);
157     alist = cblistopen();
158     cbglobalgc(alist, (void (*)(void *))cblistclose);
159     for(i = 0; i < cblistnum(lines); i++){
160     rp = cblistval(lines, i, NULL);
161     if(cbstrfwimatch(rp, "indexname:")){
162     g_indexname = skiplabel(rp);
163     } else if(cbstrfwimatch(rp, "tmplfile:")){
164     g_tmplfile = skiplabel(rp);
165     } else if(cbstrfwimatch(rp, "topfile:")){
166     g_topfile = skiplabel(rp);
167     } else if(cbstrfwimatch(rp, "logfile:")){
168     g_logfile = skiplabel(rp);
169     } else if(cbstrfwimatch(rp, "lprefix:")){
170     g_lprefix = skiplabel(rp);
171     } else if(cbstrfwimatch(rp, "gprefix:")){
172     g_gprefix = skiplabel(rp);
173     } else if(cbstrfwimatch(rp, "gsuffix:")){
174     g_gsuffix = skiplabel(rp);
175     } else if(cbstrfwimatch(rp, "dirindex:")){
176     g_dirindex = skiplabel(rp);
177     } else if(cbstrfwimatch(rp, "replace:")){
178     cblistpush(rlist, skiplabel(rp), -1);
179     } else if(cbstrfwimatch(rp, "perpage:")){
180     g_perpage = skiplabel(rp);
181     } else if(cbstrfwimatch(rp, "attrselect:")){
182     if(!cbstricmp(skiplabel(rp), "true")) g_attrselect = TRUE;
183     } else if(cbstrfwimatch(rp, "showscore:")){
184     if(!cbstricmp(skiplabel(rp), "true")) g_showscore = TRUE;
185     } else if(cbstrfwimatch(rp, "extattr:")){
186     cblistpush(alist, skiplabel(rp), -1);
187     } else if(cbstrfwimatch(rp, "snipwwidth:")){
188     g_snipwwidth = atoi(skiplabel(rp));
189     } else if(cbstrfwimatch(rp, "sniphwidth:")){
190     g_sniphwidth = atoi(skiplabel(rp));
191     } else if(cbstrfwimatch(rp, "snipawidth:")){
192     g_snipawidth = atoi(skiplabel(rp));
193     } else if(cbstrfwimatch(rp, "condgstep:")){
194     g_condgstep = atoi(skiplabel(rp));
195     } else if(cbstrfwimatch(rp, "dotfidf:")){
196     if(!cbstricmp(skiplabel(rp), "true")) g_dotfidf = TRUE;
197     } else if(cbstrfwimatch(rp, "smplphrase:")){
198     if(!cbstricmp(skiplabel(rp), "true")) g_smplphrase = TRUE;
199     } else if(cbstrfwimatch(rp, "candetail:")){
200     if(!cbstricmp(skiplabel(rp), "true")) g_candetail = TRUE;
201     } else if(cbstrfwimatch(rp, "smlrvnum:")){
202     g_smlrvnum = atoi(skiplabel(rp));
203     } else if(cbstrfwimatch(rp, "spcache:")){
204     g_spcache = skiplabel(rp);
205     }
206     }
207     if(!g_indexname) showerror("indexname is undefined.");
208     if(!g_tmplfile) showerror("tmplfile is undefined.");
209     if(!g_topfile) showerror("topfile is undefined.");
210     if(!g_logfile) showerror("logfile is undefined.");
211     if(!g_lprefix) showerror("lprefix is undefined.");
212     if(!g_gprefix) showerror("gprefix is undefined.");
213     if(!g_gsuffix) showerror("gsuffix is undefined.");
214     if(!g_dirindex) showerror("dirindex is undefined.");
215     g_replexprs = rlist;
216     if(!g_perpage) showerror("perpage is undefined.");
217     g_extattrs = alist;
218     if(g_snipwwidth < 0) showerror("snipwwidth is undefined.");
219     if(g_sniphwidth < 0) showerror("sniphwidth is undefined.");
220     if(g_snipawidth < 0) showerror("snipawidth is undefined.");
221     if(g_condgstep < 1) showerror("condgstep is undefined.");
222     if(!g_spcache) showerror("spcache is undefined.");
223     /* read parameters */
224     params = getparameters();
225     cbglobalgc(params, (void (*)(void *))cbmapclose);
226     if(!(p_phrase = cbmapget(params, "phrase", -1, NULL))) p_phrase = "";
227     while(*p_phrase == ' ' || *p_phrase == '\t'){
228     p_phrase++;
229     }
230     if(!(p_attr = cbmapget(params, "attr", -1, NULL))) p_attr = "";
231     while(*p_attr == ' ' || *p_attr == '\t'){
232     p_attr++;
233     }
234     if(!(p_attrval = cbmapget(params, "attrval", -1, NULL))) p_attrval = "";
235     while(*p_attrval == ' ' || *p_attrval == '\t'){
236     p_attrval++;
237     }
238     if(cbstrfwmatch(p_attr, "gstep=")){
239     g_condgstep = atoi(p_attr + 6);
240     p_attr = "";
241     }
242     if(cbstrfwmatch(p_attr, "tfidf=")){
243     g_dotfidf = !cbstricmp(p_attr + 6, "true");
244     p_attr = "";
245     }
246     if(!(p_order = cbmapget(params, "order", -1, NULL))) p_order = "";
247     while(*p_order == ' ' || *p_order == '\t'){
248     p_order++;
249     }
250     if((rp = cbmapget(params, "perpage", -1, NULL)) != NULL) p_perpage = atoi(rp);
251     if(p_perpage < 1) p_perpage = DEFPERPAGE;
252     if((rp = cbmapget(params, "detail", -1, NULL)) != NULL) p_detail = atoi(rp);
253     if(p_detail < 1) p_detail = 0;
254     if((rp = cbmapget(params, "similar", -1, NULL)) != NULL) p_similar = atoi(rp);
255     if(p_similar < 1) p_similar = 0;
256     if((rp = cbmapget(params, "pagenum", -1, NULL)) != NULL) p_pagenum = atoi(rp);
257     if(p_pagenum < 1) p_pagenum = 1;
258     if((rp = cbmapget(params, "enc", -1, NULL)) != NULL){
259     if((tmp = est_iconv(p_phrase, -1, rp, "UTF-8", NULL, NULL)) != NULL){
260     p_phrase = tmp;
261     cbglobalgc(tmp, free);
262     }
263     if((tmp = est_iconv(p_attr, -1, rp, "UTF-8", NULL, NULL)) != NULL){
264     p_attr = tmp;
265     cbglobalgc(tmp, free);
266     }
267     if((tmp = est_iconv(p_attrval, -1, rp, "UTF-8", NULL, NULL)) != NULL){
268     p_attrval = tmp;
269     cbglobalgc(tmp, free);
270     }
271     if((tmp = est_iconv(p_order, -1, rp, "UTF-8", NULL, NULL)) != NULL){
272     p_order = tmp;
273     cbglobalgc(tmp, free);
274     }
275     }
276     /* read the other files and the database */
277     if(!g_db){
278     if(!(tmp = cbreadfile(g_tmplfile, NULL))) showerror("the template file is missing.");
279     cbglobalgc(tmp, free);
280     g_tmpltext = tmp;
281     if(!(tmp = cbreadfile(g_topfile, NULL))) showerror("the top page file is missing.");
282     cbglobalgc(tmp, free);
283     g_toptext = tmp;
284     for(i = 0; i <= LOCKRETRYNUM; i++){
285     if((g_db = est_db_open(g_indexname, ESTDBREADER | ESTDBLCKNB, &ecode)) != NULL) break;
286     if(ecode != ESTELOCK) showerror("the index is missing or broken.");
287     est_usleep(1000 * 1000);
288     }
289     if(!g_db) showerror("the index is being updated now.");
290     cbglobalgc(g_db, (void (*)(void *))myestdbclose);
291     if(g_spcache[0] != '\0') est_db_set_special_cache(g_db, g_spcache, SPCACHEMNUM);
292     }
293     setsimilarphrase();
294     /* show the page */
295     showpage();
296     /* output the log message */
297     outputlog();
298     return 0;
299     }
300    
301    
302     /* show the error page and exit */
303     static void showerror(const char *msg){
304     printf("Status: 500 Internal Server Error\r\n");
305     printf("Content-Type: text/plain; charset=UTF-8\r\n");
306     printf("\r\n");
307     printf("Error: %s\n", msg);
308     exit(1);
309     }
310    
311    
312     /* skip the label of a line */
313     static const char *skiplabel(const char *str){
314     if(!(str = strchr(str, ':'))) return "";
315     str++;
316     while(*str != '\0' && (*str == ' ' || *str == '\t')){
317     str++;
318     }
319     return str;
320     }
321    
322    
323     /* get CGI parameters */
324     static CBMAP *getparameters(void){
325     int maxlen = 1024 * 1024 * 32;
326     CBMAP *map, *attrs;
327     CBLIST *pairs, *parts;
328     const char *rp, *body;
329     char *buf, *key, *val, *dkey, *dval, *wp, *bound, *fbuf, *aname;
330     int i, len, c, blen, flen;
331     map = cbmapopenex(37);
332     buf = NULL;
333     len = 0;
334     if((rp = getenv("REQUEST_METHOD")) != NULL && !strcmp(rp, "POST") &&
335     (rp = getenv("CONTENT_LENGTH")) != NULL && (len = atoi(rp)) > 0){
336     if(len > maxlen) len = maxlen;
337     buf = cbmalloc(len + 1);
338     for(i = 0; i < len && (c = getchar()) != EOF; i++){
339     buf[i] = c;
340     }
341     buf[i] = '\0';
342     if(i != len){
343     free(buf);
344     buf = NULL;
345     }
346     } else if((rp = getenv("QUERY_STRING")) != NULL){
347     buf = cbmemdup(rp, -1);
348     len = strlen(buf);
349     }
350     if(buf && len > 0){
351     if((rp = getenv("CONTENT_TYPE")) != NULL && cbstrfwmatch(rp, "multipart/form-data") &&
352     (rp = strstr(rp, "boundary=")) != NULL){
353     rp += 9;
354     bound = cbmemdup(rp, -1);
355     if((wp = strchr(bound, ';')) != NULL) *wp = '\0';
356     parts = cbmimeparts(buf, len, bound);
357     for(i = 0; i < cblistnum(parts); i++){
358     body = cblistval(parts, i, &blen);
359     attrs = cbmapopen();
360     fbuf = cbmimebreak(body, blen, attrs, &flen);
361     if((rp = cbmapget(attrs, "NAME", -1, NULL)) != NULL){
362     cbmapput(map, rp, -1, fbuf, flen, FALSE);
363     aname = cbsprintf("%s-filename", rp);
364     if((rp = cbmapget(attrs, "FILENAME", -1, NULL)) != NULL)
365     cbmapput(map, aname, -1, rp, -1, FALSE);
366     free(aname);
367     }
368     free(fbuf);
369     cbmapclose(attrs);
370     }
371     cblistclose(parts);
372     free(bound);
373     } else {
374     pairs = cbsplit(buf, -1, "&");
375     for(i = 0; i < cblistnum(pairs); i++){
376     key = cbmemdup(cblistval(pairs, i, NULL), -1);
377     if((val = strchr(key, '=')) != NULL){
378     *(val++) = '\0';
379     dkey = cburldecode(key, NULL);
380     dval = cburldecode(val, NULL);
381     cbmapput(map, dkey, -1, dval, -1, FALSE);
382     free(dval);
383     free(dkey);
384     }
385     free(key);
386     }
387     cblistclose(pairs);
388     }
389     }
390     free(buf);
391     return map;
392     }
393    
394    
395     /* close the database */
396     static void myestdbclose(ESTDB *db){
397     int ecode;
398     est_db_close(db, &ecode);
399     }
400    
401    
402     /* output escaped string */
403     static void xmlprintf(const char *format, ...){
404     va_list ap;
405     char *tmp, cbuf[32];
406     unsigned char c;
407     int cblen;
408     va_start(ap, format);
409     while(*format != '\0'){
410     if(*format == '%'){
411     cbuf[0] = '%';
412     cblen = 1;
413     format++;
414     while(strchr("0123456789 .+-", *format) && *format != '\0' && cblen < 31){
415     cbuf[cblen++] = *format;
416     format++;
417     }
418     cbuf[cblen++] = *format;
419     cbuf[cblen] = '\0';
420     switch(*format){
421     case 's':
422     tmp = va_arg(ap, char *);
423     if(!tmp) tmp = "(null)";
424     printf(cbuf, tmp);
425     break;
426     case 'd':
427     printf(cbuf, va_arg(ap, int));
428     break;
429     case 'o': case 'u': case 'x': case 'X': case 'c':
430     printf(cbuf, va_arg(ap, unsigned int));
431     break;
432     case 'e': case 'E': case 'f': case 'g': case 'G':
433     printf(cbuf, va_arg(ap, double));
434     break;
435     case '@':
436     tmp = va_arg(ap, char *);
437     if(!tmp) tmp = "(null)";
438     while(*tmp){
439     switch(*tmp){
440     case '&': printf("&amp;"); break;
441     case '<': printf("&lt;"); break;
442     case '>': printf("&gt;"); break;
443     case '"': printf("&quot;"); break;
444     default:
445     if(!((*tmp >= 0 && *tmp <= 0x8) || (*tmp >= 0x0e && *tmp <= 0x1f))) putchar(*tmp);
446     break;
447     }
448     tmp++;
449     }
450     break;
451     case '?':
452     tmp = va_arg(ap, char *);
453     if(!tmp) tmp = "(null)";
454     while(*tmp){
455     c = *(unsigned char *)tmp;
456     if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
457     (c >= '0' && c <= '9') || (c != '\0' && strchr("_-.", c))){
458     putchar(c);
459     } else {
460     printf("%%%02X", c);
461     }
462     tmp++;
463     }
464     break;
465     case '%':
466     putchar('%');
467     break;
468     }
469     } else {
470     putchar(*format);
471     }
472     format++;
473     }
474     va_end(ap);
475     }
476    
477    
478     /* create a vector of keywords */
479     static CBMAP *vectorizer(void *db, int id, void *kwdb){
480     CBMAP *kwords;
481     char *mbuf;
482     int msiz;
483     if(!(mbuf = crget((CURIA *)kwdb, (char *)&id, sizeof(int), 0, -1, &msiz))) return NULL;
484     kwords = cbmapload(mbuf, msiz);
485     free(mbuf);
486     return kwords;
487     }
488    
489    
490     /* set the phrase for similarity search */
491     static void setsimilarphrase(void){
492     ESTDOC *doc;
493     CBMAP *svmap;
494     CBDATUM *datum;
495     const char *kbuf, *vbuf;
496     char *ptr;
497     int ksiz, vsiz;
498     if(!cbstrfwimatch(p_phrase, ESTOPSIMILAR) && p_similar < 1) return;
499     if(g_smlrvnum < 1){
500     p_phrase = "";
501     return;
502     }
503     if(!g_kwdb){
504     ptr = cbsprintf("%s%c%s", g_indexname, ESTPATHCHR, KWDBNAME);
505     if((g_kwdb = cropen(ptr, CR_OREADER, -1, -1)) != NULL){
506     cbglobalgc(g_kwdb, (void (*)(void *))crclose);
507     est_db_set_vectorizer(g_db, vectorizer, g_kwdb);
508     }
509     free(ptr);
510     }
511     if(p_similar < 1) return;
512     svmap = g_kwdb ? vectorizer(g_db, p_similar, g_kwdb) : NULL;
513     if(!svmap && (doc = est_db_get_doc(g_db, p_similar, 0)) != NULL){
514     svmap = est_db_etch_doc(g_dotfidf ? g_db : NULL, doc, g_smlrvnum);
515     est_doc_delete(doc);
516     } else if(!svmap){
517     return;
518     }
519     datum = cbdatumopen(ESTOPSIMILAR, -1);
520     cbmapiterinit(svmap);
521     while((kbuf = cbmapiternext(svmap, &ksiz)) != NULL){
522     vbuf = cbmapget(svmap, kbuf, ksiz, &vsiz);
523     cbdatumcat(datum, " WITH ", -1);
524     cbdatumcat(datum, vbuf, vsiz);
525     cbdatumcat(datum, " ", 1);
526     cbdatumcat(datum, kbuf, ksiz);
527     }
528     ptr = cbdatumtomalloc(datum, NULL);
529     cbglobalgc(ptr, free);
530     p_phrase = ptr;
531     cbmapclose(svmap);
532     }
533    
534    
535     /* show the page */
536     static void showpage(void){
537     ESTCOND *cond;
538     ESTDOC **docs;
539     CBMAP *hints;
540     CBLIST *elems;
541     const char *rp;
542     char *tmp, numbuf[NUMBUFSIZ];
543     int i, tnum, max, *res, rnum, sc, dnum, miss;
544     printf("Cache-Control: no-cache, must-revalidate, no-transform\r\n");
545     printf("Pragma: no-cache\r\n");
546     printf("Content-Disposition: inline; filename=%s\r\n", g_scriptname);
547     printf("Content-Type: text/html; charset=UTF-8\r\n");
548     printf("\r\n");
549     g_etime = est_gettimeofday();
550     cond = est_cond_new();
551     if(p_phrase[0] != '\0') est_cond_set_phrase(cond, p_phrase);
552     if(p_attr[0] != '\0'){
553     if(p_attrval[0] != '\0'){
554     tmp = cbsprintf("%s %s", p_attr, p_attrval);
555     est_cond_add_attr(cond, tmp);
556     free(tmp);
557     } else {
558     est_cond_add_attr(cond, p_attr);
559     }
560     }
561     if(p_order[0] != '\0') est_cond_set_order(cond, p_order);
562     switch(g_condgstep){
563     case 1:
564     est_cond_set_options(cond, ESTCONDSURE);
565     break;
566     case 2:
567     est_cond_set_options(cond, ESTCONDUSU);
568     break;
569     case 3:
570     est_cond_set_options(cond, ESTCONDFAST);
571     break;
572     case 4:
573     est_cond_set_options(cond, ESTCONDAGIT);
574     break;
575     }
576     if(!g_dotfidf) est_cond_set_options(cond, ESTCONDNOIDF);
577     if(g_smplphrase) est_cond_set_options(cond, ESTCONDSIMPLE);
578     if(g_showscore) est_cond_set_options(cond, ESTCONDSCFB);
579     tnum = 0;
580     max = p_pagenum * p_perpage * 1.3 + 1;
581     do {
582     est_cond_set_max(cond, max);
583     hints = cbmapopenex(MINIBNUM);
584     res = est_db_search(g_db, cond, &rnum, hints);
585     if(g_candetail && p_detail > 0){
586     if(rnum < 1) cbmapput(hints, "", 0, "1", 1, TRUE);
587     free(res);
588     res = cbmalloc(sizeof(int));
589     res[0] = p_detail;
590     rnum = 1;
591     }
592     docs = cbmalloc(rnum * sizeof(ESTDOC *) + 1);
593     dnum = 0;
594     miss = 0;
595     for(i = 0; i < rnum; i++){
596     if(!(docs[dnum] = est_db_get_doc(g_db, res[i], dnum < p_pagenum * p_perpage ? 0 :
597     ESTGDNOATTR | ESTGDNOTEXT))){
598     miss++;
599     continue;
600     }
601     if((sc = est_cond_score(cond, i)) >= 0){
602     sprintf(numbuf, "%d", sc);
603     est_doc_add_attr(docs[dnum], DATTRSCORE, numbuf);
604     }
605     dnum++;
606     }
607     if(tnum <= MISSRETRYNUM && miss > 0 && max <= rnum && dnum < p_pagenum * p_perpage + 1){
608     for(i = 0; i < dnum; i++){
609     est_doc_delete(docs[i]);
610     }
611     free(docs);
612     free(res);
613     cbmapclose(hints);
614     max *= MISSINCRATIO;
615     tnum++;
616     continue;
617     }
618     break;
619     } while(TRUE);
620     g_etime = est_gettimeofday() - g_etime;
621     elems = cbxmlbreak(g_tmpltext, FALSE);
622     for(i = 0; i < cblistnum(elems); i++){
623     rp = cblistval(elems, i, NULL);
624     if(!strcmp(rp, "<!--ESTFORM-->")){
625     showform();
626     } else if(!strcmp(rp, "<!--ESTRESULT-->")){
627     if(p_phrase[0] == '\0' && p_attr[0] == '\0' && p_detail < 1){
628     showtop();
629     } else {
630     showresult(docs, dnum, hints, miss);
631     }
632     } else if(!strcmp(rp, "<!--ESTINFO-->")){
633     showinfo();
634     } else {
635     printf("%s", rp);
636     }
637     }
638     for(i = 0; i < dnum; i++){
639     est_doc_delete(docs[i]);
640     }
641     cblistclose(elems);
642     free(docs);
643     free(res);
644     cbmapclose(hints);
645     est_cond_delete(cond);
646     }
647    
648    
649     /* show the form */
650     static void showform(void){
651     CBLIST *list;
652     const char *elem;
653     int i, num;
654     xmlprintf("<div id=\"estform\" class=\"estform\">\n");
655     xmlprintf("<form action=\"%@\" method=\"get\" id=\"form_self\">\n", g_scriptname);
656     xmlprintf("<div class=\"form_basic\">\n");
657     xmlprintf("<input type=\"text\" name=\"phrase\" value=\"%@\""
658     " size=\"80\" id=\"phrase\" class=\"text\" tabindex=\"%d\" accesskey=\"0\" />\n",
659     p_phrase, ++g_tabidx);
660     xmlprintf("<input type=\"submit\" value=\"Search\""
661     " id=\"search\" class=\"submit\" tabindex=\"%d\" accesskey=\"1\" />\n",
662     ++g_tabidx);
663     xmlprintf("</div>\n");
664     xmlprintf("<div class=\"form_extension\">\n");
665     xmlprintf("<select name=\"perpage\" id=\"perpage\" tabindex=\"%d\">\n", ++g_tabidx);
666     list = cbsplit(g_perpage, -1, ",");
667     for(i = 0; i < cblistnum(list); i++){
668     elem = cblistval(list, i, NULL);
669     if(elem[0] == '\0') continue;
670     num = atoi(elem);
671     xmlprintf("<option value=\"%d\"%s>%d</option>\n",
672     num, num == p_perpage ? " selected=\"selected\"" : "", num);
673     }
674     cblistclose(list);
675     xmlprintf("</select>\n");
676     xmlprintf("per page, with\n");
677     if(g_attrselect){
678     xmlprintf("<select name=\"attr\" id=\"attr\" tabindex=\"%d\">\n", ++g_tabidx);
679     xmlprintf("<option value=\"\">--</option>\n");
680     xmlprintf("<option value=\"@title ISTRINC\"%s>title including</option>\n",
681     cbstrfwmatch(p_attr, "@title ISTRINC") ? " selected=\"selected\"" : "");
682     xmlprintf("<option value=\"@title ISTRBW\"%s>title beginning with</option>\n",
683     cbstrfwmatch(p_attr, "@title ISTRBW") ? " selected=\"selected\"" : "");
684     xmlprintf("<option value=\"@title ISTREW\"%s>title ending with</option>\n",
685     cbstrfwmatch(p_attr, "@title ISTREW") ? " selected=\"selected\"" : "");
686     xmlprintf("<option value=\"@author ISTRINC\"%s>author including</option>\n",
687     cbstrfwmatch(p_attr, "@author ISTRINC") ? " selected=\"selected\"" : "");
688     xmlprintf("<option value=\"@author ISTRBW\"%s>author beginning with</option>\n",
689     cbstrfwmatch(p_attr, "@author ISTRBW") ? " selected=\"selected\"" : "");
690     xmlprintf("<option value=\"@author ISTREW\"%s>author ending with</option>\n",
691     cbstrfwmatch(p_attr, "@author ISTREW") ? " selected=\"selected\"" : "");
692     xmlprintf("<option value=\"@mdate NUMLT\"%s>date less than</option>\n",
693     cbstrfwmatch(p_attr, "@mdate NUMLT") ? " selected=\"selected\"" : "");
694     xmlprintf("<option value=\"@mdate NUMGE\"%s>date not less than</option>\n",
695     cbstrfwmatch(p_attr, "@mdate NUMGE") ? " selected=\"selected\"" : "");
696     xmlprintf("<option value=\"@size NUMLT\"%s>size less than</option>\n",
697     cbstrfwmatch(p_attr, "@size NUMLT") ? " selected=\"selected\"" : "");
698     xmlprintf("<option value=\"@size NUMGE\"%s>size not less than</option>\n",
699     cbstrfwmatch(p_attr, "@size NUMGE") ? " selected=\"selected\"" : "");
700     xmlprintf("</select>\n");
701     xmlprintf("<input type=\"text\" name=\"attrval\" value=\"%@\""
702     " size=\"16\" id=\"attrval\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n",
703     p_attrval, ++g_tabidx);
704     xmlprintf(", order by\n");
705     xmlprintf("<select name=\"order\" id=\"order\" tabindex=\"%d\">\n", ++g_tabidx);
706     xmlprintf("<option value=\"\">score</option>\n");
707     xmlprintf("<option value=\"@title STRA\"%s>title (asc)</option>\n",
708     !strcmp(p_order, "@title STRA") ? " selected=\"selected\"" : "");
709     xmlprintf("<option value=\"@title STRD\"%s>title (desc)</option>\n",
710     !strcmp(p_order, "@title STRD") ? " selected=\"selected\"" : "");
711     xmlprintf("<option value=\"@author STRA\"%s>author (asc)</option>\n",
712     !strcmp(p_order, "@author STRA") ? " selected=\"selected\"" : "");
713     xmlprintf("<option value=\"@author STRD\"%s>author (desc)</option>\n",
714     !strcmp(p_order, "@author STRD") ? " selected=\"selected\"" : "");
715     xmlprintf("<option value=\"@mdate NUMA\"%s>date (asc)</option>\n",
716     !strcmp(p_order, "@mdate NUMA") ? " selected=\"selected\"" : "");
717     xmlprintf("<option value=\"@mdate NUMD\"%s>date (desc)</option>\n",
718     !strcmp(p_order, "@mdate NUMD") ? " selected=\"selected\"" : "");
719     xmlprintf("<option value=\"@size NUMA\"%s>size (asc)</option>\n",
720     !strcmp(p_order, "@size NUMA") ? " selected=\"selected\"" : "");
721     xmlprintf("<option value=\"@size NUMD\"%s>size (desc)</option>\n",
722     !strcmp(p_order, "@size NUMD") ? " selected=\"selected\"" : "");
723     xmlprintf("</select>\n");
724     } else {
725     xmlprintf("<input type=\"text\" name=\"attr\" value=\"%@\""
726     " size=\"24\" id=\"attr\" class=\"text\" tabindex=\"%d\" accesskey=\"2\" />\n",
727     p_attr, ++g_tabidx);
728     xmlprintf(", order by\n");
729     xmlprintf("<input type=\"text\" name=\"order\" value=\"%@\""
730     " size=\"24\" id=\"order\" class=\"text\" tabindex=\"%d\" accesskey=\"3\" />\n",
731     p_order, ++g_tabidx);
732     }
733     xmlprintf("</div>\n");
734     xmlprintf("</form>\n");
735     xmlprintf("</div>\n");
736     }
737    
738    
739     /* show the top message */
740     static void showtop(void){
741     printf("%s", g_toptext);
742     }
743    
744    
745     /* show the result */
746     static void showresult(ESTDOC **docs, int dnum, CBMAP *hints, int miss){
747     CBMAP *cnames;
748     CBLIST *words;
749     const char *key, *myphrase;
750     char cname[NUMBUFSIZ];
751     int i, hits, snum, start, end, cnum, pnum;
752     xmlprintf("<div id=\"estresult\" class=\"estresult\">\n");
753     hits = atoi(cbmapget(hints, "", 0, NULL)) - miss;
754     start = (p_pagenum - 1) * p_perpage;
755     end = p_pagenum * p_perpage;
756     if(end > dnum) end = dnum;
757     xmlprintf("<div class=\"resinfo\">");
758     xmlprintf("Results of <strong>%d</strong> - <strong>%d</strong>",
759     start + (hits > 0 ? 1 : 0), end);
760     xmlprintf(" of about <strong>%d</strong>", hits);
761     if(p_phrase[0] != '\0' && strlen(p_phrase) < 128)
762     xmlprintf(" for <strong>%@</strong>", p_phrase);
763     if(g_etime > 0.0) xmlprintf(" (%.3f sec.)", g_etime / 1000.0);
764     if(miss > p_perpage * p_pagenum) xmlprintf("*");
765     xmlprintf("</div>\n");
766     if(cbmaprnum(hints) > 2 || (p_phrase[0] != '\0' && p_attr[0] != '\0')){
767     xmlprintf("<div class=\"hints\">");
768     cbmapiterinit(hints);
769     i = 0;
770     while((key = cbmapiternext(hints, NULL)) != NULL){
771     if(key[0] == '\0') continue;
772     if(i++ > 0) xmlprintf(", ");
773     xmlprintf("<span class=\"hword\">%s (%s)</span>", key, cbmapget(hints, key, -1, NULL));
774     }
775     xmlprintf("</div>\n");
776     }
777     words = cblistopen();
778     cbmapiterinit(hints);
779     while((key = cbmapiternext(hints, NULL)) != NULL){
780     if(key[0] == '\0' || atoi(cbmapget(hints, key, -1, NULL)) < 0) continue;
781     cblistpush(words, key, -1);
782     }
783     cnames = cbmapopenex(MINIBNUM);
784     cnum = 0;
785     for(i = 0; i < cblistnum(words); i++){
786     sprintf(cname, "key%d", ++cnum);
787     cbmapput(cnames, cblistval(words, i, NULL), -1, cname, -1, FALSE);
788     }
789     for(snum = start; snum < end; snum++){
790     showdoc(docs[snum], words, cnames, g_candetail && p_detail > 0);
791     }
792     cbmapclose(cnames);
793     cblistclose(words);
794     if(dnum < 1) xmlprintf("<p class=\"note\">Your search did not match any documents.</p>\n");
795     myphrase = p_similar > 0 ? "" : p_phrase;
796     xmlprintf("<div class=\"paging\">\n");
797     if(p_pagenum > 1){
798     xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
799     "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"navi\">PREV</a>\n",
800     g_scriptname, myphrase, p_attr, p_attrval, p_order,
801     p_perpage, p_pagenum - 1, p_similar);
802     } else {
803     xmlprintf("<span class=\"void\">PREV</span>\n");
804     }
805     pnum = (hits - 1 - (hits - 1) % p_perpage + p_perpage) / p_perpage;
806     if(hits > 0 && p_detail < 1){
807     for(i = p_pagenum > NAVIPAGES ? p_pagenum - NAVIPAGES + 1 : 1;
808     i == 1 || (i <= pnum && i < p_pagenum + NAVIPAGES); i++){
809     if(i == p_pagenum){
810     printf("<span class=\"pnow\">%d</span>\n", i);
811     } else {
812     xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
813     "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"pnum\">%d</a>\n",
814     g_scriptname, myphrase, p_attr, p_attrval, p_order, p_perpage, i, p_similar, i);
815     }
816     }
817     }
818     if(snum < dnum){
819     xmlprintf("<a href=\"%@?phrase=%?&amp;attr=%?&amp;attrval=%?&amp;order=%?"
820     "&amp;perpage=%d&amp;pagenum=%d&amp;similar=%d\" class=\"navi\">NEXT</a>\n",
821     g_scriptname, myphrase, p_attr, p_attrval, p_order,
822     p_perpage, p_pagenum + 1, p_similar);
823     } else {
824     xmlprintf("<span class=\"void\">NEXT</span>\n");
825     }
826     xmlprintf("</div>\n");
827     xmlprintf("</div>\n");
828     }
829    
830    
831     /* show a document */
832     static void showdoc(ESTDOC *doc, const CBLIST *words, CBMAP *cnames, int detail){
833     CBMAP *kwords;
834     CBLIST *names, *lines;
835     const char *uri, *title, *score, *val, *name, *line, *cname;
836     char *turi, *tsv, *pv, *str;
837     int i, id;
838     id = est_doc_id(doc);
839     if(!(uri = est_doc_attr(doc, ESTDATTRURI))) uri = ".";
840     turi = makeshownuri(uri);
841     if(!(title = est_doc_attr(doc, ESTDATTRTITLE))) title = "";
842     if(title[0] == '\0' && !(title = est_doc_attr(doc, DATTRLFILE))) title = "";
843     if(title[0] == '\0' && ((pv = strrchr(uri, '/')) != NULL)) title = pv + 1;
844     if(title[0] == '\0') title = "(no title)";
845     if(!(score = est_doc_attr(doc, DATTRSCORE))) score = "";
846     xmlprintf("<dl class=\"doc\" id=\"doc_%d\">\n", id);
847     xmlprintf("<dt>");
848     xmlprintf("<a href=\"%@\" class=\"doc_title\">%@</a>", turi, title);
849     if(score[0] != '\0') xmlprintf(" <span class=\"doc_score\">%@</span>", score);
850     xmlprintf("</dt>\n");
851     if(detail){
852     names = est_doc_attr_names(doc);
853     for(i = 0; i < cblistnum(names); i++){
854     name = cblistval(names, i, NULL);
855     if(name[0] != '_' && strcmp(name, ESTDATTRURI) && strcmp(name, ESTDATTRTITLE) &&
856     (val = est_doc_attr(doc, name)) != NULL && val[0] != '\0'){
857     xmlprintf("<dd class=\"doc_attr\">");
858     xmlprintf("%@: <span class=\"doc_val\">%@</span>", name, val);
859     xmlprintf("</dd>\n");
860     }
861     }
862     cblistclose(names);
863     if(g_smlrvnum > 0){
864     xmlprintf("<dd class=\"doc_attr\">");
865     xmlprintf("#vector: <span class=\"doc_val\">");
866     kwords = est_db_etch_doc(g_db, doc, g_smlrvnum);
867     cbmapiterinit(kwords);
868     for(i = 0; (name = cbmapiternext(kwords, NULL)) != NULL; i++){
869     if(i > 0) xmlprintf(", ");
870     xmlprintf("%@ (%@)\n", name, cbmapget(kwords, name, -1, NULL));
871     }
872     cbmapclose(kwords);
873     xmlprintf("</span>");
874     xmlprintf("</dd>\n");
875     }
876     } else {
877     for(i = 0; i < cblistnum(g_extattrs); i++){
878     str = cbmemdup(cblistval(g_extattrs, i, NULL), -1);
879     if((pv = strchr(str, '|')) != NULL){
880     *pv = '\0';
881     pv++;
882     if((val = est_doc_attr(doc, str)) != NULL && val[0] != '\0'){
883     xmlprintf("<dd class=\"doc_attr\">");
884     xmlprintf("%@: <span class=\"doc_val\">%@</span>", pv, val);
885     xmlprintf("</dd>\n");
886     }
887     }
888     free(str);
889     }
890     }
891     xmlprintf("<dd class=\"doc_text\">");
892     tsv = est_doc_make_snippet(doc, words, detail ? INT_MAX : g_snipwwidth,
893     detail ? INT_MAX : g_sniphwidth, g_snipawidth);
894     lines = cbsplit(tsv, -1, "\n");
895     for(i = 0; i < cblistnum(lines); i++){
896     line = cblistval(lines, i, NULL);
897     if(line[0] == '\0'){
898     if(i < cblistnum(lines) - 1) xmlprintf(" ... ");
899     } else if((pv = strchr(line, '\t')) != NULL){
900     str = cbmemdup(line, pv - line);
901     if(!(cname = cbmapget(cnames, pv + 1, -1, NULL))) cname = "key0";
902     xmlprintf("<strong class=\"key %@\">%@</strong>", cname, str);
903     free(str);
904     } else {
905     xmlprintf("%@", line);
906     }
907     }
908     cblistclose(lines);
909     free(tsv);
910     xmlprintf("</dd>\n");
911     xmlprintf("<dd class=\"doc_navi\">\n");
912     xmlprintf("<span class=\"doc_uri\">%@</span>\n", turi);
913     if(g_candetail)
914     xmlprintf("- <a href=\"%@?phrase=%?&amp;detail=%d&amp;perpage=%d\" class=\"detail\">"
915     "[detail]</a>\n", g_scriptname, p_similar > 0 ? "" : p_phrase, id, p_perpage);
916     if(g_smlrvnum > 0)
917     xmlprintf("- <a href=\"%@?similar=%d&amp;perpage=%d\" class=\"similar\">[similar]</a>\n",
918     g_scriptname, id, p_perpage);
919     xmlprintf("</dd>\n");
920     xmlprintf("</dl>\n");
921     free(turi);
922     }
923    
924    
925     /* make a URI to be shown */
926     static char *makeshownuri(const char *uri){
927     const char *prefix;
928     char *turi, *file, *bef, *aft, *pv, *nuri, *wp;
929     int i;
930     if(cbstrfwimatch(uri, g_lprefix)) uri += strlen(g_lprefix);
931     prefix = g_gprefix;
932     if(cbstrfwimatch(uri, "file://") || cbstrfwimatch(uri, "ftp://") ||
933     cbstrfwimatch(uri, "http://") || cbstrfwimatch(uri, "https://")) prefix = "";
934     turi = cbsprintf("%s%s%s", prefix, uri, g_gsuffix);
935     if(g_dirindex[0] != '\0' && (file = strrchr(turi, '/')) != NULL &&
936     !cbstricmp(file + 1, g_dirindex)){
937     file[1] = '\0';
938     }
939     for(i = 0; i < cblistnum(g_replexprs); i++){
940     bef = cbmemdup(cblistval(g_replexprs, i, NULL), -1);
941     if((pv = strstr(bef, "{{!}}")) != NULL){
942     *pv = '\0';
943     aft = pv + 5;
944     } else {
945     aft = "";
946     }
947     if((pv = strstr(turi, bef)) != NULL){
948     nuri = cbmalloc(strlen(turi) + strlen(aft) + 1);
949     wp = nuri;
950     memcpy(wp, turi, pv - turi);
951     wp += pv - turi;
952     wp += sprintf(wp, "%s", aft);
953     sprintf(wp, "%s", pv + strlen(bef));
954     free(turi);
955     turi = nuri;
956     }
957     free(bef);
958     }
959     return turi;
960     }
961    
962    
963     /* show the top */
964     static void showinfo(void){
965     xmlprintf("<div id=\"estinfo\" class=\"estinfo\">");
966     xmlprintf("Powered by Hyper Estraier %@, with %d documents and %d words.",
967     est_version, est_db_doc_num(g_db), est_db_word_num(g_db));
968     xmlprintf("</div>\n");
969     }
970    
971    
972     /* output the log message */
973     static void outputlog(void){
974     FILE *ofp;
975     const char *val;
976     if(g_logfile[0] == '\0' || !(ofp = fopen(g_logfile, "ab"))) return;
977     if(!(val = getenv("REMOTE_ADDR"))) val = "0.0.0.0";
978     fprintf(ofp, "%s:", val);
979     if(!(val = getenv("REMOTE_PORT"))) val = "0";
980     fprintf(ofp, "%s\t", val);
981     fprintf(ofp, "%s\t", p_phrase);
982     if(!(val = getenv("HTTP_USER_AGENT"))) val = "*";
983     fprintf(ofp, "%s\n", val);
984     fclose(ofp);
985     }
986    
987    
988    
989     /* END OF FILE */

  ViewVC Help
Powered by ViewVC 1.1.26