/[Semantic-Engine]/EPrints/search.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /EPrints/search.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 22 - (show annotations)
Mon Jul 2 19:16:31 2007 UTC (16 years, 9 months ago) by dpavlin
File size: 5859 byte(s)
* save log file to disk instead of filling up apache's error.log
* implement similarity to results from KinoSearch 
* make KinoSearch query with operator AND
* correctly normalize and use score_ponder
1 #!/usr/bin/perl -w
2
3
4 ######################################
5 #
6 # A simple search engine program
7 #
8 ######################################
9
10
11 use strict;
12 use CGI::Carp qw(fatalsToBrowser);
13 use Semantic::API;
14 use CGI;
15 use Data::Dump qw/dump/;
16 use EPrints;
17 use Cwd qw/abs_path/;
18 use KinoSearch::Simple;
19 use lib '/home/dpavlin/stem-hr/';
20 use StemHR;
21
22 my $debug = 1;
23
24 my $abs_path;
25
26 BEGIN {
27 use CGI::Carp qw(carpout);
28
29 $abs_path = abs_path( $0 );
30 $abs_path =~ s!/[^/]*$!/!; #!fix-vim
31
32 open(my $log_fh, '>>', "$abs_path/log") or
33 die("Unable to open $abs_path/log: $!\n");
34 carpout($log_fh);
35 }
36
37
38 #############################################################
39 my $COLLECTION = 'EPrints';
40 my ( @TERMS, @RESULTS );
41 my ( $RESULTS_TO_DISPLAY, $TERMS_TO_DISPLAY ) = ( 20, 20 );
42 #############################################################
43
44
45 ###############################
46 # CGI Variables
47 ###############################
48 my $cgi = new CGI;
49 my $start = $cgi->param( 'start' ) || 0;
50 my $query = $cgi->param( 'query' ) || '';
51 my $similar = $cgi->param( 'similar' ) || '';
52 my $slogovi = $cgi->param( 'slogovi' ) || '';
53 my $stem = $cgi->param( 'stem' ) || '';
54 my $similar_to_kino = $cgi->param( 'similar_to_kino' );
55
56 my $charset='iso-8859-2';
57
58 my $full_query = $query;
59 $full_query .= " " . EPrints->slogovi( $query ) if ($slogovi);
60 $full_query .= " " . StemHR->stem( $query ) if ($stem);
61
62 ##############################
63 # Start the HTML output
64 ##############################
65 print "Content-type: text/html; charset=$charset\n\n";
66 print qq|<?xml version="1.0" encoding="$charset"?>
67 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
68 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
69 <html xmlns="http://www.w3.org/1999/xhtml">
70 <head>
71 <meta http-equiv="content-type" content="text/html; charset=$charset" />
72 <title>Semantic Search Engine</title>
73 </head>
74 <body>
75 <form method="get" action="">
76 <p>
77 Enter bunch of related terms to documents you are trying to find:
78 <br/><input type="text" name="query" value="$query" size="80">
79 <br/><input type="submit" />
80 |,
81 $cgi->checkbox( -name => 'stem' ),
82 $cgi->checkbox( -name => 'slogovi' ),
83 $cgi->checkbox( -name => 'similar_to_kino', -label=>'similar to fulltext' ),
84 qq|
85 </p>\n|;
86
87
88
89 ##########################
90 # Do the actual search
91 ##########################
92 if( $query || $similar ) {
93
94 # Create collection-based objects
95 my $semantic = Semantic::API::Search->new( storage => 'sqlite',
96 database => "$abs_path/eprints.db",
97 collection => $COLLECTION );
98
99 my $score_ponder;
100
101 my ($results, $terms);
102 if ( $query ) {
103 my $kino = KinoSearch::Simple->new(
104 path => 'kinoindex/',
105 language => 'ru',
106 );
107
108 my $kino_query = join(" AND ", split(/\s+/, $query ) );
109
110 my $total_hits = $kino->search(
111 query => $kino_query,
112 offset => 0,
113 num_wanted => 10,
114 );
115
116 my @similar_ids;
117 my ( $max, $min ) = ( 0,0 );
118
119 print qq|<div style="width:20%; float:right;">KinoSearch hits $total_hits for $kino_query\n<ol>|;
120 while ( my $hit = $kino->fetch_hit_hashref ) {
121 print qq|<li><a href="#id_$hit->{id}">$hit->{title}</a> $hit->{score}</li>\n|;
122 if ( $similar_to_kino ) {
123 push @similar_ids, $hit->{id};
124 } else {
125 $score_ponder->{ $hit->{id} } = $hit->{score};
126 $min = $hit->{score} if ( $hit->{score} < $min );
127 $max = $hit->{score} if ( $hit->{score} > $max );
128 }
129 }
130
131 my $d = $max - $min;
132
133 map {
134 $score_ponder->{ $_ } -= $min;
135 $score_ponder->{ $_ } /= $d;
136 warn "score_ponder $_ = ", $score_ponder->{$_} if $debug;
137 } keys %$score_ponder;
138
139 if ( $similar_to_kino ) {
140 print qq|</ol></div>|;
141 ($results, $terms) = $semantic->find_similar( @similar_ids );
142 } else {
143 print qq|</ol>score range: $min - $max</div>|;
144 ($results, $terms) = $semantic->semantic_search( $full_query );
145 }
146
147 } else {
148 ($results, $terms) = $semantic->find_similar( $similar );
149 }
150
151 warn "results = ",dump( $results ) if $debug;
152 warn "terms = ",dump( $terms ) if $debug;
153
154 ##################################
155 # TERM BASED CALCULATIONS
156 ##################################
157 my @sorted_terms = sort { $terms->{$b} <=> $terms->{$a} } keys %$terms;
158 my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY );
159
160 warn "top_terms = ", dump( @top_terms ) if $debug;
161
162 print "<p>Full query: $full_query</p>\n";
163 print "<p>Related Terms: ". ( join ", ", @top_terms ) ."</p>\n";
164 print "<hr />\n";
165
166
167 ##################################
168 # DOCUMENT BASED CALCULATIONS
169 ##################################
170
171 print "<p>Result Count: ".(scalar keys %$results)."</p>\n";
172
173 my @sorted_results = sort {
174 $results->{$b} <=> $results->{$a}
175 } map {
176 $results->{$_} *= ( $score_ponder->{$_} || 0.1 ); $_;
177 } keys %$results;
178 my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY );
179
180 warn "display results = ", dump( @display_results ) if $debug;
181
182 ##################################
183 # Access the storage engine to
184 # retrieve the title and text
185 ##################################
186 my $i = 1 + $start;
187 print $semantic->paginate( "?query=$query;similar=$similar;stem=$stem;slogovi=$slogovi", $start, scalar keys %$results, $RESULTS_TO_DISPLAY);
188 foreach my $id ( @display_results ){
189 EPrints->id( $id );
190 print "<p><a name=\"id_$id\"/>$i. <b>", EPrints->lookup( 'title' ), "</b>";
191 print "| score: <em>", sprintf("%.2f",$results->{$id}), "</em> | id: $id | <a href=\"?similar=$id\">similar</a> | ";
192 my ($type,$uri) = EPrints->fulltext;
193 print qq|<a href="$uri">$type</a>|;
194 print "</p>\n";
195 print "<p>";
196 # print $semantic->summarize($id);
197 print "</p><p>Keywords: ", EPrints->lookup('keywords'), "</p><p>";
198 print "<small>", EPrints->lookup('abstract'), "</small>";
199 print "</p>\n";
200 $i++;
201 }
202
203 }
204
205
206 print "</body>\n</html>\n";

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26