/[Semantic-Engine]/EPrints/search.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /EPrints/search.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 22 - (hide annotations)
Mon Jul 2 19:16:31 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 5859 byte(s)
* save log file to disk instead of filling up apache's error.log
* implement similarity to results from KinoSearch 
* make KinoSearch query with operator AND
* correctly normalize and use score_ponder
1 dpavlin 1 #!/usr/bin/perl -w
2    
3    
4     ######################################
5     #
6     # A simple search engine program
7     #
8     ######################################
9    
10    
11     use strict;
12     use CGI::Carp qw(fatalsToBrowser);
13     use Semantic::API;
14     use CGI;
15     use Data::Dump qw/dump/;
16     use EPrints;
17     use Cwd qw/abs_path/;
18 dpavlin 18 use KinoSearch::Simple;
19 dpavlin 8 use lib '/home/dpavlin/stem-hr/';
20     use StemHR;
21 dpavlin 1
22 dpavlin 9 my $debug = 1;
23    
24 dpavlin 22 my $abs_path;
25 dpavlin 1
26 dpavlin 22 BEGIN {
27     use CGI::Carp qw(carpout);
28    
29     $abs_path = abs_path( $0 );
30     $abs_path =~ s!/[^/]*$!/!; #!fix-vim
31    
32     open(my $log_fh, '>>', "$abs_path/log") or
33     die("Unable to open $abs_path/log: $!\n");
34     carpout($log_fh);
35     }
36    
37    
38 dpavlin 1 #############################################################
39     my $COLLECTION = 'EPrints';
40     my ( @TERMS, @RESULTS );
41 dpavlin 5 my ( $RESULTS_TO_DISPLAY, $TERMS_TO_DISPLAY ) = ( 20, 20 );
42 dpavlin 1 #############################################################
43    
44    
45     ###############################
46     # CGI Variables
47     ###############################
48     my $cgi = new CGI;
49     my $start = $cgi->param( 'start' ) || 0;
50     my $query = $cgi->param( 'query' ) || '';
51     my $similar = $cgi->param( 'similar' ) || '';
52 dpavlin 15 my $slogovi = $cgi->param( 'slogovi' ) || '';
53     my $stem = $cgi->param( 'stem' ) || '';
54 dpavlin 22 my $similar_to_kino = $cgi->param( 'similar_to_kino' );
55 dpavlin 1
56     my $charset='iso-8859-2';
57    
58 dpavlin 5 my $full_query = $query;
59 dpavlin 14 $full_query .= " " . EPrints->slogovi( $query ) if ($slogovi);
60 dpavlin 8 $full_query .= " " . StemHR->stem( $query ) if ($stem);
61 dpavlin 1
62     ##############################
63     # Start the HTML output
64     ##############################
65     print "Content-type: text/html; charset=$charset\n\n";
66     print qq|<?xml version="1.0" encoding="$charset"?>
67     <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
68     "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
69     <html xmlns="http://www.w3.org/1999/xhtml">
70     <head>
71     <meta http-equiv="content-type" content="text/html; charset=$charset" />
72 dpavlin 22 <title>Semantic Search Engine</title>
73 dpavlin 1 </head>
74     <body>
75     <form method="get" action="">
76     <p>
77 dpavlin 16 Enter bunch of related terms to documents you are trying to find:
78     <br/><input type="text" name="query" value="$query" size="80">
79     <br/><input type="submit" />
80 dpavlin 22 |,
81     $cgi->checkbox( -name => 'stem' ),
82     $cgi->checkbox( -name => 'slogovi' ),
83     $cgi->checkbox( -name => 'similar_to_kino', -label=>'similar to fulltext' ),
84     qq|
85 dpavlin 1 </p>\n|;
86    
87    
88    
89     ##########################
90     # Do the actual search
91     ##########################
92     if( $query || $similar ) {
93    
94     # Create collection-based objects
95     my $semantic = Semantic::API::Search->new( storage => 'sqlite',
96     database => "$abs_path/eprints.db",
97     collection => $COLLECTION );
98    
99 dpavlin 18 my $score_ponder;
100    
101 dpavlin 1 my ($results, $terms);
102     if ( $query ) {
103 dpavlin 18 my $kino = KinoSearch::Simple->new(
104     path => 'kinoindex/',
105     language => 'ru',
106     );
107    
108 dpavlin 22 my $kino_query = join(" AND ", split(/\s+/, $query ) );
109    
110 dpavlin 18 my $total_hits = $kino->search(
111 dpavlin 22 query => $kino_query,
112 dpavlin 18 offset => 0,
113     num_wanted => 10,
114     );
115    
116 dpavlin 22 my @similar_ids;
117     my ( $max, $min ) = ( 0,0 );
118    
119     print qq|<div style="width:20%; float:right;">KinoSearch hits $total_hits for $kino_query\n<ol>|;
120 dpavlin 18 while ( my $hit = $kino->fetch_hit_hashref ) {
121     print qq|<li><a href="#id_$hit->{id}">$hit->{title}</a> $hit->{score}</li>\n|;
122 dpavlin 22 if ( $similar_to_kino ) {
123     push @similar_ids, $hit->{id};
124     } else {
125     $score_ponder->{ $hit->{id} } = $hit->{score};
126     $min = $hit->{score} if ( $hit->{score} < $min );
127     $max = $hit->{score} if ( $hit->{score} > $max );
128     }
129 dpavlin 18 }
130    
131 dpavlin 22 my $d = $max - $min;
132    
133     map {
134     $score_ponder->{ $_ } -= $min;
135     $score_ponder->{ $_ } /= $d;
136     warn "score_ponder $_ = ", $score_ponder->{$_} if $debug;
137     } keys %$score_ponder;
138    
139     if ( $similar_to_kino ) {
140     print qq|</ol></div>|;
141     ($results, $terms) = $semantic->find_similar( @similar_ids );
142     } else {
143     print qq|</ol>score range: $min - $max</div>|;
144     ($results, $terms) = $semantic->semantic_search( $full_query );
145     }
146    
147 dpavlin 1 } else {
148     ($results, $terms) = $semantic->find_similar( $similar );
149     }
150    
151 dpavlin 9 warn "results = ",dump( $results ) if $debug;
152     warn "terms = ",dump( $terms ) if $debug;
153 dpavlin 1
154     ##################################
155     # TERM BASED CALCULATIONS
156     ##################################
157 dpavlin 22 my @sorted_terms = sort { $terms->{$b} <=> $terms->{$a} } keys %$terms;
158 dpavlin 1 my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY );
159 dpavlin 9
160     warn "top_terms = ", dump( @top_terms ) if $debug;
161    
162 dpavlin 4 print "<p>Full query: $full_query</p>\n";
163 dpavlin 1 print "<p>Related Terms: ". ( join ", ", @top_terms ) ."</p>\n";
164     print "<hr />\n";
165    
166    
167     ##################################
168     # DOCUMENT BASED CALCULATIONS
169     ##################################
170    
171     print "<p>Result Count: ".(scalar keys %$results)."</p>\n";
172    
173 dpavlin 22 my @sorted_results = sort {
174     $results->{$b} <=> $results->{$a}
175     } map {
176     $results->{$_} *= ( $score_ponder->{$_} || 0.1 ); $_;
177     } keys %$results;
178 dpavlin 1 my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY );
179    
180 dpavlin 9 warn "display results = ", dump( @display_results ) if $debug;
181 dpavlin 1
182     ##################################
183     # Access the storage engine to
184     # retrieve the title and text
185     ##################################
186     my $i = 1 + $start;
187 dpavlin 15 print $semantic->paginate( "?query=$query;similar=$similar;stem=$stem;slogovi=$slogovi", $start, scalar keys %$results, $RESULTS_TO_DISPLAY);
188 dpavlin 1 foreach my $id ( @display_results ){
189     EPrints->id( $id );
190 dpavlin 18 print "<p><a name=\"id_$id\"/>$i. <b>", EPrints->lookup( 'title' ), "</b>";
191 dpavlin 11 print "| score: <em>", sprintf("%.2f",$results->{$id}), "</em> | id: $id | <a href=\"?similar=$id\">similar</a> | ";
192 dpavlin 12 my ($type,$uri) = EPrints->fulltext;
193 dpavlin 11 print qq|<a href="$uri">$type</a>|;
194     print "</p>\n";
195     print "<p>";
196 dpavlin 1 # print $semantic->summarize($id);
197 dpavlin 8 print "</p><p>Keywords: ", EPrints->lookup('keywords'), "</p><p>";
198 dpavlin 1 print "<small>", EPrints->lookup('abstract'), "</small>";
199     print "</p>\n";
200 dpavlin 11 $i++;
201 dpavlin 1 }
202    
203     }
204    
205    
206     print "</body>\n</html>\n";

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26