/[Semantic-Engine]/EPrints/search.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /EPrints/search.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (show annotations)
Mon Jul 2 12:55:49 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 5001 byte(s)
mix-in KinoSearch::Simple to fine-tune scores
1 #!/usr/bin/perl -w
2
3
4 ######################################
5 #
6 # A simple search engine program
7 #
8 ######################################
9
10
11 use strict;
12 use CGI::Carp qw(fatalsToBrowser);
13 use Semantic::API;
14 use CGI;
15 use Data::Dump qw/dump/;
16 use EPrints;
17 use Cwd qw/abs_path/;
18 use KinoSearch::Simple;
19 use lib '/home/dpavlin/stem-hr/';
20 use StemHR;
21
22 my $debug = 1;
23
24 my $abs_path = abs_path( $0 );
25 $abs_path =~ s!/[^/]*$!/!; #!fix-vim
26
27 #############################################################
28 my $COLLECTION = 'EPrints';
29 my ( @TERMS, @RESULTS );
30 my ( $RESULTS_TO_DISPLAY, $TERMS_TO_DISPLAY ) = ( 20, 20 );
31 #############################################################
32
33
34 ###############################
35 # CGI Variables
36 ###############################
37 my $cgi = new CGI;
38 my $start = $cgi->param( 'start' ) || 0;
39 my $query = $cgi->param( 'query' ) || '';
40 my $similar = $cgi->param( 'similar' ) || '';
41 my $slogovi = $cgi->param( 'slogovi' ) || '';
42 my $stem = $cgi->param( 'stem' ) || '';
43 my $kino = $cgi->param( 'kino' );
44
45 my $charset='iso-8859-2';
46
47 my $full_query = $query;
48 $full_query .= " " . EPrints->slogovi( $query ) if ($slogovi);
49 $full_query .= " " . StemHR->stem( $query ) if ($stem);
50
51 ##############################
52 # Start the HTML output
53 ##############################
54 print "Content-type: text/html; charset=$charset\n\n";
55 print qq|<?xml version="1.0" encoding="$charset"?>
56 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
57 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
58 <html xmlns="http://www.w3.org/1999/xhtml">
59 <head>
60 <meta http-equiv="content-type" content="text/html; charset=$charset" />
61 <title>Search Engine</title>
62 </head>
63 <body>
64 <form method="get" action="">
65 <p>
66 Enter bunch of related terms to documents you are trying to find:
67 <br/><input type="text" name="query" value="$query" size="80">
68 <br/><input type="submit" />
69 |, $cgi->checkbox( -name => 'stem' ), $cgi->checkbox( -name => 'slogovi' ), qq|
70 </p>\n|;
71
72
73
74 ##########################
75 # Do the actual search
76 ##########################
77 if( $query || $similar ) {
78
79 # Create collection-based objects
80 my $semantic = Semantic::API::Search->new( storage => 'sqlite',
81 database => "$abs_path/eprints.db",
82 collection => $COLLECTION );
83
84 my $score_ponder;
85
86 my ($results, $terms);
87 if ( $query ) {
88 ($results, $terms) = $semantic->semantic_search( $full_query );
89
90 my $kino = KinoSearch::Simple->new(
91 path => 'kinoindex/',
92 language => 'ru',
93 );
94
95 my $total_hits = $kino->search(
96 query => $query,
97 offset => 0,
98 num_wanted => 10,
99 );
100
101 print qq|<div style="width:20%; float:right;">KinoSearch hits $total_hits for $query\n<ol>|;
102 while ( my $hit = $kino->fetch_hit_hashref ) {
103 print qq|<li><a href="#id_$hit->{id}">$hit->{title}</a> $hit->{score}</li>\n|;
104 $score_ponder->{ $hit->{id} } = $hit->{score};
105 }
106 print qq|</ol></div>|;
107
108 } else {
109 ($results, $terms) = $semantic->find_similar( $similar );
110 }
111
112 warn "results = ",dump( $results ) if $debug;
113 warn "terms = ",dump( $terms ) if $debug;
114
115 ##################################
116 # TERM BASED CALCULATIONS
117 ##################################
118 my @sorted_terms = sort {
119 my ( $sb, $sa ) = ( $terms->{$b}, $terms->{$a} );
120 $sb *= $score_ponder->{$b} if $score_ponder->{$b};
121 $sa *= $score_ponder->{$a} if $score_ponder->{$a};
122 $sb <=> $sa;
123 } keys %$terms;
124 my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY );
125
126 warn "top_terms = ", dump( @top_terms ) if $debug;
127
128 print "<p>Full query: $full_query</p>\n";
129 print "<p>Related Terms: ". ( join ", ", @top_terms ) ."</p>\n";
130 print "<hr />\n";
131
132
133 ##################################
134 # DOCUMENT BASED CALCULATIONS
135 ##################################
136
137 print "<p>Result Count: ".(scalar keys %$results)."</p>\n";
138
139 my @sorted_results = sort { $results->{$b} <=> $results->{$a} } keys %$results;
140 my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY );
141
142 warn "display results = ", dump( @display_results ) if $debug;
143
144 ##################################
145 # Access the storage engine to
146 # retrieve the title and text
147 ##################################
148 my $i = 1 + $start;
149 print $semantic->paginate( "?query=$query;similar=$similar;stem=$stem;slogovi=$slogovi", $start, scalar keys %$results, $RESULTS_TO_DISPLAY);
150 foreach my $id ( @display_results ){
151 EPrints->id( $id );
152 print "<p><a name=\"id_$id\"/>$i. <b>", EPrints->lookup( 'title' ), "</b>";
153 print "| score: <em>", sprintf("%.2f",$results->{$id}), "</em> | id: $id | <a href=\"?similar=$id\">similar</a> | ";
154 my ($type,$uri) = EPrints->fulltext;
155 print qq|<a href="$uri">$type</a>|;
156 print "</p>\n";
157 print "<p>";
158 # print $semantic->summarize($id);
159 print "</p><p>Keywords: ", EPrints->lookup('keywords'), "</p><p>";
160 print "<small>", EPrints->lookup('abstract'), "</small>";
161 print "</p>\n";
162 $i++;
163 }
164
165 }
166
167
168 print "</body>\n</html>\n";

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26