1 |
dpavlin |
1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
|
4 |
|
|
###################################### |
5 |
|
|
# |
6 |
|
|
# A simple search engine program |
7 |
|
|
# |
8 |
|
|
###################################### |
9 |
|
|
|
10 |
|
|
|
11 |
|
|
use strict; |
12 |
|
|
use CGI::Carp qw(fatalsToBrowser); |
13 |
|
|
use Semantic::API; |
14 |
|
|
use CGI; |
15 |
|
|
use Data::Dump qw/dump/; |
16 |
|
|
use EPrints; |
17 |
|
|
use Cwd qw/abs_path/; |
18 |
|
|
|
19 |
|
|
my $abs_path = abs_path( $0 ); |
20 |
|
|
$abs_path =~ s!/[^/]*$!/!; #!fix-vim |
21 |
|
|
|
22 |
|
|
############################################################# |
23 |
|
|
my $COLLECTION = 'EPrints'; |
24 |
|
|
my ( @TERMS, @RESULTS ); |
25 |
dpavlin |
5 |
my ( $RESULTS_TO_DISPLAY, $TERMS_TO_DISPLAY ) = ( 20, 20 ); |
26 |
dpavlin |
1 |
############################################################# |
27 |
|
|
|
28 |
|
|
|
29 |
|
|
############################### |
30 |
|
|
# CGI Variables |
31 |
|
|
############################### |
32 |
|
|
my $cgi = new CGI; |
33 |
|
|
my $start = $cgi->param( 'start' ) || 0; |
34 |
|
|
my $query = $cgi->param( 'query' ) || ''; |
35 |
|
|
my $similar = $cgi->param( 'similar' ) || ''; |
36 |
dpavlin |
5 |
my $slogovi = $cgi->param( 'slogovi' ); |
37 |
dpavlin |
1 |
|
38 |
|
|
my $charset='iso-8859-2'; |
39 |
|
|
|
40 |
dpavlin |
5 |
my $full_query = $query; |
41 |
|
|
$full_query .= " " . join(" ", EPrints::slogovi( $query )) if ($slogovi); |
42 |
dpavlin |
1 |
|
43 |
|
|
############################## |
44 |
|
|
# Start the HTML output |
45 |
|
|
############################## |
46 |
|
|
print "Content-type: text/html; charset=$charset\n\n"; |
47 |
|
|
print qq|<?xml version="1.0" encoding="$charset"?> |
48 |
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" |
49 |
|
|
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> |
50 |
|
|
<html xmlns="http://www.w3.org/1999/xhtml"> |
51 |
|
|
<head> |
52 |
|
|
<meta http-equiv="content-type" content="text/html; charset=$charset" /> |
53 |
|
|
<title>Search Engine</title> |
54 |
|
|
</head> |
55 |
|
|
<body> |
56 |
|
|
<form method="get" action=""> |
57 |
|
|
<p> |
58 |
|
|
<input type="text" name="query" value="$query" /> |
59 |
|
|
<input type="submit" /> |
60 |
dpavlin |
5 |
|, $cgi->checkbox( -name => 'slogovi' ), qq| |
61 |
dpavlin |
1 |
</p>\n|; |
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
########################## |
66 |
|
|
# Do the actual search |
67 |
|
|
########################## |
68 |
|
|
if( $query || $similar ) { |
69 |
|
|
|
70 |
|
|
# Create collection-based objects |
71 |
|
|
my $semantic = Semantic::API::Search->new( storage => 'sqlite', |
72 |
|
|
database => "$abs_path/eprints.db", |
73 |
|
|
collection => $COLLECTION ); |
74 |
|
|
|
75 |
|
|
my ($results, $terms); |
76 |
|
|
if ( $query ) { |
77 |
dpavlin |
4 |
($results, $terms) = $semantic->semantic_search( $full_query ); |
78 |
dpavlin |
1 |
} else { |
79 |
|
|
($results, $terms) = $semantic->find_similar( $similar ); |
80 |
|
|
} |
81 |
|
|
|
82 |
|
|
warn "results = ",dump( $results ); |
83 |
|
|
warn "terms = ",dump( $terms ); |
84 |
|
|
|
85 |
|
|
################################## |
86 |
|
|
# TERM BASED CALCULATIONS |
87 |
|
|
################################## |
88 |
|
|
my @sorted_terms = sort { $terms->{$b} <=> $terms->{$a} } keys %$terms; |
89 |
|
|
my @top_terms = splice( @sorted_terms, $start, $TERMS_TO_DISPLAY ); |
90 |
|
|
|
91 |
dpavlin |
4 |
print "<p>Full query: $full_query</p>\n"; |
92 |
dpavlin |
1 |
print "<p>Related Terms: ". ( join ", ", @top_terms ) ."</p>\n"; |
93 |
|
|
print "<hr />\n"; |
94 |
|
|
|
95 |
|
|
|
96 |
|
|
################################## |
97 |
|
|
# DOCUMENT BASED CALCULATIONS |
98 |
|
|
################################## |
99 |
|
|
|
100 |
|
|
print "<p>Result Count: ".(scalar keys %$results)."</p>\n"; |
101 |
|
|
|
102 |
|
|
my @sorted_results = sort { $results->{$b} <=> $results->{$a} } keys %$results; |
103 |
|
|
my @display_results = splice( @sorted_results, $start, $RESULTS_TO_DISPLAY ); |
104 |
|
|
|
105 |
|
|
warn dump( @display_results ); |
106 |
|
|
|
107 |
|
|
################################## |
108 |
|
|
# Access the storage engine to |
109 |
|
|
# retrieve the title and text |
110 |
|
|
################################## |
111 |
|
|
my $i = 1 + $start; |
112 |
|
|
print $semantic->paginate( "?query=$query;similar=$similar", $start, scalar keys %$results, $RESULTS_TO_DISPLAY); |
113 |
|
|
foreach my $id ( @display_results ){ |
114 |
|
|
EPrints->id( $id ); |
115 |
|
|
print "<p>$i. <b>", EPrints->lookup( 'title' ), "</b> <em>", sprintf("%.2f",$results->{$id}), "</em> <a href=\"?similar=$id\">similar</a></p>\n"; |
116 |
|
|
print "<p>"; |
117 |
|
|
# print $semantic->summarize($id); |
118 |
|
|
print "<small>", EPrints->lookup('abstract'), "</small>"; |
119 |
|
|
print "</p>\n"; |
120 |
|
|
$i++; |
121 |
|
|
} |
122 |
|
|
|
123 |
|
|
} |
124 |
|
|
|
125 |
|
|
|
126 |
|
|
print "</body>\n</html>\n"; |