/[pgestraier]/trunk/data/indexer.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/data/indexer.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 83 - (show annotations)
Wed Jan 17 22:30:14 2007 UTC (17 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 1063 byte(s)
restructure IMDB trivia parser, added db target to create
trivia database in PostgreSQL
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Search::Estraier 0.06;
5 use parse_trivia;
6
7 # score for words in title
8 my $title_rank = 3;
9
10 open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!";
11
12 # open node
13 my $node = Search::Estraier::Node->new(
14 url => 'http://localhost:1978/node/trivia',
15 user => 'admin',
16 passwd => 'admin',
17 create => 1,
18 );
19
20 my $nr = 1;
21
22 parse_trivia($t, sub {
23
24 my $a = {@_};
25
26 # create a document object
27 my $doc = Search::Estraier::Document->new;
28
29 # add attributes to the document object
30 $doc->add_attr('@uri', "file://localhost/trivia/$nr");
31
32 $doc->add_attr('@title', $a->{title});
33 $doc->add_hidden_text(
34 (($a->{title} . ' ') x $title_rank)
35 );
36
37 $doc->add_attr('@size', length($a->{trivia}));
38
39 $doc->add_attr('year', $a->{year}) if ($a->{year});
40 foreach my $q ( $a->{qv} ) {
41 $doc->add_attr('quote', $q);
42 $doc->add_hidden_text($q);
43 }
44
45 # add the body text to the document object
46 $doc->add_text($a->{trivia});
47
48
49 # register the document object to the database
50 $node->put_doc($doc);
51
52 });

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26