1 |
dpavlin |
74 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
|
5 |
|
|
use Cwd qw/abs_path/; |
6 |
|
|
use File::Temp qw/tempdir/; |
7 |
|
|
use Data::Dumper; |
8 |
|
|
use lib './lib'; |
9 |
|
|
|
10 |
|
|
use WebPAC::Lookup; |
11 |
|
|
use WebPAC::Input::ISIS; |
12 |
dpavlin |
127 |
use WebPAC::DB 0.02; |
13 |
dpavlin |
74 |
use WebPAC::Normalize::XML; |
14 |
|
|
use WebPAC::Output::TT; |
15 |
|
|
use WebPAC::Output::Estraier; |
16 |
|
|
|
17 |
dpavlin |
76 |
my $limit = shift @ARGV; |
18 |
|
|
|
19 |
dpavlin |
74 |
my $abs_path = abs_path($0); |
20 |
|
|
$abs_path =~ s#/[^/]*$#/#; |
21 |
|
|
|
22 |
|
|
my $isis_file = '/data/isis_data/ps/LIBRI/LIBRI'; |
23 |
|
|
|
24 |
|
|
my $lookup = new WebPAC::Lookup( |
25 |
|
|
lookup_file => "$abs_path/conf/lookup/isis.pm", |
26 |
|
|
); |
27 |
|
|
|
28 |
|
|
my $isis = new WebPAC::Input::ISIS( |
29 |
|
|
code_page => 'ISO-8859-2', # application encoding |
30 |
dpavlin |
76 |
limit_mfn => $limit, |
31 |
dpavlin |
74 |
); |
32 |
|
|
|
33 |
|
|
my $maxmfn = $isis->open( |
34 |
|
|
filename => $isis_file, |
35 |
|
|
code_page => '852', # database encoding |
36 |
|
|
); |
37 |
|
|
|
38 |
|
|
my $path = './db/'; |
39 |
|
|
|
40 |
|
|
my $db = new WebPAC::DB( |
41 |
|
|
path => $path, |
42 |
|
|
); |
43 |
|
|
|
44 |
|
|
my $n = new WebPAC::Normalize::XML( |
45 |
|
|
# filter => { 'foo' => sub { shift } }, |
46 |
|
|
db => $db, |
47 |
|
|
lookup_regex => $lookup->regex, |
48 |
|
|
lookup => $lookup, |
49 |
|
|
); |
50 |
|
|
|
51 |
|
|
$n->open( |
52 |
|
|
tag => 'isis', |
53 |
|
|
xml_file => "$abs_path/conf/normalize/isis_ffzg.xml", |
54 |
|
|
); |
55 |
|
|
|
56 |
|
|
my $out = new WebPAC::Output::TT( |
57 |
|
|
include_path => "$abs_path/conf/output/tt", |
58 |
|
|
filters => { foo => sub { shift } }, |
59 |
|
|
); |
60 |
|
|
|
61 |
|
|
my $est = new WebPAC::Output::Estraier( |
62 |
|
|
url => 'http://localhost:1978/node/webpac2', |
63 |
|
|
user => 'admin', |
64 |
|
|
passwd => 'admin', |
65 |
|
|
database => 'ps', |
66 |
|
|
); |
67 |
|
|
|
68 |
dpavlin |
113 |
my $total_rows = 0; |
69 |
dpavlin |
74 |
|
70 |
dpavlin |
113 |
for ( 0 ... $isis->size ) { |
71 |
|
|
|
72 |
|
|
my $row = $isis->fetch || next; |
73 |
|
|
|
74 |
dpavlin |
74 |
my $mfn = $row->{'000'}->[0] || die "can't find MFN"; |
75 |
|
|
|
76 |
|
|
my $ds = $n->data_structure($row); |
77 |
|
|
|
78 |
|
|
# print STDERR Dumper($row, $ds); |
79 |
|
|
|
80 |
dpavlin |
113 |
# my $html = $out->apply( |
81 |
|
|
# template => 'html_ffzg.tt', |
82 |
|
|
# data => $ds, |
83 |
|
|
# ); |
84 |
|
|
# |
85 |
|
|
# # create test output |
86 |
|
|
# |
87 |
|
|
# my $file = sprintf('out/%02d.html', $mfn ); |
88 |
|
|
# open(my $fh, '>', $file) or die "can't open $file: $!"; |
89 |
|
|
# print $fh $html; |
90 |
|
|
# close($fh); |
91 |
|
|
# |
92 |
|
|
# $html =~ s#\s*[\n\r]+\s*##gs; |
93 |
|
|
# |
94 |
dpavlin |
74 |
# print STDERR $html; |
95 |
|
|
|
96 |
|
|
$est->add( |
97 |
|
|
id => $mfn, |
98 |
|
|
ds => $ds, |
99 |
|
|
type => 'search', |
100 |
|
|
); |
101 |
|
|
|
102 |
dpavlin |
113 |
$total_rows++; |
103 |
|
|
|
104 |
dpavlin |
74 |
}; |
105 |
dpavlin |
113 |
|
106 |
|
|
my $log = $lookup->_get_logger; |
107 |
|
|
|
108 |
|
|
$log->info("$total_rows records indexed"); |