|
link /data/Webpacus/config.yml |
|
1 |
|
--- #YAML:1.0 |
2 |
|
# DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!! |
3 |
|
|
4 |
|
# encoding of this configuration file |
5 |
|
config_encoding: 'ISO-8859-2' |
6 |
|
# encoding in Catalyst. |
7 |
|
catalyst_encoding: 'UTF-8' |
8 |
|
# relative path to sites templates under Catalyst root |
9 |
|
sites_root: 'sites' |
10 |
|
|
11 |
|
# which indexing engine to use? (hyperestraier is default) |
12 |
|
# disable indexer |
13 |
|
use_indexer: '' |
14 |
|
#use_indexer: 'hyperestraier' |
15 |
|
#use_indexer: 'hyperestraier-native' |
16 |
|
#use_indexer: 'kinosearch' |
17 |
|
|
18 |
|
# configuration for Hyper Estraier full text search engine |
19 |
|
hyperestraier: |
20 |
|
#url: 'http://localhost:1978/node/webpac2' |
21 |
|
masterurl: 'http://localhost:1978' |
22 |
|
defaultnode: 'webpac2' |
23 |
|
# defaultnode: 'ps' |
24 |
|
defaultdepth: 1 |
25 |
|
user: 'admin' |
26 |
|
passwd: 'admin' |
27 |
|
# don't turn this on! it will spit huge amounts of output |
28 |
|
#debug: 1 |
29 |
|
# |
30 |
|
# if using hyperestraier-native use following: |
31 |
|
path: 'casket/' |
32 |
|
# |
33 |
|
# number of results on each page |
34 |
|
hits_on_page: 10 |
35 |
|
# number of results to fetch for suggestion (it will fold multiple sameones) |
36 |
|
hits_for_suggest: 20 |
37 |
|
# |
38 |
|
# options used while indexing |
39 |
|
# |
40 |
|
# which tag type to use for search engine (used while indexing) |
41 |
|
type: 'search' |
42 |
|
# |
43 |
|
|
44 |
|
# options for pager |
45 |
|
pager: |
46 |
|
# how many pages to show for navigation? |
47 |
|
max_pages: 20 |
48 |
|
|
49 |
|
# configuration for KinoSearch search engine library |
50 |
|
kinosearch: |
51 |
|
index_path: './kinosearch/' |
52 |
|
database: 'unconfigured database name' |
53 |
|
label: 'unconfigured database label' |
54 |
|
encoding: 'iso-8859-2' |
55 |
|
# clean database before opening? (WARNING: this erases existing database) |
56 |
|
clean: 1 |
57 |
|
# which field type to index? |
58 |
|
type: 'search' |
59 |
|
|
60 |
|
webpac: |
61 |
|
# default template to use |
62 |
|
template: 'html_ffzg_results_short.tt' |
63 |
|
# path to database files |
64 |
|
db_path: '/data/webpac2/db' |
65 |
|
# path to templates used by WebPAC::Output |
66 |
|
template_path: '/data/webpac2/conf/output/tt' |
67 |
|
# default template for results |
68 |
|
default_template: 'html_ffzg.tt' |
69 |
|
# default user editable css file |
70 |
|
default_css: 'user.css' |
71 |
|
css_path: 'root/css' |
72 |
|
# encoding comming from webpac |
73 |
|
webpac_encoding: 'iso-8859-2' |
74 |
|
# encoding expected by Catalyst |
75 |
|
out_encoding: 'utf-8' |
76 |
|
# define different input formats (types) and perl modules to handle them |
77 |
|
inputs: |
78 |
|
isis: 'WebPAC::Input::ISIS' |
79 |
|
marc: 'WebPAC::Input::MARC' |
80 |
|
excel: 'WebPAC::Input::Excel' |
81 |
|
gutenberg: 'WebPAC::Input::Gutenberg' |
82 |
|
dbf: 'WebPAC::Input::DBF' |
83 |
|
# define delimiters for validation |
84 |
|
delimiters: |
85 |
|
- ' ; ' |
86 |
|
- ' : ' |
87 |
|
- ' / ' |
88 |
|
- ' \. ' |
89 |
|
|
90 |
|
editor: |
91 |
|
# open this record when opening editor |
92 |
|
# (it will also be used to deduce default database and input) |
93 |
|
default_record_uri: 'ps/libri/1' |
94 |
|
|
95 |
|
# directives after this are used when indexing using core WebPAC modules |
96 |
|
|
97 |
|
databases: |
98 |
|
# This is empty database created only in Hyper Estraier to merge |
99 |
|
# all three databases |
100 |
|
'webpac2': |
101 |
|
name: 'Search all' |
102 |
|
links: |
103 |
|
- to: ps |
104 |
|
credit: 10000 |
105 |
|
- to: kk |
106 |
|
credit: 10000 |
107 |
|
- to: jzav |
108 |
|
credit: 10000 |
109 |
|
|
110 |
|
# site with alternative databases |
111 |
|
'hr': |
112 |
|
name: 'Primjer za hrvatsko suèelje' |
113 |
|
links: |
114 |
|
- to: ffps |
115 |
|
credit: 10000 |
116 |
|
- to: ffkk |
117 |
|
credit: 5000 |
118 |
|
- to: fffo |
119 |
|
credit: 10000 |
120 |
|
|
121 |
|
# Psihologija |
122 |
|
'ffps': |
123 |
|
# all variables here will be available as config variables in template |
124 |
|
name: 'Knji¾nica psihologije' |
125 |
|
code: 'ps' |
126 |
|
|
127 |
|
# define input source |
128 |
|
input: |
129 |
|
# this will create unique name with name of database and mfn |
130 |
|
- name: libri |
131 |
|
# isis |
132 |
|
type: isis |
133 |
|
|
134 |
|
# full path to database |
135 |
|
path: '/data/isis_data/ffps-libri/LIBRI' |
136 |
|
|
137 |
|
# encoding of character set in isis data |
138 |
|
encoding: 'cp852' |
139 |
|
|
140 |
|
# limit number of records to read from database |
141 |
|
#limit: 100 |
142 |
|
|
143 |
|
# define normalisation for that source |
144 |
|
normalize: |
145 |
|
# which tag to use in normalize xml for data? |
146 |
|
- tag: 'isis' |
147 |
|
# path to normalization xml |
148 |
|
path: 'conf/normalize/common.pl' |
149 |
|
#path: 'conf/normalize/isis_ffzg.yml' |
150 |
|
# convert input to MARC21 |
151 |
|
# - tag: 'marc21' |
152 |
|
# # |
153 |
|
# path: 'conf/normalize/mapping.pl' |
154 |
|
# output: 'out/marc/ps-libri.marc' |
155 |
|
|
156 |
|
# another input database |
157 |
|
- name: peri |
158 |
|
type: isis |
159 |
|
path: '/data/isis_data/ffps-peri/PERI' |
160 |
|
encoding: 'cp852' |
161 |
|
#limit: 100 |
162 |
|
normalize: |
163 |
|
tag: 'isis' |
164 |
|
path: 'conf/normalize/common.pl' |
165 |
|
|
166 |
|
# Komparativna |
167 |
|
ffkk: |
168 |
|
name: 'Komparativna knji¾evnost, Filozofski fakultet u Zagrebu' |
169 |
|
|
170 |
|
input: |
171 |
|
- name: libri |
172 |
|
type: isis |
173 |
|
path: '/data/isis_data/ffkk-libri/LIBRI' |
174 |
|
encoding: 'cp852' |
175 |
|
modify_file: 'conf/modify/common.pl' |
176 |
|
normalize: |
177 |
|
# path: 'conf/normalize/common.pl' |
178 |
|
path: 'conf/normalize/mapping.pl' |
179 |
|
- name: peri |
180 |
|
type: isis |
181 |
|
path: '/data/isis_data/ffkk-peri/PERI' |
182 |
|
encoding: 'cp852' |
183 |
|
modify_file: 'conf/modify/common.pl' |
184 |
|
normalize: |
185 |
|
# path: 'conf/normalize/common.pl' |
186 |
|
path: 'conf/normalize/mapping.pl' |
187 |
|
- name: arti |
188 |
|
type: isis |
189 |
|
path: '/data/isis_data/ffkk-arti/ARTI' |
190 |
|
encoding: 'cp852' |
191 |
|
modify_file: 'conf/modify/ffkk-arti.pl' |
192 |
|
normalize: |
193 |
|
# - path: 'conf/normalize/common.pl' |
194 |
|
- path: 'conf/normalize/mapping.pl' |
195 |
|
- path: 'conf/lookup/ffkk-arti.pl' |
196 |
|
|
197 |
|
fffo: |
198 |
|
name: 'Filozofski Fakultet - Fonetika' |
199 |
|
|
200 |
|
input: |
201 |
|
name: libri |
202 |
|
type: isis |
203 |
|
path: '/data/isis_data/fffo-libri/LIBRI' |
204 |
|
encoding: 'cp852' |
205 |
|
#limit: 100 |
206 |
|
normalize: |
207 |
|
tag: 'isis' |
208 |
|
path: 'conf/normalize/common.pl' |
209 |
|
|
210 |
|
jzav: |
211 |
|
name: 'Jadranski zavod' |
212 |
|
input: |
213 |
|
name: knjige |
214 |
|
type: isis |
215 |
|
path: '/data/isis_data/jzav/BIB' |
216 |
|
encoding: 'cp852' |
217 |
|
#limit: 100 |
218 |
|
normalize: |
219 |
|
tag: 'isis' |
220 |
|
path: 'conf/normalize/common.pl' |
221 |
|
|
222 |
|
efzg: |
223 |
|
name: 'Ekonomski fakutet u Zagrebu' |
224 |
|
input: |
225 |
|
name: knjige |
226 |
|
type: marc |
227 |
|
path: '/data/isis_data/efzg/unimarc.iso' |
228 |
|
encoding: 'cp852' |
229 |
|
# CroList recoding pairs to fix encoding problems |
230 |
|
recode: 'ÏC È Ïc è ÂC Æ Âc æ ò ð ÏS © Ïs ¹ ÏZ ® Ïz ¾' |
231 |
|
#limit: 100 |
232 |
|
normalize: |
233 |
|
tag: 'isis' |
234 |
|
path: 'conf/normalize/common.pl' |
235 |
|
|
236 |
|
# excel |
237 |
|
excel: |
238 |
|
name: 'Excel' |
239 |
|
|
240 |
|
input: |
241 |
|
name: doaj |
242 |
|
type: excel |
243 |
|
path: '/data/isis_data/doaj2csv.xls' |
244 |
|
#encoding: 'cp852' |
245 |
|
#limit: 100 |
246 |
|
normalize: |
247 |
|
#tag: 'isis' |
248 |
|
path: 'conf/normalize/excel.pl' |
249 |
|
|
250 |
|
ffsfb: |
251 |
|
name: 'Slavenska filologija (B), Filozofski fakulteti u Zagrebu' |
252 |
|
input: |
253 |
|
name: libri |
254 |
|
type: isis |
255 |
|
path: '/data/isis_data/ffsfb-libri/' |
256 |
|
encoding: 'cp852' |
257 |
|
#limit: 10 |
258 |
|
modify_records: |
259 |
|
200: |
260 |
|
'^a': |
261 |
|
' ; ': '^k' |
262 |
|
'^d': |
263 |
|
'^e': ' : ' |
264 |
|
'^e': |
265 |
|
'^d': ' = ' |
266 |
|
'*': |
267 |
|
'^g': ' ; ' |
268 |
|
210: |
269 |
|
'*': |
270 |
|
'^c': '^b' |
271 |
|
'^d': '^c' |
272 |
|
'. (': '. (^e' |
273 |
|
' : ': ' :^f' |
274 |
|
700: |
275 |
|
'^a': |
276 |
|
'^b': ', ' |
277 |
|
701: |
278 |
|
'^a': |
279 |
|
'^b': ', ' |
280 |
|
702: |
281 |
|
'^a': |
282 |
|
'^b': ', ' |
283 |
|
normalize: |
284 |
|
path: 'conf/normalize/common.pl' |
285 |
|
fflo: |
286 |
|
name: 'Lingvistika, Filozofski fakultet u Zagrebu' |
287 |
|
input: |
288 |
|
name: libri |
289 |
|
type: isis |
290 |
|
path: '/data/isis_data/fflo-libri/LIBRI' |
291 |
|
encoding: 'cp852' |
292 |
|
#limit: 10 |
293 |
|
modify_records: |
294 |
|
200: |
295 |
|
'^d': |
296 |
|
'^e': ' : ' |
297 |
|
'^e': |
298 |
|
'^d': ' = ' |
299 |
|
'*': |
300 |
|
'^g': ' ; ' |
301 |
|
'^c': '. ' |
302 |
|
210: |
303 |
|
'*': |
304 |
|
'^c': '^b' |
305 |
|
'^d': '^c' |
306 |
|
700: |
307 |
|
'^a': |
308 |
|
'^b': ', ' |
309 |
|
701: |
310 |
|
'^a': |
311 |
|
'^b': ', ' |
312 |
|
702: |
313 |
|
'^a': |
314 |
|
'^b': ', ' |
315 |
|
normalize: |
316 |
|
path: 'conf/normalize/common.pl' |
317 |
|
|
318 |
|
gutenberg: |
319 |
|
name: 'Project Gutenberg archive' |
320 |
|
input: |
321 |
|
name: archive |
322 |
|
type: gutenberg |
323 |
|
path: 't/data/gutenberg-small.rdf' |
324 |
|
normalize: |
325 |
|
path: 'conf/normalize/common.pl' |
326 |
|
|
327 |
|
sand: |
328 |
|
name: 'Project Gutenberg archive' |
329 |
|
input: |
330 |
|
name: casopisi |
331 |
|
type: dbf |
332 |
|
path: 't/data/cas2000.dbf' |
333 |
|
normalize: |
334 |
|
path: 'conf/normalize/common.pl' |
335 |
|
mapping_path: 'conf/input/dbf/cas2000.yml' |