--- trunk/conf/llin.yml 2006/09/05 15:14:14 627 +++ trunk/conf/llin.yml 2007/06/22 00:03:45 872 @@ -1 +1,335 @@ -link /data/Webpacus/config.yml \ No newline at end of file +--- #YAML:1.0 +# DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!! + +# encoding of this configuration file +config_encoding: 'ISO-8859-2' +# encoding in Catalyst. +catalyst_encoding: 'UTF-8' +# relative path to sites templates under Catalyst root +sites_root: 'sites' + +# which indexing engine to use? (hyperestraier is default) +# disable indexer +use_indexer: '' +#use_indexer: 'hyperestraier' +#use_indexer: 'hyperestraier-native' +#use_indexer: 'kinosearch' + +# configuration for Hyper Estraier full text search engine +hyperestraier: + #url: 'http://localhost:1978/node/webpac2' + masterurl: 'http://localhost:1978' + defaultnode: 'webpac2' +# defaultnode: 'ps' + defaultdepth: 1 + user: 'admin' + passwd: 'admin' + # don't turn this on! it will spit huge amounts of output + #debug: 1 + # + # if using hyperestraier-native use following: + path: 'casket/' + # + # number of results on each page + hits_on_page: 10 + # number of results to fetch for suggestion (it will fold multiple sameones) + hits_for_suggest: 20 + # + # options used while indexing + # + # which tag type to use for search engine (used while indexing) + type: 'search' + # + +# options for pager +pager: + # how many pages to show for navigation? + max_pages: 20 + +# configuration for KinoSearch search engine library +kinosearch: + index_path: './kinosearch/' + database: 'unconfigured database name' + label: 'unconfigured database label' + encoding: 'iso-8859-2' + # clean database before opening? (WARNING: this erases existing database) + clean: 1 + # which field type to index? + type: 'search' + +webpac: + # default template to use + template: 'html_ffzg_results_short.tt' + # path to database files + db_path: '/data/webpac2/db' + # path to templates used by WebPAC::Output + template_path: '/data/webpac2/conf/output/tt' + # default template for results + default_template: 'html_ffzg.tt' + # default user editable css file + default_css: 'user.css' + css_path: 'root/css' + # encoding comming from webpac + webpac_encoding: 'iso-8859-2' + # encoding expected by Catalyst + out_encoding: 'utf-8' + # define different input formats (types) and perl modules to handle them + inputs: + isis: 'WebPAC::Input::ISIS' + marc: 'WebPAC::Input::MARC' + excel: 'WebPAC::Input::Excel' + gutenberg: 'WebPAC::Input::Gutenberg' + dbf: 'WebPAC::Input::DBF' + # define delimiters for validation + delimiters: + - ' ; ' + - ' : ' + - ' / ' + - ' \. ' + +editor: + # open this record when opening editor + # (it will also be used to deduce default database and input) + default_record_uri: 'ps/libri/1' + +# directives after this are used when indexing using core WebPAC modules + +databases: + # This is empty database created only in Hyper Estraier to merge + # all three databases + 'webpac2': + name: 'Search all' + links: + - to: ps + credit: 10000 + - to: kk + credit: 10000 + - to: jzav + credit: 10000 + + # site with alternative databases + 'hr': + name: 'Primjer za hrvatsko suèelje' + links: + - to: ffps + credit: 10000 + - to: ffkk + credit: 5000 + - to: fffo + credit: 10000 + + # Psihologija + 'ffps': + # all variables here will be available as config variables in template + name: 'Knji¾nica psihologije' + code: 'ps' + + # define input source + input: + # this will create unique name with name of database and mfn + - name: libri + # isis + type: isis + + # full path to database + path: '/data/isis_data/ffps-libri/LIBRI' + + # encoding of character set in isis data + encoding: 'cp852' + + # limit number of records to read from database + #limit: 100 + + # define normalisation for that source + normalize: + # which tag to use in normalize xml for data? + - tag: 'isis' + # path to normalization xml + path: 'conf/normalize/common.pl' + #path: 'conf/normalize/isis_ffzg.yml' + # convert input to MARC21 +# - tag: 'marc21' +# # +# path: 'conf/normalize/mapping.pl' +# output: 'out/marc/ps-libri.marc' + + # another input database + - name: peri + type: isis + path: '/data/isis_data/ffps-peri/PERI' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + # Komparativna + ffkk: + name: 'Komparativna knji¾evnost, Filozofski fakultet u Zagrebu' + + input: + - name: libri + type: isis + path: '/data/isis_data/ffkk-libri/LIBRI' + encoding: 'cp852' + modify_file: 'conf/modify/common.pl' + normalize: +# path: 'conf/normalize/common.pl' + path: 'conf/normalize/mapping.pl' + - name: peri + type: isis + path: '/data/isis_data/ffkk-peri/PERI' + encoding: 'cp852' + modify_file: 'conf/modify/common.pl' + normalize: +# path: 'conf/normalize/common.pl' + path: 'conf/normalize/mapping.pl' + - name: arti + type: isis + path: '/data/isis_data/ffkk-arti/ARTI' + encoding: 'cp852' + modify_file: 'conf/modify/ffkk-arti.pl' + normalize: +# - path: 'conf/normalize/common.pl' + - path: 'conf/normalize/mapping.pl' + - path: 'conf/lookup/ffkk-arti.pl' + + fffo: + name: 'Filozofski Fakultet - Fonetika' + + input: + name: libri + type: isis + path: '/data/isis_data/fffo-libri/LIBRI' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + jzav: + name: 'Jadranski zavod' + input: + name: knjige + type: isis + path: '/data/isis_data/jzav/BIB' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + efzg: + name: 'Ekonomski fakutet u Zagrebu' + input: + name: knjige + type: marc + path: '/data/isis_data/efzg/unimarc.iso' + encoding: 'cp852' + # CroList recoding pairs to fix encoding problems + recode: 'ÏC È Ïc è ÂC Æ Âc æ ò ð ÏS © Ïs ¹ ÏZ ® Ïz ¾' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + # excel + excel: + name: 'Excel' + + input: + name: doaj + type: excel + path: '/data/isis_data/doaj2csv.xls' + #encoding: 'cp852' + #limit: 100 + normalize: + #tag: 'isis' + path: 'conf/normalize/excel.pl' + + ffsfb: + name: 'Slavenska filologija (B), Filozofski fakulteti u Zagrebu' + input: + name: libri + type: isis + path: '/data/isis_data/ffsfb-libri/' + encoding: 'cp852' + #limit: 10 + modify_records: + 200: + '^a': + ' ; ': '^k' + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '*': + '^g': ' ; ' + 210: + '*': + '^c': '^b' + '^d': '^c' + '. (': '. (^e' + ' : ': ' :^f' + 700: + '^a': + '^b': ', ' + 701: + '^a': + '^b': ', ' + 702: + '^a': + '^b': ', ' + normalize: + path: 'conf/normalize/common.pl' + fflo: + name: 'Lingvistika, Filozofski fakultet u Zagrebu' + input: + name: libri + type: isis + path: '/data/isis_data/fflo-libri/LIBRI' + encoding: 'cp852' + #limit: 10 + modify_records: + 200: + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '*': + '^g': ' ; ' + '^c': '. ' + 210: + '*': + '^c': '^b' + '^d': '^c' + 700: + '^a': + '^b': ', ' + 701: + '^a': + '^b': ', ' + 702: + '^a': + '^b': ', ' + normalize: + path: 'conf/normalize/common.pl' + + gutenberg: + name: 'Project Gutenberg archive' + input: + name: archive + type: gutenberg + path: 't/data/gutenberg-small.rdf' + normalize: + path: 'conf/normalize/common.pl' + + sand: + name: 'Project Gutenberg archive' + input: + name: casopisi + type: dbf + path: 't/data/cas2000.dbf' + normalize: + path: 'conf/normalize/common.pl' + mapping_path: 'conf/input/dbf/cas2000.yml'