--- trunk/conf/llin.yml 2006/09/05 15:14:14 627 +++ trunk/conf/llin.yml 2007/11/28 00:01:34 1071 @@ -1 +1,417 @@ -link /data/Webpacus/config.yml \ No newline at end of file +--- #YAML:1.0 +# DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!! + +# encoding of this configuration file +config_encoding: 'ISO-8859-2' +# encoding in Catalyst. +catalyst_encoding: 'UTF-8' +# relative path to sites templates under Catalyst root +sites_root: 'sites' + +# which indexing engine to use? (hyperestraier is default) +# disable indexer +use_indexer: '' +#use_indexer: 'hyperestraier' +#use_indexer: 'hyperestraier-native' +#use_indexer: 'kinosearch' + +# configuration for Hyper Estraier full text search engine +hyperestraier: + #url: 'http://localhost:1978/node/webpac2' + masterurl: 'http://localhost:1978' + defaultnode: 'webpac2' +# defaultnode: 'ps' + defaultdepth: 1 + user: 'admin' + passwd: 'admin' + # don't turn this on! it will spit huge amounts of output + #debug: 1 + # + # if using hyperestraier-native use following: + path: 'casket/' + # + # number of results on each page + hits_on_page: 10 + # number of results to fetch for suggestion (it will fold multiple sameones) + hits_for_suggest: 20 + # + # options used while indexing + # + # which tag type to use for search engine (used while indexing) + type: 'search' + # + +# options for pager +pager: + # how many pages to show for navigation? + max_pages: 20 + +# configuration for KinoSearch search engine library +kinosearch: + index_path: './kinosearch/' + database: 'unconfigured database name' + label: 'unconfigured database label' + encoding: 'iso-8859-2' + # clean database before opening? (WARNING: this erases existing database) + clean: 1 + # which field type to index? + type: 'search' + +webpac: + # default template to use + template: 'html_ffzg_results_short.tt' + # path to database files + db_path: '/data/webpac2/db' + # path to templates used by WebPAC::Output + template_path: '/data/webpac2/conf/output/tt' + # default template for results + default_template: 'html_ffzg.tt' + # default user editable css file + default_css: 'user.css' + css_path: 'root/css' + # encoding comming from webpac + webpac_encoding: 'iso-8859-2' + # encoding expected by Catalyst + out_encoding: 'utf-8' + # define different input formats (types) and perl modules to handle them + inputs: + isis: 'WebPAC::Input::ISIS' + marc: 'WebPAC::Input::MARC' + excel: 'WebPAC::Input::Excel' + gutenberg: 'WebPAC::Input::Gutenberg' + dbf: 'WebPAC::Input::DBF' + # define delimiters for validation + delimiters: + - ' ; ' + - ' : ' + - ' / ' + - ' \. ' + +editor: + # open this record when opening editor + # (it will also be used to deduce default database and input) + default_record_uri: 'ps/libri/1' + +# directives after this are used when indexing using core WebPAC modules + +databases: + # This is empty database created only in Hyper Estraier to merge + # all three databases + 'webpac2': + name: 'Search all' + links: + - to: ps + credit: 10000 + - to: kk + credit: 10000 + - to: jzav + credit: 10000 + + # site with alternative databases + 'hr': + name: 'Primjer za hrvatsko suèelje' + links: + - to: ffps + credit: 10000 + - to: ffkk + credit: 5000 + - to: fffo + credit: 10000 + + # Psihologija + 'ffps': + # all variables here will be available as config variables in template + name: 'Knji¾nica psihologije' + code: 'ps' + + # define input source + input: + # this will create unique name with name of database and mfn + - name: libri + # isis + type: isis + + # full path to database + path: '/data/isis_data/ffps-libri/LIBRI' + + # encoding of character set in isis data + encoding: 'cp852' + + # limit number of records to read from database + #limit: 100 + + # define normalisation for that source + normalize: + # which tag to use in normalize xml for data? + - tag: 'isis' + # path to normalization xml + path: 'conf/normalize/common.pl' + #path: 'conf/normalize/isis_ffzg.yml' + # convert input to MARC21 +# - tag: 'marc21' +# # +# path: 'conf/normalize/mapping.pl' +# output: 'out/marc/ps-libri.marc' + + # another input database + - name: peri + type: isis + path: '/data/isis_data/ffps-peri/PERI' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + # Komparativna + ffkk: + name: 'Komparativna knji¾evnost, Filozofski fakultet u Zagrebu' + + input: + - name: libri + type: isis + path: '/data/isis_data/ffkk-libri/LIBRI' + encoding: 'cp852' + modify_file: 'conf/modify/common.pl' + normalize: +# path: 'conf/normalize/common.pl' + path: 'conf/normalize/mapping.pl' + - name: peri + type: isis + path: '/data/isis_data/ffkk-peri/PERI' + encoding: 'cp852' + modify_file: 'conf/modify/common.pl' + normalize: +# path: 'conf/normalize/common.pl' + path: 'conf/normalize/mapping.pl' + - name: arti + type: isis + path: '/data/isis_data/ffkk-arti/ARTI' + encoding: 'cp852' + modify_file: 'conf/modify/ffkk-arti.pl' + normalize: +# - path: 'conf/normalize/common.pl' + - path: 'conf/normalize/mapping.pl' + - path: 'conf/lookup/ffkk-arti.pl' + + fffo: + name: 'Filozofski Fakultet - Fonetika' + + input: + name: libri + type: isis + path: '/data/isis_data/fffo-libri/LIBRI' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + jzav: + name: 'Jadranski zavod' + input: + name: knjige + type: isis + path: '/data/isis_data/jzav/BIB' + encoding: 'cp852' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + efzg: + name: 'Ekonomski fakutet u Zagrebu' + input: + name: knjige + type: marc + path: '/data/isis_data/efzg/unimarc.iso' + encoding: 'cp852' + # CroList recoding pairs to fix encoding problems + recode: 'ÏC È Ïc è ÂC Æ Âc æ ò ð ÏS © Ïs ¹ ÏZ ® Ïz ¾' + #limit: 100 + normalize: + tag: 'isis' + path: 'conf/normalize/common.pl' + + ffsfb: + name: 'Slavenska filologija (B), Filozofski fakulteti u Zagrebu' + input: + name: libri + type: isis + path: '/data/isis_data/ffsfb-libri/' + encoding: 'cp852' + #limit: 10 + modify_records: + 200: + '^a': + ' ; ': '^k' + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '*': + '^g': ' ; ' + 210: + '*': + '^c': '^b' + '^d': '^c' + '. (': '. (^e' + ' : ': ' :^f' + 700: + '^a': + '^b': ', ' + 701: + '^a': + '^b': ', ' + 702: + '^a': + '^b': ', ' + normalize: + path: 'conf/normalize/common.pl' + fflo: + name: 'Lingvistika, Filozofski fakultet u Zagrebu' + input: + name: libri + type: isis + path: '/data/isis_data/fflo-libri/LIBRI' + encoding: 'cp852' + #limit: 10 + modify_records: + 200: + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '*': + '^g': ' ; ' + '^c': '. ' + 210: + '*': + '^c': '^b' + '^d': '^c' + 700: + '^a': + '^b': ', ' + 701: + '^a': + '^b': ', ' + 702: + '^a': + '^b': ', ' + normalize: + path: 'conf/normalize/common.pl' + + gutenberg: + name: 'Project Gutenberg archive' + input: + name: archive + type: gutenberg + path: 't/data/gutenberg-small.rdf' + normalize: + path: 'conf/normalize/common.pl' + + sand: + name: 'SAND' + input: + name: casopisi + type: dbf + path: 't/data/cas2000.dbf' + normalize: + path: 'conf/normalize/common.pl' +# mapping_path: 'conf/input/dbf/cas2000.yml' + + exhibit: + name: 'SMILE Exhibit based ouptput for WebPAC' + input: + name: 'FFZG - Psihologija' + type: marc + path: 'out/marc/ffsf-peri.marc' + encoding: 'cp852' + normalize: + path: 'conf/normalize/json.pl' + output: + module: 'JSON' + path: 'out/exhibit/ps.js' + + webpacus: + name: 'Webpacus' + input: + name: 'foobar' + type: marc +# path: 'out/marc/ffsf-peri.marc' +# path: 'out/marc/ffkk-libri.marc' + path: 'out/marc/ffiz-libri.marc' + encoding: 'cp852' + normalize: + path: 'conf/normalize/webpacus.pl' +# path: 'conf/normalize/webpacus-unimarc.pl' + output: + - module: 'KinoSearch' + path: 'var/kinosearch' + - module: 'Sorted' + path: 'var/sorted' + - module: 'Webpacus' + path: '/data/Webpacus2' + + hidra: + name: 'HIDRA' + input: + - name: bib + type: isis + path: '/data/hidra/BIB' + encoding: 'cp852' + #modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/minimal.pl' + + perlmonks: + name: 'PerlMonks' + input: + - name: xml + module: 'WebPAC::Input::XML' + path: '/home/dpavlin/monk-search/xml-dump' + mungle: 'conf/mungle/perlmonks-xml.pl' + normalize: + path: 'conf/normalize/perlmonks.pl' + output: + - module: 'KinoSearch' + path: 'var/kinosearch' + - module: 'Sorted' + path: 'var/sorted' + - module: 'Webpacus' + path: '/data/Webpacus2' + + ufo: + name: 'UFO Reports 1998-2006 in the UK' + input: + - name: pdf + module: 'WebPAC::Input::PDF' + path: '/home/dpavlin/x/ufo/UFOReport1998.pdf' +# - '/home/dpavlin/x/ufo/UFOReport1998.pdf' +# - '/home/dpavlin/x/ufo/UFOReport1999.pdf' +# - '/home/dpavlin/x/ufo/UFOReport2000.pdf' +# - '/home/dpavlin/x/ufo/UFOReport2001.pdf' +# - '/home/dpavlin/x/ufo/UFOReports2002WholeoftheUK.pdf' +# - '/home/dpavlin/x/ufo/UFOReports2003WholeoftheUK.pdf' +# - '/home/dpavlin/x/ufo/UFOReports2004WholeoftheUK.pdf' +# - '/home/dpavlin/x/ufo/UFOReports2005WholeoftheUK.pdf' +# - '/home/dpavlin/x/ufo/UFOReports2006WholeoftheUK.pdf' + normalize: + path: 'conf/normalize/ufo.pl' + output: + - module: 'Jifty' + path: '/data/Webpacus2' + model: 'Webpacus::Model::UFO' + + csv: + name: 'CSV Excel export' + input: + - name: peri + module: 'WebPAC::Input::ISIS' + path: '/data/isis_data/ffps-peri/PERI' + encoding: 'cp852' + normalize: + path: 'conf/normalize/csv_ff-peri.pl' + output: + - module: 'Excel' + path: 'out/peri.xls'