--- trunk/conf/mjesec.yml 2006/08/23 10:08:17 612 +++ trunk/conf/mjesec.yml 2006/09/06 19:25:22 636 @@ -9,7 +9,8 @@ sites_root: 'sites' # which indexing engine to use? (hyperestraier is default) -use_indexer: 'hyperestraier' +#use_indexer: 'hyperestraier' +use_indexer: 'hyperestraier-native' #use_indexer: 'kinosearch' # configuration for Hyper Estraier full text search engine @@ -24,6 +25,8 @@ # don't turn this on! it will spit huge amounts of output #debug: 1 # + # + path: 'casket/' # number of results on each page hits_on_page: 10 # number of results to fetch for suggestion (it will fold multiple sameones) @@ -304,7 +307,7 @@ name: libri type: isis path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis.pm' #limit: 10 normalize: @@ -316,7 +319,7 @@ name: libri type: isis path: '/backup/isis_backup/A018-2/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -328,7 +331,7 @@ name: libri type: isis path: '/backup/isis_backup/A203-2/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -340,7 +343,7 @@ name: libri type: isis path: '/backup/isis_backup/A102-1B/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -352,7 +355,7 @@ - name: libri type: isis path: '/backup/isis_backup/novi-40162/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -360,7 +363,7 @@ - name: arti type: isis path: '/backup/isis_backup/novi-40162/ISISDATA/latest/ARTI/ARTI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -373,7 +376,7 @@ name: libri type: isis path: '/backup/isis_backup/A106-1/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -385,7 +388,7 @@ name: libri type: isis path: '/backup/isis_backup/B001-2/winisis/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -397,7 +400,7 @@ name: libri type: isis path: '/backup/isis_backup/A129-2/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -409,7 +412,7 @@ name: libri type: isis path: '/backup/isis_backup/A209-2/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -418,14 +421,26 @@ ffkk: name: 'Komparativna knji¾evnost, Filozofski fakultet u Zagrebu' input: - name: libri - type: isis - path: '/backup/isis_backup/A207-3/isisdata/latest/LIBRI/LIBRI' - encoding: '852' - lookup: 'conf/lookup/isis_ffzg.yml' - #limit: 10 - normalize: - path: 'conf/normalize/common.pl' + - name: libri + type: isis + path: '/backup/isis_backup/A207-3/isisdata/latest/LIBRI/LIBRI' + encoding: 'cp852' + lookup: 'conf/lookup/isis_ffzg.yml' + #limit: 10 + normalize: + path: 'conf/normalize/common.pl' + - name: arti + type: isis + path: '/backup/isis_backup/A207-3/isisdata/latest/ARTI/ARTI' + encoding: 'cp852' + lookup: 'conf/lookup/isis_ffzg.yml' + #limit: 10 + modify_records: + 200: + '^f': + '; ': '^x' + normalize: + path: 'conf/normalize/common.pl' fflo: name: 'Lingvistika, Filozofski fakultet u Zagrebu' @@ -433,21 +448,31 @@ name: libri type: isis path: '/backup/isis_backup/A108-1/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 + modify_file: 'conf/modify/common.pl' modify_records: 200: + '^k': + '^e': ' : ' '^d': '^e': ' : ' '^e': '^d': ' = ' + '^k': ' ; ' '*': '^g': ' ; ' - '^c': '. ' + '^a': + ' : ': '^e' + ' = ': '^d' + ' ; ': '^k' 210: - '*': + '^a': '^c': '^b' + '^b': + ' ; ': '^a' + ' : ': '^b' '^d': '^c' 700: '^a': @@ -467,7 +492,7 @@ name: libri type: isis path: '/backup/isis_backup/B025-2/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -479,7 +504,7 @@ name: libri type: isis path: '/backup/isis_backup/A226-1/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -490,8 +515,8 @@ input: name: libri type: isis - path: '/backup/isis_backup/A201-1/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + path: '/backup/isis_backup/A-201-1/isisdata/latest/LIBRI/LIBRI' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -503,7 +528,7 @@ name: libri type: isis path: '/backup/isis_backup/C124-3/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -527,7 +552,7 @@ # path: '/data/isis_data/PS/LIBRI/' # encoding of character set in isis data - encoding: '852' + encoding: 'cp852' # lookup lookup: 'conf/lookup/isis_ffzg.yml' @@ -535,6 +560,34 @@ # limit number of records to read from database #limit: 10 + modify_records: + 200: + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '*': + '^g': ' ; ' + '^c': '. ' + 210: + '^a': + '^c': '^b' + '^b': + ' : ': '^b' + '^d': '^c' + '*': + '. (': '. (^e' + '^e': + ' : ': ' ^f' + 700: + '^a': + '^b': ', ' + 701: + '^a': + '^b': ', ' + 702: + '^a': + '^b': ', ' # define normalisation for that source normalize: # which tag to use in normalize xml for data? @@ -547,7 +600,7 @@ - name: peri type: isis path: '/backup/isis_backup/sunce2/isisdata/latest/PERI/PERI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -558,8 +611,8 @@ input: name: libri type: isis - path: '/backup/isis_backup/A224-2/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + path: '/backup/isis_backup/A224-6/isisdata/latest/LIBRI/LIBRI' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -571,7 +624,7 @@ name: libri type: isis path: '/backup/isis_backup/B009-2/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -582,26 +635,32 @@ input: name: libri type: isis - path: '/backup/isis_backup/A125-3/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + path: '/data/isis_data/sfb/LIBRI/LIBRI' +# path: '/backup/isis_backup/A125-3/ISISDATA/latest/LIBRI/LIBRI' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 modify_records: 200: '^a': - ' ; ': '^k' + ' : ': '^e' + ' = ': '^d' + '^k': + '^e': ' : ' '^d': '^e': ' : ' '^e': '^d': ' = ' + '^k': ' ; ' '*': '^g': ' ; ' 210: - '*': + '^a': '^c': '^b' + '^b': + ' ; ': '^a' + ' : ': '^b' '^d': '^c' - '. (': '. (^e' - ' : ': ' :^f' 700: '^a': '^b': ', ' @@ -620,7 +679,7 @@ name: libri type: isis path: '/backup/isis_backup/A121-2/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 modify_records: @@ -657,9 +716,24 @@ name: libri type: isis path: '/backup/isis_backup/A002-3/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 + modify_records: + 200: + '^a': + ' : ': '^e' + ' = ': '^d' + ' ; ': '^k' + '^k': + '^e': ' : ' + '^d': + '^e': ' : ' + '^e': + '^d': ' = ' + '^k': ' ; ' + '*': + '^g': ' ; ' normalize: path: 'conf/normalize/common.pl' @@ -669,7 +743,7 @@ name: libri type: isis path: '/backup/isis_backup/A224-1/isisdata/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -681,7 +755,7 @@ name: libri type: isis path: '/backup/isis_backup/A112-1/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -693,7 +767,7 @@ name: libri type: isis path: '/backup/isis_backup/C107-9/ISISDATA/latest/LIBRI/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/isis_ffzg.yml' #limit: 10 normalize: @@ -705,7 +779,7 @@ name: knjige type: marc path: '/data/drustvene/efzg/unimarc.iso' - encoding: '852' + encoding: 'cp852' # CroList recoding pairs to fix encoding problems recode: 'ÏC È Ïc è ÂC Æ Âc æ ò ð ÏS © Ïs ¹ ÏZ ® Ïz ¾' lookup: 'conf/lookup/efzg.yml' @@ -719,7 +793,7 @@ name: bib type: isis path: '/data/drustvene/eizg/BIB' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/eizg.yml' #limit: 100 normalize: @@ -745,7 +819,7 @@ name: knjige type: isis path: '/data/drustvene/irmo/LIBRI' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/eizg.yml' #limit: 10 normalize: @@ -771,7 +845,7 @@ name: knjige type: isis path: '/data/drustvene/iztzg/BIB' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/eizg.yml' #limit: 10 normalize: @@ -783,7 +857,7 @@ name: bib type: isis path: '/data/drustvene/jzav/BIB' - encoding: '852' + encoding: 'cp852' lookup: 'conf/lookup/jzav.yml' #limit: 10 normalize: