/[webpac]/openisis/0.9.9e/core/cdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/core/cdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 20735 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: cdx.c,v 1.9 2004/11/11 18:20:23 kripke Exp $
25     charset collation
26     */
27    
28     #include "../core/core.h"
29    
30     /*
31     We use
32     - p codes 0,1,..p-1 for primaries (word and nonword)
33     - v codes p..p+v-1 for secondary and tertiary level variants
34     - m codes p+v..p+v+m-1 for maps (targets)
35    
36     We index code numbers to an array[p+v+m] of unsigned ints as code values.
37     The highest bit is the nonword indicator.
38     The lower nibble of the highest byte is the length of the code's byte sequence
39     (for maps in #code values, else in # cleartext bytes).
40    
41     If the sequence does not fit into the lower 3 bytes,
42     the value&0xffffff is the offset of the bytes.
43    
44     For primaries and variants, the bytes are cleartext.
45     For maps, the bytes are code text (length given as #codes),
46     where every code uses one byte, if p+v<=256, else two.
47    
48     With variants, we also have an array[p+v] of variant info.
49     For a primary code 0..p-1,
50     this is the number of secondary and tertiary variants,
51     and the code of the first of the (sec+1)*(ter+1) variants - 1.
52    
53     For a variant code p..p+v-1, this is it's secondary and tertiary weights
54     and the code of the associated primary.
55     */
56    
57     typedef union {
58     unsigned u;
59     struct {
60     #ifdef CPU_BIG_ENDIAN
61     unsigned char hi;
62     unsigned char c[3];
63     #else
64     unsigned char c[3];
65     unsigned char hi;
66     #endif
67     } b;
68     } Cv;
69    
70     typedef struct Var { /* variant info */
71     unsigned char sec; /* secondary variant */
72     unsigned char ter; /* tertiary variant */
73     unsigned short rel; /* code of related variant */
74     } Var;
75    
76     /*
77     For the encoding, we map every byte to:
78     - it's code number
79     - the "word" bit
80     - the index of a table of possible following bytes
81    
82     To cover the BMP with UTF-8, we need 1072 tables:
83     - 32 bytes 110* initiating a 2 byte sequence,
84     using one table each
85     - 16 bytes 1110* initiating a 3 byte sequence,
86     each having a table with 64 second bytes 10*,
87     using 16+16*64 = 1040 tables
88     (- the 32 tables used for the UTF-16 surrogates D800-DFFF)
89     CJK 3400-9FFF alone uses 432 tables (some 27.000 ideographs)
90     */
91     typedef struct Bin { /* byte info */
92     unsigned short cod; /* byte's collation code */
93     unsigned short tab; /* table */
94     } Bin;
95    
96     typedef Bin Bins [256]; /* full table */
97    
98     typedef struct Tab { /* table info */
99     unsigned off; /* bytes offset of Bin for min */
100     unsigned char min;
101     unsigned char max;
102     unsigned short unu;
103     } Tab;
104    
105    
106     enum {
107     BIT_VARIANTS = 0x20,
108     BIT_FRENCH = 0x10,
109     TYP_PLAIN = 0,
110     TYP_VARIANTS = BIT_VARIANTS, /* TODO */
111     TYP_FRENCH = BIT_VARIANTS|BIT_FRENCH, /* TODO */
112     CVLAT1 = 104 /* # primary codes for builtin lat1cdx */
113     };
114    
115     /* header of a dumped/mapped cx */
116     struct Cdx {
117     unsigned char mag[3]; /* magic MCX or mcx for mapped cdx */
118     unsigned char typ; /* base type | bits per (primary) code-1 */
119     /* currently only 8(7) and 16(15) bits supported */
120     unsigned short pri; /* p # primary codes incl. 0,1 */
121     unsigned short var; /* p + v # variants */
122     unsigned short map; /* p+v + m # maps */
123     unsigned short tab; /* # non-root tables 1..tab */
124     /* redundant offsets */
125     unsigned ovi; /* offset variant_infos */
126     unsigned otp; /* offset table_pointers - 4 */
127     unsigned siz; /* total size */
128     Bins bt0; /* root table, always full */
129     unsigned cv[CVLAT1]; /* actually Cv code_values[map=p+v+m] */
130     /* Var variant_infos[var=p+v], if BIT_VARIANTS */
131     /* Tab table_pointers[tab] */
132     /* Bin byte_tables ... */
133     /* unsigned char *more_bytes */
134     };
135    
136    
137     static const char MAGIC[3] =
138     #ifdef CPU_BIG_ENDIAN
139     {'M','C','X'};
140     # define W(b) (0x01000000|(b)<<16)
141     # define N(b) (0x81000000|(b)<<16)
142     #else
143     {'m','c','x'};
144     # define W(b) (0x01000000|(b))
145     # define N(b) (0x81000000|(b))
146     #endif
147    
148     #if 0
149     const Cdx lat1cdx = {
150     {'s','t','a'}, /* mark as static */
151     TYP_PLAIN|7, CVLAT1, CVLAT1, CVLAT1, 0,0,0,0,
152     { /* byte infos */
153     #define B(b) {b,0}
154     /* 32 C0 controls */
155     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
156     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
157     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
158     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
159     /* ! " # $ % & ' */
160     B( 2),B( 3),B( 4),B( 5),B( 6),B( 7),B( 8),B( 9),
161     /* ( ) * + , - . / */
162     B(10),B(11),B(12),B(13),B(14),B(15),B(16),B(17),
163     /* 0 1 2 3 4 5 6 7 */
164     B(18),B(19),B(20),B(21),B(22),B(23),B(24),B(25),
165     /* 8 9 : ; < = > ? */
166     B(26),B(27),B(28),B(29),B(30),B(31),B(32),B(33),
167     /* @ A B C D E F G */
168     B(34),B(35),B(36),B(37),B(38),B(39),B(40),B(41),
169     B(42),B(43),B(44),B(45),B(46),B(47),B(48),B(49), /* H-O */
170     B(50),B(51),B(52),B(53),B(54),B(55),B(56),B(57), /* P-W */
171     /* X Y Z [ \ ] ^ _ */
172     B(58),B(59),B(60),B(61),B(62),B(63),B(64),B(65),
173     /* ` a b c d e f g */
174     B(66),B(35),B(36),B(37),B(38),B(39),B(40),B(41),
175     B(42),B(43),B(44),B(45),B(46),B(47),B(48),B(49), /* h-o */
176     B(50),B(51),B(52),B(53),B(54),B(55),B(56),B(57), /* p-w */
177     /* x y z { | } ~ DEL */
178     B(58),B(59),B(60),B(67),B(68),B(69),B(70),B(0),
179     /* 32 C1 controls */
180     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
181     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
182     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
183     B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
184     /* 32 mostly symbols */
185     B( 2),B(71),B(72),B(73),B(74),B(75),B(76),B(77),
186     B(78),B(79),B(80),B(81),B(82),B(83),B(84),B(85),
187     B(86),B(87),B(88),B(89),B(90),B(91),B(92),B(93),
188     B(94),B(95),B(96),B(97),B(98),B(99),B(100),B(101),
189     /* 64 Latin alphas including 2 symbols */
190     B(35),B(35),B(35),B(35),B(35),B(35),B(35),B(37), /* 7A,1C */
191     B(39),B(39),B(39),B(39),B(43),B(43),B(43),B(43), /* 4E,4I */
192     B(54),B(49),B(49),B(49),B(49),B(49),B(49),B(102), /* TN5O* */
193     B(49),B(55),B(55),B(55),B(55),B(59),B(54),B(53), /* O4UYTS */
194     B(35),B(35),B(35),B(35),B(35),B(35),B(35),B(37), /* 7a,1c */
195     B(39),B(39),B(39),B(39),B(43),B(43),B(43),B(43), /* 4e,4i */
196     B(54),B(49),B(49),B(49),B(49),B(49),B(49),B(103), /* tn5o% */
197     B(49),B(55),B(55),B(55),B(55),B(59),B(54),B(59) /* o4uyty */
198     #undef B
199     },
200     { /* code values, all using immediate bytes */
201     0, N(9),
202     N(32),N(33),N(34),N(35),N(36),N(37),N(38),N(39), /* !"#$%&' */
203     N(40),N(41),N(42),N(43),N(44),N(45),N(46),N(47), /* ()*+,-./ */
204     W(48),W(49),W(50),W(51),W(52),W(53),W(54),W(55), /* 01234567 */
205     W(56),W(57),N(58),N(59),N(60),N(61),N(62),N(63), /* 89:;<=>? */
206     N(64),W(65),W(66),W(67),W(68),W(69),W(70),W(71), /* @ABCDEFG */
207     W(72),W(73),W(74),W(75),W(76),W(77),W(78),W(79), /* HIJKLMNO */
208     W(80),W(81),W(82),W(83),W(84),W(85),W(86),W(87), /* PQRSTUWW */
209     W(88),W(89),W(90),N(91),N(92),N(93),N(94),N(95), /* XYZ[\]^_ */
210     N(96), /*a-z*/ N(123),N(124),N(125),N(126), /* ` {|}~ */
211     N(161),N(162),N(163),N(164),N(165),N(166),N(167),
212     N(168),N(169),N(170),N(171),N(172),N(173),N(174),N(175),
213     N(176),N(177),N(178),N(179),N(180),N(181),N(182),N(183),
214     N(184),N(185),N(186),N(187),N(188),N(189),N(190),N(191),
215     N(215), N(247)
216     }
217     }; /* lat1cdx */
218     #endif
219    
220    
221     /*
222     encode l byte in b to key.
223     key->len is the max len on input, resulting len on return.
224     stop if max key len is hit or on word boundary, if words.
225     return #used bytes in b.
226     */
227     int cEnc ( const Cdx *cdx, Key *key, unsigned char *b, int l, int words )
228     {
229     const unsigned char * const base = (unsigned char *)cdx;
230     const Bin * const bt0 = cdx->bt0;
231     const Tab * const tp = (Tab*)(base + cdx->otp);
232     const int bits = 1+(0xf&cdx->typ), mapcodeshift = 256<cdx->var ? 1 : 0;
233     const unsigned char *e = b+l;
234     int bitsleft = 8*key->len, pfxlen = 0;
235     int unassigned = 0;
236     Bin seq[CDX_MAXSEQ], *top; /* current sequence */
237     unsigned u, len;
238     unsigned short code;
239     Cv cv;
240    
241     key->len = 0;
242     for ( ; b < e && bits <= bitsleft; b++ ) {
243     eRr(LOG_TRACE, "byte %c of %d bits %d/%d", *b, e-b, bitsleft, bits);
244     /* traverse byte info tables to find longest matching sequence */
245     for (*(top = seq) = bt0[*b]; top->tab && b < e; ) {
246     const Tab * const t = tp + top->tab;
247     unsigned char n = b[1];
248     LOG_DBG(LOG_DEBUG, "table %d check %d %d-%d off %d %d",
249     top->tab, n, t->min, t->max, t->off, *(int*)(base+t->off));
250     if ( n < t->min || n > t->max /* out of bounds */
251     || !(u = ((unsigned*)(base+t->off))[n - t->min]) /* unassigned */
252     )
253     break;
254     *++top = *(Bin*)&u;
255     b++;
256     }
257     for ( ; top > seq && !top->cod; top-- ) /* no complete sequence */
258     b--;
259     code = top->cod;
260     eRr(LOG_TRACE, "code %d", code);
261     if (words) {
262     if ( 1<words /* leave 1st words-1 bytes alone */
263     && b+l-e >= words /* ok, passed it for the 1st time */
264     ) {
265     pfxlen = key->len;
266     words = 1;
267     } else if (0x80000000&cdx->cv[code]) { /* hit nonword */
268     if (pfxlen == key->len) /* skip to word */
269     continue;
270     break; /* had something after prefix */
271     }
272     }
273     if (cdx->pri > code) {
274     if ( code )
275     unassigned = 0;
276     else { /* unassigned */
277     if ( unassigned )
278     continue;
279     unassigned = 1;
280     code = 1;
281     }
282     switch (bits) {
283     case 16: key->byt[key->len++] = code >> 8; /* always bigend */
284     case 8: key->byt[key->len++] = (char)code;
285     }
286     bitsleft -= bits;
287     continue;
288     }
289     /* if (cdx->var > top->cod) variant */
290     /* else map: */
291     cv.u = cdx->cv[code];
292     if ((len = (0xf & cv.b.hi)<<mapcodeshift)) { /* len = #codes */
293     /* mapped variants TODO */
294     if ( 0 > (bitsleft -= (len << 3)) )
295     break;
296     memcpy(key->byt+key->len, ~3&len ? base+(0xffffff&cv.u) : cv.b.c, len);
297     key->len += len;
298     }
299     }
300     if (/*words &&*/ key->len == pfxlen) /* if pfxlen: found no words */
301     key->len = 0;
302    
303     return b+l-e;
304     } /* cEnc */
305    
306    
307     int cDec ( const Cdx *cdx, unsigned char *b, int l, Key *key )
308     {
309     const unsigned char * const base = (unsigned char *)cdx;
310     const unsigned char *k = key->byt, *e = b+l-CDX_MAXSEQ;
311     const int bits = 1+(0xf&cdx->typ);
312     int bitsleft = 8*key->len;
313     Cv cv;
314    
315     for ( ; b < e && bits <= bitsleft; bitsleft -= bits ) {
316     unsigned short v = *k++;
317     if ( 16 == bits )
318     v = v<<8 | *k++;
319     cv.u = cdx->cv[v];
320     switch ( cv.b.hi &= 0xf ) { /* discard word bit */
321     case 1: *b++ = cv.b.c[0]; continue;
322     case 2: *b++ = cv.b.c[0]; *b++ = cv.b.c[1]; continue;
323     case 3: *b++ = cv.b.c[0]; *b++ = cv.b.c[1]; *b++ = cv.b.c[2]; continue;
324     }
325     memcpy(b, base+(cv.u&0xffffff), cv.b.hi);
326     b += cv.b.hi;
327     }
328     return b+l-CDX_MAXSEQ-e;
329     } /* cDec */
330    
331    
332    
333     typedef struct {
334     Cdx cdx;
335     unsigned cvb[0x10000-CVLAT1];
336     Var *vi, vib[0x10000];
337     Tab tp[1100];
338     Bins *bt, btb[1100];
339     unsigned char *p, b[200000];
340     } CdxMake;
341    
342    
343     /* make byte table entries */
344     static int mapSeq (CdxMake *mk, unsigned char *p, int len,
345     unsigned short code, int save)
346     {
347     Bins *bt = &mk->cdx.bt0;
348     LOG_DBG(LOG_DEBUG, "map '%.*s' -> %d", len, p, code);
349     if (save) {
350     Cv *cv = (Cv*)mk->cdx.cv+code;
351     if (15 < len) {
352     eRr(ERR_INVAL, "sequence '%.*s' too long %d", len, p, len);
353     return 1;
354     }
355     cv->b.hi = (0x80&save) | len;
356     if (!(~3 & len))
357     memcpy(cv->b.c, p, len);
358     else {
359     cv->u |= mk->p - mk->b;
360     memcpy(mk->p, p, len);
361     mk->p += len;
362     }
363     }
364     for (;; p++) {
365     Bin *b = *bt+*p;
366     if (!--len) { /* last byte -- assign code */
367     if (b->cod) {
368     eRr(ERR_INVAL, "attempt to reassign code %d for %d", code, b->cod);
369     return 1;
370     }
371     b->cod = code;
372     return 0;
373     }
374     if (b->tab)
375     bt = mk->btb + (b->tab - 1);
376     else {
377     bt = mk->bt++;
378     b->tab = mk->bt - mk->btb;
379     LOG_DBG(LOG_DEBUG, "new table %d", b->tab);
380     }
381     }
382     return 0;
383     }
384    
385     /** compile collation src */
386     static Cdx *cMake ( const Fld *src )
387     {
388     const Fld *eof = REND(src), *f;
389     CdxMake mk;
390     int skipalias = 1, mapcodeshift;
391     unsigned short firstcode = 1; /* of last row */
392     unsigned bins=0, off, u;
393     Cdx *cdx;
394    
395     LOG_DBG(LOG_DEBUG, "cMake %d", sizeof mk);
396     memset(&mk, 0, sizeof mk);
397     mk.vi = mk.vib;
398     mk.bt = mk.btb;
399     mk.p = mk.b;
400     mk.cdx.cv[0] = 0x80000000; /* 0: unassigned */
401     mk.cdx.cv[1] = 0x81000009; /* 1: tab */
402     mk.cdx.pri = 2;
403    
404     /* first pass: gather primaries and aliases */
405     for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
406     unsigned char *p = (unsigned char*)f->val, *e, *t, save;
407     unsigned short code;
408    
409     if (3 > f->len || TAB != p[1]) continue;
410     e = p + f->len;
411     switch (*p) {
412     case 'A': if (skipalias) continue; save = 0; goto alias;
413     case 'W': save = 1; break;
414     case 'N': save = 0x80; break;
415     case 'S':
416     case 'T':
417     skipalias = 1;
418     default: continue;
419     }
420     firstcode = mk.cdx.pri;
421     skipalias = 0;
422     alias:
423     code = firstcode;
424     for (p+=2; e > p; p = t+1) {
425     if (!(t = memchr(p, TAB, e-p))) t = e;
426     if (t > p && mapSeq(&mk, p, t-p, code, save)) return 0;
427     if (save || code < mk.cdx.pri-1) code++;
428     }
429     if (save) mk.cdx.pri = code;
430     }
431     mk.cdx.typ = TYP_PLAIN|(256 < mk.cdx.pri ? 15 : 7);
432    
433     mk.cdx.var = mk.cdx.pri; /* second pass: gather variants */
434     for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
435     unsigned char *p = (unsigned char*)f->val;
436    
437     if (3 > f->len || TAB != p[1]) continue;
438     switch (*p) {
439     case 'S':
440     case 'T':
441     eRr(ERR_INVAL, "this version does not support multilevel");
442     return 0;
443     }
444     }
445     if (mk.cdx.var > mk.cdx.pri)
446     mk.cdx.typ |= BIT_VARIANTS;
447    
448     mk.cdx.map = mk.cdx.var; /* third pass: resolve maps */
449     mapcodeshift = 256<mk.cdx.var ? 1 : 0;
450     for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
451     unsigned short code, codes[CDX_MAXSEQ];
452     unsigned char ccodes[CDX_MAXSEQ], *pcodes;
453     unsigned char *p = (unsigned char*)f->val, *e, *t;
454     unsigned short n = 0;
455     Bin seq[CDX_MAXSEQ], *bin; /* current sequence */
456     Cv *cv;
457    
458     if (3 > f->len || 'M' != p[0] || TAB != p[1]) continue;
459     e = p + f->len;
460     p += 2;
461     if (!(t = memchr(p, TAB, e-p))) continue;
462    
463     for ( ; p<t && n<CDX_MAXSEQ; p++ ) { /* get n codes for map */
464     for (*(bin = seq) = mk.cdx.bt0[*p]; bin->tab && p < t;) {
465     /* slightly simpler than in cEnc, since we have full tables */
466     int *ent = (int*)mk.btb[bin->tab-1];
467     if (!ent[p[1]]) break; /* unassigned */
468     *++bin = *(Bin*)(ent + *++p);
469     if (seq+CDX_MAXSEQ == bin) break;
470     }
471     for (; bin>seq && (!bin->cod || mk.cdx.var<=bin->cod); bin--)
472     p--;
473     if (mk.cdx.var>bin->cod) /* ignore map codes */
474     codes[n++] = bin->cod ? bin->cod : 1;
475     }
476     p = t+1;
477    
478     /* got n codes ... */
479     if (1 == n) /* single code ~ alias */
480     code = codes[0];
481     else { /* make entry cv[mk.cdx.map++] (even for n=0) */
482     if (mapcodeshift) {
483     pcodes = (unsigned char*)codes; /* native */
484     #ifndef CPU_BIG_ENDIAN
485     { unsigned short *us = codes+n;
486     while (us-- > codes) *us = (*us>>8) | (*us<<8); /* mk be */
487     }
488     #endif
489     } else {
490     for (u=n; u--;) ccodes[u] = codes[u];
491     pcodes = ccodes;
492     }
493     cv = (Cv*)mk.cdx.cv + mk.cdx.map;
494     cv->b.hi = n;
495     if (n && 0x80000000&mk.cdx.cv[codes[0]]) /* inherit from 1st code */
496     cv->b.hi |= 0x80;
497     n <<= mapcodeshift;
498     if (!(~3 & n))
499     memcpy(cv->b.c, pcodes, n);
500     else {
501     cv->u |= mk.p - mk.b;
502     memcpy(mk.p, pcodes, n);
503     mk.p += n;
504     }
505     code = mk.cdx.map++;
506     }
507     for (; e > p; p = t+1) { /* map 'em all to mk.cdx.map */
508     if (!(t = memchr(p, TAB, e-p))) t = e;
509     if (t > p && mapSeq(&mk, p, t-p, code, 0)) return 0;
510     }
511     }
512     /* finish: compact */
513     mk.cdx.tab = mk.bt - mk.btb;
514     mk.cdx.otp =
515     mk.cdx.ovi = (((char *)&((Cdx*)0)->cv) - (char*)0) /* offsetoff(cv) */
516     + mk.cdx.map*sizeof (unsigned);
517     if (mk.cdx.var > mk.cdx.pri)
518     mk.cdx.otp += mk.cdx.var * sizeof (Var);
519     off = mk.cdx.otp + mk.cdx.tab * sizeof (Tab);
520     for (u=0; u<mk.cdx.tab; u++) /* compact tables */ {
521     int *base = (int *)(mk.btb + u), *pi = base;
522     mk.tp[u].off = off;
523     while (!*pi) pi++;
524     LOG_DBG(LOG_DEBUG, "found code %x", *pi);
525     mk.tp[u].min = pi - base;
526     for (pi = base+256; !*--pi; ) ;
527     mk.tp[u].max = pi - base;
528     bins += 1+mk.tp[u].max-mk.tp[u].min;
529     off += (1+mk.tp[u].max-mk.tp[u].min)*sizeof (Bin);
530     LOG_DBG(LOG_DEBUG, "table %d %d-%d", u+1, mk.tp[u].min, mk.tp[u].max);
531     }
532     mk.cdx.siz = off + (mk.p - mk.b);
533     eRr(LOG_INFO,
534     "%d primaries %d variants %d maps %d tables %d bins, off %d size %d",
535     mk.cdx.pri, mk.cdx.var-mk.cdx.pri, mk.cdx.map-mk.cdx.var, mk.cdx.tab,
536     bins, off, mk.cdx.siz);
537     /* adjust indirect CVs by off */ {
538     unsigned i = 0, mask = 0x0c000000;
539     for (; i < mk.cdx.var; i++)
540     if (0x0c000000 & mk.cdx.cv[i])
541     mk.cdx.cv[i] += off;
542     if (mapcodeshift) mask |= 0x02000000; /* only 1 code fits */
543     for (; i < mk.cdx.map; i++)
544     if (mask & mk.cdx.cv[i])
545     mk.cdx.cv[i] += off;
546     }
547     cdx = (Cdx*)mAlloc(mk.cdx.siz);
548     memcpy(cdx, &mk.cdx, mk.cdx.ovi);
549     if (cdx->var > cdx->pri)
550     memcpy((char*)cdx + cdx->ovi, mk.vib, mk.cdx.var*sizeof (Var));
551     if (cdx->tab) {
552     memcpy((char*)cdx + cdx->otp, mk.tp, mk.cdx.tab*sizeof (Tab));
553     for (u=0; u<mk.cdx.tab; u++)
554     memcpy((char*)cdx + mk.tp[u].off,
555     mk.btb[u] + mk.tp[u].min,
556     (1+mk.tp[u].max-mk.tp[u].min)*sizeof (Bin));
557     }
558     memcpy((char*)cdx + off, mk.b, mk.p - mk.b);
559     cdx->otp -= sizeof (Tab); /* adjust 0 based */
560    
561     return cdx;
562     } /* cMake */
563    
564    
565    
566     /*
567     list of open shared cdx
568     */
569     typedef struct Foo Foo;
570    
571     struct Foo {
572     char nln; /* namelen */
573     char nam[31];
574     Foo *foo;
575     int ref;
576     Cdx *cdx;
577     FMap fm;
578     };
579     #if 0
580     static Foo lat1foo = { 7, "Latin-1", 0, 1, (Cdx*)&lat1cdx };
581     static Foo *fools = &lat1foo;
582     #endif
583     static Foo *fools = 0;
584    
585    
586    
587     /** open or compile collation src */
588     const Cdx *cOpen (const Fld *src)
589     {
590     char *nam = 0, *p;
591     int nln = 0, coldef = 0;
592     Foo *foo = fools;
593     Cdx *cdx = 0;
594     const Fld *eof, *f;
595     char fname[20];
596     FMap fm;
597    
598     if (src)
599     for (f = src, eof = REND(src); ++f < eof; )
600     if (MET_COL == f->tag) {
601     coldef = 1;
602     if (2 < f->len && 'C' == f->val[0]
603     && TAB == f->val[1] && TAB != f->val[2]
604     ) { /* named */
605     nam = f->val+2;
606     nln = f->len-2;
607     if ((p = memchr(nam, TAB, nln))) nln = p-nam;
608     if (nln > 15) nln = 15;
609     LOG_DBG(LOG_DEBUG, "collation name '%.*s'", nln, nam);
610     for (; foo; foo = foo->foo)
611     if (nln == foo->nln && !memcmp(nam, foo->nam, nln)) { /* got it */
612     foo->ref++; /* ref it */
613     return foo->cdx; /* ret it */
614     }
615     /* TODO: try to map -- check type */
616     memset(fname, 0, sizeof fname);
617     memcpy(fname, nam, nln);
618     memcpy(fname+nln, ".mcx", 5);
619     memset(&fm, 0, sizeof fm);
620     fm.fil = FIL_NONE;
621     if (!fMOpen(&fm, fname, FIL_RD)) {
622     int size = fSize(fm.fil);
623     fm.lim = (size + env.psz-1)>>env.psh;
624     if ( (int)fm.lim != fMap(&fm, fm.lim)
625     || memcmp(MAGIC, fm.map, 3)
626     || size != (int)((Cdx*)fm.map)->siz
627     ) {
628     eRr(ERR_TRASH, "bad coll file '%s'", fname);
629     fMClose(&fm);
630     } else {
631     cdx = (Cdx*)fm.map;
632     eRr(LOG_VERBOSE, "mapped coll '%s' %d bytes", fname, size);
633     #ifndef WIN32
634     fClose(&fm.fil); /* don't need the file open */
635     #endif
636     }
637     }
638     }
639     }
640     if (!cdx && (!coldef || !(cdx = cMake(src))))
641     return 0;
642     if (nam) { /* foo an mmaped or named made */
643     Foo *newfoo = mAlloc(sizeof *newfoo);
644     memcpy(newfoo->nam, nam, newfoo->nln = nln);
645     newfoo->foo = fools;
646     newfoo->ref = 1;
647     newfoo->cdx = cdx;
648     if (fm.map) /* was mapped */
649     newfoo->fm = fm;
650     else { /* try to write */
651     file fil = FIL_NONE;
652     if (!fOpen(&fil, fname, FIL_WR|FIL_CREAT|FIL_TRUNC)) {
653     memcpy(cdx->mag, MAGIC, 3);
654     fWrite(&fil, cdx, cdx->siz);
655     fClose(&fil);
656     eRr(LOG_INFO, "saved coll '%s' %d bytes", fname, cdx->siz);
657     }
658     memcpy(cdx->mag, "nam", 3); /* yet we keep using our selfmade copy */
659     }
660     fools = newfoo;
661     }
662     return cdx;
663     } /* cOpen */
664    
665    
666     void cClose (Cdx *cdx)
667     {
668     Foo *foo, *f = 0;
669     switch (cdx->mag[0]) {
670     case 0: /* internal anonymous */
671     mFree(cdx);
672     case 's': /* internal static */
673     return;
674     #ifdef CPU_BIG_ENDIAN
675     case 'M':
676     #else
677     case 'm': /* mapped - always named */
678     #endif
679     case 'n': /* internal named */
680     for (foo = fools; foo; foo = (f = foo)->foo)
681     if (foo->cdx == cdx) {
682     if (!--foo->ref) {
683     if ('n' == cdx->mag[0])
684     mFree(cdx);
685     else
686     fMClose(&foo->fm);
687     if (f)
688     f->foo = foo->foo;
689     else
690     fools = foo->foo;
691     mFree(foo);
692     }
693     return;
694     }
695     /* panic time ? */
696     }
697     } /* cClose */

  ViewVC Help
Powered by ViewVC 1.1.26