1 |
/* |
2 |
openisis - an open implementation of the CDS/ISIS database |
3 |
Version 0.8.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
#ifndef LSTR_H |
23 |
|
24 |
/* |
25 |
$Id: lstr.h,v 1.15 2003/04/08 00:20:53 kripke Exp $ |
26 |
definition of ISIS internal structures for openisis lib implementation. |
27 |
|
28 |
This header defines the common internal data structures and their members |
29 |
and external structure and member definition types. |
30 |
A internal data structure is the common in-memory representation of |
31 |
ISIS data; it may be backed by different external (in-file) structures. |
32 |
|
33 |
An in-memory data structure |
34 |
is a contigous int* chunk of memory, consisting of the sizes (one int), |
35 |
an array of members (int) and some buffer (array of char). |
36 |
|
37 |
A data structures sizes consists of the lengths |
38 |
fix and rep of the fixed and repeated part, resp., |
39 |
and the number of occurences of the repeated part. |
40 |
It is used in both an abstract description of a data type |
41 |
and each instance of a data structure. |
42 |
If the number of occurences of the repeated part is not fixed, |
43 |
it is 0 in the abstract case. |
44 |
The lengths are suitable as offsets into the lmbr array. |
45 |
This structure is designed to fit within 32 bits. |
46 |
|
47 |
The sizing is the only common meta-data description of a logical |
48 |
data type. For readability of code, however, the member indexes |
49 |
are enum-erated. When actually used, the type has to match the code's |
50 |
assumptions anyway. |
51 |
The numbers fix and rep are fixed for a given data structure. |
52 |
|
53 |
Although the logical data type has more properties, |
54 |
especially the type of each member (numeric, C-string or raw mem), |
55 |
a representation of this data is needed only when converting |
56 |
external data and thus is stored (somewhat redundantly) |
57 |
with the external properties. |
58 |
|
59 |
There are only two types of members: |
60 |
numeric and memory (mostly some character data). |
61 |
both are represented by a int, which, for memory, |
62 |
denotes an offset (counted in BYTES, not ints) from the records start |
63 |
(NOT offset into the data buffer, as with the isis mst record). |
64 |
|
65 |
The member array consists of a fixed number <emp>fix</emp> of initial |
66 |
members and a number <emp>occ</emp> of occurences |
67 |
of member arrays of fixed length <emp>rep</emp>. |
68 |
While <emp>occ</emp> is usually fixed, |
69 |
it varies for masterfile records. |
70 |
|
71 |
Note that this layout somewhat resembles the isis record itself, |
72 |
which supports a single level of nesting and repetition. |
73 |
*/ |
74 |
|
75 |
/* id of a structure */ |
76 |
|
77 |
/* construction |
78 |
*/ |
79 |
#define LSTRID( set, no, vari ) \ |
80 |
(((int)(vari))<<16 | (set)<<8 | (no) ) |
81 |
|
82 |
/* access |
83 |
*/ |
84 |
#define LSTRNO( l ) (char)(l) /* no of record type in set */ |
85 |
#define LSTRSET( l ) (char)((l)>>8) /* record set number */ |
86 |
#define LSTRVAR( l ) (short)((l)>>16) /* variant */ |
87 |
|
88 |
|
89 |
/* size of a structure */ |
90 |
|
91 |
/* construction |
92 |
*/ |
93 |
#define LSTRSIZE( fix, rep, occ ) \ |
94 |
(((int)(occ))<<16 | (rep)<<8 | (fix)) |
95 |
|
96 |
/* access |
97 |
*/ |
98 |
#define LSTRFIX( l ) (char)(l) /* length of fixed part */ |
99 |
#define LSTRREP( l ) (char)((l)>>8) /* length of repeated part */ |
100 |
#define LSTROCC( l ) (short)((l)>>16) /* the members */ |
101 |
/* actual number of members */ |
102 |
#define LSTRMEMS( l ) (LSTRFIX(l) + LSTRREP(l)*LSTROCC(l)) |
103 |
/* in-memory size of members (w/o buffer). */ |
104 |
#define LSTRLEN( t ) ((int)sizeof(int)*(1 + LSTRMEMS(t))) |
105 |
|
106 |
|
107 |
/** |
108 |
Description of a external members layout is encoded as a short "len" |
109 |
and a short offset "off" into the data or repeated part, resp. |
110 |
offset and len are combined into one int. |
111 |
If offsets are 0, they are computed based on the assumption, |
112 |
that members are layouted packed in the declared ordering. |
113 |
|
114 |
For character data, len just gives the length of the member |
115 |
in bytes (up to 2**15), with the highest bit cleared. |
116 |
A length of 0 denotes character data of variable size. |
117 |
|
118 |
Numerical data is marked with the highest bit set. |
119 |
For numbers, the lowest two bits give the ld of the length in bytes |
120 |
(i.e. 0=1,1=2,2=4,3=8 bytes). |
121 |
The offset and length of the actually used bits within these bytes |
122 |
are each encoded in six bits, which is enough for 0..63. |
123 |
A length of 0 (as resulting from 6-bit overflow on '64') means no masking. |
124 |
The 15th bit, if set, denotes high endian data. |
125 |
|
126 |
Note that the only reason for stuffing all that into a short is, |
127 |
that it is sufficient. The amount of space saved is minimal and |
128 |
depending on the actual processor there may be a slight performance |
129 |
penalty for the bit operations. |
130 |
|
131 |
*/ |
132 |
#define LONG2LEN( l ) ((short)(l)) |
133 |
#define LONG2OFF( l ) ((short)((l) >> 16)) |
134 |
|
135 |
/* constants |
136 |
*/ |
137 |
#define LMBR_FNUM 0x8000 /* flag numeric data */ |
138 |
#define LMBR_FHIE 0x4000 /* flag high endian data */ |
139 |
|
140 |
/* construction |
141 |
*/ |
142 |
#define LMBRNUM( ld ) (LMBR_FNUM | ld) |
143 |
/* standard nueric types: the world according to Java :) */ |
144 |
#define LMBRBYTE LMBRNUM(0) |
145 |
#define LMBRSHORT LMBRNUM(1) |
146 |
#define LMBRINT LMBRNUM(2) |
147 |
#define LMBRLONG LMBRNUM(3) |
148 |
/* bit mask construction for numerical data */ |
149 |
#define LMBRBITS( off, len, ld ) (LMBR_FNUM | (off)<<8 | (len)<<2 | (ld)) |
150 |
#define LMBRBITS4( off, len ) (LMBR_FNUM | (off)<<8 | (len)<<2 | 2) |
151 |
/* if you have to specify the offset explicitly */ |
152 |
#define LSTRLOFF( len, off ) ((len) | (off)<<16) |
153 |
|
154 |
/* access |
155 |
*/ |
156 |
#define LMBRISNUM( f ) (LMBR_FNUM & (f)) |
157 |
#define LMBRLD( f ) (0x3 & (f)) |
158 |
/* bit access for numerical data */ |
159 |
#define LMBRISBITS( f ) (0x3ffc & (f)) |
160 |
#define LMBRBITLEN( f ) (0x003f & ((f) >> 2)) |
161 |
#define LMBRBITOFF( f ) (0x003f & ((f) >> 8)) |
162 |
|
163 |
/** |
164 |
Description of an external structure layout. |
165 |
This is usually fixed for a given (implementation of a) type; |
166 |
for the isis mst record, however, the occurences of the repeated |
167 |
part and the total external and internal lengths vary for each record. |
168 |
*/ |
169 |
|
170 |
enum { |
171 |
LSTR_SIZE, /* sizes */ |
172 |
LSTR_XRLO, /* repeated part byte length and offset <<16; auto */ |
173 |
LSTR_XLEN, /* ext. total length of data; auto */ |
174 |
LSTR_ILEN, /* int. total length of data; auto */ |
175 |
LSTR_XMBR /* members */ |
176 |
}; |
177 |
#define LSTR_AUTOLENGTHS 0,0,0 |
178 |
#define LSTR_LONGS( totmbrs ) (4+(totmbrs)) |
179 |
|
180 |
/* technical variants of a record */ |
181 |
typedef enum { |
182 |
LVAR_PAC, /* packed little endian base variant */ |
183 |
LVAR_ALI, /* aligned variant */ |
184 |
LVAR_BIG, /* big endian aligned */ |
185 |
LVAR_VARI |
186 |
} lstr_variant; |
187 |
|
188 |
/* known record sets */ |
189 |
enum { |
190 |
LSET_MST, /* isis 1 master file */ |
191 |
LSET_INV, /* isis 1 inverted file */ |
192 |
LSET_SETS |
193 |
}; |
194 |
|
195 |
|
196 |
typedef struct { |
197 |
const char ***name; /* an array of names per record type */ |
198 |
int **desc[LVAR_VARI]; /* an array of descs per variant */ |
199 |
} LstrSet; |
200 |
|
201 |
|
202 |
/* data types of ISIS 1 master file */ |
203 |
typedef enum { |
204 |
LSTR_MFC, /* MST head */ |
205 |
LSTR_MFR, /* MST record */ |
206 |
LSTR_XRF, /* XRF record */ |
207 |
LSTR_MST /* count */ |
208 |
} lstr_mst; |
209 |
|
210 |
|
211 |
enum { /* MFC members */ |
212 |
LMFC_CTLM = 1, |
213 |
LMFC_NMFN, /* next available MFN */ |
214 |
LMFC_NMFB, /* next free block in MF counting from 1 */ |
215 |
LMFC_NMFP, /* next free pos in MF */ |
216 |
LMFC_TYPE, /* "0 for user's bases; 1 for messages" */ |
217 |
LMFC_RCNT, /* record count ? not used by CISIS */ |
218 |
LMFC_MFX1, |
219 |
LMFC_MFX2, /* LAN lock */ |
220 |
LMFC_MFX3, /* LAN lock */ |
221 |
LMFC__FL |
222 |
}; |
223 |
|
224 |
enum { /* XRF members */ |
225 |
LXRF_XPOS = 1, |
226 |
LXRF_XREC |
227 |
}; |
228 |
|
229 |
/* data types of ISIS 1 inverted file */ |
230 |
typedef enum { |
231 |
LSTR_CNT, /* CNT record */ |
232 |
LSTR_N01, /* N01 record */ |
233 |
LSTR_L01, /* L01 record */ |
234 |
LSTR_N02, /* N02 record */ |
235 |
LSTR_L02, /* L02 record */ |
236 |
LSTR_IFP, /* IFP record */ |
237 |
LSTR_INV /* count */ |
238 |
} lstr_inv; |
239 |
|
240 |
|
241 |
enum { /* CNT members */ |
242 |
LCNT_TYPE = 1, /* 1 or 2: N0x */ |
243 |
LCNT_ORDN, /* fix 5 */ |
244 |
LCNT_ORDF, /* fix 5 */ |
245 |
LCNT_N, /* fix 15 */ |
246 |
LCNT_K, /* fix 5 */ |
247 |
LCNT_LEV, /* depth of index */ |
248 |
LCNT_POSR, /* position of root */ |
249 |
LCNT_NMAX, |
250 |
LCNT_FMAX, |
251 |
LCNT_ABNO, |
252 |
LCNT__FL |
253 |
}; |
254 |
|
255 |
enum { /* N0x members */ |
256 |
LN0X_POS = 1, /* record number */ |
257 |
LN0X_OCK, /* number of keys */ |
258 |
LN0X_TYPE, /* type 1 or 2 */ |
259 |
LN0X__FL, /* offset of repeated part */ |
260 |
LN0X_KEY = 0, /* key chars */ |
261 |
LN0X_REF, /* ref to next node (>0) or leaf (<0) */ |
262 |
LN0X__RL /* length of repeated part */ |
263 |
}; |
264 |
|
265 |
enum { /* L0x members */ |
266 |
LL0X_POS = 1, /* record number */ |
267 |
LL0X_OCK, /* number of keys */ |
268 |
LL0X_TYPE, /* type 1 or 2 */ |
269 |
LL0X_PS, /* type 1 or 2 */ |
270 |
LL0X__FL, /* offset of repeated part */ |
271 |
LL0X_KEY = 0, /* key chars */ |
272 |
LL0X_INFB, /* ifp block */ |
273 |
LL0X_INFP, /* ifp pos */ |
274 |
LL0X__RL /* length of repeated part */ |
275 |
}; |
276 |
|
277 |
enum { /* IFP members */ |
278 |
LIFP_NXTB = 1, /* block of next segment */ |
279 |
LIFP_NXTP, /* pos of next segment */ |
280 |
LIFP_TOTP, /* total postings */ |
281 |
LIFP_SEGP, /* postings in this segment */ |
282 |
LIFP_SEGC, /* postings in this segment */ |
283 |
LIFP__FL, /* offset of repeated part */ |
284 |
LIFP_MFN = 0, /* mfn */ |
285 |
LIFP_ADD, /* additional qualifyers */ |
286 |
LIFP__RL /* length of repeated part */ |
287 |
}; |
288 |
|
289 |
|
290 |
extern const LstrSet lstrlib[LSET_SETS]; |
291 |
|
292 |
/** auto-initialise the given structure or all builtins, iff NULL. |
293 |
*/ |
294 |
extern int lstr_auto ( int *str ); |
295 |
|
296 |
#define LSTR_H |
297 |
#endif /* LSTR_H */ |