1 |
astrand |
930 |
#include <string.h> |
2 |
|
|
#include <stdlib.h> |
3 |
|
|
/* #define NDEBUG */ |
4 |
|
|
#include <assert.h> |
5 |
astrand |
918 |
|
6 |
astrand |
930 |
#include "hash.h" |
7 |
|
|
|
8 |
|
|
|
9 |
astrand |
918 |
/* |
10 |
astrand |
930 |
** public domain code by Jerry Coffin. |
11 |
|
|
** |
12 |
|
|
** Tested with Visual C 1.0 and Borland C 3.1. |
13 |
|
|
** Compiles without warnings, and seems like it should be pretty |
14 |
|
|
** portable. |
15 |
|
|
*/ |
16 |
|
|
|
17 |
|
|
/* HW: HenkJan Wolthuis, 1997, public domain |
18 |
|
|
|
19 |
|
|
changed functionnames, all public functions now have a 'hash_' prefix |
20 |
|
|
minor editing, marked 'm all(?) with a description |
21 |
|
|
removed a bug in hash_del and one in hash_enumerate |
22 |
|
|
added some assertions |
23 |
|
|
added a 'count' member to hold the number of elements |
24 |
|
|
added hash_sorted_enum, sometimes useful |
25 |
|
|
changed the testmain |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
/* |
29 |
astrand |
918 |
** RBS: Bob Stout, 2003, public domain |
30 |
|
|
** |
31 |
|
|
** 1. Fixed some problems in hash() static function. |
32 |
|
|
** 2. Use unsigned shorts for hash values. This was implicit in the original |
33 |
|
|
** which was written for PC's using early Microsoft and Borland compilers. |
34 |
|
|
*/ |
35 |
|
|
|
36 |
astrand |
930 |
/* HW: #define to allow duplicate keys, they're added before the existing |
37 |
|
|
key so hash_lookup finds the last one inserted first (LIFO) |
38 |
|
|
when not defined, hash_insert swaps the datapointers, returning a |
39 |
|
|
pointer to the old data |
40 |
|
|
*/ |
41 |
|
|
/* #define DUPLICATE_KEYS */ |
42 |
|
|
|
43 |
|
|
/* |
44 |
|
|
** These are used in freeing a table. Perhaps I should code up |
45 |
|
|
** something a little less grungy, but it works, so what the heck. |
46 |
|
|
*/ |
47 |
|
|
static void (*function)(void *) = NULL; |
48 |
|
|
static hash_table *the_table = NULL; |
49 |
|
|
|
50 |
|
|
|
51 |
|
|
/* Initialize the hash_table to the size asked for. Allocates space |
52 |
|
|
** for the correct number of pointers and sets them to NULL. If it |
53 |
|
|
** can't allocate sufficient memory, signals error by setting the size |
54 |
|
|
** of the table to 0. |
55 |
|
|
*/ |
56 |
|
|
/*HW: changed, now returns NULL on malloc-failure */ |
57 |
|
|
hash_table *hash_construct_table( hash_table *table, size_t size ) |
58 |
|
|
{ |
59 |
|
|
size_t i; |
60 |
|
|
bucket **temp; |
61 |
|
|
|
62 |
|
|
table->size = size; |
63 |
|
|
table->count = 0; |
64 |
|
|
table->table = (bucket **)malloc(sizeof(bucket *) * size); |
65 |
|
|
temp = table->table; |
66 |
|
|
|
67 |
|
|
if( NULL == temp ) |
68 |
|
|
{ |
69 |
|
|
table->size = 0; |
70 |
|
|
return NULL; /*HW: was 'table' */ |
71 |
|
|
} |
72 |
|
|
|
73 |
|
|
for( i=0; i<size; i++ ) |
74 |
|
|
temp[i] = NULL; |
75 |
|
|
|
76 |
|
|
return table; |
77 |
|
|
} |
78 |
|
|
|
79 |
|
|
|
80 |
|
|
/* |
81 |
|
|
** Hashes a string to produce an unsigned short, which should be |
82 |
|
|
** sufficient for most purposes. |
83 |
|
|
** RBS: fixed per user feedback from Steve Greenland |
84 |
|
|
*/ |
85 |
|
|
|
86 |
|
|
static unsigned short hash(char *string) |
87 |
|
|
{ |
88 |
|
|
unsigned short ret_val = 0; |
89 |
|
|
int i; |
90 |
|
|
|
91 |
|
|
while (*string) |
92 |
|
|
{ |
93 |
|
|
/* |
94 |
|
|
** RBS: Added conditional to account for strings in which the |
95 |
astrand |
918 |
** length is less than an integral multiple of sizeof(int). |
96 |
|
|
** |
97 |
|
|
** Note: This fixes the problem of hasing trailing garbage, but |
98 |
|
|
** doesn't fix the problem with some CPU's which can't align on |
99 |
|
|
** byte boundries. Any decent C compiler *should* fix this, but |
100 |
|
|
** it still might extract a performance hit. Also unaddressed is |
101 |
|
|
** what happens when using a CPU which addresses data only on |
102 |
|
|
** 4-byte boundries when it tries to work with a pointer to a |
103 |
|
|
** 2-byte unsigned short. |
104 |
astrand |
930 |
*/ |
105 |
astrand |
918 |
|
106 |
astrand |
930 |
if (strlen(string) >= sizeof(unsigned short)) |
107 |
|
|
i = *(unsigned short *)string; |
108 |
|
|
else i = (unsigned short)(*string); |
109 |
|
|
ret_val ^= i; |
110 |
|
|
ret_val <<= 1; |
111 |
|
|
string ++; |
112 |
|
|
} |
113 |
|
|
return ret_val; |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
/* |
117 |
|
|
** Insert 'key' into hash table. |
118 |
|
|
** Returns pointer to old data associated with the key, if any, or |
119 |
|
|
** NULL if the key wasn't in the table previously. |
120 |
|
|
*/ |
121 |
|
|
/* HW: returns NULL if malloc failed */ |
122 |
|
|
void *hash_insert( char *key, void *data, hash_table *table ) |
123 |
|
|
{ |
124 |
|
|
unsigned short val = hash(key) % table->size; |
125 |
|
|
bucket *ptr; |
126 |
|
|
|
127 |
|
|
assert( NULL != key ); |
128 |
|
|
|
129 |
|
|
/* |
130 |
|
|
** NULL means this bucket hasn't been used yet. We'll simply |
131 |
|
|
** allocate space for our new bucket and put our data there, with |
132 |
|
|
** the table pointing at it. |
133 |
|
|
*/ |
134 |
|
|
|
135 |
|
|
if( NULL == (table->table)[val] ) |
136 |
|
|
{ |
137 |
|
|
(table->table)[val] = (bucket *)malloc(sizeof(bucket)); |
138 |
|
|
if( NULL == (table->table)[val] ) |
139 |
|
|
return NULL; |
140 |
|
|
|
141 |
|
|
if( NULL == ((table->table)[val]->key = (char*) malloc(strlen(key)+1)) ) |
142 |
|
|
{ |
143 |
|
|
free( (table->table)[val] ); |
144 |
|
|
(table->table)[val] = NULL; |
145 |
|
|
return NULL; |
146 |
|
|
} |
147 |
|
|
strcpy( (table->table)[val]->key, key); |
148 |
|
|
(table->table)[val] -> next = NULL; |
149 |
|
|
(table->table)[val] -> data = data; |
150 |
|
|
table->count++; /* HW */ |
151 |
|
|
return (table->table)[val] -> data; |
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
/* HW: added a #define so the hashtable can accept duplicate keys */ |
155 |
|
|
#ifndef DUPLICATE_KEYS |
156 |
|
|
/* |
157 |
|
|
** This spot in the table is already in use. See if the current string |
158 |
|
|
** has already been inserted, and if so, increment its count. |
159 |
|
|
*/ /* HW: ^^^^^^^^ ?? */ |
160 |
|
|
for( ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next ) |
161 |
|
|
if( 0 == strcmp(key, ptr->key) ) |
162 |
|
|
{ |
163 |
|
|
void *old_data; |
164 |
|
|
|
165 |
|
|
old_data = ptr->data; |
166 |
|
|
ptr->data = data; |
167 |
|
|
return old_data; |
168 |
|
|
} |
169 |
|
|
#endif |
170 |
|
|
/* |
171 |
|
|
** This key must not be in the table yet. We'll add it to the head of |
172 |
|
|
** the list at this spot in the hash table. Speed would be |
173 |
|
|
** slightly improved if the list was kept sorted instead. In this case, |
174 |
|
|
** this code would be moved into the loop above, and the insertion would |
175 |
|
|
** take place as soon as it was determined that the present key in the |
176 |
|
|
** list was larger than this one. |
177 |
|
|
*/ |
178 |
|
|
|
179 |
|
|
ptr = (bucket *)malloc(sizeof(bucket)); |
180 |
|
|
if( NULL == ptr ) |
181 |
|
|
return NULL; /*HW: was 0 */ |
182 |
|
|
|
183 |
|
|
if( NULL == (ptr -> key = (char*) malloc(strlen(key)+1)) ) |
184 |
|
|
{ |
185 |
|
|
free(ptr); |
186 |
|
|
return NULL; |
187 |
|
|
} |
188 |
|
|
strcpy( ptr->key, key ); |
189 |
|
|
ptr -> data = data; |
190 |
|
|
ptr -> next = (table->table)[val]; |
191 |
|
|
(table->table)[val] = ptr; |
192 |
|
|
table->count++; /* HW */ |
193 |
|
|
|
194 |
|
|
return data; |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
|
198 |
|
|
/* |
199 |
|
|
** Look up a key and return the associated data. Returns NULL if |
200 |
|
|
** the key is not in the table. |
201 |
|
|
*/ |
202 |
|
|
void *hash_lookup( char *key, hash_table *table ) |
203 |
|
|
{ |
204 |
|
|
unsigned short val = hash(key) % table->size; |
205 |
|
|
bucket *ptr; |
206 |
|
|
|
207 |
|
|
assert( NULL != key ); |
208 |
|
|
|
209 |
|
|
if(NULL == (table->table)[val]) |
210 |
|
|
return NULL; |
211 |
|
|
|
212 |
|
|
for( ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next ) |
213 |
|
|
{ |
214 |
|
|
if(0 == strcmp( key, ptr -> key ) ) |
215 |
|
|
return ptr->data; |
216 |
|
|
} |
217 |
|
|
|
218 |
|
|
return NULL; |
219 |
|
|
} |
220 |
|
|
|
221 |
|
|
/* |
222 |
|
|
** Delete a key from the hash table and return associated |
223 |
|
|
** data, or NULL if not present. |
224 |
|
|
*/ |
225 |
|
|
|
226 |
|
|
void *hash_del(char *key, hash_table *table) |
227 |
|
|
{ |
228 |
|
|
unsigned short val = hash(key) % table->size; |
229 |
|
|
void *data; |
230 |
|
|
bucket *ptr, *last = NULL; |
231 |
|
|
|
232 |
|
|
assert( NULL != key ); |
233 |
|
|
|
234 |
|
|
if( NULL == (table->table)[val] ) |
235 |
|
|
return NULL; /* HW: was 'return 0' */ |
236 |
|
|
|
237 |
|
|
/* |
238 |
|
|
** Traverse the list, keeping track of the previous node in the list. |
239 |
|
|
** When we find the node to delete, we set the previous node's next |
240 |
|
|
** pointer to point to the node after ourself instead. We then delete |
241 |
|
|
** the key from the present node, and return a pointer to the data it |
242 |
|
|
** contains. |
243 |
|
|
*/ |
244 |
|
|
for( last = NULL, ptr = (table->table)[val]; |
245 |
|
|
NULL != ptr; |
246 |
|
|
last = ptr, ptr = ptr->next ) |
247 |
|
|
{ |
248 |
|
|
if( 0 == strcmp( key, ptr -> key) ) |
249 |
|
|
{ |
250 |
|
|
if( last != NULL ) |
251 |
|
|
{ |
252 |
|
|
data = ptr -> data; |
253 |
|
|
last -> next = ptr -> next; |
254 |
|
|
free( ptr->key ); |
255 |
|
|
free( ptr ); |
256 |
|
|
table->count--; /* HW */ |
257 |
|
|
return data; |
258 |
|
|
} |
259 |
|
|
|
260 |
|
|
/* If 'last' still equals NULL, it means that we need to |
261 |
|
|
** delete the first node in the list. This simply consists |
262 |
|
|
** of putting our own 'next' pointer in the array holding |
263 |
|
|
** the head of the list. We then dispose of the current |
264 |
|
|
** node as above. |
265 |
|
|
*/ |
266 |
|
|
else |
267 |
|
|
{ |
268 |
|
|
/* HW: changed this bit to match the comments above */ |
269 |
|
|
data = ptr->data; |
270 |
|
|
(table->table)[val] = ptr->next; |
271 |
|
|
free( ptr->key ); |
272 |
|
|
free( ptr ); |
273 |
|
|
table->count--; /* HW */ |
274 |
|
|
return data; |
275 |
|
|
} |
276 |
|
|
} |
277 |
|
|
} |
278 |
|
|
|
279 |
|
|
/* |
280 |
|
|
** If we get here, it means we didn't find the item in the table. |
281 |
|
|
** Signal this by returning NULL. |
282 |
|
|
*/ |
283 |
|
|
|
284 |
|
|
return NULL; |
285 |
|
|
} |
286 |
|
|
|
287 |
|
|
/* |
288 |
|
|
** free_table iterates the table, calling this repeatedly to free |
289 |
|
|
** each individual node. This, in turn, calls one or two other |
290 |
|
|
** functions - one to free the storage used for the key, the other |
291 |
|
|
** passes a pointer to the data back to a function defined by the user, |
292 |
|
|
** process the data as needed. |
293 |
|
|
*/ |
294 |
|
|
|
295 |
|
|
static void free_node( char *key, void *data ) |
296 |
|
|
{ |
297 |
|
|
(void) data; |
298 |
|
|
|
299 |
|
|
assert( NULL != key ); |
300 |
|
|
|
301 |
|
|
if( NULL != function ) |
302 |
|
|
{ |
303 |
|
|
function( hash_del( key, the_table ) ); |
304 |
|
|
} |
305 |
|
|
else |
306 |
|
|
hash_del( key, the_table ); |
307 |
|
|
} |
308 |
|
|
|
309 |
|
|
/* |
310 |
|
|
** Frees a complete table by iterating over it and freeing each node. |
311 |
|
|
** the second parameter is the address of a function it will call with a |
312 |
|
|
** pointer to the data associated with each node. This function is |
313 |
|
|
** responsible for freeing the data, or doing whatever is needed with |
314 |
|
|
** it. |
315 |
|
|
*/ |
316 |
|
|
|
317 |
|
|
void hash_free_table( hash_table *table, void (*func)(void *) ) |
318 |
|
|
{ |
319 |
|
|
function = func; |
320 |
|
|
the_table = table; |
321 |
|
|
|
322 |
|
|
hash_enumerate( table, free_node ); |
323 |
|
|
free( table->table ); |
324 |
|
|
table->table = NULL; |
325 |
|
|
table->size = 0; |
326 |
|
|
table->count = 0; /* HW */ |
327 |
|
|
|
328 |
|
|
the_table = NULL; |
329 |
|
|
function = NULL; |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
/* |
333 |
|
|
** Simply invokes the function given as the second parameter for each |
334 |
|
|
** node in the table, passing it the key and the associated data. |
335 |
|
|
*/ |
336 |
|
|
|
337 |
|
|
void hash_enumerate( hash_table *table, void (*func)(char *, void *) ) |
338 |
|
|
{ |
339 |
|
|
unsigned i; |
340 |
|
|
bucket *temp; |
341 |
|
|
bucket *swap; |
342 |
|
|
|
343 |
|
|
for( i=0; i<table->size; i++ ) |
344 |
|
|
{ |
345 |
|
|
if( NULL != (table->table)[i] ) |
346 |
|
|
{ |
347 |
|
|
/* HW: changed this loop */ |
348 |
|
|
temp = (table->table)[i]; |
349 |
|
|
while( NULL != temp ) |
350 |
|
|
{ |
351 |
|
|
/* HW: swap trick, in case temp is freed by 'func' */ |
352 |
|
|
swap = temp->next; |
353 |
|
|
func( temp -> key, temp->data ); |
354 |
|
|
temp = swap; |
355 |
|
|
} |
356 |
|
|
} |
357 |
|
|
} |
358 |
|
|
} |
359 |
|
|
|
360 |
|
|
/* HW: added hash_sorted_enum() |
361 |
|
|
|
362 |
|
|
hash_sorted_enum is like hash_enumerate but gives |
363 |
|
|
sorted output. This is much slower than hash_enumerate, but |
364 |
|
|
sometimes nice for printing to a file... |
365 |
|
|
*/ |
366 |
|
|
|
367 |
|
|
typedef struct sort_struct |
368 |
|
|
{ |
369 |
|
|
char *key; |
370 |
|
|
void *data; |
371 |
|
|
} sort_struct; |
372 |
|
|
static sort_struct *sortmap = NULL; |
373 |
|
|
|
374 |
|
|
static int counter = 0; |
375 |
|
|
|
376 |
|
|
/* HW: used as 'func' for hash_enumerate */ |
377 |
|
|
static void key_get( char *key, void *data ) |
378 |
|
|
{ |
379 |
|
|
sortmap[ counter ].key = key; |
380 |
|
|
sortmap[ counter ].data = data; |
381 |
|
|
counter++; |
382 |
|
|
} |
383 |
|
|
|
384 |
|
|
/* HW: used for comparing the keys in qsort */ |
385 |
|
|
static int key_comp( const void* a, const void *b ) |
386 |
|
|
{ |
387 |
|
|
return strcmp( (*(sort_struct*)a).key, (*(sort_struct*)b).key ); |
388 |
|
|
} |
389 |
|
|
|
390 |
|
|
/* HW: it's a compromise between speed and space. this one needs |
391 |
|
|
table->count * sizeof( sort_struct) memory. |
392 |
|
|
Another approach only takes count*sizeof(char*), but needs |
393 |
|
|
to hash_lookup the data of every key after sorting the key. |
394 |
|
|
returns 0 on malloc failure, 1 on success |
395 |
|
|
*/ |
396 |
|
|
int hash_sorted_enum( hash_table *table, void (*func)( char *, void *) ) |
397 |
|
|
{ |
398 |
|
|
int i; |
399 |
|
|
|
400 |
|
|
/* nothing to do ! */ |
401 |
|
|
if( NULL == table || 0 == table->count || NULL == func ) |
402 |
|
|
return 0; |
403 |
|
|
|
404 |
|
|
/* malloc an pointerarray for all hashkey's and datapointers */ |
405 |
|
|
if( NULL == ( sortmap = (sort_struct*) malloc( sizeof( sort_struct ) * table->count)) ) |
406 |
|
|
return 0; |
407 |
|
|
|
408 |
|
|
/* copy the pointers to the hashkey's */ |
409 |
|
|
counter = 0; |
410 |
|
|
hash_enumerate( table, key_get ); |
411 |
|
|
|
412 |
|
|
/* sort the pointers to the keys */ |
413 |
|
|
qsort( sortmap, table->count, sizeof(sort_struct), key_comp ); |
414 |
|
|
|
415 |
|
|
/* call the function for each node */ |
416 |
|
|
for( i=0; i <abs( (table->count)); i++ ) |
417 |
|
|
{ |
418 |
|
|
func( sortmap[i].key, sortmap[i].data ); |
419 |
|
|
} |
420 |
|
|
|
421 |
|
|
/* free the pointerarray */ |
422 |
|
|
free( sortmap ); |
423 |
|
|
sortmap = NULL; |
424 |
|
|
|
425 |
|
|
return 1; |
426 |
|
|
} |
427 |
|
|
|
428 |
|
|
/* HW: changed testmain */ |
429 |
|
|
#define TEST |
430 |
|
|
#ifdef TEST |
431 |
|
|
|
432 |
|
|
#include <stdio.h> |
433 |
|
|
//#include "snip_str.h" /* for strdup() */ |
434 |
|
|
|
435 |
|
|
FILE *o; |
436 |
|
|
|
437 |
|
|
void fprinter(char *string, void *data) |
438 |
|
|
{ |
439 |
|
|
fprintf(o,"%s: %s\n", string, (char *)data); |
440 |
|
|
} |
441 |
|
|
|
442 |
|
|
void printer(char *string, void *data) |
443 |
|
|
{ |
444 |
|
|
printf("%s: %s\n", string, (char *)data); |
445 |
|
|
} |
446 |
|
|
|
447 |
|
|
/* function to pass to hash_free_table */ |
448 |
|
|
void strfree( void *d ) |
449 |
|
|
{ |
450 |
|
|
/* any additional processing goes here (if you use structures as data) */ |
451 |
|
|
/* free the datapointer */ |
452 |
|
|
free(d); |
453 |
|
|
} |
454 |
|
|
|
455 |
|
|
|
456 |
|
|
int main(void) |
457 |
|
|
{ |
458 |
|
|
hash_table table; |
459 |
|
|
|
460 |
|
|
char *strings[] = { |
461 |
|
|
"The first string", |
462 |
|
|
"The second string", |
463 |
|
|
"The third string", |
464 |
|
|
"The fourth string", |
465 |
|
|
"A much longer string than the rest in this example.", |
466 |
|
|
"The last string", |
467 |
|
|
NULL |
468 |
|
|
}; |
469 |
|
|
|
470 |
|
|
char *junk[] = { |
471 |
|
|
"The first data", |
472 |
|
|
"The second data", |
473 |
|
|
"The third data", |
474 |
|
|
"The fourth data", |
475 |
|
|
"The fifth datum", |
476 |
|
|
"The sixth piece of data" |
477 |
|
|
}; |
478 |
|
|
|
479 |
|
|
int i; |
480 |
|
|
void *j; |
481 |
|
|
|
482 |
|
|
hash_construct_table(&table,211); |
483 |
|
|
|
484 |
|
|
/* I know, no checking on strdup ;-)), but using strdup |
485 |
|
|
to demonstrate hash_table_free with a functionpointer */ |
486 |
|
|
for (i = 0; NULL != strings[i]; i++ ) |
487 |
|
|
hash_insert( strings[i], strdup(junk[i]), &table ); |
488 |
|
|
|
489 |
|
|
/* enumerating to a file */ |
490 |
|
|
if( NULL != (o = fopen("HASH.HSH","wb")) ) |
491 |
|
|
{ |
492 |
|
|
fprintf( o, "%d strings in the table:\n\n", table.count ); |
493 |
|
|
hash_enumerate( &table, fprinter ); |
494 |
|
|
fprintf( o, "\nsorted by key:\n"); |
495 |
|
|
hash_sorted_enum( &table, fprinter ); |
496 |
|
|
fclose( o ); |
497 |
|
|
} |
498 |
|
|
|
499 |
|
|
/* enumerating to screen */ |
500 |
|
|
hash_sorted_enum( &table, printer ); |
501 |
|
|
printf("\n"); |
502 |
|
|
|
503 |
|
|
/* delete 3 strings, should be 3 left */ |
504 |
|
|
for( i=0; i<3; i++ ) |
505 |
|
|
{ |
506 |
|
|
/* hash_del returns a pointer to the data */ |
507 |
|
|
strfree( hash_del( strings[i], &table) ); |
508 |
|
|
} |
509 |
|
|
hash_enumerate( &table, printer); |
510 |
|
|
|
511 |
|
|
for (i=0;NULL != strings[i];i++) |
512 |
|
|
{ |
513 |
|
|
j = hash_lookup(strings[i], &table); |
514 |
|
|
if (NULL == j) |
515 |
|
|
printf("\n'%s' is not in the table", strings[i]); |
516 |
|
|
else |
517 |
|
|
printf("\n%s is still in the table.", strings[i]); |
518 |
|
|
} |
519 |
|
|
|
520 |
|
|
hash_free_table( &table, strfree ); |
521 |
|
|
|
522 |
|
|
return 0; |
523 |
|
|
} |
524 |
|
|
#endif /* TEST */ |