1 |
#include <string.h> |
2 |
#include <stdlib.h> |
3 |
/* #define NDEBUG */ |
4 |
#include <assert.h> |
5 |
|
6 |
#include "hash.h" |
7 |
|
8 |
|
9 |
/* |
10 |
** public domain code by Jerry Coffin. |
11 |
** |
12 |
** Tested with Visual C 1.0 and Borland C 3.1. |
13 |
** Compiles without warnings, and seems like it should be pretty |
14 |
** portable. |
15 |
*/ |
16 |
|
17 |
/* HW: HenkJan Wolthuis, 1997, public domain |
18 |
|
19 |
changed functionnames, all public functions now have a 'hash_' prefix |
20 |
minor editing, marked 'm all(?) with a description |
21 |
removed a bug in hash_del and one in hash_enumerate |
22 |
added some assertions |
23 |
added a 'count' member to hold the number of elements |
24 |
added hash_sorted_enum, sometimes useful |
25 |
changed the testmain |
26 |
*/ |
27 |
|
28 |
/* |
29 |
** RBS: Bob Stout, 2003, public domain |
30 |
** |
31 |
** 1. Fixed some problems in hash() static function. |
32 |
** 2. Use unsigned shorts for hash values. This was implicit in the original |
33 |
** which was written for PC's using early Microsoft and Borland compilers. |
34 |
*/ |
35 |
|
36 |
/* HW: #define to allow duplicate keys, they're added before the existing |
37 |
key so hash_lookup finds the last one inserted first (LIFO) |
38 |
when not defined, hash_insert swaps the datapointers, returning a |
39 |
pointer to the old data |
40 |
*/ |
41 |
/* #define DUPLICATE_KEYS */ |
42 |
|
43 |
/* |
44 |
** These are used in freeing a table. Perhaps I should code up |
45 |
** something a little less grungy, but it works, so what the heck. |
46 |
*/ |
47 |
static void (*function)(void *) = NULL; |
48 |
static hash_table *the_table = NULL; |
49 |
|
50 |
|
51 |
/* Initialize the hash_table to the size asked for. Allocates space |
52 |
** for the correct number of pointers and sets them to NULL. If it |
53 |
** can't allocate sufficient memory, signals error by setting the size |
54 |
** of the table to 0. |
55 |
*/ |
56 |
/*HW: changed, now returns NULL on malloc-failure */ |
57 |
hash_table *hash_construct_table( hash_table *table, size_t size ) |
58 |
{ |
59 |
size_t i; |
60 |
bucket **temp; |
61 |
|
62 |
table->size = size; |
63 |
table->count = 0; |
64 |
table->table = (bucket **)malloc(sizeof(bucket *) * size); |
65 |
temp = table->table; |
66 |
|
67 |
if( NULL == temp ) |
68 |
{ |
69 |
table->size = 0; |
70 |
return NULL; /*HW: was 'table' */ |
71 |
} |
72 |
|
73 |
for( i=0; i<size; i++ ) |
74 |
temp[i] = NULL; |
75 |
|
76 |
return table; |
77 |
} |
78 |
|
79 |
|
80 |
/* |
81 |
** Hashes a string to produce an unsigned short, which should be |
82 |
** sufficient for most purposes. |
83 |
** RBS: fixed per user feedback from Steve Greenland |
84 |
*/ |
85 |
|
86 |
static unsigned short hash(char *string) |
87 |
{ |
88 |
unsigned short ret_val = 0; |
89 |
int i; |
90 |
|
91 |
while (*string) |
92 |
{ |
93 |
/* |
94 |
** RBS: Added conditional to account for strings in which the |
95 |
** length is less than an integral multiple of sizeof(int). |
96 |
** |
97 |
** Note: This fixes the problem of hasing trailing garbage, but |
98 |
** doesn't fix the problem with some CPU's which can't align on |
99 |
** byte boundries. Any decent C compiler *should* fix this, but |
100 |
** it still might extract a performance hit. Also unaddressed is |
101 |
** what happens when using a CPU which addresses data only on |
102 |
** 4-byte boundries when it tries to work with a pointer to a |
103 |
** 2-byte unsigned short. |
104 |
*/ |
105 |
|
106 |
if (strlen(string) >= sizeof(unsigned short)) |
107 |
i = *(unsigned short *)string; |
108 |
else i = (unsigned short)(*string); |
109 |
ret_val ^= i; |
110 |
ret_val <<= 1; |
111 |
string ++; |
112 |
} |
113 |
return ret_val; |
114 |
} |
115 |
|
116 |
/* |
117 |
** Insert 'key' into hash table. |
118 |
** Returns pointer to old data associated with the key, if any, or |
119 |
** NULL if the key wasn't in the table previously. |
120 |
*/ |
121 |
/* HW: returns NULL if malloc failed */ |
122 |
void *hash_insert( char *key, void *data, hash_table *table ) |
123 |
{ |
124 |
unsigned short val = hash(key) % table->size; |
125 |
bucket *ptr; |
126 |
|
127 |
assert( NULL != key ); |
128 |
|
129 |
/* |
130 |
** NULL means this bucket hasn't been used yet. We'll simply |
131 |
** allocate space for our new bucket and put our data there, with |
132 |
** the table pointing at it. |
133 |
*/ |
134 |
|
135 |
if( NULL == (table->table)[val] ) |
136 |
{ |
137 |
(table->table)[val] = (bucket *)malloc(sizeof(bucket)); |
138 |
if( NULL == (table->table)[val] ) |
139 |
return NULL; |
140 |
|
141 |
if( NULL == ((table->table)[val]->key = (char*) malloc(strlen(key)+1)) ) |
142 |
{ |
143 |
free( (table->table)[val] ); |
144 |
(table->table)[val] = NULL; |
145 |
return NULL; |
146 |
} |
147 |
strcpy( (table->table)[val]->key, key); |
148 |
(table->table)[val] -> next = NULL; |
149 |
(table->table)[val] -> data = data; |
150 |
table->count++; /* HW */ |
151 |
return (table->table)[val] -> data; |
152 |
} |
153 |
|
154 |
/* HW: added a #define so the hashtable can accept duplicate keys */ |
155 |
#ifndef DUPLICATE_KEYS |
156 |
/* |
157 |
** This spot in the table is already in use. See if the current string |
158 |
** has already been inserted, and if so, increment its count. |
159 |
*/ /* HW: ^^^^^^^^ ?? */ |
160 |
for( ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next ) |
161 |
if( 0 == strcmp(key, ptr->key) ) |
162 |
{ |
163 |
void *old_data; |
164 |
|
165 |
old_data = ptr->data; |
166 |
ptr->data = data; |
167 |
return old_data; |
168 |
} |
169 |
#endif |
170 |
/* |
171 |
** This key must not be in the table yet. We'll add it to the head of |
172 |
** the list at this spot in the hash table. Speed would be |
173 |
** slightly improved if the list was kept sorted instead. In this case, |
174 |
** this code would be moved into the loop above, and the insertion would |
175 |
** take place as soon as it was determined that the present key in the |
176 |
** list was larger than this one. |
177 |
*/ |
178 |
|
179 |
ptr = (bucket *)malloc(sizeof(bucket)); |
180 |
if( NULL == ptr ) |
181 |
return NULL; /*HW: was 0 */ |
182 |
|
183 |
if( NULL == (ptr -> key = (char*) malloc(strlen(key)+1)) ) |
184 |
{ |
185 |
free(ptr); |
186 |
return NULL; |
187 |
} |
188 |
strcpy( ptr->key, key ); |
189 |
ptr -> data = data; |
190 |
ptr -> next = (table->table)[val]; |
191 |
(table->table)[val] = ptr; |
192 |
table->count++; /* HW */ |
193 |
|
194 |
return data; |
195 |
} |
196 |
|
197 |
|
198 |
/* |
199 |
** Look up a key and return the associated data. Returns NULL if |
200 |
** the key is not in the table. |
201 |
*/ |
202 |
void *hash_lookup( char *key, hash_table *table ) |
203 |
{ |
204 |
unsigned short val = hash(key) % table->size; |
205 |
bucket *ptr; |
206 |
|
207 |
assert( NULL != key ); |
208 |
|
209 |
if(NULL == (table->table)[val]) |
210 |
return NULL; |
211 |
|
212 |
for( ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next ) |
213 |
{ |
214 |
if(0 == strcmp( key, ptr -> key ) ) |
215 |
return ptr->data; |
216 |
} |
217 |
|
218 |
return NULL; |
219 |
} |
220 |
|
221 |
/* |
222 |
** Delete a key from the hash table and return associated |
223 |
** data, or NULL if not present. |
224 |
*/ |
225 |
|
226 |
void *hash_del(char *key, hash_table *table) |
227 |
{ |
228 |
unsigned short val = hash(key) % table->size; |
229 |
void *data; |
230 |
bucket *ptr, *last = NULL; |
231 |
|
232 |
assert( NULL != key ); |
233 |
|
234 |
if( NULL == (table->table)[val] ) |
235 |
return NULL; /* HW: was 'return 0' */ |
236 |
|
237 |
/* |
238 |
** Traverse the list, keeping track of the previous node in the list. |
239 |
** When we find the node to delete, we set the previous node's next |
240 |
** pointer to point to the node after ourself instead. We then delete |
241 |
** the key from the present node, and return a pointer to the data it |
242 |
** contains. |
243 |
*/ |
244 |
for( last = NULL, ptr = (table->table)[val]; |
245 |
NULL != ptr; |
246 |
last = ptr, ptr = ptr->next ) |
247 |
{ |
248 |
if( 0 == strcmp( key, ptr -> key) ) |
249 |
{ |
250 |
if( last != NULL ) |
251 |
{ |
252 |
data = ptr -> data; |
253 |
last -> next = ptr -> next; |
254 |
free( ptr->key ); |
255 |
free( ptr ); |
256 |
table->count--; /* HW */ |
257 |
return data; |
258 |
} |
259 |
|
260 |
/* If 'last' still equals NULL, it means that we need to |
261 |
** delete the first node in the list. This simply consists |
262 |
** of putting our own 'next' pointer in the array holding |
263 |
** the head of the list. We then dispose of the current |
264 |
** node as above. |
265 |
*/ |
266 |
else |
267 |
{ |
268 |
/* HW: changed this bit to match the comments above */ |
269 |
data = ptr->data; |
270 |
(table->table)[val] = ptr->next; |
271 |
free( ptr->key ); |
272 |
free( ptr ); |
273 |
table->count--; /* HW */ |
274 |
return data; |
275 |
} |
276 |
} |
277 |
} |
278 |
|
279 |
/* |
280 |
** If we get here, it means we didn't find the item in the table. |
281 |
** Signal this by returning NULL. |
282 |
*/ |
283 |
|
284 |
return NULL; |
285 |
} |
286 |
|
287 |
/* |
288 |
** free_table iterates the table, calling this repeatedly to free |
289 |
** each individual node. This, in turn, calls one or two other |
290 |
** functions - one to free the storage used for the key, the other |
291 |
** passes a pointer to the data back to a function defined by the user, |
292 |
** process the data as needed. |
293 |
*/ |
294 |
|
295 |
static void free_node( char *key, void *data ) |
296 |
{ |
297 |
(void) data; |
298 |
|
299 |
assert( NULL != key ); |
300 |
|
301 |
if( NULL != function ) |
302 |
{ |
303 |
function( hash_del( key, the_table ) ); |
304 |
} |
305 |
else |
306 |
hash_del( key, the_table ); |
307 |
} |
308 |
|
309 |
/* |
310 |
** Frees a complete table by iterating over it and freeing each node. |
311 |
** the second parameter is the address of a function it will call with a |
312 |
** pointer to the data associated with each node. This function is |
313 |
** responsible for freeing the data, or doing whatever is needed with |
314 |
** it. |
315 |
*/ |
316 |
|
317 |
void hash_free_table( hash_table *table, void (*func)(void *) ) |
318 |
{ |
319 |
function = func; |
320 |
the_table = table; |
321 |
|
322 |
hash_enumerate( table, free_node ); |
323 |
free( table->table ); |
324 |
table->table = NULL; |
325 |
table->size = 0; |
326 |
table->count = 0; /* HW */ |
327 |
|
328 |
the_table = NULL; |
329 |
function = NULL; |
330 |
} |
331 |
|
332 |
/* |
333 |
** Simply invokes the function given as the second parameter for each |
334 |
** node in the table, passing it the key and the associated data. |
335 |
*/ |
336 |
|
337 |
void hash_enumerate( hash_table *table, void (*func)(char *, void *) ) |
338 |
{ |
339 |
unsigned i; |
340 |
bucket *temp; |
341 |
bucket *swap; |
342 |
|
343 |
for( i=0; i<table->size; i++ ) |
344 |
{ |
345 |
if( NULL != (table->table)[i] ) |
346 |
{ |
347 |
/* HW: changed this loop */ |
348 |
temp = (table->table)[i]; |
349 |
while( NULL != temp ) |
350 |
{ |
351 |
/* HW: swap trick, in case temp is freed by 'func' */ |
352 |
swap = temp->next; |
353 |
func( temp -> key, temp->data ); |
354 |
temp = swap; |
355 |
} |
356 |
} |
357 |
} |
358 |
} |
359 |
|
360 |
/* HW: added hash_sorted_enum() |
361 |
|
362 |
hash_sorted_enum is like hash_enumerate but gives |
363 |
sorted output. This is much slower than hash_enumerate, but |
364 |
sometimes nice for printing to a file... |
365 |
*/ |
366 |
|
367 |
typedef struct sort_struct |
368 |
{ |
369 |
char *key; |
370 |
void *data; |
371 |
} sort_struct; |
372 |
static sort_struct *sortmap = NULL; |
373 |
|
374 |
static int counter = 0; |
375 |
|
376 |
/* HW: used as 'func' for hash_enumerate */ |
377 |
static void key_get( char *key, void *data ) |
378 |
{ |
379 |
sortmap[ counter ].key = key; |
380 |
sortmap[ counter ].data = data; |
381 |
counter++; |
382 |
} |
383 |
|
384 |
/* HW: used for comparing the keys in qsort */ |
385 |
static int key_comp( const void* a, const void *b ) |
386 |
{ |
387 |
return strcmp( (*(sort_struct*)a).key, (*(sort_struct*)b).key ); |
388 |
} |
389 |
|
390 |
/* HW: it's a compromise between speed and space. this one needs |
391 |
table->count * sizeof( sort_struct) memory. |
392 |
Another approach only takes count*sizeof(char*), but needs |
393 |
to hash_lookup the data of every key after sorting the key. |
394 |
returns 0 on malloc failure, 1 on success |
395 |
*/ |
396 |
int hash_sorted_enum( hash_table *table, void (*func)( char *, void *) ) |
397 |
{ |
398 |
int i; |
399 |
|
400 |
/* nothing to do ! */ |
401 |
if( NULL == table || 0 == table->count || NULL == func ) |
402 |
return 0; |
403 |
|
404 |
/* malloc an pointerarray for all hashkey's and datapointers */ |
405 |
if( NULL == ( sortmap = (sort_struct*) malloc( sizeof( sort_struct ) * table->count)) ) |
406 |
return 0; |
407 |
|
408 |
/* copy the pointers to the hashkey's */ |
409 |
counter = 0; |
410 |
hash_enumerate( table, key_get ); |
411 |
|
412 |
/* sort the pointers to the keys */ |
413 |
qsort( sortmap, table->count, sizeof(sort_struct), key_comp ); |
414 |
|
415 |
/* call the function for each node */ |
416 |
for( i=0; i <abs( (table->count)); i++ ) |
417 |
{ |
418 |
func( sortmap[i].key, sortmap[i].data ); |
419 |
} |
420 |
|
421 |
/* free the pointerarray */ |
422 |
free( sortmap ); |
423 |
sortmap = NULL; |
424 |
|
425 |
return 1; |
426 |
} |
427 |
|
428 |
/* HW: changed testmain */ |
429 |
#define TEST |
430 |
#ifdef TEST |
431 |
|
432 |
#include <stdio.h> |
433 |
//#include "snip_str.h" /* for strdup() */ |
434 |
|
435 |
FILE *o; |
436 |
|
437 |
void fprinter(char *string, void *data) |
438 |
{ |
439 |
fprintf(o,"%s: %s\n", string, (char *)data); |
440 |
} |
441 |
|
442 |
void printer(char *string, void *data) |
443 |
{ |
444 |
printf("%s: %s\n", string, (char *)data); |
445 |
} |
446 |
|
447 |
/* function to pass to hash_free_table */ |
448 |
void strfree( void *d ) |
449 |
{ |
450 |
/* any additional processing goes here (if you use structures as data) */ |
451 |
/* free the datapointer */ |
452 |
free(d); |
453 |
} |
454 |
|
455 |
|
456 |
int main(void) |
457 |
{ |
458 |
hash_table table; |
459 |
|
460 |
char *strings[] = { |
461 |
"The first string", |
462 |
"The second string", |
463 |
"The third string", |
464 |
"The fourth string", |
465 |
"A much longer string than the rest in this example.", |
466 |
"The last string", |
467 |
NULL |
468 |
}; |
469 |
|
470 |
char *junk[] = { |
471 |
"The first data", |
472 |
"The second data", |
473 |
"The third data", |
474 |
"The fourth data", |
475 |
"The fifth datum", |
476 |
"The sixth piece of data" |
477 |
}; |
478 |
|
479 |
int i; |
480 |
void *j; |
481 |
|
482 |
hash_construct_table(&table,211); |
483 |
|
484 |
/* I know, no checking on strdup ;-)), but using strdup |
485 |
to demonstrate hash_table_free with a functionpointer */ |
486 |
for (i = 0; NULL != strings[i]; i++ ) |
487 |
hash_insert( strings[i], strdup(junk[i]), &table ); |
488 |
|
489 |
/* enumerating to a file */ |
490 |
if( NULL != (o = fopen("HASH.HSH","wb")) ) |
491 |
{ |
492 |
fprintf( o, "%d strings in the table:\n\n", table.count ); |
493 |
hash_enumerate( &table, fprinter ); |
494 |
fprintf( o, "\nsorted by key:\n"); |
495 |
hash_sorted_enum( &table, fprinter ); |
496 |
fclose( o ); |
497 |
} |
498 |
|
499 |
/* enumerating to screen */ |
500 |
hash_sorted_enum( &table, printer ); |
501 |
printf("\n"); |
502 |
|
503 |
/* delete 3 strings, should be 3 left */ |
504 |
for( i=0; i<3; i++ ) |
505 |
{ |
506 |
/* hash_del returns a pointer to the data */ |
507 |
strfree( hash_del( strings[i], &table) ); |
508 |
} |
509 |
hash_enumerate( &table, printer); |
510 |
|
511 |
for (i=0;NULL != strings[i];i++) |
512 |
{ |
513 |
j = hash_lookup(strings[i], &table); |
514 |
if (NULL == j) |
515 |
printf("\n'%s' is not in the table", strings[i]); |
516 |
else |
517 |
printf("\n%s is still in the table.", strings[i]); |
518 |
} |
519 |
|
520 |
hash_free_table( &table, strfree ); |
521 |
|
522 |
return 0; |
523 |
} |
524 |
#endif /* TEST */ |