问题
Hello can someone help me find what is causing the problem? For some reason the find_hash function is giving me problems. It should be failing the if(table -> buckets_array[i] != NULL){
and if(table -> buckets_array[i] != '\0'){
but it is not and it going to the next check which gives me a segmentation fault. What can be causing the first 2 if's statement to pass since I initially set it to table -> buckets_array[i] = NULL?
Edit: Thanks for wildplasser
I came up with a much better solution for my find_hash function.
if(table->buckets_array[table->hash_func(key)] == NULL){
return NULL;
}else{
return table -> buckets_array[table->hash_func(key)] -> data;
}
Thanks you everyone for the help.
/*hash.h*/
#include<stdio.h>
#include<stdlib.h>
typedef struct data_{
char *key;
void *data;
struct data_ *next;
}data_el;
typedef struct hash_table_ {
void **order;
int *number_next_calls;
int *number_buckets;
int *buckets_size;
int *worst;
int *total;
float *average;
int (*hash_func)(char *);
int (*comp_func)(void*, void*);
data_el **buckets_array;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
static void lower_case_word(char *w);
/*hash.c*/
#include"hash.h"
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *)){
int i;
Phash_table table_p;
hash_table hash_table;
table_p = (Phash_table)malloc(sizeof(hash_table));
/*Setting function pointers*/
table_p->hash_func = hash_func;
table_p->comp_func = cmp_func;
/*Create buckets array*/
table_p->buckets_array = (data_el **)malloc(sizeof(data_el *)*(size+1));
table_p->buckets_size = (int *)malloc(sizeof(int)*(size+1));
/*Setting order array*/
table_p->order = NULL;
/*Setting inital condictions*/
table_p->worst = (int *)malloc(sizeof(int));
table_p->total = (int *)malloc(sizeof(int));
table_p->average = (float *)malloc(sizeof(float));
table_p->number_buckets = (int *)malloc(sizeof(int));
*(table_p->number_buckets) = size;
for(i = 0; i < size; i++){
table_p->buckets_array[i] = NULL;
}
return table_p;
}
void free_hash(Phash_table table){
int i;
i = 0;
data_el *cur;
data_el *prev;
/*Free order array*/
if(table->order != NULL){
free(table->order);
}
/*Free Buckets array and buckets_size array*/
if(table ->buckets_array != NULL){
for(i=0; i < *(table->number_buckets); i++){
if(table->buckets_array[i] != NULL){
/*Travers the linked list and free it*/
cur = table->buckets_array[i];
prev = cur;
while((cur = cur->next) != NULL){
free(prev);
prev = cur;
}
/*Free the last node*/
free(cur);
}
}
}
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
int index;
data_el *p, *cur;
index = table->hash_func(key);
/*Head insertion*/
if(table->buckets_array[index] == NULL){
table->buckets_array[index] = (data_el *)malloc(sizeof(data_el));
table->buckets_array[index]->data = data;
table->buckets_array[index]->next = NULL;
table->buckets_array[index]->key = key;
}else{
cur = table->buckets_array[index];
p = (data_el *)malloc(sizeof(data_el));
p->key = key;
p->data = data;
p->next = cur;
cur = p;
table->buckets_array[index] = cur;
}
/*Update Total*/
*table->total += 1;
/*Update Bucket Size*/
(table->buckets_size[index]) +=1;
/*Updates Worst Case*/
if((table->buckets_size[index]) > *(table->worst)){
*(table->worst) = (table->buckets_size[index]);
}else{
*(table->worst) +=1;
}
/*Update Average*/
int temp_total,temp_buckets;
temp_total = *(table->total);
temp_buckets = *(table->number_buckets);
*(table->average)= (float)temp_total/(float)temp_buckets;
}
void *find_hash(Phash_table table, char *key){
int i;
i = 0;
while(1){
if(table->buckets_array[i] == '\0'){
printf("End of Array");
break;
}
if(table->buckets_array[i] != NULL){
printf("%Checking");
if(table->buckets_array[i] != '\0'){
if(table->buckets_array[i]->key != NULL){
if( strcmp(table->buckets_array[i]->key,key) == 0 ){
printf("Found it\n");
break;
}
}
}
}
i++;
}
if(table->buckets_array[i] != NULL){
printf("new asdasd %d", *((int *)table->buckets_array[i]->data));
return table->buckets_array[i]->data;
}else{
return NULL;
}
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
total = (table->total);
largest = (table->worst);
average = (table->average);
}
void *start_hash_walk(Phash_table table){
int i, max_buckets,step,next_calls;
step = 0;
data_el *cur;
next_calls = 0;
max_buckets = *(table ->number_buckets);
table->order = (void**)malloc(sizeof(void *)*(*(table->total)));
/*Set number_next_calls to 0*/
table->number_next_calls = &next_calls;
*(table->number_next_calls) = 0;
/*Traverse the ADT and put all data into order array*/
for(i = 0; i < max_buckets; i++){
if(table->buckets_array[i] != NULL){
cur = table->buckets_array[i];
while(cur){
table->order[step] = cur->data;
step ++;
cur = cur->next;
}
}
}
/*Bubble Short*/
int j,k;
for(j = 0; j < (step - 1);j++){
for(k = 0;k < (step -(j+1));k ++){
if((table->comp_func)(table->order[j],table->order[j+1]) < 5){
void *temp;
temp = table->order[j];
table->order[j] = table->order[j+1];
table->order[j+1] = temp;
printf("Swaping %s with %s\n",table->order[j],table->order[j+1]);
}
}
}
return table->order;
}
void *next_hash_walk(Phash_table table){
/*
Check the amount of times next_has_walk has been
if higher than total, it will return null
*/
if(*(table->number_next_calls) >= *(table->total)){
return NULL;
}else{
*(table->number_next_calls) = *(table->number_next_calls) + 1;
return table->order[*(table->number_next_calls)];
}
}
/*project4.c*/
#include"hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
int hash_function(char *word){
int sum,i;
i = 0;
sum = 0;
while(word[i] != '\0'){
sum = sum + word[i];
i++;
}
return sum%1000;
}
int main(void){
/*Creating buckets array*/
Phash_table dictionary;
void *test;
dictionary = new_hash(DICTIONARY_SIZE,hash_function,comp);
int i;
i = 0;
void *frequency[DICTIONARY_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0;
char c, word[WORD_SIZE];
printf("Parsing input ...\n");
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
/* End of a word */
if (char_index) {
/* Word is not empty */
word[char_index] = '\0';
lower_case_word(word);
if(!find_hash(dictionary,word) ){
insert_hash(dictionary,word,frequency[hash_function(word)]);
}
frequency[hash_function(word)]++;
char_index = 0;
num_words++;
}
}else{
word[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,dictionary_size);
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
回答1:
Your find_hash()
function does not return a value.
You omitted some functions:
Phash_table new_hash(int size, int (*hash)(char *), int (*comp)(void *, void *));
void lower_case_word(char *word);
int hash_function(char *);
int comp_function(void *, void *);
You've not specified DICTIONARY_SIZE or WORD_SIZE.
You've not declared c
in main()
.
You've not defined int main(void)
(or int main(int argc, char **argv)
). The return type is int
, and it takes 0 or 2 arguments.
You've neither defined nor initialized char_index
. You didn't define word
. You didn't define or initialize num_words
.
Generally, you should submit compilable code to SO. The missing functions and standard headers are one thing; it is legitimate (but a mild nuisance) if you omit their definitions.
You might want to consider whether the comparison function should be taking void const *
(or const void *
) arguments instead of just void *
; it will make it more generally usable, for example with qsort()
or bsearch()
.
I'm not clear whether the hash function would benefit from taking a void *
instead of a char *
; it should probably be a char const *
or void const *
rather than an unqualified pointer.
Stylistically, do not put spaces around either ->
or .
operators. They bind tighter than anything else. Generally, put spaces around binary operators such as /
. Generally, put spaces after commas (but be consistent even more).
hash.h
Do not (usually — this is a usual case) declare static functions in headers. A static function is only needed in one file; there is no point in putting it in the header. A header is for sharing information between source files. If something isn't going to be used by other files, there's no point in declaring it.
Do protect headers from reinclusion:
#ifndef HASH_H_INCLUDED
#define HASH_H_INCLUDED
...rest of header here...
#endif /* HASH_H_INCLUDED */
Do not include in the header anything that is not needed to use the header. None of the material in "hash.h" requires either <stdlib.h>
or <stdio.h>
so it probably shouldn't be there.
The C standard uses a space between #include
and the header name; what's good enough for them should be good enough for you.
project4.c
You aren't paying enough attention to compiler warnings, or don't have enough compiler warnings set. If you're using GCC, compile with -Wall
at least. When I was test compiling, I used, for example:
gcc -O3 -g -std=c99 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -c project4.c
I expect my code to compile cleanly under that set of options.
You need #include <ctype.h>
to declare tolower()
.
Function comp()
is not defined anywhere. I added:
static int comp(void *v1, void *v2)
{
char const *s1 = v1;
char const *s2 = v2;
return strcmp(s1, s2);
}
I also put static
in front of hash_function()
to avoid needing an external definition.
One time when I compiled, I got the warning:
project4.c:51: warning: comparison is always false due to limited range of data type
That shows a problem. You declared c
as char
, but then wrote:
while ((c = getchar()) != EOF)
This is a no-no. The function getchar()
returns an int
(yes, the name is not good). The set of values it can return includes every character plus a distinct value EOF. You must use an int
to get reliable behaviour. One of two problems occurs. Either you never get EOF because the assignment is to an (implicitly) unsigned type, and -1 gets mapped to 0xFF, and when 0xFF is promoted to an int
, it is positive and therefore not equal to -1; or you misinterpret a valid character (often U+00FF, ÿ, SMALL LATIN LETTER Y WITH DIAERESIS, colloquially y-umlaut) as EOF.
You should consider using something like !isalnum()
instead of the condition:
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n'))
hash.c
The code omitted #include <string.h>
.
The compiler says:
hash.c: In function ‘find_hash’:
hash.c:142: warning: too few arguments for format
hash.c: In function ‘start_hash_walk’:
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 2 has type ‘void *’
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 3 has type ‘void *’
The line numbers should be treated with a pinch of salt; I use Allman style layout and therefore added a number of lines to your code when I formatted it. But if your compiler isn't giving you this sort of advice, you should be looking for a compiler that does.
Line 142 contains:
printf("%Checking");
The %
is unneeded. However, you may never see the output until something prints a newline, or you use fflush(stdout);
to force the data out.
printf("Checking\n");
As a general rule, especially during debugging, end printed messages with a newline.
Line 219 is:
printf("Swaping %s with %s\n",table->order[j],table->order[j+1]);
It should probably read:
printf("Swapping %s with %s\n", (char *)table->order[j], (char *)table->order[j+1]);
because order
is a void **
. You might be better off with it being a char **
, of course.
Operations
With those (minor) issues fixed, the code compiles and links. When run on some text, the output looks like:
Parsing input ...
End of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of Array
Output newlines! It is probably also worth printing out what string it is you've just processed, and whether you added it or found it already in the data.
One problem in main()
: you do not intialize the frequency
array before you use it. Further, it isn't entirely clear why the array exists; why isn't the frequency information being kept in the hash table proper? And why is the frequency information an array of void *
rather than, say, int
?
On one lot of sample data, the program summarized the input as:
There were 68 words; 0 unique words.
The number of unique words is initially worrisome, but it turns out that you initialize dictionary_size
to 0 and never change it and then print it, so that much of the output is correct.
I think you need to write yourself a structure dumper. That's a function that when given a (pointer to the) structure, prints sensible diagnostic information about the data in the structure. I use the general form:
void dump_xyzstructure(FILE *fp, char const *tag, xyzstructure const *data);
Then I can write:
dump_xyzstructure(stdout, "After initialization", &xyz);
dump_xyzstructure(logfp, "At this point", &xyz);
Etc.
Your hash table is pretty complex. However, you have code outside the hash table calling the hash function, and I'm not sure that's a good idea. Mostly, it shouldn't need to. Since your hash table is needed to support frequency counting, the hash table should be recording the frequencies for you; you should not be doing that in the auxilliary code in the main program.
You can simplify life by streamlining the structure. You don't have to dynamically allocate everything in the structure. On a 64-bit machine in particular, that is a huge waste of space. (OK: it is a trivial waste of space, but it a waste of effort too.)
/*Setting inital condictions*/
table_p->worst = (int *)malloc(sizeof(int));
table_p->total = (int *)malloc(sizeof(int));
table_p->average = (float *)malloc(sizeof(float));
table_p->number_buckets = (int *)malloc(sizeof(int));
It would be more compact to have a data structure without those pointers, using just an element in the structure. It will simplify the deallocation code, too (and it simplifies the printing code because it doesn't have to worry about whether the pointers have been allocated or not):
typedef struct hash_table
{
void **order;
int number_next_calls;
int number_buckets;
int *buckets_size; // Number of entries in each bucket
int worst;
int total;
float average;
int (*hash_func)(char *);
int (*comp_func)(void*, void*);
data_el **buckets_array;
} hash_table, *Phash_table;
The average can be computed on demand; it doesn't warrant a place in the structure, really. Losing all those pointers simplifies the code considerably.
For example, this fragment:
if (*(table->number_next_calls) >= *(table->total))
{
return NULL;
}
else
{
*(table->number_next_calls) = *(table->number_next_calls) + 1;
return table->order[*(table->number_next_calls)];
}
becomes:
if (table->number_next_calls >= table->total)
{
return NULL;
}
else
{
table->number_next_calls = table->number_next_calls + 1;
return table->order[table->number_next_calls];
}
or even:
if (table->number_next_calls >= table->total)
return NULL;
else
return table->order[++table->number_next_calls];
Sample text
This is something I use quite a lot in SO comments:
Welcome to StackOverflow. Please note that the preferred way of saying 'thanks' around here is by up-voting good questions and helpful answers (once you have enough reputation to do so), and by accepting the most helpful answer to any question you ask (which also gives you a small boost to your reputation). Please see the
[FAQ](http://stackoverflow.com/faq)
and especially[How do I ask questions here?](http://stackoverflow.com/faq#howtoask)
Sample Output
Most of the data shown comes from the dump_hash_table()
function; I needed to see what was working and what was not. Most of it was actually working — I didn't have to fix up much of the algorithm (the worst case calculation was faulty and is fixed). I've left out the trace code while the input was being parsed.
Hash Table: 0x10E700900 At end of input
Order = 0x00000000
Buckets = 0x7FD9A3800000
Hash = 0x10E607A40
Comp = 0x10E607AA0
Next calls: 0
Buckets: 1000
Worst: 4
Total: 74
Average: 0.074000
Bucket 3: 2
Bucket 67: 1
Bucket 97: 1
Bucket 99: 2
Bucket 105: 1
Bucket 211: 2
Bucket 213: 1
Bucket 219: 2
Bucket 220: 1
Bucket 226: 1
Bucket 227: 4
Bucket 229: 1
Bucket 307: 3
Bucket 312: 3
Bucket 317: 1
Bucket 319: 4
Bucket 321: 3
Bucket 328: 1
Bucket 334: 1
Bucket 337: 1
Bucket 349: 3
Bucket 418: 3
Bucket 420: 3
Bucket 421: 1
Bucket 425: 1
Bucket 431: 1
Bucket 433: 1
Bucket 438: 1
Bucket 448: 2
Bucket 451: 1
Bucket 463: 1
Bucket 531: 1
Bucket 537: 1
Bucket 542: 1
Bucket 551: 1
Bucket 634: 2
Bucket 646: 1
Bucket 649: 2
Bucket 651: 1
Bucket 656: 1
Bucket 663: 1
Bucket 748: 1
Bucket 752: 2
Bucket 771: 1
Bucket 880: 1
Bucket 888: 1
Bucket 942: 1
Bucket 959: 1
Unique words: 74
Used Buckets: 48
There were 74 words; 0 unique words.
hash.h
Note that hash.h
must now include <stdio.h>
to be able to declare dump_hash_table()
. Including <stdlib.h>
is not necessary.
/*hash.h*/
#ifndef HASH_H_INCLUDED
#define HASH_H_INCLUDED
#include <stdio.h>
#include <stdlib.h>
typedef struct data
{
char *key;
void *data;
struct data *next;
} data_el;
typedef struct hash_table
{
void **order;
int number_next_calls;
int number_buckets;
int *buckets_size; // Array of sizes of each bucket
int worst;
int total;
float average;
int (*hash_func)(char *);
int (*comp_func)(void*, void*);
data_el **buckets_array;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
void dump_hash_table(FILE *fp, char const *tag, hash_table const *table);
#endif /* HASH_H_INCLUDED */
hash.c
/*hash.c*/
#include "hash.h"
#include <string.h>
#include <inttypes.h>
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *))
{
int i;
Phash_table table_p;
hash_table hash_table;
table_p = (Phash_table)malloc(sizeof(hash_table));
/*Setting function pointers*/
table_p->hash_func = hash_func;
table_p->comp_func = cmp_func;
/*Create buckets array*/
table_p->buckets_array = (data_el **)malloc(sizeof(data_el *)*(size+1));
table_p->buckets_size = (int *)malloc(sizeof(int)*(size+1));
/*Setting order array*/
table_p->order = NULL;
/*Setting inital conditions*/
table_p->worst = 0;
table_p->total = 0;
table_p->average = 0.0;
table_p->number_buckets = size;
for (i = 0; i < size; i++)
{
table_p->buckets_array[i] = NULL;
}
return table_p;
}
void free_hash(Phash_table table)
{
int i;
i = 0;
data_el *cur;
data_el *prev;
/*Free order array*/
if (table->order != NULL)
{
free(table->order);
}
/*Free Buckets array and buckets_size array*/
if (table->buckets_array != NULL)
{
for (i = 0; i < table->number_buckets; i++)
{
if (table->buckets_array[i] != NULL)
{
/*Travers the linked list and free it*/
cur = table->buckets_array[i];
prev = cur;
while ((cur = cur->next) != NULL)
{
free(prev);
prev = cur;
}
/*Free the last node*/
free(cur);
}
}
}
free(table);
}
void insert_hash(Phash_table table, char *key, void *data)
{
int index;
data_el *p, *cur;
printf("Inserting: <<%s>> (data: %d)\n", key, *(int *)data);
index = table->hash_func(key);
/*Head insertion*/
if (table->buckets_array[index] == NULL)
{
table->buckets_array[index] = (data_el *)malloc(sizeof(data_el));
table->buckets_array[index]->data = data;
table->buckets_array[index]->next = NULL;
table->buckets_array[index]->key = key;
}
else
{
cur = table->buckets_array[index];
p = (data_el *)malloc(sizeof(data_el));
p->key = key;
p->data = data;
p->next = cur;
cur = p;
table->buckets_array[index] = cur;
}
/*Update Total*/
table->total += 1;
/*Update Bucket Size*/
table->buckets_size[index] +=1;
/*Updates Worst Case*/
if (table->buckets_size[index] > table->worst)
{
table->worst = table->buckets_size[index];
}
/*Update Average*/
table->average = (float)table->total / (float)table->number_buckets;
}
void *find_hash(Phash_table table, char *key)
{
int i = 0;
while (1)
{
if (table->buckets_array[i] == '\0')
{
printf("End of Array\n");
break;
}
if (table->buckets_array[i] != NULL)
{
printf("Checking:\n");
if (table->buckets_array[i] != '\0')
{
if (table->buckets_array[i]->key != NULL)
{
if (strcmp(table->buckets_array[i]->key, key) == 0 )
{
printf("Found it\n");
break;
}
}
}
}
i++;
}
if (table->buckets_array[i] != NULL)
{
printf("New entry %d\n", *((int *)table->buckets_array[i]->data));
return table->buckets_array[i]->data;
}
else
{
return NULL;
}
}
void stat_hash(Phash_table table, int *total, int *largest, float *average)
{
*total = table->total;
*largest = table->worst;
*average = table->average;
}
void *start_hash_walk(Phash_table table)
{
int i, max_buckets, step, next_calls;
step = 0;
data_el *cur;
next_calls = 0;
max_buckets = table ->number_buckets;
table->order = (void**)malloc(sizeof(void *) * table->total);
table->number_next_calls = 0;
/*Traverse the ADT and put all data into order array*/
for (i = 0; i < max_buckets; i++)
{
if (table->buckets_array[i] != NULL)
{
cur = table->buckets_array[i];
while (cur)
{
table->order[step] = cur->data;
step ++;
cur = cur->next;
}
}
}
/*Bubble Short*/
int j, k;
for (j = 0; j < (step - 1);j++)
{
for (k = 0;k < (step -(j+1));k ++)
{
if ((table->comp_func)(table->order[j], table->order[j+1]) < 5)
{
void *temp;
temp = table->order[j];
table->order[j] = table->order[j+1];
table->order[j+1] = temp;
printf("Swapping %s with %s\n", (char *)table->order[j], (char *)table->order[j+1]);
}
}
}
return table->order;
}
void *next_hash_walk(Phash_table table)
{
/*
Check the amount of times next_has_walk has been
if higher than total, it will return null
*/
if (table->number_next_calls >= table->total)
return NULL;
else
return table->order[++table->number_next_calls];
}
void dump_hash_table(FILE *fp, char const *tag, hash_table const *table)
{
fprintf(fp, "Hash Table: 0x%.8" PRIXPTR " %s\n", (uintptr_t)table, tag);
if (table == 0)
return;
fprintf(fp, " Order = 0x%.8" PRIXPTR "\n", (uintptr_t)table->order);
fprintf(fp, " Buckets = 0x%.8" PRIXPTR "\n", (uintptr_t)table->buckets_array);
fprintf(fp, " Hash = 0x%.8" PRIXPTR "\n", (uintptr_t)table->hash_func);
fprintf(fp, " Comp = 0x%.8" PRIXPTR "\n", (uintptr_t)table->comp_func);
fprintf(fp, " Next calls: %d\n", table->number_next_calls);
fprintf(fp, " Buckets: %d\n", table->number_buckets);
fprintf(fp, " Worst: %d\n", table->worst);
fprintf(fp, " Total: %d\n", table->total);
fprintf(fp, " Average: %f\n", table->average);
if (table->buckets_size != 0)
{
int unique_words = 0;
int used_buckets = 0;
for (int i = 0; i < table->number_buckets; i++)
{
unique_words += table->buckets_size[i];
if (table->buckets_size[i] != 0)
{
used_buckets++;
fprintf(fp, " Bucket %3d: %d\n", i, table->buckets_size[i]);
}
}
fprintf(fp, " Unique words: %d\n", unique_words);
fprintf(fp, " Used Buckets: %d\n", used_buckets);
}
}
project4.c
/*project4.c*/
#include "hash.h"
#include <string.h>
#include <ctype.h>
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
static void lower_case_word(char *w);
static int hash_function(char *word)
{
int sum, i;
i = 0;
sum = 0;
while (word[i] != '\0')
{
sum = sum + word[i];
i++;
}
return sum%1000;
}
static int comp(void *v1, void *v2)
{
char const *s1 = v1;
char const *s2 = v2;
return strcmp(s1, s2);
}
int main(void)
{
/*Creating buckets array*/
Phash_table dictionary = new_hash(DICTIONARY_SIZE, hash_function, comp);
int frequency[DICTIONARY_SIZE] = { 0 };
int char_index = 0, dictionary_size = 0, num_words = 0;
char word[WORD_SIZE];
int c;
printf("Parsing input ...\n");
while ((c = getchar()) != EOF)
{
if (isalnum(c))
word[char_index++] = c;
else
{
/* End of a word */
if (char_index)
{
/* Word is not empty */
word[char_index] = '\0';
lower_case_word(word);
printf("Got: %s\n", word);
if (!find_hash(dictionary, word))
{
insert_hash(dictionary, word, &frequency[hash_function(word)]);
}
frequency[hash_function(word)]++;
char_index = 0;
num_words++;
}
}
}
dump_hash_table(stdout, "At end of input", dictionary);
printf("There were %d words; %d unique words.\n", num_words, dictionary_size);
}
static void lower_case_word(char *w)
{
int i = 0;
while (w[i] != '\0')
{
w[i] = tolower(w[i]);
i++;
}
}
回答2:
This is overly complex. I'll comment in the code.
void *find_hash(Phash_table table, char *key){
int i;
i = 0;
while(1){
excellent place for a for() loop.
if(table->buckets_array[i] == '\0'){
->buckets_array[] is a pointer to an array of pointers to data_el '\0' is a int (a character constant is an int in C)
printf("End of Array");
break;
}
if(table->buckets_array[i] != NULL){
You already tested that and broke out of the loop. The condition is always true.
printf("%Checking");
if(table->buckets_array[i] != '\0'){
No need to test it for the third time.
if(table->buckets_array[i]->key != NULL){
Ahh!, now we are getting to the payload of the node
if( strcmp(table->buckets_array[i]->key,key) == 0 ){
printf("Found it\n");
break;
}
}
}
}
Kludge of braces here, I am too lazy to read, or count (4) or mach them.
i++;
}
if(table->buckets_array[i] != NULL){
You really seem to like this condition.
printf("new asdasd %d", *((int *)table->buckets_array[i]->data));
return table->buckets_array[i]->data;
Ahh! the payload gets delivered. You could keep the printing to the caller.
}else{
return NULL;
}
}
Reduced verion:
void *find_hash(Phash_table table, char *key){
int i;
unsigned i;
for (i=0; i < *table_p->number_buckets; i++){
data_el *cur;
for (cur = table->buckets_array[i]; cur != NULL; cur = cur->next ) {
if( strcmp(cur->key,key) ) continue;
return cur->data; /* found it ! */
}
}
return NULL;
}
Also: it is impractical to malloc int-size objects (like table_p->number_buckets) Much more practical is to change the definition to:
typedef struct hash_table_ {
void **order; /* This might be intended as a permutation array ? */
unsigned number_next_calls;
unsigned int number_buckets;
unsigned int *buckets_size; /* I think this was intended as a histogram */
unsigned int worst;
unsigned int total;
float average;
int (*hash_func)(char *);
int (*comp_func)(void*, void*);
data_el **buckets_array;
} hash_table, *Phash_table;
Hiding pointers behind a typedef (like your Phash_table) is genarally considered bad practice.
As I commented yesterday, your hash function is dangerous, because it returns a signed int. It could generate a negative index. ( it is also very bad, collision-wise, since "ABC", "ACB" , "CAB" all hash to the same value. Better use K&R, FNV, or Jenkins.)
回答3:
Maybe you could correct the code so that it is what you are really testing. Perhaps include all the necessary bits so we can compile it (I like to).
Note that find_hash
has no return value and yet a call to it precedes the call to insert_hash
that is giving you trouble.
EDIT (full code now shown)
Function comp
in the call to new_hash
- hence cannot compile.
What are you doing with the mallocs in:
table_p->worst = (int *)malloc(sizeof(int));
table_p->total = (int *)malloc(sizeof(int));
table_p->average = (float *)malloc(sizeof(float));
table_p->number_buckets = (int *)malloc(sizeof(int));
The return from malloc should never be cast anyway, but here you are allocating space for an int and then telling the compiler that you allocated space for an int *
. Do these fields actually need to be be pointers?
In your find_hash, the loop should be a for:
for (i=0; table->buckets_array[i] == ...; ++i) {
etc
}
but, as sarnold said, why do you have tests of table->buckets_array[i]
against both NULL
(0) and '\0'
(0). This is very strange stuff.
I would not spend too much time debugging it. Instead I suggest doing a review of the code (or get a colleague to review it) and sorting out some of the problems. Among them are:
- figuring out what table->buckets_array[i] should contain
- un-casting malloc returns
- not mallocing space for integral data
- formatting
- add
const
to parameters that are not changed - extract nested loops where possible
- use the correct looping
来源:https://stackoverflow.com/questions/10467348/why-is-this-passing-the-if-statement