问题
I'm trying to program a trie in C to read a file and add all the words in the file to the trie, and it works well, but I can't get it to accept apostrophes:
typedef struct node
{
bool wordBool;
struct node* next[27]; // 26 letters and one space for the apostrophe
}
node;
node* base;
int numWords = 0;
bool load(const char* dictionary)
{
FILE* dictionaryf = fopen(dictionary, "r"); // the file to read
base = malloc(sizeof(node));
node variable;
node *currNode = &variable;
int n = 0;
while((n = fgetc(dictionaryf)) != EOF)
{
if (n == '\n')
{
if (!currNode->wordBool)
{
currNode->wordBool = true;
numWords++;
}
currNode = base;
}
else if (n == 39) //I tried putting this in so it would accept apostrophes
{
if(currNode->next[n-39] == NULL)
{
currNode->next[n-39] = malloc(sizeof(node));
}
currNode = currNode->next[n-39];
}
else {
if(currNode->next[n-96] == NULL)
{
currNode->next[n-96] = malloc(sizeof(node));
}
currNode = currNode->next[n-96];
}
}
if (currNode!= base && !currNode->wordBool)
{
currNode->wordBool = true;
numWords++;
}
printf("%i\n", numWords);
fclose(dictionaryf);
return true;
}
this is the code that makes the trie, but it won't put apostrophes into the trie
回答1:
This code is closely based on yours, but tackles a slightly different problem in that it accepts an arbitrary text file and processes it regardless of the characters in it. It treats accented characters as 'non-alphabetic' in the typical style of English-speakers (in part because it doesn't use setlocale()
, and in part because it doesn't process multi-byte or wide characters). It counts the number of times each word is present (which takes up no extra space in the data structure on a 64-bit machine). It includes a printing function which is important for checking that it did the job correctly, of course.
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node
{
bool wordBool;
int wordCount;
struct node *next[27]; // 26 letters and one space for the apostrophe
} node;
static const char trie_map[] = "'abcdefghijklmnopqrstuvwxyz";
static node *base = 0;
static int numWords = 0;
static void oom(void)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
static int trie_index(char c)
{
char *p = strchr(trie_map, tolower(c));
if (p == 0)
return -1;
else
return (p - trie_map);
}
static
bool load(const char *dictionary)
{
FILE *dictionaryf = fopen(dictionary, "r"); // the file to read
if (dictionaryf == 0)
return false;
base = calloc(sizeof(node), 1);
node *currNode = base;
int n;
while ((n = fgetc(dictionaryf)) != EOF)
{
n = trie_index(n);
if (n >= 0)
{
if (currNode->next[n] == NULL)
{
currNode->next[n] = calloc(sizeof(node), 1);
if (currNode->next[n] == NULL)
oom();
}
currNode = currNode->next[n];
}
else if (currNode != base)
{
if (!currNode->wordBool)
{
currNode->wordBool = true;
numWords++;
}
currNode->wordCount++;
currNode = base;
}
/* else: consecutive non-letters, non-apostrophes */
}
if (currNode != base && !currNode->wordBool)
{
currNode->wordBool = true;
numWords++;
}
printf("%i distinct words\n", numWords);
fclose(dictionaryf);
return true;
}
static void print_trie(node *trie, char *buffer, size_t buflen)
{
if (trie != 0)
{
if (trie->wordBool)
printf("Word: %3d [%s]\n", trie->wordCount, buffer);
size_t len = strlen(buffer);
if (len >= buflen - 2)
{
fprintf(stderr, "Word too long!\n[%s]\n", buffer);
exit(EXIT_FAILURE);
}
for (int i = 0; i < 27; i++)
{
if (trie->next[i] != 0)
{
buffer[len] = trie_map[i];
buffer[len+1] = '\0';
print_trie(trie->next[i], buffer, buflen);
}
}
}
}
int main(int argc, char **argv)
{
const char *data = "data";
if (argc == 2)
data = argv[1];
if (load(data))
{
printf("Loaded file '%s' OK\n", data);
char buffer[256] = "";
print_trie(base, buffer, sizeof(buffer));
}
else
printf("Load failed!\n");
return 0;
}
When run on its own source code (trie-31.c
), it produces:
94 distinct words
Loaded file 'trie-31.c' OK
Word: 3 [']
Word: 1 ['abcdefghijklmnopqrstuvwxyz]
Word: 1 [and]
Word: 1 [apostrophe]
Word: 1 [apostrophes]
Word: 2 [argc]
Word: 2 [argv]
Word: 7 [base]
Word: 2 [bool]
Word: 10 [buffer]
Word: 3 [buflen]
Word: 2 [c]
Word: 2 [calloc]
Word: 8 [char]
Word: 1 [consecutive]
Word: 3 [const]
Word: 1 [ctype]
Word: 14 [currnode]
Word: 1 [d]
Word: 5 [data]
Word: 2 [dictionary]
Word: 4 [dictionaryf]
Word: 1 [distinct]
Word: 4 [else]
Word: 1 [eof]
Word: 4 [exit]
Word: 1 [failed]
Word: 2 [failure]
Word: 1 [false]
Word: 1 [fclose]
Word: 1 [fgetc]
Word: 3 [file]
Word: 1 [fopen]
Word: 2 [for]
Word: 2 [fprintf]
Word: 5 [h]
Word: 7 [i]
Word: 14 [if]
Word: 5 [include]
Word: 2 [index]
Word: 7 [int]
Word: 4 [len]
Word: 2 [letters]
Word: 3 [load]
Word: 1 [loaded]
Word: 1 [long]
Word: 1 [main]
Word: 4 [map]
Word: 1 [memory]
Word: 16 [n]
Word: 7 [next]
Word: 8 [node]
Word: 2 [non]
Word: 2 [null]
Word: 4 [numwords]
Word: 1 [of]
Word: 1 [ok]
Word: 1 [one]
Word: 2 [oom]
Word: 1 [out]
Word: 3 [p]
Word: 3 [print]
Word: 4 [printf]
Word: 1 [r]
Word: 1 [read]
Word: 5 [return]
Word: 2 [s]
Word: 1 [s']
Word: 2 [size]
Word: 3 [sizeof]
Word: 1 [space]
Word: 7 [static]
Word: 1 [stdbool]
Word: 2 [stderr]
Word: 1 [stdio]
Word: 1 [stdlib]
Word: 1 [strchr]
Word: 1 [string]
Word: 1 [strlen]
Word: 2 [struct]
Word: 2 [t]
Word: 2 [the]
Word: 1 [to]
Word: 1 [tolower]
Word: 1 [too]
Word: 15 [trie]
Word: 3 [true]
Word: 1 [typedef]
Word: 3 [void]
Word: 1 [while]
Word: 2 [word]
Word: 6 [wordbool]
Word: 3 [wordcount]
Word: 1 [words]
Some of the words include apostrophes (there's '
on its own, 'abcdefghijklmnopqrstuvwxyz
and s'
). For the file great.panjandrum which contains:
So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots
The output is:
66 distinct words
Loaded file 'great.panjandrum' OK
Word: 2 [a]
Word: 1 [all]
Word: 1 [an]
Word: 7 [and]
Word: 1 [apple]
Word: 1 [as]
Word: 3 [at]
Word: 1 [barber]
Word: 1 [bear]
Word: 1 [boots]
Word: 1 [button]
Word: 1 [cabbage]
Word: 1 [can]
Word: 2 [catch]
Word: 1 [coming]
Word: 1 [cut]
Word: 1 [died]
Word: 1 [down]
Word: 1 [fell]
Word: 1 [game]
Word: 1 [garden]
Word: 1 [garyulies]
Word: 2 [great]
Word: 1 [gunpowder]
Word: 1 [he]
Word: 1 [head]
Word: 1 [heels]
Word: 1 [himself]
Word: 1 [imprudently]
Word: 2 [into]
Word: 1 [its]
Word: 1 [joblillies]
Word: 1 [leaf]
Word: 1 [little]
Word: 1 [make]
Word: 1 [married]
Word: 1 [no]
Word: 2 [of]
Word: 1 [out]
Word: 1 [panjandrum]
Word: 1 [picninnies]
Word: 1 [pie]
Word: 1 [playing]
Word: 1 [pops]
Word: 1 [present]
Word: 1 [ran]
Word: 1 [round]
Word: 1 [same]
Word: 3 [she]
Word: 1 [shop]
Word: 2 [so]
Word: 1 [soap]
Word: 1 [street]
Word: 13 [the]
Word: 1 [their]
Word: 1 [there]
Word: 1 [they]
Word: 1 [till]
Word: 1 [time]
Word: 3 [to]
Word: 1 [top]
Word: 1 [very]
Word: 1 [went]
Word: 1 [were]
Word: 1 [what]
Word: 1 [with]
来源:https://stackoverflow.com/questions/37106523/c-trie-trying-to-add-apostrophe