C trie trying to add apostrophe

孤人 提交于 2019-12-13 07:58:58

问题


I'm trying to program a trie in C to read a file and add all the words in the file to the trie, and it works well, but I can't get it to accept apostrophes:

 typedef struct node
{
    bool wordBool;
    struct node* next[27]; // 26 letters and one space for the apostrophe
}
node;

node* base;
int numWords = 0;

bool load(const char* dictionary)
{

FILE* dictionaryf = fopen(dictionary, "r"); // the file to read

base = malloc(sizeof(node));

node variable;
node *currNode = &variable;

int n = 0;

while((n = fgetc(dictionaryf)) != EOF)
{
   if (n == '\n')
   {
      if (!currNode->wordBool)
      {
          currNode->wordBool = true;
          numWords++;
      }
      currNode = base;
   }
   else if (n == 39) //I tried putting this in so it would accept apostrophes
    {
         if(currNode->next[n-39] == NULL)
        {
            currNode->next[n-39] = malloc(sizeof(node));
        } 
        currNode = currNode->next[n-39];
    }
   else {
        if(currNode->next[n-96] == NULL)
        {
            currNode->next[n-96] = malloc(sizeof(node));
        }      
        currNode = currNode->next[n-96];
   }
}
if (currNode!= base && !currNode->wordBool)
{
    currNode->wordBool = true; 
    numWords++;
}
printf("%i\n", numWords);
fclose(dictionaryf);
return true;
}

this is the code that makes the trie, but it won't put apostrophes into the trie


回答1:


This code is closely based on yours, but tackles a slightly different problem in that it accepts an arbitrary text file and processes it regardless of the characters in it. It treats accented characters as 'non-alphabetic' in the typical style of English-speakers (in part because it doesn't use setlocale(), and in part because it doesn't process multi-byte or wide characters). It counts the number of times each word is present (which takes up no extra space in the data structure on a 64-bit machine). It includes a printing function which is important for checking that it did the job correctly, of course.

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node
{
    bool wordBool;
    int  wordCount;
    struct node *next[27]; // 26 letters and one space for the apostrophe
} node;

static const char trie_map[] = "'abcdefghijklmnopqrstuvwxyz";
static node *base = 0;
static int numWords = 0;

static void oom(void)
{
    fprintf(stderr, "Out of memory\n");
    exit(EXIT_FAILURE);
}

static int trie_index(char c)
{
    char *p = strchr(trie_map, tolower(c));
    if (p == 0)
        return -1;
    else
        return (p - trie_map);
}

static
bool load(const char *dictionary)
{
    FILE *dictionaryf = fopen(dictionary, "r"); // the file to read
    if (dictionaryf == 0)
        return false;

    base = calloc(sizeof(node), 1);

    node *currNode = base;

    int n;

    while ((n = fgetc(dictionaryf)) != EOF)
    {
        n = trie_index(n);
        if (n >= 0)
        {
            if (currNode->next[n] == NULL)
            {
                currNode->next[n] = calloc(sizeof(node), 1);
                if (currNode->next[n] == NULL)
                    oom();
            }
            currNode = currNode->next[n];
        }
        else if (currNode != base)
        {
            if (!currNode->wordBool)
            {
                currNode->wordBool = true;
                numWords++;
            }
            currNode->wordCount++;
            currNode = base;
        }
        /* else: consecutive non-letters, non-apostrophes */
    }

    if (currNode != base && !currNode->wordBool)
    {
        currNode->wordBool = true;
        numWords++;
    }
    printf("%i distinct words\n", numWords);
    fclose(dictionaryf);
    return true;
}

static void print_trie(node *trie, char *buffer, size_t buflen)
{
    if (trie != 0)
    {
        if (trie->wordBool)
            printf("Word: %3d [%s]\n", trie->wordCount, buffer);
        size_t len = strlen(buffer);
        if (len >= buflen - 2)
        {
            fprintf(stderr, "Word too long!\n[%s]\n", buffer);
            exit(EXIT_FAILURE);
        }
        for (int i = 0; i < 27; i++)
        {
            if (trie->next[i] != 0)
            {
                buffer[len] = trie_map[i];
                buffer[len+1] = '\0';
                print_trie(trie->next[i], buffer, buflen);
            }
        }
    }
}

int main(int argc, char **argv)
{
    const char *data = "data";
    if (argc == 2)
        data = argv[1];
    if (load(data))
    {
        printf("Loaded file '%s' OK\n", data);
        char buffer[256] = "";
        print_trie(base, buffer, sizeof(buffer));
    }
    else
        printf("Load failed!\n");

    return 0;
}

When run on its own source code (trie-31.c), it produces:

94 distinct words
Loaded file 'trie-31.c' OK
Word:   3 [']
Word:   1 ['abcdefghijklmnopqrstuvwxyz]
Word:   1 [and]
Word:   1 [apostrophe]
Word:   1 [apostrophes]
Word:   2 [argc]
Word:   2 [argv]
Word:   7 [base]
Word:   2 [bool]
Word:  10 [buffer]
Word:   3 [buflen]
Word:   2 [c]
Word:   2 [calloc]
Word:   8 [char]
Word:   1 [consecutive]
Word:   3 [const]
Word:   1 [ctype]
Word:  14 [currnode]
Word:   1 [d]
Word:   5 [data]
Word:   2 [dictionary]
Word:   4 [dictionaryf]
Word:   1 [distinct]
Word:   4 [else]
Word:   1 [eof]
Word:   4 [exit]
Word:   1 [failed]
Word:   2 [failure]
Word:   1 [false]
Word:   1 [fclose]
Word:   1 [fgetc]
Word:   3 [file]
Word:   1 [fopen]
Word:   2 [for]
Word:   2 [fprintf]
Word:   5 [h]
Word:   7 [i]
Word:  14 [if]
Word:   5 [include]
Word:   2 [index]
Word:   7 [int]
Word:   4 [len]
Word:   2 [letters]
Word:   3 [load]
Word:   1 [loaded]
Word:   1 [long]
Word:   1 [main]
Word:   4 [map]
Word:   1 [memory]
Word:  16 [n]
Word:   7 [next]
Word:   8 [node]
Word:   2 [non]
Word:   2 [null]
Word:   4 [numwords]
Word:   1 [of]
Word:   1 [ok]
Word:   1 [one]
Word:   2 [oom]
Word:   1 [out]
Word:   3 [p]
Word:   3 [print]
Word:   4 [printf]
Word:   1 [r]
Word:   1 [read]
Word:   5 [return]
Word:   2 [s]
Word:   1 [s']
Word:   2 [size]
Word:   3 [sizeof]
Word:   1 [space]
Word:   7 [static]
Word:   1 [stdbool]
Word:   2 [stderr]
Word:   1 [stdio]
Word:   1 [stdlib]
Word:   1 [strchr]
Word:   1 [string]
Word:   1 [strlen]
Word:   2 [struct]
Word:   2 [t]
Word:   2 [the]
Word:   1 [to]
Word:   1 [tolower]
Word:   1 [too]
Word:  15 [trie]
Word:   3 [true]
Word:   1 [typedef]
Word:   3 [void]
Word:   1 [while]
Word:   2 [word]
Word:   6 [wordbool]
Word:   3 [wordcount]
Word:   1 [words]

Some of the words include apostrophes (there's ' on its own, 'abcdefghijklmnopqrstuvwxyz and s'). For the file great.panjandrum which contains:

So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots

The output is:

66 distinct words
Loaded file 'great.panjandrum' OK
Word:   2 [a]
Word:   1 [all]
Word:   1 [an]
Word:   7 [and]
Word:   1 [apple]
Word:   1 [as]
Word:   3 [at]
Word:   1 [barber]
Word:   1 [bear]
Word:   1 [boots]
Word:   1 [button]
Word:   1 [cabbage]
Word:   1 [can]
Word:   2 [catch]
Word:   1 [coming]
Word:   1 [cut]
Word:   1 [died]
Word:   1 [down]
Word:   1 [fell]
Word:   1 [game]
Word:   1 [garden]
Word:   1 [garyulies]
Word:   2 [great]
Word:   1 [gunpowder]
Word:   1 [he]
Word:   1 [head]
Word:   1 [heels]
Word:   1 [himself]
Word:   1 [imprudently]
Word:   2 [into]
Word:   1 [its]
Word:   1 [joblillies]
Word:   1 [leaf]
Word:   1 [little]
Word:   1 [make]
Word:   1 [married]
Word:   1 [no]
Word:   2 [of]
Word:   1 [out]
Word:   1 [panjandrum]
Word:   1 [picninnies]
Word:   1 [pie]
Word:   1 [playing]
Word:   1 [pops]
Word:   1 [present]
Word:   1 [ran]
Word:   1 [round]
Word:   1 [same]
Word:   3 [she]
Word:   1 [shop]
Word:   2 [so]
Word:   1 [soap]
Word:   1 [street]
Word:  13 [the]
Word:   1 [their]
Word:   1 [there]
Word:   1 [they]
Word:   1 [till]
Word:   1 [time]
Word:   3 [to]
Word:   1 [top]
Word:   1 [very]
Word:   1 [went]
Word:   1 [were]
Word:   1 [what]
Word:   1 [with]


来源:https://stackoverflow.com/questions/37106523/c-trie-trying-to-add-apostrophe

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!