问题
I need to be able to write a code in C programming that can read the text file and find how many of each word there is and output the word and how many times it occurs. Right now I have code that will print out each word and how many times it occurs, but I need it to print in alphabetical order and to ignore the uppercase letters. For example, "It" and "it" should be counted as the same word. I'm not sure where in my code to include the revisions. Below is an example of my code.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
if (argc == 1) {
printf("The input file name has not been provided\n");
} else if (argc == 2) {
FILE *f = fopen(argv[1], "rb");
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
fseek(f, 0, SEEK_SET);
char *str = malloc(fsize + 1);
fread(str, fsize, 1, f);
fclose(f);
str[fsize] = 0;
int count = 0, c = 0, i, j = 0, k, space = 0;
char p[1000][512], str1[512], ptr1[1000][512];
char *ptr;
for (i = 0; i < strlen(str); i++) {
if ((str[i] == ' ') || (str[i] == ',') || (str[i] == '.')) {
space++;
}
}
for (i = 0, j = 0, k = 0; j < strlen(str); j++) {
if ((str[j] == ' ') || (str[j] == 44) || (str[j] == 46)) {
p[i][k] = '\0';
i++;
k = 0;
} else
p[i][k++] = str[j];
}
k = 0;
for (i = 0; i <= space; i++) {
for (j = 0; j <= space; j++) {
if (i == j) {
strcpy(ptr1[k], p[i]);
k++;
count++;
break;
} else {
if (strcmp(ptr1[j], p[i]) != 0)
continue;
else
break;
}
}
}
for (i = 0; i < count; i++) {
for (j = 0; j <= space; j++) {
if (strcmp(ptr1[i], p[j]) == 0)
c++;
}
printf("%s %d \n", ptr1[i], c);
c = 0;
}
}
return 0;
}
回答1:
Here is a minimal proposition, your code would probably need to be broken down into functions, but consider this is just some kind of draft proposition.
You can simply replace your strcmp
but strcasecmp
for the case sensitive part.
Then for sorting, you can use qsort
:
define a function for comparison like:
int compar(const void *a, const void *b)
{
return *(char *)a - *(char *)b;
}
and apply it on you word array. As far as I understood, ptr1 seems to hold your words, so you may add
qsort(ptr1, count, sizeof(ptr1[0]), compar);
before your last for loop.
Nevertheless it seems to me that you need to fix you extracting loop as valgrind reports some errors in your code.
回答2:
Here is sample function using milkstrings functions
typedef struct tWord {
tXt tx ;
int count ;
struct tWord * next ; } tWord;
typedef struct tWord *pWord;
void wordfrequency(void) {
pWord np,prev,bprev,most,allword ;
allword = NULL ;
FILE * fi = fopen("sample.txt","r") ;
if (fi == NULL)
return ;
tXt rlin = " " ;
while (rlin != txtEndFile) {
tXt lin = txtUpcase(txtTrim(rlin = txtFromFile(fi))) ;
while (lin[0]) {
tXt wrd = txtTrim(txtEat(&lin,' ')) ;
np = allword ;
while (np) {
if (strcmp(np->tx,wrd) == 0) {
np->count++ ;
wrd = "" ;
np = NULL ;
}
else
np = np->next ;}
if (wrd[0]) {
np = (pWord) malloc(sizeof(tWord)) ;
np->tx = fridge(wrd) ;
np->count = 1 ;
np->next = allword ;
allword = np ; } } }
while (allword) {
prev = bprev = NULL ;
np = most = allword ;
while (np) {
if (strcmp(most->tx , np->tx) >0 ) {
most = np ;
bprev = prev ; }
prev = np ;
np = np->next ; }
printf("%5d %s\n",most->count,unfridge(most->tx)) ;
if (bprev)
bprev->next = most->next ;
else
allword = most->next ;
free (most) ; }
fclose(fi) ;
if (txtAnyError())
printf("%s\n",txtLastError()) ;
}
来源:https://stackoverflow.com/questions/34070657/c-program-to-count-the-word-frequency-in-a-text-file