Reading from file to dynamic struct

强颜欢笑 提交于 2020-01-06 14:01:26

问题


I would like to read from a file, line by line. Each line has 3 arguments guaranteed. First 2 are first and last name and third is age. I want to make a linked list, in which, each node represents a person (line) in the file. I don't know the size of the names so I made it dynamic. I also don't know the number of lines in the file, so I would like that to be dynamic too.

My approach was to use fscanf, but then I wouldn't know how much memory needs to be allocated prior to reading it. The function convertToList is supposed to receive a file path of the file we wanna read, convert it to a linked list, then return the head node. (Open to improvements)

Check out my code and see where I got stuck:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef enum
{
    FALSE,
    TRUE 
}bool;

struct Node{
    char firstName[50];
    char lastName[50];
    int age;
    struct Node *next;
};

typedef struct {
    struct Node *head;
}LinkedList;


struct Node * convertToList(char *inputFilePath);

int main(int argc, char* argv[]) {

    if(argc != 4) {
        printf("Invalid arguments.\n");
        exit(0);
    }
    if (strlen(argv[3])!=1) {
        printf("Invalid sorting type.\n");
        exit(0);
    }

    char *inputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[1]) +1);
    memcpy(inputFilePath, argv[1], strlen(argv[1]));
    char *outputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[2]) +1);
    memcpy(outputFilePath, argv[2], strlen(argv[2]) +1);
    char *sortType = argv[3];

    //LinkedList* inputList = (LinkedList*)malloc(sizeof(struct Node));

    struct Node* head = malloc(sizeof(struct Node));

    head = convertToList(inputFilePath);
    printf("\n%s %s %d\n", head->firstName, head->lastName, head->age);
//              printf("\nsaaap\n");

    getchar();


}

struct Node * convertToList(char *inputFilePath) {
FILE* ifp;
ifp = fopen(inputFilePath, "r");
if (!ifp) { perror("fopen"); exit(0); }
struct Node *head = NULL;
struct Node *prev = NULL;
bool isHead = TRUE;
while(!feof(ifp))   {
    struct Node *tmp = (struct Node*)malloc(sizeof(struct Node));
    if (prev != NULL)
        prev->next = tmp;


    if (head==NULL) 
        head = tmp;

    fscanf(ifp, "%s %s %d\n", tmp->firstName, tmp->lastName, &tmp->age);
    prev = tmp;

    //Need to link to next node as well

}

fclose(ifp);
return head;

}

I know that the fscanf is wrong, but I'm not sure how to fix it. Also, how do I return the root? Is my approach gonna work? And lastly, how do can I set the next node in the list? I don't see it happening with the current while loop.

Thanks.


回答1:


If you need to link the nodes this is how you can do it and use dynamic storage, here you go, I didn't think this very much but it is Ok.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

struct Node
{
    char *firstName;
    char *lastName;
    int   age;
    struct Node *next;
};

struct Node *convertToList(const char *const inputFilePath);
void freeList(struct Node *);

int main(int argc, char* argv[])
{
    struct Node *head;

    if (argc != 2)
    {
        printf("Invalid arguments.\n");
        return 1;
    }

    head = convertToList(argv[1]);
    if (head != NULL)
    {
        struct Node *current;

        current = head;
        while (current != NULL)
        {
            fprintf(stderr, "%s %s %d\n", current->firstName, current->lastName, current->age);
            current = current->next;
        }
        /* do manupulations with the list, example above, print the values */
        freeList(head);
    }
    return 0;
}

void freeList(struct Node *node)
{
    struct Node *current;

    current = node;
    while (current != NULL)
    {
        struct Node *next;

        next = current->next;
        if (current->firstName != NULL)
            free(current->firstName);
        if (current->lastName != NULL)
            free(current->lastName);
        free(current);

        current = next;
    }

}

size_t appendChar(char **buffer, char character, size_t length)
{
    char *temporary;
    if (buffer == NULL)
        return length;
    temporary = realloc(*buffer, 1 + length);
    if (temporary == NULL)
        return length;
    temporary[length] = character;
    *buffer           = temporary;

    return 1 + length;
}

struct Node *parseFileLine(char *line)
{
    char        *word;
    struct Node *node;
    char        *endptr;

    if (line == NULL)
        return NULL;

    node = malloc(sizeof(struct Node));
    if (node == NULL)
        return NULL;

    node->firstName = NULL;
    node->lastName  = NULL;
    node->age       = -1; // an invalid value;
    node->next      = NULL;

    word = strtok(line, " ");
    if (word == NULL)
        return node;
    node->firstName = strdup(word);

    word = strtok(NULL, " ");
    if (word == NULL)
        return node;
    node->lastName = strdup(word);

    word = strtok(NULL, " ");
    if (word == NULL)
        return node;

    node->age = strtol(word, &endptr, 10);
    if (*endptr != '\0')
        node->age = -1;

    return node;
}

struct Node *getNode(FILE *file)
{
    char  *line;
    int    character;
    size_t length;

    line   = NULL;
    length = 0;
    while ((character = fgetc(file)) != EOF)
    {
        if (((char)character == '\n') && (line != NULL))
        {
            struct Node *node;

            length = appendChar(&line, '\0', length);
            node   = parseFileLine(line);
            free(line);

            return node;
        }
        length = appendChar(&line, (char)character, length);
    }

    if (line != NULL)
        free(line);

    return NULL;
}

struct Node *convertToList(const char *const inputFilePath)
{
    FILE        *ifp;
    struct Node *head;
    struct Node *current;
    struct Node *last;

    ifp = fopen(inputFilePath, "r");
    if (ifp == NULL)
    {
        perror("fopen");
        return NULL;
    }

    head = NULL;
    last = NULL;
    while ((current = getNode(ifp)) != NULL)
    {
        if (current == NULL)
            return head;
        if (head == NULL)
            head = current;
        if (last != NULL)
            last->next = current;
        last = current;
    }
    fclose(ifp);

    return head;
}

Here you can also print the nodes to see that the data is correctly there.

I think you don't understand what malloc is for and you don't know much about pointers too, in your fscanf you are storing data in firstName and lastName without allocating memory for it, they are not even initialized so you would get a segmentation fault.




回答2:


A somewhat different approach.

argv copying

First off, as mentioned, you do not need to copy argv values. Main reason for doing do is if you manipulate the values. There are also cases where one want to erase argv values as they can be read by ps and other tools, read from /proc/ etc. For example some programs take passwords as argument, to prevent password to be readable by anyone having access to the system one typically copy the argument then overwrite the argv value.

It is however usually good practice to use variables for the arguments. It usually makes the code clearer, but also makes it easier to maintain if one do changes. E.g. implement flag arguments like -f <filename>.

exit() and return from main()

You also exit() with zero on error. You would want to exit with zero on success, and other value on error or other. This is the norm. 0 == success. Some applications implement numeric exit codes that can mean different things. E.g. 0 is normal exit, 1 is not an error but some special case, 2 likewise 3 might be an error etc. For example grep:

EXIT STATUS
   The exit status is 0 if selected lines are found, and 1 if not found.  If  an
   error occurred the exit status is 2.  (Note: POSIX error handling code should
   check for '2' or greater.)

scanf

When you use scanf to read strings there are some tricks that can be used to make it better. First off always use the size parameter.

char name[16]
sscanf(buf, "%15s", name);

Do also check items read:

if (sscanf(buf, "%15s %d", name, &age) != 2)
     ... error ...

Third you can also save number of bytes read by %n:

sscanf(buf, "%n%15s%n %n%d%n", &of1, name, &of2, &age, &of3)

Usage

A very simple, but also quick and user-friendly thing, is to add a usage function.

Typically:

int usage(const char *self, const char *err_str)
{
    fprintf(stderr,
        "Usage: %s <in-file> <out-file> <sort-type>\n"
        "  Sort types:\n"
        "   f Sort by First Name\n"
        "   l Sort by Last Name\n"
        "   a Sort by Age\n"
        ,
        self
    );
    if (err_str) {
        fprintf(stderr,
            "\nError: %s\n",
            err_str
        );
    }
    return ERR_ARG;
}

Then in main() you can quickly and clean add something like:

if (argc < 4)
    return usage(argv[0], "Missing arguments.");

A note on you validation of the sort argument. Instead of using strlen() you can check if byte 2 is 0.

if (argv[3][1] != '\0')
    ... error ...

Finally main could be something like:

int main(int argc, char *argv[])
{
    char *in_file, *out_file, sort;
    struct Node *head = NULL;
    int err = 0;

    if (argc < 4)
        return usage(argv[0], "Missing arguments.");
    if (argc > 4)
        return usage(argv[0], "Unknown arguments.");
    if (argv[3][1] != '\0')
        return usage(argv[0], "Invalid sorting type.");

    in_file  = argv[1];
    out_file = argv[2];
    sort     = argv[3][0];

    if (sort != 'f' && sort != 'l' && sort != 'a')
        return usage(argv[0], "Invalid sorting type.");

    if ((err = file_to_llist(in_file, &head)) != 0)
        return err;

    prnt_llist(stdout, head);
    free_ll(head);

    return err;
}

malloc helpers

When dealing with a lot of mallocing and similar it can be useful to add some helper functions. If you get a memory error you normally would exit right away.

void *alloc(size_t size)
{
    void *buf;

    if ((buf = malloc(size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

void *re_alloc(void *old, size_t size)
{
    void *buf;

    if ((buf = realloc(old, size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

Parsing of the file

As you want to have everything dynamically allocated and no limits (beyond system memory) one solution is to implement some sort of tokenizer. It can be helpful to use a struct to hold it together. Something like:

struct file_toker {
    FILE *fh;     /* File handle */
    char *buf;    /* Dynamic Read buffer */
    size_t size;  /* Size of buffer */
    size_t len;   /* Length of actual data in buffer. */
};

One point here is to keep length of tokens read. By this one do not need to keep using strlen etc.

If you can afford it it would usually be better to read whole file in one go, then parse the buffer. Optionally one can read file in chunks of say 4096*16 bytes, but then one get some complexity when it comes to overlapping lines between reads etc.

Anyhow in this example one byte is read at a time.


Start code

Finally a starting ground could be something like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>     /* memcpy/strncpy */
#include <errno.h>      /* errno for fopen() */
#include <ctype.h>      /* isspace() */

#define ERR_ARG         1
#define ERR_FILE_FMT    2
#define ERR_MEM         3

struct Node {
    char *name_first;
    char *name_last;
    int age;
    struct Node *next;
};

struct file_toker {
    FILE *fh;
    char *buf;
    size_t size;
    size_t len;
};

/* ===============----- GEN HELPERS ------=================== */

int usage(const char *self, const char *err_str)
{
    fprintf(stderr,
        "Usage: %s <in-file> <out-file> <sort-type>\n"
        "  Sort types:\n"
        "   f Sort by First Name\n"
        "   l Sort by Last Name\n"
        "   a Sort by Age\n"
        ,
        self
    );
    if (err_str) {
        fprintf(stderr,
            "\nError: %s\n",
            err_str
        );
    }
    return ERR_ARG;
}

void *alloc(size_t size)
{
    void *buf;

    if ((buf = malloc(size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

void *re_alloc(void *old, size_t size)
{
    void *buf;

    if ((buf = realloc(old, size)) == NULL) {
        fprintf(stderr, "Memory error.\n");
        exit(ERR_MEM);
    }
    return buf;
}

/* ===============----- LINKED LIST ------=================== */

void free_node(struct Node *n)
{
    if (!n)
        return;
    if (n->name_first)
        free(n->name_first);
    if (n->name_last)
        free(n->name_last);
    free(n);
}

void free_ll(struct Node *n)
{
    struct Node *p;

    if (!n)
        return;
    for ( ; n ; ) {
        p = n;
        n = n->next;
        free_node(p);
    }
}


void prnt_llist(FILE *fd, struct Node *n)
{
    int i = 0;

    fprintf(fd, "NODELIST:\n");
    for ( ; n != NULL ; n = n->next) {
        fprintf(fd,
            "Entry %d {\n"
            "  Name: %s, %s\n"
            "  Age : %d\n"
            "}\n",
            ++i,
            n->name_last,
            n->name_first,
            n->age
        );
    }
}

/* ================--------- FILE TOKER ------------==================== */
/* Free / close reader. */
void free_ft(struct file_toker *ft)
{
    if (!ft)
        return;
    if (ft->fh)
        fclose(ft->fh);
    free(ft->buf);
    ft->fh = NULL;
    ft->buf = NULL;
}
/* Initiate reader. */
int ft_init(struct file_toker *ft, const char *fn, size_t buf_sz)
{
    ft->size = buf_sz;
    ft->len = 0;
    ft->buf = alloc(ft->size);

    ft->fh = fopen(fn, "r");
    if (!ft->fh) {
        perror("Unable to open file");
        return errno;
    }
    return 0;
}
/* Increase buffer size. */
size_t ft_increase(struct file_toker *ft)
{
    if (ft->size < 1)
        ft->size = 1;
    ft->size *= 2;
    ft->buf = re_alloc(ft->buf, ft->size);
    return ft->size;
}
/* Read and skip spaces (\n, \r, ' ', \t etc.). Return first non-space. */
char ft_skip_space(struct file_toker *ft)
{
    int c;

    while ((c = fgetc(ft->fh)) != EOF && isspace(c))
        ;
    return c == EOF ? 0 : (char)c;
}
/* Read next token */
size_t file_tok(struct file_toker *ft)
{
    size_t i = 1;
    size_t max;
    int c;

    if (ft->size < 2)
        ft_increase(ft);

    ft->len = 0;
    max = ft->size - 1;

    /* Skip any leading spaces. Function return first non-space. */
    if ((ft->buf[0] = ft_skip_space(ft)) == 0)
        return 0;

    while ((c = fgetc(ft->fh)) != EOF) {
        /* If space, break. */
        if (isspace(c))
            break;
        /* Save char to buffer. */
        ft->buf[i++] = (char)c;
        /* If entire buffer used, increase it's size. */
        if (i > max)
            max = ft_increase(ft) - 1;
    }
    /* Null terminate. */
    ft->buf[i] = 0x00;
    /* Length without terminating null */
    ft->len = i;

    return i;
}
/* Read next space separated token and save it as new allocated string. */
int file_tok_str(struct file_toker *ft, char **out)
{
    if (file_tok(ft) == 0)
        return 1;
    *out = alloc(ft->len + 1);
    memcpy(*out, ft->buf, ft->len + 1);

    return 0;
}
/* Read next space separated token and scan it as int. */
int file_tok_int(struct file_toker *ft, int *out)
{
    if (file_tok(ft) == 0)
        return 1;
    if ((sscanf(ft->buf, "%d", out)) != 1)
        return 1;
    return 0;
}

/* ===============----- FILE PARSER ------=================== */    
int file_to_llist(const char *fn, struct Node **head)
{
    struct Node *node = NULL, *cur = *head;
    struct file_toker ft;

    /* Initiate new file token reader, initial buffer size 4096 bytes. */
    if (ft_init(&ft, fn, 4096))
        return 1;

    while (1) {
        /* Allocate next node */
        node = alloc(sizeof(struct Node));
        node->name_first = NULL;
        node->name_last  = NULL;
        /* Read and copy first name. */
        if (file_tok_str(&ft, &node->name_first))
            break;
        /* Read and copy last name. */
        if (file_tok_str(&ft, &node->name_last))
            break;
        /* Read and copy age. */
        if (file_tok_int(&ft, &node->age))
            break;

        /* Link and save current for next iteration. */
        node->next = NULL;
        if (cur) {
            cur->next = node;
        }
        cur = node;
        if (*head == NULL)
            *head = node;
    }
    /* Free last unused node. */
    free_node(node);
    free_ft(&ft);

    return 0;
}

/* ===============----- MAIN ROUTINE ------=================== */
int main(int argc, char *argv[])
{
    char *in_file, *out_file, sort;
    struct Node *head = NULL;
    int err = 0;

    if (argc < 4)
        return usage(argv[0], "Missing arguments.");
    if (argc > 4)
        return usage(argv[0], "Unknown arguments.");
    if (argv[3][1] != '\0')
        return usage(argv[0], "Invalid sorting type.");

    in_file  = argv[1];
    out_file = argv[2];
    sort     = argv[3][0];

    if (sort != 'f' && sort != 'l' && sort != 'a')
        return usage(argv[0], "Invalid sorting type.");

    if ((err = file_to_llist(in_file, &head)) != 0)
        return err;

    prnt_llist(stdout, head);
    free_ll(head);

    return err;
}


来源:https://stackoverflow.com/questions/27454800/reading-from-file-to-dynamic-struct

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!