In C, parsing a string of multiple whitespace separated integers

前端 未结 5 959
生来不讨喜
生来不讨喜 2020-12-11 19:26

I am attempting to use C to parse a file containing multiple rows of whitespace separated integers into a dynamic array of dynamic int arrays. Each row will be an array in t

相关标签:
5条回答
  • 2020-12-11 19:52

    Read a line via fgets() is a great first step.

    2 methods: strtol() (better error handling) and sscanf()

    while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
      char *endptr;
      while (1) {  
        errno = 0;
        long num = strtol(line, &endptr, 10);
        if (line == endptr) break;  // no conversion
        if (errno) break;  // out of range or other error
    
        #if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
        // long and int may have different ranges
        if (num < INT_MIN || num > INT_MAX) {
          errno = ERANGE; 
          break;  // out of range
        }
        #endif
    
        int value = (int) num;
        printf("%d\n", value);
        line = endptr;
      } 
      while (isspace((unsigned char) *endptr)) endptr++;
      if (*endptr != '\0') Handle_ExtraGarbageAtEndOfLine();
    }
    

    " sscanf only ever parses the first number in the string." is not quite so. Use sscanf() with "%n" to record where scanning stopped.

    while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
      int n;
      while (1) {  
        n = 0;
        int value;
        if (sscanf(line, "%d %n", &value, &n) != 1) break;
        printf("%d\n", value);
        line += n;
      } 
      if (line[n] != '\0') Handle_ExtraGarbageAtEndOfLine();
    }
    
    0 讨论(0)
  • 2020-12-11 19:57

    Just use a loop over the input line, exploiting that atol() stops at the next whitespace delimiter anyway. Works well for positive integers only ;) But it is fast, you need not read tons of strtok and sscanf documentation and it is even robust in the presence of "noise" littered between your integers.
    To make it work for negative ints, too, replace isdigit() with !isspace() and there you go.

    void bla()
    {
        const char * input = "    1           3           4       6     ";
        size_t i;
        size_t len = strlen(input);
        for (i = 0; i < len; ++i)
        {
            if (isdigit(input[i]))
            {
                printf("%d\n", atol(&input[i]));
                while (i < len && isdigit(input[i]))
                    ++i;
            }
    
        }
    }
    
    void bla1()
    { // positive and negative ints version
        const char * input = "    10           -3           42       6     ";
        size_t i;
        size_t len = strlen(input);
        for (i = 0; i < len; ++i)
        {
            if (!isspace(input[i]))
            {
                printf("%d\n", atol(&input[i]));
                while (i < len && !isspace(input[i]))
                    ++i;
            }
        }
        /* Output: 
            10
            -3
            42
            6
    
        */
    }
    

    The next part of your question was (implicitly), how to handle dynamic arrays to store your parsed int values in. Here a solution which is based on the code above. The chunkSize is set too small for the input so I could test if the realloc code section also works.

    typedef struct DataRow_tag
    {
        int32_t *data;
        size_t length;
    } DataRow_t;
    
    // Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
    // (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
    // Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
    // Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
    // The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
    // 0: false -> fail
    // 1: true -> success!
    int 
    ReadRowWithUnknownNumberOfColumnsOfInt32
        ( const char * row      // Zero terminated string containing 1 row worth of data.
        , DataRow_t *result     // Pointer to the place the data will be stored at.
        )
    {
        int success = 0;
        size_t chunkSize = 10; // Set this value to something most likely large enough for your application.
    
        // This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
        assert(NULL != result && NULL == result->data);
        if (NULL != result && NULL == result->data)
        {
            result->length = 0;
            size_t rowLength = strlen(row);
            const char *pInput = row;
            const char *pEnd = &row[rowLength-1];
    
            result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
            if (NULL != result->data )
            {
                for (; pInput < pEnd; ++pInput)
                {
                    assert(pInput <= pEnd);
                    assert(*pInput != 0);
                    if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
                    {
                        long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
                        // TODO: we could test here if lval value fits in an int32_t...platform dependent!
                        result->data[result->length++] = lval;
                        if (result->length == chunkSize)
                        { // our buffer was too small... we need a bigger one.
                            chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
                            int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
                            if (NULL == temp)
                            { // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
                                // So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
                                free(result->data);
                                result->length = 0;
                                break;
                            }
                            else
                            {
                                result->data = temp;
                            }
                        }
                        while (pInput < pEnd && !isspace(*pInput))
                            ++pInput;
                    }
                }
                if (pInput >= pEnd)
                    success = 1;
                else
                { // make sure we do not leave result in some funny state.
                    result->length = 0;
                    free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
                    result->data = NULL;
                }
            }
        }
    
        return success;
    }
    void Bla2()
    {
        const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
        DataRow_t dataRow = { 0 };
        if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
        {
            for (size_t i = 0; i < dataRow.length; ++i)
            {
                printf("%d ", dataRow.data[i]);
            }
            printf("\n");
    
            free(dataRow.data);
            dataRow.data = NULL;
            dataRow.length = 0;
        }
    }
    
    0 讨论(0)
  • 2020-12-11 20:00

    Use strtok() function with " "(space) as delimeter and place this in a loop that terminates when strtok() returns NULL to get each token then print each number from each token:

    while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
    
        printf("%s\n", lineBuffer);
    
        char *token=strtok(line," ");
    
        while(token!=NULL)
        {
            if(sscanf(token, "%d", &value) > 0)
                 printf("%d\n", value);
             token=strtok(NULL," ");
        }
    }
    
    0 讨论(0)
  • 2020-12-11 20:03

    You should use :

    lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));
    

    than :

    char lineBuffer[BUFFER_SIZE];
    

    Your stack will thanks you !

    0 讨论(0)
  • 2020-12-11 20:16

    Use strtol() which gives a pointer to the end of the match if there is one, and a char pointer to store the current position:

        while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
    
        printf("%s\n", lineBuffer);
        char* p = lineBuffer;
        while(p < lineBuffer+BUFFER_SIZE ) {
            char* end;
            long int value = strtol( p , &end , 10 );
            if( value == 0L && end == p )  //docs also suggest checking errno value
                break;
    
            printf("%ld\n", value);
            p = end ;
        }
    }
    
    0 讨论(0)
提交回复
热议问题