Parse string into argv/argc

前端 未结 13 820
生来不讨喜
生来不讨喜 2020-11-28 07:45

Is there a way in C to parse a piece of text and obtain values for argv and argc, as if the text had been passed to an application on the command line?

This doesn\'

相关标签:
13条回答
  • 2020-11-28 07:55

    Solution for those that don't want to use dynamic memory allocation (E.g. embedded)

    I written tokenise_to_argc_argv() for an embedded project, which uses strtok_r() as the basis for tokenising a command string into argc and argv form. Unlike most answers here, I usually allocate memory statically. Thus my implementation assumes that you have an upper bound of argv_length. For most typical embedded applications, this is more than enough. I included example code below as well so you can quickly use it.

    int tokenise_to_argc_argv(
            char     *buffer,     ///< In/Out : Modifiable String Buffer To Tokenise
            int      *argc,       ///< Out    : Argument Count
            char     *argv[],     ///< Out    : Argument String Vector Array
            const int argv_length ///< In     : Maximum Count For `*argv[]`
          )
    { /* Tokenise string buffer into argc and argv format (req: string.h) */
      int i = 0;
      for (i = 0 ; i < argv_length ; i++)
      { /* Fill argv via strtok_r() */
        if ( NULL == (argv[i] = strtok_r( NULL , " ", &buffer)) ) break;
      }
      *argc = i;
      return i; // Argument Count
    }
    

    Note:

    • The provided character buffer must be modifiable (as strtok_r() inserts \0 into the buffer to delimitate string tokens).
    • strtok_r in this function is currently using " " space character as the only deliminator. This emulates the behaviour main(int argc, char *argv[]) in typical commandline interfaces.
    • This function does not use malloc or calloc, instead you will have to allocate the argv array separately, and supply the length of argv explicitly. This is because I intend to use this in embedded devices and thus would rather allocate it manually.
    • strtok_r() is used because it is threadsafe (Since strtok() uses an internal static pointer). Also it is part of the standard C library string.h thus is very portable.

    Below are the demonstration code as well as it's output. In addition, this shows that tokenise_to_argc_argv() can handle most string cases and thus has been tested. Also this function does not rely on malloc or calloc and thus is suitable for embedded usage (after using stdint.h types).


    Demonstration Code

    /*******************************************************************************
      Tokenise String Buffer To Argc and Argv Style Format
      Brian Khuu 2017
    *******************************************************************************/
    #include <stdio.h>  // printf()
    #include <ctype.h>  // isprint()
    #include <string.h> // strtok_r()
    
    /**-----------------------------------------------------------------------------
      @brief Tokenise a string buffer into argc and argv format
    
      Tokenise string buffer to argc and argv form via strtok_r()
      Warning: Using strtok_r will modify the string buffer
    
      Returns: Number of tokens extracted
    
    ------------------------------------------------------------------------------*/
    int tokenise_to_argc_argv(
            char     *buffer,     ///< In/Out : Modifiable String Buffer To Tokenise
            int      *argc,       ///< Out    : Argument Count
            char     *argv[],     ///< Out    : Argument String Vector Array
            const int argv_length ///< In     : Maximum Count For `*argv[]`
          )
    { /* Tokenise string buffer into argc and argv format (req: string.h) */
      int i = 0;
      for (i = 0 ; i < argv_length ; i++)
      { /* Fill argv via strtok_r() */
        if ( NULL == (argv[i] = strtok_r( NULL, " ", &buffer)) ) break;
      }
      *argc = i;
      return i; // Argument Count
    }
    
    /*******************************************************************************
      Demonstration of tokenise_to_argc_argv()
    *******************************************************************************/
    
    static void print_buffer(char *buffer, int size);
    static void print_argc_argv(int argc, char *argv[]);
    static void demonstrate_tokenise_to_argc_argv(char buffer[], int buffer_size);
    
    int main(void)
    { /* This shows various string examples */
      printf("# `tokenise_to_argc_argv()` Examples\n");
      { printf("## Case0: Normal\n");
        char  buffer[] = "tokenising example";
        demonstrate_tokenise_to_argc_argv(buffer, sizeof(buffer));
      }
      { printf("## Case1: Empty String\n");
        char  buffer[] = "";
        demonstrate_tokenise_to_argc_argv(buffer, sizeof(buffer));
      }
      { printf("## Case2: Extra Space\n");
        char  buffer[] = "extra  space here";
        demonstrate_tokenise_to_argc_argv(buffer, sizeof(buffer));
      }
      { printf("## Case3: One Word String\n");
        char  buffer[] = "one-word";
        demonstrate_tokenise_to_argc_argv(buffer, sizeof(buffer));
      }
    }
    
    static void demonstrate_tokenise_to_argc_argv(char buffer[], int buffer_size)
    { /* This demonstrates usage of tokenise_to_argc_argv */
      int   argc     = 0;
      char *argv[10] = {0};
    
      printf("* **Initial State**\n");
      print_buffer(buffer, buffer_size);
    
      /* Tokenise Command Buffer */
      tokenise_to_argc_argv(buffer, &argc, argv, sizeof(argv));
    
      printf("* **After Tokenizing**\n");
      print_buffer(buffer, buffer_size);
      print_argc_argv(argc,argv);
      printf("\n\n");
    }
    
    static void print_buffer(char *buffer, int size)
    {
      printf(" - Buffer Content `");
      for (int i = 0 ; i < size; i++) printf("%c",isprint(buffer[i])?buffer[i]:'0');
      printf("` | HEX: ");
      for (int i = 0 ; i < size; i++) printf("%02X ", buffer[i]);
      printf("\n");
    }
    
    static void print_argc_argv(int argc, char *argv[])
    { /* This displays the content of argc and argv */
      printf("* **Argv content** (argc = %d): %s\n", argc, argc ? "":"Argv Is Empty");
      for (int i = 0 ; i < argc ; i++) printf(" - `argv[%d]` = `%s`\n", i, argv[i]);
    }
    

    Output

    tokenise_to_argc_argv() Examples

    Case0: Normal

    • Initial State
      • Buffer Content tokenising example0 | HEX: 74 6F 6B 65 6E 69 73 69 6E 67 20 65 78 61 6D 70 6C 65 00
    • After Tokenizing
      • Buffer Content tokenising0example0 | HEX: 74 6F 6B 65 6E 69 73 69 6E 67 00 65 78 61 6D 70 6C 65 00
    • Argv content (argc = 2):
      • argv[0] = tokenising
      • argv[1] = example

    Case1: Empty String

    • Initial State
      • Buffer Content 0 | HEX: 00
    • After Tokenizing
      • Buffer Content 0 | HEX: 00
    • Argv content (argc = 0): Argv Is Empty

    Case2: Extra Space

    • Initial State
      • Buffer Content extra space here0 | HEX: 65 78 74 72 61 20 20 73 70 61 63 65 20 68 65 72 65 00
    • After Tokenizing
      • Buffer Content extra0 space0here0 | HEX: 65 78 74 72 61 00 20 73 70 61 63 65 00 68 65 72 65 00
    • Argv content (argc = 3):
      • argv[0] = extra
      • argv[1] = space
      • argv[2] = here

    Case3: One Word String

    • Initial State
      • Buffer Content one-word0 | HEX: 6F 6E 65 2D 77 6F 72 64 00
    • After Tokenizing
      • Buffer Content one-word0 | HEX: 6F 6E 65 2D 77 6F 72 64 00
    • Argv content (argc = 1):
      • argv[0] = one-word

    Missing string.h or strtok_r() in your toolchain somehow?

    If for some reason your toolchain does not have strtok_r(). You can use this simplified version of strtok_r(). It is a modified version of the GNU C implementation of strtok_r(), but simplified to only support space character.

    To use this, just place it on top of tokenise_to_argc_argv() then replace strtok_r( NULL, " ", &buffer) with strtok_space(&buffer)

    /**-----------------------------------------------------------------------------
      @brief Simplied space deliminated only version of strtok_r()
    
      - save_ptr : In/Out pointer to a string. This pointer is incremented by this
                    function to find and mark the token boundry via a `\0` marker.
                    It is also used by this function to find mutiple other tokens
                    via repeated calls.
    
      Returns:
        - NULL  : No token found
        - pointer to start of a discovered token
    
    ------------------------------------------------------------------------------*/
    char * strtok_space(char **save_ptr)
    { /* strtok_space is slightly modified from GNU C Library `strtok_r()`  implementation. 
          Thus this function is also licenced as GNU Lesser General Public License*/
      char *start = *save_ptr;
      char *end = 0;
    
      if (*start == '\0') {
        *save_ptr = start;
        return NULL;
      }
    
      /* Scan leading delimiters.  */
      while(*start == ' ') start++;
      if (*start == '\0') {
        *save_ptr = start;
        return NULL;
      }
    
      /* Find the end of the token.  */
      end = start;
      while((*end != '\0') && (*end != ' ')) end++;
      if (*end == '\0') {
        *save_ptr = end;
        return start;
      }
    
      /* Terminate the token and make *SAVE_PTR point past it.  */
      *end = '\0';
      *save_ptr = end + 1;
      return start;
    }
    
    0 讨论(0)
  • 2020-11-28 07:58

    Here's my contribution. Its nice and short, but things to be wary of are:

    • The use of strtok modifies the original "commandLine" string, replacing the spaces with \0 end-of-string delimeters
    • argv[] ends up pointing into "commandLine", so don't modify it until you're finished with argv[].

    The code:

    enum { kMaxArgs = 64 };
    int argc = 0;
    char *argv[kMaxArgs];
    
    char *p2 = strtok(commandLine, " ");
    while (p2 && argc < kMaxArgs-1)
      {
        argv[argc++] = p2;
        p2 = strtok(0, " ");
      }
    argv[argc] = 0;
    

    You can now use argc and argv, or pass them to other functions declared like "foo(int argc, char **argv)".

    0 讨论(0)
  • 2020-11-28 07:58

    I just did this for an embedded project in plain C, where I have a little CLI that parses serial port input and executes a limited set of commands with the parameters.

    This is probably not the neatest, but as small and efficient as I could get it:

    int makeargs(char *args, int *argc, char ***aa) {
        char *buf = strdup(args);
        int c = 1;
        char *delim;
        char **argv = calloc(c, sizeof (char *));
    
        argv[0] = buf;
    
        while (delim = strchr(argv[c - 1], ' ')) {
            argv = realloc(argv, (c + 1) * sizeof (char *));
            argv[c] = delim + 1;
            *delim = 0x00;
            c++;
        }
    
        *argc = c;
        *aa = argv;
    
        return c;
    }
    

    to test:

    int main(void) {
        char **myargs;
        int argc;
    
        int numargs = makeargs("Hello world, this is a test", &argc, &myargs);
        while (numargs) {
            printf("%s\r\n", myargs[argc - numargs--]);
        };
    
        return (EXIT_SUCCESS);
    }
    
    0 讨论(0)
  • 2020-11-28 07:58

    This one I wrote also considers quotes (but not nested)

    Feel free to contribute.

    /*
    Tokenize string considering also quotes.
    By Zibri <zibri AT zibri DOT org>
    https://github.com/Zibri/tokenize
    */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdbool.h>
    
    int main(int argc, char *argv[])
    {
      char *str1, *token;
      int j;
      char *qstart = NULL;
      bool quoted = false;
    
      if (argc != 2) {
        fprintf(stderr, "Usage: %s string\n", argv[0]);
        exit(EXIT_FAILURE);
      }
    
      for (j = 1, str1 = argv[1];; j++, str1 = NULL) {
        token = strtok(str1, " ");
        if (token == NULL)
          break;
        if ((token[0] == 0x27) || (token[0] == 0x22)) {
          qstart = token + 1;
          quoted = true;
        }
        if ((token[strlen(token) - 1] == 0x27) || (token[strlen(token) - 1] == 0x22)) {
          quoted = false;
          token[strlen(token) - 1] = 0;
          printf("%d: %s\n", j, qstart);
        } else {
          if (quoted) {
            token[strlen(token)] = 0x20;
            j--;
          } else
            printf("%d: %s\n", j, token);
        }
      }
    
      if (quoted) {
        fprintf(stderr, "String quoting error\n");
        return EXIT_FAILURE;
      } else
        return EXIT_SUCCESS;
    }
    

    Example output:

    $ ./tokenize "1 2 3 '4 5 6' 7 8 \"test abc\" 10 11"
    1: 1
    2: 2
    3: 3
    4: 4 5 6
    5: 7
    6: 8
    7: test abc
    8: 10
    9: 11
    
    0 讨论(0)
  • 2020-11-28 08:02

    I ended up writing a function to do this myself, I don't think its very good but it works for my purposes - feel free to suggest improvements for anyone else who needs this in the future:

    void parseCommandLine(char* cmdLineTxt, char*** argv, int* argc){
        int count = 1;
    
        char *cmdLineCopy = strdupa(cmdLineTxt);
        char* match = strtok(cmdLineCopy, " ");
     // First, count the number of arguments
        while(match != NULL){
            count++;
            match = strtok(NULL, " ");
        }
    
        *argv = malloc(sizeof(char*) * (count+1));
        (*argv)[count] = 0;
        **argv = strdup("test"); // The program name would normally go in here
    
        if (count > 1){
            int i=1;
            cmdLineCopy = strdupa(cmdLineTxt);
            match = strtok(cmdLineCopy, " ");
            do{
                (*argv)[i++] = strdup(match);
                match = strtok(NULL, " ");
            } while(match != NULL);
         }
    
        *argc = count;
    }
    
    0 讨论(0)
  • 2020-11-28 08:03

    Matt Peitrek's LIBTINYC has a module called argcargv.cpp that takes a string and parses it out to the argument array taking quoted arguments into account. Note that it's Windows-specific, but it's pretty simple so should be easy to move to whatever platform you want.

    0 讨论(0)
提交回复
热议问题