Is there a way in C to parse a piece of text and obtain values for argv and argc, as if the text had been passed to an application on the command line?
This doesn\'
Unfortunately C++ but for others which might search for this kind of library i recommend:
ParamContainer - easy-to-use command-line parameter parser
Really small and really easy.
p.addParam("long-name", 'n', ParamContainer::regular,
"parameter description", "default_value");
programname --long-name=value
cout << p["long-name"];
>> value
From my experience:
If glib solution is overkill for your case you may consider coding one yourself.
Then you can:
The diagram below should clarify (hopefully):
aa bbb ccc "dd d" ee <- original string
aa0bbb0ccc00dd d00ee0 <- transformed string
| | | | |
argv[0] __/ / / / /
argv[1] ____/ / / /
argv[2] _______/ / /
argv[3] ___________/ /
argv[4] ________________/
A possible API could be:
char **parseargs(char *arguments, int *argc);
void freeparsedargs(char **argv);
You will need additional considerations to implement freeparsedargs() safely.
If your string is very long and you don't want to scan twice you may consider alteranatives like allocating more elements for the argv arrays (and reallocating if needed).
EDIT: Proposed solution (desn't handle quoted argument).
#include <stdio.h>
static int setargs(char *args, char **argv)
{
int count = 0;
while (isspace(*args)) ++args;
while (*args) {
if (argv) argv[count] = args;
while (*args && !isspace(*args)) ++args;
if (argv && *args) *args++ = '\0';
while (isspace(*args)) ++args;
count++;
}
return count;
}
char **parsedargs(char *args, int *argc)
{
char **argv = NULL;
int argn = 0;
if (args && *args
&& (args = strdup(args))
&& (argn = setargs(args,NULL))
&& (argv = malloc((argn+1) * sizeof(char *)))) {
*argv++ = args;
argn = setargs(args,argv);
}
if (args && !argv) free(args);
*argc = argn;
return argv;
}
void freeparsedargs(char **argv)
{
if (argv) {
free(argv[-1]);
free(argv-1);
}
}
int main(int argc, char *argv[])
{
int i;
char **av;
int ac;
char *as = NULL;
if (argc > 1) as = argv[1];
av = parsedargs(as,&ac);
printf("== %d\n",ac);
for (i = 0; i < ac; i++)
printf("[%s]\n",av[i]);
freeparsedargs(av);
exit(0);
}
I'm surprised nobody has provided the simplest answer using standard POSIX functionality:
http://www.opengroup.org/onlinepubs/9699919799/functions/wordexp.html
My project requires breaking a string into argc
and argv
.
Found a pretty excellent code of Torek. But it alters the input buffer so I made some modifications to fit my needs.
I just put a little bit more to handle quote mixing when input in the command line so the behavior is more (not completely) like Linux Shell.
Note: This function doesn't edit the original string, so you can reuse the input buffer (error report,etc).
void remove_quote(char* input){
//Implementing yourself to remove quotes so it would be completely like Linux shell
}
size_t cmd_param_split(char *buffer, char *argv[], size_t argv_max_size)
{
char *p, *start_of_word;
int c, i;
enum states { DULL=0, IN_WORD, IN_STRING, QUOTE_DOUBLE,QUOTE_SINGLE } state = DULL;
size_t argc = 0;
int quote = 0;
for (p = buffer; argc < argv_max_size && *p != '\0'; p++) {
c = (unsigned char) *p;
printf("processing %c, state = %d\n", c,state);
switch (state) {
case DULL:
if (isspace(c)) {
continue;
}
if (c == '"' ||c == '\'') {
quote = c;
state = IN_STRING;
start_of_word = p + 1;
continue;
}
state = IN_WORD;
start_of_word = p;
continue;
case IN_STRING:
if (c == '"' || c == '\'') {
if (c!=quote)
continue;
else
quote = 0;
strncpy(argv[argc],start_of_word, p - start_of_word);
remove_quote(argv[argc]);
argc++;
state = DULL;
}
continue;
case IN_WORD:
if(quote==0 && (c == '\"' ||c == '\''))
quote = c;
else if (quote == c)
quote = 0;
if (isspace(c) && quote==0) {
strncpy(argv[argc],start_of_word, p - start_of_word);
remove_quote(argv[argc]);
argc++;
state = DULL;
}
continue;
}
}
if (state != DULL && argc < argv_max_size){
strncpy(argv[argc],start_of_word, p - start_of_word);
remove_quote(argv[argc]);
argc++;
}
if (quote){
printf("WARNING: Quote is unbalanced. This could lead to unwanted-behavior\n");
for(i = 0;i<argc;i++)
printf("arg %d = [%s]\n",i,argv[i]);
printf("Original buffer: [%s]\n",buffer);
}
return argc;
}
int main()
{
int i=0;
int argc;
char* argv[64];
for(i=0;i<64;i++){
argv[i] = malloc(256);
memset(argv[i],0x0,256);
}
char* buffer="1 2 3 \'3 4\"567\' \"bol\'obala\" 2x2=\"foo\"";
argc = cmd_param_split(buffer,argv,64);
for(i = 0;i<argc;i++)
printf("arg %d = [%s]\n",i,argv[i]);
return 0;
}
Tested with below strings
1. "1 2 3 \'3 4\"567\' \"bol\'obala\" 2x2=\"foo\""
arg 0 = [1]
arg 1 = [2]
arg 2 = [3]
arg 3 = [3 4"567]
arg 4 = [bol'obala]
arg 5 = [2x2="foo"]
2. "./foo bar=\"Hanoi HoChiMinh\" exp='foo123 \"boo111' mixquote \"hanoi \'s\""
arg 0 = [./foo]
arg 1 = [bar="Hanoi HoChiMinh"]
arg 2 = [exp='foo123 "boo111']
arg 3 = [mixquote]
arg 4 = [hanoi 's]
However, Linux shell would remove quotes, even in mixed case, as below when running from cmd line, tested in a RaspberryPi.
./foo bar="Hanoi HoChiMinh" exp='foo123 "boo111' mixquote "hanoi 's"
arg 0 = [./foo]
arg 1 = [bar=Hanoi HoChiMinh]
arg 2 = [exp=foo123 "boo111]
arg 3 = [mixquote]
arg 4 = [hanoi 's]
So if you really want to mimic the whole Linux shell's behavior, just put a little bit more effort into removing quotes remove_quote() function as I leave blank above.
Here's a solution for both Windows and Unix (tested on Linux, OSX and Windows). Tested with Valgrind and Dr. Memory.
It uses wordexp for POSIX systems, and CommandLineToArgvW for Windows.
Note that for the Windows solution, most of the code is converting between char **
and wchar_t **
with the beautiful Win32 API, since there is no CommandLineToArgvA
available (ANSI-version).
#ifdef _WIN32
#include <windows.h>
#else
#include <wordexp.h>
#endif
char **split_commandline(const char *cmdline, int *argc)
{
int i;
char **argv = NULL;
assert(argc);
if (!cmdline)
{
return NULL;
}
// Posix.
#ifndef _WIN32
{
wordexp_t p;
// Note! This expands shell variables.
if (wordexp(cmdline, &p, 0))
{
return NULL;
}
*argc = p.we_wordc;
if (!(argv = calloc(*argc, sizeof(char *))))
{
goto fail;
}
for (i = 0; i < p.we_wordc; i++)
{
if (!(argv[i] = strdup(p.we_wordv[i])))
{
goto fail;
}
}
wordfree(&p);
return argv;
fail:
wordfree(&p);
}
#else // WIN32
{
wchar_t **wargs = NULL;
size_t needed = 0;
wchar_t *cmdlinew = NULL;
size_t len = strlen(cmdline) + 1;
if (!(cmdlinew = calloc(len, sizeof(wchar_t))))
goto fail;
if (!MultiByteToWideChar(CP_ACP, 0, cmdline, -1, cmdlinew, len))
goto fail;
if (!(wargs = CommandLineToArgvW(cmdlinew, argc)))
goto fail;
if (!(argv = calloc(*argc, sizeof(char *))))
goto fail;
// Convert from wchar_t * to ANSI char *
for (i = 0; i < *argc; i++)
{
// Get the size needed for the target buffer.
// CP_ACP = Ansi Codepage.
needed = WideCharToMultiByte(CP_ACP, 0, wargs[i], -1,
NULL, 0, NULL, NULL);
if (!(argv[i] = malloc(needed)))
goto fail;
// Do the conversion.
needed = WideCharToMultiByte(CP_ACP, 0, wargs[i], -1,
argv[i], needed, NULL, NULL);
}
if (wargs) LocalFree(wargs);
if (cmdlinew) free(cmdlinew);
return argv;
fail:
if (wargs) LocalFree(wargs);
if (cmdlinew) free(cmdlinew);
}
#endif // WIN32
if (argv)
{
for (i = 0; i < *argc; i++)
{
if (argv[i])
{
free(argv[i]);
}
}
free(argv);
}
return NULL;
}
The always-wonderful glib has g_shell_parse_args()
which sounds like what you're after.
If you're not interested in even quoting, this might be overkill. All you need to do is tokenize, using whitespace as a token character. Writing a simple routine to do that shouldn't take long, really.
If you're not super-stingy on memory, doing it in one pass without reallocations should be easy; just assume a worst-case of every second character being a space, thus assuming a string of n
characters contains at most (n + 1) / 2
arguments, and (of course) at most n
bytes of argument text (excluding terminators).