How do I write a function to split and return an array for a string with delimiters in the C programming language?
char* str = \"JAN,FEB,MAR,APR,MAY,JUN,JUL,
This optimized method create (or update an existing) array of pointers in *result and returns the number of elements in *count.
Use "max" to indicate the maximum number of strings you expect (when you specify an existing array or any other reaseon), else set it to 0
To compare against a list of delimiters, define delim as a char* and replace the line:
if (str[i]==delim) {
with the two following lines:
char *c=delim; while(*c && *c!=str[i]) c++;
if (*c) {
Enjoy
#include <stdlib.h>
#include <string.h>
char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
size_t i;
char **_result;
// there is at least one string returned
*count=1;
_result= *result;
// when the result array is specified, fill it during the first pass
if (_result) {
_result[0]=str;
}
// scan the string for delimiter, up to specified length
for (i=0; i<len; ++i) {
// to compare against a list of delimiters,
// define delim as a string and replace
// the next line:
// if (str[i]==delim) {
//
// with the two following lines:
// char *c=delim; while(*c && *c!=str[i]) c++;
// if (*c) {
//
if (str[i]==delim) {
// replace delimiter with zero
str[i]=0;
// when result array is specified, fill it during the first pass
if (_result) {
_result[*count]=str+i+1;
}
// increment count for each separator found
++(*count);
// if max is specified, dont go further
if (max && *count==max) {
break;
}
}
}
// when result array is specified, we are done here
if (_result) {
return _result;
}
// else allocate memory for result
// and fill the result array
*result=malloc((*count)*sizeof(char*));
if (!*result) {
return NULL;
}
_result=*result;
// add first string to result
_result[0]=str;
// if theres more strings
for (i=1; i<*count; ++i) {
// find next string
while(*str) ++str;
++str;
// add next string to result
_result[i]=str;
}
return _result;
}
Usage example:
#include <stdio.h>
int main(int argc, char **argv) {
char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char **result=malloc(6*sizeof(char*));
char **result2=0;
unsigned long count;
unsigned long count2;
unsigned long i;
split(strdup(str),strlen(str),',',&result,&count,6);
split(strdup(str),strlen(str),',',&result2,&count2,0);
if (result)
for (i=0; i<count; ++i) {
printf("%s\n",result[i]);
}
printf("\n");
if (result2)
for (i=0; i<count2; ++i) {
printf("%s\n", result2[i]);
}
return 0;
}
My version:
int split(char* str, const char delimeter, char*** args) {
int cnt = 1;
char* t = str;
while (*t == delimeter) t++;
char* t2 = t;
while (*(t2++))
if (*t2 == delimeter && *(t2 + 1) != delimeter && *(t2 + 1) != 0) cnt++;
(*args) = malloc(sizeof(char*) * cnt);
for(int i = 0; i < cnt; i++) {
char* ts = t;
while (*t != delimeter && *t != 0) t++;
int len = (t - ts + 1);
(*args)[i] = malloc(sizeof(char) * len);
memcpy((*args)[i], ts, sizeof(char) * (len - 1));
(*args)[i][len - 1] = 0;
while (*t == delimeter) t++;
}
return cnt;
}
If you are willing to use an external library, I can't recommend bstrlib enough. It takes a little extra setup, but is easier to use in the long run.
For example, split the string below, one first creates a bstring
with the bfromcstr()
call. (A bstring
is a wrapper around a char buffer).
Next, split the string on commas, saving the result in a struct bstrList
, which has fields qty
and an array entry
, which is an array of bstring
s.
bstrlib
has many other functions to operate on bstring
s
Easy as pie...
#include "bstrlib.h"
#include <stdio.h>
int main() {
int i;
char *tmp = "Hello,World,sak";
bstring bstr = bfromcstr(tmp);
struct bstrList *blist = bsplit(bstr, ',');
printf("num %d\n", blist->qty);
for(i=0;i<blist->qty;i++) {
printf("%d: %s\n", i, bstr2cstr(blist->entry[i], '_'));
}
}
I think strsep
is still the best tool for this:
while ((token = strsep(&str, ","))) my_fn(token);
That is literally one line that splits a string.
The extra parentheses are a stylistic element to indicate that we're intentionally testing the result of an assignment, not an equality operator ==
.
For that pattern to work, token
and str
both have type char *
. If you started with a string literal, then you'd want to make a copy of it first:
// More general pattern:
const char *my_str_literal = "JAN,FEB,MAR";
char *token, *str, *tofree;
tofree = str = strdup(my_str_literal); // We own str's memory now.
while ((token = strsep(&str, ","))) my_fn(token);
free(tofree);
If two delimiters appear together in str
, you'll get a token
value that's the empty string. The value of str
is modified in that each delimiter encountered is overwritten with a zero byte - another good reason to copy the string being parsed first.
In a comment, someone suggested that strtok
is better than strsep
because strtok
is more portable. Ubuntu and Mac OS X have strsep
; it's safe to guess that other unixy systems do as well. Windows lacks strsep
, but it has strbrk
which enables this short and sweet strsep
replacement:
char *strsep(char **stringp, const char *delim) {
if (*stringp == NULL) { return NULL; }
char *token_start = *stringp;
*stringp = strpbrk(token_start, delim);
if (*stringp) {
**stringp = '\0';
(*stringp)++;
}
return token_start;
}
Here is a good explanation of strsep
vs strtok
. The pros and cons may be judged subjectively; however, I think it's a telling sign that strsep
was designed as a replacement for strtok
.
My code (tested):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int dtmsplit(char *str, const char *delim, char ***array, int *length ) {
int i=0;
char *token;
char **res = (char **) malloc(0 * sizeof(char *));
/* get the first token */
token = strtok(str, delim);
while( token != NULL )
{
res = (char **) realloc(res, (i + 1) * sizeof(char *));
res[i] = token;
i++;
token = strtok(NULL, delim);
}
*array = res;
*length = i;
return 1;
}
int main()
{
int i;
int c = 0;
char **arr = NULL;
int count =0;
char str[80] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
c = dtmsplit(str, ",", &arr, &count);
printf("Found %d tokens.\n", count);
for (i = 0; i < count; i++)
printf("string #%d: %s\n", i, arr[i]);
return(0);
}
Result:
Found 12 tokens.
string #0: JAN
string #1: FEB
string #2: MAR
string #3: APR
string #4: MAY
string #5: JUN
string #6: JUL
string #7: AUG
string #8: SEP
string #9: OCT
string #10: NOV
string #11: DEC
Below is my strtok()
implementation from zString library.
zstring_strtok()
differs from standard library's strtok()
in the way it treats consecutive delimiters.
Just have a look at the code below,sure that you will get an idea about how it works (I tried to use as many comments as I could)
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
Below is an example usage...
Example Usage
char str[] = "A,B,,,C";
printf("1 %s\n",zstring_strtok(s,","));
printf("2 %s\n",zstring_strtok(NULL,","));
printf("3 %s\n",zstring_strtok(NULL,","));
printf("4 %s\n",zstring_strtok(NULL,","));
printf("5 %s\n",zstring_strtok(NULL,","));
printf("6 %s\n",zstring_strtok(NULL,","));
Example Output
1 A
2 B
3 ,
4 ,
5 C
6 (null)
The library can be downloaded from Github https://github.com/fnoyanisi/zString