问题
I need to write my function strtok. Below is my code. The problem is - I can't display result string. In code I use strcpy()
and then display new array.
Is it possible to display string using just pointer *str
?
#include <stdio.h>
#include <string.h>
char* my_strtok(char* s, char* delm){
char W[100];
int i = 0, k = 0, j = 0;
char *ptr;
static char *Iterator;
ptr = s;
if (s == NULL){
s = Iterator;
}
while (s[i] != '\0'){
j = 0;
while (delm[j] != '\0'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
ptr++;
i++;
k++;
}
It:
W[i] = 0;
Iterator = ++ptr;
return W;
}
int main(void){
char s[100];
char delm[] = " ";
gets(s);
char newstr[100];
char *str = my_strtok(s, delm);
strcpy(newstr, str);
printf("%s", newstr);
return 0;
}
回答1:
Internal implementation of strtok has already been discussed here:
How does strtok() split the string into tokens in C?
In your type of implementation ( calling it your type because it is quite different from the actual one), you have not allocated any memory dynamically to the local variable 'W'. So when you return it, it is not guaranteed that you will receive the string correctly because that memory which was locally allocated to 'W' is not reserved for it anymore in the calling function. Apart from this, there are many unnecessary variables used in the code. And also, it needs proper re-structuring. For your better understanding I have modified your code(keeping it in your style as far as possible) to do the required job:
#include <stdio.h>
#include <string.h>
#include <malloc.h>
char* my_strtok(char* s, char* delm)
{
static int currIndex = 0;
if(!s || !delm || s[currIndex] == '\0')
return NULL;
char *W = (char *)malloc(sizeof(char)*100);
int i = currIndex, k = 0, j = 0;
while (s[i] != '\0'){
j = 0;
while (delm[j] != '\0'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
i++;
k++;
}
It:
W[i] = 0;
currIndex = i+1;
//Iterator = ++ptr;
return W;
}
int main(void)
{
char s[100] = "my name is khan";
char delm[] = " ";
//char newstr[100];
char *str = my_strtok(s, delm);
while(str){
printf("%s", str);
free(str);
str = my_strtok(s, delm);
}
return 0;
}
回答2:
Bounty Disclaimer
Bounty: "Looking for an answer drawing from credible and/or official sources."
I think GNU GLibC constitutes a credible and official source, right? You can download GLibC 2.22 here as .tar.gz
(25mb).
After you extract the sources:
string/strtok.c
#include <string.h>
static char *olds;
#undef strtok
#ifndef STRTOK
# define STRTOK strtok
#endif
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the last string strtok() was called with is
used. For example:
char s[] = "-abc-=-def";
x = strtok(s, "-"); // x = "abc"
x = strtok(NULL, "-="); // x = "def"
x = strtok(NULL, "="); // x = NULL
// s = "abc\0=-def\0"
*/
char *
STRTOK (char *s, const char *delim)
{
char *token;
if (s == NULL)
s = olds;
/* Scan leading delimiters. */
s += strspn (s, delim);
if (*s == '\0')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = strpbrk (token, delim);
if (s == NULL)
/* This token finishes the string. */
olds = __rawmemchr (token, '\0');
else
{
/* Terminate the token and make OLDS point past it. */
*s = '\0';
olds = s + 1;
}
return token;
}
string/strspn.c
#include <string.h>
#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t
STRSPN (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '\0'; ++p)
{
for (a = accept; *a != '\0'; ++a)
if (*p == *a)
break;
if (*a == '\0')
return count;
else
++count;
}
return count;
}
libc_hidden_builtin_def (strspn)
string/strpbrk.c
#include <string.h>
#undef strpbrk
#ifndef STRPBRK
#define STRPBRK strpbrk
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
char *
STRPBRK (const char *s, const char *accept)
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
libc_hidden_builtin_def (strpbrk)
sysdeps/x86_64/rawmemchr.S
#include <sysdep.h>
.text
ENTRY (__rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
and $63, %rcx
pshufd $0, %xmm1, %xmm1
cmp $48, %rcx
ja L(crosscache)
movdqu (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
add $16, %rdi
and $-16, %rdi
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and $15, %rcx
and $-16, %rdi
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sar %cl, %eax
test %eax, %eax
je L(unaligned_no_match)
/* Check which byte is a match. */
bsf %eax, %eax
add %rdi, %rax
add %rcx, %rax
ret
.p2align 4
L(unaligned_no_match):
add $16, %rdi
.p2align 4
L(loop_prolog):
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches0)
test $0x3f, %rdi
jz L(align64_loop)
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
add $64, %rdi
test %eax, %eax
jnz L(matches0)
and $-64, %rdi
.p2align 4
L(align64_loop):
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
pmovmskb %xmm4, %eax
add $64, %rdi
test %eax, %eax
jz L(align64_loop)
sub $64, %rdi
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
ret
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
ret
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
ret
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
ret
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
ret
.p2align 4
L(return_null):
xor %rax, %rax
ret
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)
回答3:
Actually this one is my solution to the general case... when str is char* you can't write to it.
Here it is:
And here is the link to github resource:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define DICT_LEN 256
int *create_delim_dict(char *delim)
{
int *d = (int*)malloc(sizeof(int)*DICT_LEN);
memset((void*)d, 0, sizeof(int)*DICT_LEN);
int i;
for(i=0; i< strlen(delim); i++) {
d[delim[i]] = 1;
}
return d;
}
char *my_strtok(char *str, char *delim)
{
static char *last, *to_free;
int *deli_dict = create_delim_dict(delim);
if(!deli_dict) {
return NULL;
}
if(str) {
last = (char*)malloc(strlen(str)+1);
if(!last) {
free(deli_dict);
}
to_free = last;
strcpy(last, str);
}
while(deli_dict[*last] && *last != '\0') {
last++;
}
str = last;
if(*last == '\0') {
free(deli_dict);
free(to_free);
return NULL;
}
while (*last != '\0' && !deli_dict[*last]) {
last++;
}
*last = '\0';
last++;
free(deli_dict);
return str;
}
int main()
{
char * str = "- This, a sample string.";
char *del = " ,.-";
char *s = my_strtok(str, del);
while(s) {
printf("%s\n", s);
s = my_strtok(NULL, del);
}
return 0;
}
回答4:
char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size; /* static variable used to count until the end of the string */
/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */
int i; /* counter of delimiter(s) in string */
/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
nxt = str;
size = strlen(str);
}
/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
nxt++; /* last run left nxt on a null terminator, so we have to advance it */
size--; /* any advancement must follow diminution of size */
str = nxt; /* string now points to the first char after the last delimiter */
}
/* if we reached the end of string, return NULL pointer */
else
{
str = NULL;
}
/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered;
* it then stops on the last delimiter which has turned to NULL terminator
*/
while(*nxt)
{
i = strspn(nxt, delim);
while(i > 1) /* turns delimiters to NULL terminator (except the last one) */
{
*nxt = '\0';
nxt++;
size--;
i--;
} /* in the last delimiter we have to do something a */
if(1 == i) /* bit different we have to actually take nxt backwards */
{ /* one step, to break the while(*nxt) loop */
*nxt = '\0';
if(size > 1) /* if the delimiter is last char, don't do next lines */
{
nxt--;
size++; /* nxt is diminished so size increases */
}
}
nxt++; /* if no delimiter is found, advance nxt */
size--; /* in case of the last delimiter in a series, we took nxt */
} /* a step back, so now moving it a step forward means */
/* it rests on a NULL terminator */
return str;
}
回答5:
Here is an implementation increase the input buffers as per your needs and increase the no of calls to strtok with str1 as NULL to get back more tokens
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
char str1[20];
char str2[20];
char *res;
printf("enter string one\n");
scanf("%s",str1);
printf("enter string two\n");
scanf("%s",str2);
res=my_strtok(str1,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
}
char *my_strtok(char *str1,char *str2)
{
static char *str1_cpy;
static int i;
char *ptr;
int flag=0;
if(str1!=NULL)
{
str1_cpy=ret_nondelim(str1,str2); //get location of non delimiting byte
str1=ret_noofbytes(str1_cpy,str2); //scan and get location of delimitng byte
if((str1-str1_cpy)) //no of bytes = non delimiting location - delimting location
{
ptr=malloc(str1-str1_cpy+1); //malloc location and and store the string`enter code here`
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='\0';
str1_cpy=str1; //save pointer for next iteration
return ptr; //return pointer
}
else
return NULL;
}
else
{
str1_cpy=ret_nondelim(str1_cpy,str2); //same as above but pass saved pointer
str1=ret_noofbytes(str1_cpy,str2);
if((str1-str1_cpy))
{
ptr=malloc(str1-str1_cpy+1);
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='\0';
str1_cpy=str1; //save pointer for next iteration
return ptr;
}
else
return NULL;
}
}
char *ret_nondelim(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int j=0;j<strlen(str2);j++)
{
if(*(str1)==*(str2+j)) //check if slected byte is non delimiting byte
{
str1++; //break if true go check next byte
break;
}
else
flag++; //shows that selected byte is none of the delimitng byte
}
if(flag==strlen(str2)) //(above comment)-so non delimiting byte found return that pointer to caller
break;
else
flag=0;
}
return str1;
}
char *ret_noofbytes(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int k=0;k<strlen(str2);k++)
{
if(*(str2+k)==*(str1)) //check if selected bytes is delimiting byte
{
flag=1;
break; //break twice if true ie .flag==1
}
}
if(flag==1)
break;
else
str1++; //if not found go check next byte
}
return str1; //return the point where delimiting byte was found
}
回答6:
Here is the code which explains how strtok works in C.
#include <stdio.h>
#include <string.h>
#define SIZE_OF_STRING 50
#define SIZE_OF_DELIMITER 5
/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;
char *string_token(char *str, char *delimiter)
{
char *final_ptr = NULL; `enter code here`
/* Flag has been defined as static to avoid the parent function loop
* runs infinitely after reaching end of string.
*/
static int flag = 0;
int i, j;
if (delimiter == NULL) {
return NULL;
}
/* If flag is 1, it will denote the end of the string */
if (flag == 1) {
return NULL;
}
/* The below condition is to avoid temp_ptr is getting assigned
* with NULL string from the parent function main. Without
* the below condition, NULL will be assigned to temp_ptr
* and final_ptr, so accessing these pointers will lead to
* segmentation fault.
*/
if (str != NULL) {
temp_ptr = str;
}
/* Before function call ends, temp_ptr should move to the next char,
* so we can't return the temp_ptr. Hence, we introduced a new pointer
* final_ptr which points to temp_ptr.
*/
final_ptr = temp_ptr;
printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
" final_ptr: %s \n",__func__, __LINE__, str, delimiter,
strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);
for (i = 0; i <= strlen(temp_ptr); i++)
{
for (j = 0; j < strlen(delimiter); j++) {
if (temp_ptr[i] == '\0') {
/* If the flag is not set, both the temp_ptr and flag_ptr
* will be holding string "Jasmine" which will make parent
* to call this function string_token infinitely.
*/
flag = 1;
return final_ptr;
}
if ((temp_ptr[i] == delimiter[j])) {
/* NULL character is assigned, so that final_ptr will return
* til NULL character. Here, final_ptr points to temp_ptr.
*/
temp_ptr[i] = '\0';
temp_ptr += i+1;
return final_ptr;
}
}
}
/* You will somehow end up here if for loop condition fails.
* If this function doesn't return any char pointer, it will
* lead to segmentation fault in the parent function.
*/
return NULL;
}
int main()
{
char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
char *token = NULL;
char del[SIZE_OF_DELIMITER] = "|";
token = string_token(str, del);
while (token != NULL) {
printf("token %s\n", token);
token = string_token(NULL, del);
}
return 0;
}
https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html
来源:https://stackoverflow.com/questions/28931379/implementation-of-strtok-function