For string, find and replace

∥☆過路亽.° 提交于 2019-11-30 07:19:36

问题


Finding some text and replacing it with new text within a C string can be a little trickier than expected. I am searching for an algorithm which is fast, and that has a small time complexity.

What should I use?


回答1:


I can't help but wonder what algorithm strstr() implements. Given that these are fairly standard algorithms, it's entirely possible that a good implementation of strstr() uses one of them.

However there's no guarantee that strstr() implements an optimised algorithm or that the same algorithm is used from one platform to another.




回答2:


I couldn't find an implementation of search/replace in C that I liked so I present here my own. It does not use things like strstr(), snprintf(), arbitrary length temporary buffers, etc. It only requires that the haystack buffer is large enough to hold the resulting string after replacements are made.

// str_replace(haystack, haystacksize, oldneedle, newneedle) --
//  Search haystack and replace all occurences of oldneedle with newneedle.
//  Resulting haystack contains no more than haystacksize characters (including the '\0').
//  If haystacksize is too small to make the replacements, do not modify haystack at all.
//
// RETURN VALUES
// str_replace() returns haystack on success and NULL on failure. 
// Failure means there was not enough room to replace all occurences of oldneedle.
// Success is returned otherwise, even if no replacement is made.
char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle);

// ------------------------------------------------------------------
// Implementation of function
// ------------------------------------------------------------------
#define SUCCESS (char *)haystack
#define FAILURE (void *)NULL

static bool
locate_forward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);
static bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);

char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle)
{   
    size_t oldneedle_len = strlen(oldneedle);
    size_t newneedle_len = strlen(newneedle);
    char *oldneedle_ptr;    // locates occurences of oldneedle
    char *read_ptr;         // where to read in the haystack
    char *write_ptr;        // where to write in the haystack
    const char *oldneedle_last =  // the last character in oldneedle
        oldneedle +             
        oldneedle_len - 1;      

    // Case 0: oldneedle is empty
    if (oldneedle_len == 0)
        return SUCCESS;     // nothing to do; define as success

    // Case 1: newneedle is not longer than oldneedle
    if (newneedle_len <= oldneedle_len) {       
        // Pass 1: Perform copy/replace using read_ptr and write_ptr
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack; 
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            *write_ptr = *read_ptr;         
            bool found = locate_forward(&oldneedle_ptr, read_ptr,
                        oldneedle, oldneedle_last);
            if (found)  {   
                // then perform update
                write_ptr -= oldneedle_len;
                memcpy(write_ptr+1, newneedle, newneedle_len);
                write_ptr += newneedle_len;
            }               
        } 
        *write_ptr = '\0';
        return SUCCESS;
    }

    // Case 2: newneedle is longer than oldneedle
    else {
        size_t diff_len =       // the amount of extra space needed 
            newneedle_len -     // to replace oldneedle with newneedle
            oldneedle_len;      // in the expanded haystack

        // Pass 1: Perform forward scan, updating write_ptr along the way
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack;
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            bool found = locate_forward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then advance write_ptr
                write_ptr += diff_len;
            }
            if (write_ptr >= haystack+haystacksize)
                return FAILURE; // no more room in haystack
        }

        // Pass 2: Walk backwards through haystack, performing copy/replace
        for (oldneedle_ptr = (char *)oldneedle_last;
            write_ptr >= haystack;
            write_ptr--, read_ptr--)
        {
            *write_ptr = *read_ptr;
            bool found = locate_backward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then perform replacement
                write_ptr -= diff_len;
                memcpy(write_ptr, newneedle, newneedle_len);
            }   
        }
        return SUCCESS;
    }
}

// locate_forward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool 
locate_forward(char **needle_ptr, char *read_ptr,
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)++;
        if (*needle_ptr > needle_last) {
            *needle_ptr = (char *)needle;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle;
    return false;
}

// locate_backward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)--;
        if (*needle_ptr < needle) {
            *needle_ptr = (char *)needle_last;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle_last;
    return false;
}

Example usage

#define BUF 30
char *retval1, *retval2;
char message[BUF] = "Your name is $USERNAME.";
char username[] = "admin";
char username_toolong[] = "System Administrator";

int main() {
    retval1 = str_replace(message, BUF, "$USERNAME", username_toolong);
    retval2 = str_replace(message, BUF, "$USERNAME", username);
    if (!retval1)
        printf("Not enough room to replace $USERNAME with `%s'\n", username_toolong);
    if (!retval2)
        printf("Not enough room to replace $USERNAME with `%s'\n", username);
    printf("%s\n", message);
    return 0;
}

Output

Not enough room to replace $USERNAME with `System Administrator'
Your name is admin.

Cheers.




回答3:


Knuth-Morris-Pratt (which is classic) or Boyer-Moore (which is sometimes faster)?

  • http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
  • http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm

Try using a Google search for 'string searching algorithms'.




回答4:


Using std::string (from <string>) you can simply use find and replace.

  • http://www.cplusplus.com/reference/string/string/find/ - Gets you an index.
  • http://www.cplusplus.com/reference/string/string/replace/ - Takes an index.

Edit: Touché. This is for C++ only.

Is this any good to you? http://www.daniweb.com/forums/thread51976.html




回答5:


here is a nice code

#include <stdio.h>
#include <string.h>

char *replace_str(char *str, char *orig, char *rep)
{
  static char buffer[4096];
  char *p;

  if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
    return str;

  strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
  buffer[p-str] = '\0';

  sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));

  return buffer;
}

int main(void)
{
  puts(replace_str("Hello, world!", "world", "Miami"));

  return 0;
}



回答6:


My solution, based on the others, but a bit safer I believe:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_SOURCE_SIZE (0x100000)

char * searchReplace(char * string, char *toReplace[], char *replacements[], int numReplacements){
    int i = 0;
    char *locOfToRep;
    char *toRep;
    char *rep;
    int lenToRep,lenStr,lenAfterLocRep;
    static char buffer[MAX_SOURCE_SIZE];
    for(i = 0; i < numReplacements; ++i){
        toRep = toReplace[i];
        rep = replacements[i];
        //if str not in the string, exit.
        if (!(locOfToRep = strstr(string,toRep))){
           exit(EXIT_FAILURE);
        }
        lenToRep = strlen(toRep); 
        lenStr = strlen(string); 
        lenAfterLocRep = strlen(locOfToRep); 

        //Print the string upto the pointer, then the val, and then the rest of the string.
        sprintf(buffer, "%.*s%s%s", lenStr-lenAfterLocRep, string,rep,locOfToRep+lenToRep);

        string = buffer;
    }
    return buffer;
}

int main(){
    char * string = "Hello, world!";
    int numVals;
    char *names[2] = {"Hello", "world"};
    char *vals[2] = {"Goodbye", "you"};
    numVals = 2;
    string = searchReplace(string, names, vals, numVals);
    printf("%s\n",string);
}


来源:https://stackoverflow.com/questions/4408170/for-string-find-and-replace

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!