How to sort an array of string alphabetically (case sensitive, nonstandard collation)

前端 未结 6 1988
北荒
北荒 2020-12-02 19:15

I need a c language code to sort some strings and it should be case sensitive and for the same letter in upper- and lower-cases, the lower-case must come first

相关标签:
6条回答
  • 2020-12-02 19:41

    Keep alike words together...

    For lists of words, it is often more useful to group the "same" words together (even though they differ in case). For example:

    Keeping things together:          Simple "M after m":
    ------------------------          -------------------
    mars                              mars
    mars bar                          mars bar
    Mars bar                          milk
    milk                              milk-duds
    Milk                              milky-way
    milk-duds                         Mars bar
    milky-way                         Milk
    Milky-way                         Milky-way
    

    If you want words arranged like the first column, I present three ways of doing so:

    • Using strcasecmp() combined with strcmp().
    • A single pass implementation tracking the character type with isalpha(), tolower(), and isupper().
    • A single pass implementation that utilizes a collating table.

    In the end I discuss two alternatives:

    • Using the collating table to establish an arbitrary ordering.
    • Setting the locale to use locale based collating.

    Using available library functions

    If it is possible to do so, avoid reinventing the wheel. In this case, we can do so by using the POSIX function strcasecmp() to see if they are equal with a case-insensitive comparison, and falling back on strcmp() when they are.

    int alphaBetize (const char *a, const char *b) {
        int r = strcasecmp(a, b);
        if (r) return r;
        /* if equal ignoring case, use opposite of strcmp() result to get
         * lower before upper */
        return -strcmp(a, b); /* aka: return strcmp(b, a); */
    }
    

    (On some systems, the case-insensitive comparison function is called stricmp() or _stricmp(). If one is not available to you, an implementation is provided below.)

    #ifdef I_DONT_HAVE_STRCASECMP
    int strcasecmp (const char *a, const char *b) {
        while (*a && *b) {
            if (tolower(*a) != tolower(*b)) {
                break;
            }
            ++a;
            ++b;
        }
        return tolower(*a) - tolower(*b);
    }
    #endif
    

    Avoiding two passes over the strings

    Sometimes, existing functions do not perform well enough, and you have to do something else to make things faster. The following function does the comparison in roughly the same way in a single pass, and without using either strcasecmp() or strcmp(). But, it treats all non-alphabetical characters as being less than letters.

    int alphaBetize (const char *a, const char *b) {
        int weight = 0;
        do {
            if (*a != *b) {
                if (!(isalpha(*a) && isalpha(*b))) {
                    if (isalpha(*a) || isalpha(*b)) {
                        return isalpha(*a) - isalpha(*b);
                    }
                    return *a - *b;
                }
                if (tolower(*a) != tolower(*b)) {
                    return tolower(*a) - tolower(*b);
                }
                /* treat as equal, but mark the weight if not set */
                if (weight == 0) {
                    weight = isupper(*a) - isupper(*b);
                }
            }
            ++a;
            ++b;
        } while (*a && *b);
        /* if the words compared equal, use the weight as tie breaker */
        if (*a == *b) {
            return weight;
        }
        return !*b - !*a;
    }
    

    Using this comparison for sorting will keep milk and Milk next to each other even if the list includes milk-duds.

    Using a collating table

    Here is a way to dynamically create a collating table from a "configuration". It serves to illustrate a contrastive technique to change how strings get compared.

    You can map how the letters of the alphabet are compared with a kind of simple table that describes the relative order you want letters (or any character except the NUL byte) to have:

    const char * alphaBetical =
        "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";
    

    From this ordering, we can create a look-up table to see how two letters are supposed to compare to each other. The following function initializes the table if it has not already been done first, and otherwise performs the table look-up.

    int alphaBeta_lookup (int c) {
        static int initialized;
        static char table[CHAR_MAX+1];
        if (!initialized) {
            /* leave all non-alphaBeticals in their relative order, but below
               alphaBeticals */
            int i, j;
            for (i = j = 1; i < CHAR_MAX+1; ++i) {
                if (strchr(alphaBetical, i)) continue;
                table[i] = j++;
            }
            /* now run through the alphaBeticals */
            for (i = 0; alphaBetical[i]; ++i) {
                table[(int)alphaBetical[i]] = j++;
            }
            initialized = 1;
        }
        /* return the computed ordinal of the provided character */
        if (c < 0 || c > CHAR_MAX) return c;
        return table[c];
    }
    

    With this look-up table, we can now simplify the loop body of the alphaBetize() comparison function:

    int alphaBetize (const char *a, const char *b) {
        int ax = alphaBeta_lookup(*a);
        int bx = alphaBeta_lookup(*b);
        int weight = 0;
        do {
            char al = tolower(*a);
            char bl = tolower(*b);
            if (ax != bx) {
                if (al != bl) {
                    return alphaBeta_lookup(al) - alphaBeta_lookup(bl);
                }
                if (weight == 0) {
                    weight = ax - bx;
                }
            }
            ax = alphaBeta_lookup(*++a);
            bx = alphaBeta_lookup(*++b);
        } while (ax && bx);
        /* if the words compared equal, use the weight as tie breaker */
        return (ax != bx) ? !bx - !ax : weight;
    }
    

    Can we make things simpler?

    Using the collating table, you can create many different orderings with a simplified comparison function, like:

    int simple_collating (const char *a, const char *b) {
        while (alphaBeta_lookup(*a) == alphaBeta_lookup(*b)) {
            if (*a == '\0') break;
            ++a, ++b;
        }
        return alphaBeta_lookup(*a) - alphaBeta_lookup(*b);
    }
    

    Using this same function and through modifying the alphaBetical string, you can achieve nearly any ordering you want (alphabetical, reverse alphabetical, vowels before consonants, etc.). However, the arrangement of keeping alike words together requires interspersing capitalized words with words in lowercase, and this can only be done by doing a comparison that ignores case.

    Note that with the simple_collating() function above and the alphaBetical string I provided, Bacon will come before milk, but Mars will go after milk and before Milk.

    If you want to sort based on your locale.

    If you want to use a collating sequence that is already defined for your locale, you can set the locale and call the collating comparison function:

    /*
     * To change the collating locale, use (for example):
           setlocale(LC_COLLATE, "en.US");
     */
    int iso_collating (const char *a, const char *b) {
        return strcoll(a, b);
    }
    

    Now, by changing the locale, the sorting order will be based on a standardized collating sequence.

    0 讨论(0)
  • 2020-12-02 19:42

    You can write a custom comparison function for sort.

    First, look at the default strcmp sort order:

    #include <stdio.h>
    #include <stdlib.h>
    #include <ctype.h>
    #include <string.h>
    
    const char *tgt[]={
        "bacon", "Bacon", "mIlk", "Milk", "spinach", "MILK", "milk", "eggs"
    };
    int tgt_size=8;
    
    static int cmp(const void *p1, const void *p2){
        return strcmp(* (char * const *) p1, * (char * const *) p2);
    }
    
    int main(int argc, char *argv[]) {
        printf("Before sort:\n\t");
        for(int n=0; n<tgt_size; n++)
            printf("%s ", tgt[n]);
    
        qsort(tgt, tgt_size, sizeof(char *), cmp);
    
        printf("\nAfter sort:\n\t");
        for(int n=0; n<tgt_size; n++)
            printf("%s ", tgt[n]);
    
        return 0;
    }
    

    strcmp sorts by ASCII character code; i.e., it sorts A-Z then a-z so all capital A-Z come before any word with a lowercase letter:

    Before sort:
        bacon Bacon mIlk Milk spinach MILK milk eggs 
    After sort:
        Bacon MILK Milk bacon eggs mIlk milk spinach 
    

    We can write our own comparison function used in cmp used in qsort that ignores case. That looks like this:

    int mycmp(const char *a, const char *b) {
        const char *cp1 = a, *cp2 = b;
    
        for (; toupper(*cp1) == toupper(*cp2); cp1++, cp2++)
            if (*cp1 == '\0')
                return 0;
        return ((toupper(*cp1) < toupper(*cp2)) ? -1 : +1);
    } 
    

    Be sure to also change cmp to:

    static int cmp(const void *p1, const void *p2){
        return mycmp(* (char * const *) p1, * (char * const *) p2);
    }
    

    The case ignoring version now prints:

    Before sort:
        bacon Bacon mIlk Milk spinach MILK milk eggs 
    After sort:
        bacon Bacon eggs Milk MILK milk mIlk spinach 
    

    This is the same output you would get with the POSIX function strcasecmp.

    The function mycmp first compares lexicographically in normal order [a|A]-[z|Z]. This mean you will get like letter words together but you may get bacon, Bacon as likely as Bacon, bacon. This is because qsort is not a stable sort and 'Bacon' compares equal to 'bacon'.

    Now what we want is if the comparison is 0 while ignoring case (i.e., same word like 'MILK' and 'milk) now compare including case and reverse the order:

    int mycmp(const char *a, const char *b) {
        const char *cp1 = a, *cp2 = b;
        int sccmp=1;
    
        for (; toupper(*cp1) == toupper(*cp2); cp1++, cp2++)
            if (*cp1 == '\0')
                sccmp = 0;
        if (sccmp) return ((toupper(*cp1) < toupper(*cp2)) ? -1 : +1);
    
        for (; *a == *b; a++, b++)
            if (*a == '\0')
                return 0;
        return ((*a < *b) ? +1 : -1);
    }
    

    Final version prints:

    Before sort:
        bacon Bacon mIlk Milk spinach MILK milk eggs 
    After sort:
        bacon Bacon eggs milk mIlk Milk MILK spinach 
    

    Unfortunately, this approach becomes unwieldy for UNICODE. For complex sorts, consider using a mapping or a multistep sort with a stable sort.

    For complex and location aware alphabetical collations, consider Unicode collations. As an example, in different locations, letters alphabetize differently:

    Swedish                      z < ö
                                 y == w
    German                       ö < z
    Danish                       Z < Å
    Lithuanian                   i < y < k
    Tr German                    ä == æ
    Tr Spanish                   c < ch < d   
    German Dictionary Sort:      of < öf
    German Phonebook Sort:       öf < of
    

    The default values for these distinctions are captured in in the Default Unicode Collation Element Table (DUCET) that provides a default mapping for UNICODE collations and string comparisons. You can modify the defaults to capture the distinction between dictionary sorting and phonebook sorting, different locations or different treatment for case. Individual location variations are actively tracked in the Unicode Common Locale Data Repository (CLDR).

    The reccomendation for multi level sorting is tiered:

    Level   Description           Examples
    L1      Base characters       role < roles < rule
    L2      Accents               role < rôle < roles
    L3      Case/Variants         role < Role < rôle
    L4      Punctuation           role < “role” < Role
    Ln      Identical             role < ro□le < “role”
    

    A widely used implementation of Unicode collations is in the ICU Library. The default DUCET collation for several examples would be:

    b < B < bad < Bad < bäd
    c < C < cote < coté < côte < côté 
    

    You can explore the ICU library and change the locations and targets with the ICU Explorer

    If you wanted to implement your own version of the DUCET for giggles, you can follow the general method used in this Python script. It is not overwhelming, but not trivial.

    0 讨论(0)
  • 2020-12-02 19:43

    The key of the OP code is the use of function strcmp() to compare two strings.
    So, I will start by replacing this standard function by another one, like the following:

      // We assume that the collating sequence satisfies the following rules:
      // 'A' < 'B' < 'C' < ...
      // 'a' < 'b' < 'c' < ...
      // We don't make any other assumptions.
    
      #include <ctype.h>      
      int my_strcmp(const char * s1, const char * s2)
      {
          const char *p1 = s1, *p2 = s2;
          while(*p1 == *p2 && *p1 != '\0' && *p2 != '\0')
              p1++, p2++;  /* keep searching... */
    
          if (*p1 == *p2)
             return 0;
          if (*p1 == '\0')
             return -1;
          if (*p2 == '\0')
             return +1;
    
          char c1 = tolower(*p1),      c2 = tolower(*p2);
          int  u1 = isupper(*p1) != 0, u2 = isupper(*p2) != 0;
          if (c1 != c2)
            return c1 - c2;  // <<--- Alphabetical order assumption is used here 
          if (c1 == c2)
            return u1 - u2;
      }
    

    The last lines can be compacted in this way:

         return (c1 != c2)? c1 - c2: u1 - u2;
    

    Now, by replacing strcmp() by my_strcmp() you will have the desired result.

    In an sort algorithm it's good idea to think separately the 3 main aspects of it:

    • The comparisson function.
    • The abstract sort algorithm that we will use.
    • The way in that data will be "moved" in the array when two items have to be swapped.

    These aspects can be optimized independently.
    Thus, for exampmle, once you have the comparisson function well settled, the next optimization step could be to replace the double for sorting algorithm by a more efficient one, like quicksort.
    In particular, the function qsort() of the standard library <stdlib.h> provides you with such an algorithm, so you don't need to care about programming it.
    Finally, the strategy you use to store the array information could have consequences in performance.
    It would be more efficient to store strings like "array of pointers to char" instead of "array of array of char", since swapping pointers is faster than swapping two entire arrays of chars.

    Arrays of pointers

    ADDITIONAL NOTE: The three first if()'s are actually redundant, because the logic of the following sentences implies the desired result in the case that *p1 or *p2 is 0. However, by keeping those if()'s, the code becomes more readable.

    0 讨论(0)
  • 2020-12-02 19:48

    Standard Header Files Required by Program:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    

    The main Program starts here:

    //Global Variables Required
    //=========
    const char *tgt[]={"bacon", "Bacon", "mIlk", 
            "Milk", "spinach", "MILK", "milk", "eggs"};    //Array for sorting
    int tgt_size=8;                                        //Total Number of Records
    int     SortLookupTable[128];                          //Custom sorting table
    typedef int      cmp_t (const void *, const void *);
    
    
    
    
    int main(int argc, char *argv[]) {
        printf("Before sort:\n\n");
        int n=0;
        for(n=0; n<tgt_size; n++)
            printf("%s\n", tgt[n]);
    
        CreateSortTable();
    
        myQsort(tgt, tgt_size, sizeof(char *), &compare);
        printf("\n\n====\n\n");
        for(n=0; n<tgt_size; n++)
            printf("%s\n", tgt[n]);
    
        return 0;
    }
    

    Custom Sorting Table as Required:

    void CreateSortTable(void){
        int     i;
        for (i = 0; i < 128; i++){
            SortLookupTable[i] = 0;
        }
        char *s;
        s=(char *)malloc(64);
        memset(s, 0, 64);
        strcpy(s, "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ");
    
        i=1;
        for (; *s; s++){
            SortLookupTable[(int) ((unsigned char) *s)]=i;
            i++;
        }
    }
    

    Quick Sorting Algorithm, You can also use the Standard Library Provided:

    //Some important Definations required by Quick Sort
    =======
    #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
        es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
    
    #define swap(a, b)                          \
        if (swaptype == 0) {                    \
            long t = *(long *)(a);              \
            *(long *)(a) = *(long *)(b);        \
            *(long *)(b) = t;                   \
        } else                                  \
            swapfunc(a, b, es, swaptype)
    
    #define vecswap(a, b, n)    if ((n) > 0) swapfunc(a, b, n, swaptype)
    
    
    #define swapcode(TYPE, parmi, parmj, n) {       \
        long i = (n) / sizeof (TYPE);               \
        register TYPE *pi = (TYPE *) (parmi);       \
        register TYPE *pj = (TYPE *) (parmj);       \
        do {                                        \
            register TYPE   t = *pi;                \
            *pi++ = *pj;                            \
            *pj++ = t;                              \
            } while (--i > 0);                      \
    }
    
    #define min(a, b)   (a) < (b) ? a : b
    
    
    //Other important function
    void swapfunc(char *a, char *b, int n, int swaptype){
        if(swaptype <= 1)
            swapcode(long, a, b, n)
        else
            swapcode(char, a, b, n)
    }
    
    char * med3(char *a, char *b, char *c, cmp_t *cmp){
        if ( cmp(a, b) < 0){
            if (cmp(b, c) < 0){
                return b;
            }else{
                if ( cmp(a, c) < 0){
    
                    return c;
                }else{
                    return a;
                }
            }
        }else{
            if (cmp(b, c) < 0){
                return b;
            }else{
                if ( cmp(a, c) < 0){
                    return a;
                }else{
                    return c;
                }
            }
        }
    }
    
    //Custom Quick Sort
    
    void myQsort(void *a, unsigned int n, unsigned int es, cmp_t *cmp){
        char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
        int d, r, swaptype, swap_cnt;
    
    loop:   SWAPINIT(a, es);
        swap_cnt = 0;
        if (n < 7) {
            for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
                for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0; pl -= es){
                    swap(pl, pl - es);
                }
            return;
        }
        pm = (char *)a + (n / 2) * es;
        if (n > 7) {
            pl = a;
            pn = (char *)a + (n - 1) * es;
            if (n > 40) {
                d = (n / 8) * es;
                pl = med3(pl, pl + d, pl + 2 * d, cmp);
                pm = med3(pm - d, pm, pm + d, cmp);
                pn = med3(pn - 2 * d, pn - d, pn, cmp);
            }
            pm = med3(pl, pm, pn, cmp);
        }
        swap(a, pm);
        pa = pb = (char *)a + es;
    
        pc = pd = (char *)a + (n - 1) * es;
        for (;;) {
            while (pb <= pc && (r = cmp(pb, a)) <= 0) {
                if (r == 0) {
                    swap_cnt = 1;
                    swap(pa, pb);
                    pa += es;
                }
                pb += es;
            }
            while (pb <= pc && (r = cmp(pc, a)) >= 0) {
                if (r == 0) {
                    swap_cnt = 1;
                    swap(pc, pd);
                    pd -= es;
                }
                pc -= es;
            }
            if (pb > pc)
                break;
            swap(pb, pc);
            swap_cnt = 1;
            pb += es;
            pc -= es;
        }
        if (swap_cnt == 0) {  /* Switch to insertion sort */
            for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
                for (pl = pm; pl > (char *)a && cmp(pl - es, pl) > 0;
                     pl -= es)
                    swap(pl, pl - es);
            return;
        }
    
        pn = (char *)a + n * es;
        r = min(pa - (char *)a, pb - pa);
        vecswap(a, pb - r, r);
        r = min(pd - pc, pn - pd - es);
        vecswap(pb, pn - r, r);
        if ((r = pb - pa) > es)
            myQsort(a, r / es, es, cmp);
        if ((r = pd - pc) > es) {
            /* Iterate rather than recurse to save stack space */
            a = pn - r;
            n = r / es;
            goto loop;
        }
    }
    

    Two of the Most Important Functions are:

    unsigned char Change(char a){
        return (unsigned char ) SortLookupTable[(int)a];
    }
    
    
    int compare (const void *a, const void *b){
        char *s1= *(char **)a;
        char *s2= *(char **)b;
        int ret, len, i;
        ret=0;
    
        if (strlen((void*)s1) > strlen((void*)s2)){
            len=strlen((void*)s1);
        }else{
            len=strlen((void*)s2) ;
        }
    
        for(i=0; i<len; i++){
            if ( s1[i] != s2[i]){
                if ( Change(s1[i]) < Change(s2[i]) ){
                    ret=0;
                    break;
                }else{
                    ret=1;
                    break;
                }
            }
        }
    
        return ret;
    }
    
    0 讨论(0)
  • 2020-12-02 19:54

    Here, if I got it right, you want something as I'd describe as follows:

    A case insensitive sort, where under tie, tiebreaker condition "lowercase comes first" is to be used.

    So it's like:

    1. earlier_letter_in_the_alphabet < later_letter_in_the_alphabet ignoring the case

    2. lowercase < uppercase

    3. shorter_word < wider_word
      • This wasn't mentioned, I borrowed it from the lexicographical order, the one in dictionaries
      • Can be utilized simply by taking '\0' as the lowest possible in comparisons

    Step 2 to be taken only if 1 didn't distinguish anything. Step 3 will already be checked with 1. All these are to be done letter-by-letter, meaning that you should switch to 2 as soon as you get a tie between corresponding characters, not just when the whole strings are on tie.


    Assuming that this was right, all we need to do now is to write a function that makes this comparison for us for any given two strings.

    #include <ctype.h>  // for tolower and islower
    
    int my_character_compare(const char a, const char b)
    {
        int my_result;
    
        my_result = tolower(a) - tolower(b);
        // unless it is zero, my_result is definitely the result here
        // Note: if any one of them was 0, result will also properly favour that one
    
    
        if (my_result == 0 && a != b)
        // if (could not be distinguished with #1, but are different)
        {
            // means that they are case-insensitively same
            // but different...
            // means that one of them are lowercase, the other one is upper
            if (islower(a))
                return -1;  // favour a
            else
                return 1;   // favour b
        }
    
    
        // regardless if zero or not, my_result is definitely just the result
        return my_result;
    }
    
    int my_string_compare(const char * a, const char * b)
    {
        int my_result;
    
        my_result = my_character_compare(*a, *b);
        // unless it is zero, my_result is definitely the result here
    
        while (my_result == 0 && *a != 0)
        // current characters deemed to be same
        // if they are not both just 0 we will have to check the next ones
        {
            my_result = my_character_compare(*++a, *++b);
        }
    
        // whatever the my_result has been:
        //   whether it became != zero on the way and broke out of the loop
        //   or it is still zero, but we have also reached the end of the road/strings
        return my_result;
    }
    

    A compare function, by convention/rule, should return a negative value for favouring the first parameter to be in front, negative value for favouring the second parameter, zero if it cannot distinguish them. Just an additional information which you likely already know by the way you make use of strcmp.

    And that's it! Replacing that strcmp in your code with my_string_compare here, also putting up these definitions we've made on top should provide a correct result. Indeed it provides the expected result for the example input in question.

    One could shorten the definitions of course, I have made them long so that it will be easier to understand what's going on. For example, I could boil it all down to the following:

    #include <ctype.h>
    
    int my_string_compare(const char * a, const char * b)
    {
        int my_result;
    
        while (*a || *b)
        {
            if ((my_result = tolower(*a) - tolower(*b)))
                return my_result;
            if (*a != *b)
                return (islower(*a)) ? -1 : 1;
            a++;
            b++;
        }
    
        return 0;
    }
    

    Does essentially the same with the other one, you may use whichever you like; or even better, write one.

    0 讨论(0)
  • 2020-12-02 19:57

    I'm late to this discussion, and have no particular expectation to swan in and take the fabulous prize, but not seeing a solution using the idioms I'd look to first, thought I'd chime in.

    My first thought in reading the problem spec was some form of custom collating sequence, which I basically found in @jxh's Using a collating table notion. I don't see case insensitivity as a core concept, just the oddball ordering.

    So, I offer the following code purely as an alternative implementation. It's specific to glibc - qsort_r(3) is used - but feels like a lighter-weight approach, and supports many collating sequences at run-time. But it's lightly tested, and I'm very likely missing various weaknesses. Among which: I've paid no particular attention to Unicode or the world of wide characters in general, and the casts to unsigned char to avoid negative array subscripts feel suspect.

    #include <stdio.h>
    #include <limits.h>
    
    /*
     * Initialize an index array associated with the collating
     * sequence in co. The affected array can subsequently be
     * passed in as the final client data pointer into qsort_r
     * to be used by collating_compare below.
     */
    int
    collating_init(const char *co, int *cv, size_t n)
    {
        const unsigned char *uco = (const unsigned char *) co;
        const unsigned char *s;
        size_t i;
    
        if (n <= UCHAR_MAX) {
            return -1;
        }
        for (i = 0; i < n; i++) {
            /* default for chars not named in the sequence */
            cv[i] = UCHAR_MAX;
        }
        for (s = uco; *s; s++) {
            /*
             * the "collating value" for a character's own
             * character code is its ordinal (starting from
             * zero) in the collating sequence. I.e., we
             * compare the values of cv['a'] and cv['A'] -
             * rather than 'a' and 'A' - to determine order.
             */
            cv[*s] = (s - uco);
        }
    
        return 0;
    }
    
    static int
    _collating_compare(const char *str1, const char *str2, int *ip)
    {
        const unsigned char *s1 = (const unsigned char *) str1;
        const unsigned char *s2 = (const unsigned char *) str2;
    
        while (*s1 != '\0' && *s2 != '\0') {
            int cv1 = ip[*s1++];
            int cv2 = ip[*s2++];
    
            if (cv1 < cv2) return -1;
            if (cv1 > cv2) return 1;
        }
    
        if (*s1 == '\0' && *s2 == '\0') {
            return 0;
        } else {
            return *s1 == '\0' ? -1 : 1;
        }
    }
    
    int
    collating_compare(const void *v1, const void *v2, void *p)
    {
        return _collating_compare(*(const char **) v1,
                                  *(const char **) v2,
                                  (int *) p);
    }
    

    The previous is close to code that could be put in a separate module or library, but lacks its own header file (or entry in a header file). My own test merely concatenates the code above and below into a file named custom_collate_sort.c, and uses

    gcc -DMAIN_TEST -Wall -o custom_collate_sort custom_collate_sort.c
    

    ...to compile it.

    #if defined(MAIN_TEST)
    
    /* qsort_r is a GNU-ey thing... */
    
    #define __USE_GNU
    #include <stdlib.h>
    #include <string.h>
    
    #define NELEM(x) (sizeof x / sizeof 0[x])
    
    static int
    cmp(const void *v1, const void *v2)
    {
        return strcmp(*(const char **) v1, *(const char **) v2);
    }
    
    static int
    casecmp(const void *v1, const void *v2)
    {
        return strcasecmp(*(const char **) v1, *(const char **) v2);
    }
    
    int
    main(int ac, char *av[])
    {
        size_t i;
    
        int cval[256], ret;
        int cval_rev[256], rret;
    
        char *tosort[] = {
            "cheeSE", "eggs", "Milk", "potatoes", "cheese", "spaghetti",
            "eggs", "milk", "spinach", "bacon", "egg", "apple", "PEAR",
            "pea", "berry"
        };
    
        ret = collating_init("aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVxXyYzZ",
                             cval, NELEM(cval));
    
        rret = collating_init("ZzYyXxVvUuTtSsRrQqPpOoNnMmLlKkJjIiHhGgFfEeDdCcBbAa",
                              cval_rev, NELEM(cval_rev));
    
        if (ret == -1 || rret == -1) {
            fputs("collating value array must accomodate an index of UCHAR_MAX\n", stderr);
            return 1;
        }
    
        puts("Unsorted:");
        for (i = 0; i < NELEM(tosort); i++) {
            printf("  %s\n", tosort[i]);
        }
    
        qsort((void *) tosort, NELEM(tosort), sizeof tosort[0], cmp);
    
        puts("Sorted w/ strcmp:");
        for (i = 0; i < NELEM(tosort); i++) {
            printf("  %s\n", tosort[i]);
        }
    
        qsort((void *) tosort, NELEM(tosort), sizeof tosort[0], casecmp);
    
        puts("Sorted w/ strcasecmp:");
        for (i = 0; i < NELEM(tosort); i++) {
            printf("  %s\n", tosort[i]);
        }
    
        qsort_r((void *) tosort, NELEM(tosort), sizeof tosort[0],
                collating_compare, (void *) cval);
    
        puts("Sorted w/ collating sequence:");
        for (i = 0; i < NELEM(tosort); i++) {
            printf("  %s\n", tosort[i]);
        }
    
        qsort_r((void *) tosort, NELEM(tosort), sizeof tosort[0],
                collating_compare, (void *) cval_rev);
    
        puts("Sorted w/ reversed collating sequence:");
        for (i = 0; i < NELEM(tosort); i++) {
            printf("  %s\n", tosort[i]);
        }
    
        return 0;
    }
    
    #endif /* MAIN_TEST */
    
    0 讨论(0)
提交回复
热议问题