How do I remove duplicates from a C# array?

后端 未结 27 2284
北海茫月
北海茫月 2020-11-22 07:53

I have been working with a string[] array in C# that gets returned from a function call. I could possibly cast to a Generic collection, but I was w

27条回答
  •  长发绾君心
    2020-11-22 08:31

    The best way? Hard to say, the HashSet approach looks fast, but (depending on the data) using a sort algorithm (CountSort ?) can be much faster.

    using System;
    using System.Collections.Generic;
    using System.Linq;
    class Program
    {
        static void Main()
        {
            Random r = new Random(0); int[] a, b = new int[1000000];
            for (int i = b.Length - 1; i >= 0; i--) b[i] = r.Next(b.Length);
            a = new int[b.Length]; Array.Copy(b, a, b.Length);
            a = dedup0(a); Console.WriteLine(a.Length);
            a = new int[b.Length]; Array.Copy(b, a, b.Length);
            var w = System.Diagnostics.Stopwatch.StartNew();
            a = dedup0(a); Console.WriteLine(w.Elapsed); Console.Read();
        }
    
        static int[] dedup0(int[] a)  // 48 ms  
        {
            return new HashSet(a).ToArray();
        }
    
        static int[] dedup1(int[] a)  // 68 ms
        {
            Array.Sort(a); int i = 0, j = 1, k = a.Length; if (k < 2) return a;
            while (j < k) if (a[i] == a[j]) j++; else a[++i] = a[j++];
            Array.Resize(ref a, i + 1); return a;
        }
    
        static int[] dedup2(int[] a)  //  8 ms
        {
            var b = new byte[a.Length]; int c = 0;
            for (int i = 0; i < a.Length; i++) 
                if (b[a[i]] == 0) { b[a[i]] = 1; c++; }
            a = new int[c];
            for (int j = 0, i = 0; i < b.Length; i++) if (b[i] > 0) a[j++] = i;
            return a;
        }
    }
    

    Almost branch free. How? Debug mode, Step Into (F11) with a small array: {1,3,1,1,0}

        static int[] dedupf(int[] a)  //  4 ms
        {
            if (a.Length < 2) return a;
            var b = new byte[a.Length]; int c = 0, bi, ai, i, j;
            for (i = 0; i < a.Length; i++)
            { ai = a[i]; bi = 1 ^ b[ai]; b[ai] |= (byte)bi; c += bi; }
            a = new int[c]; i = 0; while (b[i] == 0) i++; a[0] = i++;
            for (j = 0; i < b.Length; i++) a[j += bi = b[i]] += bi * i; return a;
        }
    

    A solution with two nested loops might take some time, especially for larger arrays.

        static int[] dedup(int[] a)
        {
            int i, j, k = a.Length - 1;
            for (i = 0; i < k; i++)
                for (j = i + 1; j <= k; j++) if (a[i] == a[j]) a[j--] = a[k--];
            Array.Resize(ref a, k + 1); return a;
        }
    

提交回复
热议问题