I have been working with a string[]
array in C# that gets returned from a function call. I could possibly cast to a Generic
collection, but I was w
protected void Page_Load(object sender, EventArgs e)
{
string a = "a;b;c;d;e;v";
string[] b = a.Split(';');
string[] c = b.Distinct().ToArray();
if (b.Length != c.Length)
{
for (int i = 0; i < b.Length; i++)
{
try
{
if (b[i].ToString() != c[i].ToString())
{
Response.Write("Found duplicate " + b[i].ToString());
return;
}
}
catch (Exception ex)
{
Response.Write("Found duplicate " + b[i].ToString());
return;
}
}
}
else
{
Response.Write("No duplicate ");
}
}
Tested the below & it works. What's cool is that it does a culture sensitive search too
class RemoveDuplicatesInString
{
public static String RemoveDups(String origString)
{
String outString = null;
int readIndex = 0;
CompareInfo ci = CultureInfo.CurrentCulture.CompareInfo;
if(String.IsNullOrEmpty(origString))
{
return outString;
}
foreach (var ch in origString)
{
if (readIndex == 0)
{
outString = String.Concat(ch);
readIndex++;
continue;
}
if (ci.IndexOf(origString, ch.ToString().ToLower(), 0, readIndex) == -1)
{
//Unique char as this char wasn't found earlier.
outString = String.Concat(outString, ch);
}
readIndex++;
}
return outString;
}
static void Main(string[] args)
{
String inputString = "aAbcefc";
String outputString;
outputString = RemoveDups(inputString);
Console.WriteLine(outputString);
}
}
--AptSenSDET
Here is the HashSet<string> approach:
public static string[] RemoveDuplicates(string[] s)
{
HashSet<string> set = new HashSet<string>(s);
string[] result = new string[set.Count];
set.CopyTo(result);
return result;
}
Unfortunately this solution also requires .NET framework 3.5 or later as HashSet was not added until that version. You could also use array.Distinct(), which is a feature of LINQ.
The best way? Hard to say, the HashSet approach looks fast, but (depending on the data) using a sort algorithm (CountSort ?) can be much faster.
using System;
using System.Collections.Generic;
using System.Linq;
class Program
{
static void Main()
{
Random r = new Random(0); int[] a, b = new int[1000000];
for (int i = b.Length - 1; i >= 0; i--) b[i] = r.Next(b.Length);
a = new int[b.Length]; Array.Copy(b, a, b.Length);
a = dedup0(a); Console.WriteLine(a.Length);
a = new int[b.Length]; Array.Copy(b, a, b.Length);
var w = System.Diagnostics.Stopwatch.StartNew();
a = dedup0(a); Console.WriteLine(w.Elapsed); Console.Read();
}
static int[] dedup0(int[] a) // 48 ms
{
return new HashSet<int>(a).ToArray();
}
static int[] dedup1(int[] a) // 68 ms
{
Array.Sort(a); int i = 0, j = 1, k = a.Length; if (k < 2) return a;
while (j < k) if (a[i] == a[j]) j++; else a[++i] = a[j++];
Array.Resize(ref a, i + 1); return a;
}
static int[] dedup2(int[] a) // 8 ms
{
var b = new byte[a.Length]; int c = 0;
for (int i = 0; i < a.Length; i++)
if (b[a[i]] == 0) { b[a[i]] = 1; c++; }
a = new int[c];
for (int j = 0, i = 0; i < b.Length; i++) if (b[i] > 0) a[j++] = i;
return a;
}
}
Almost branch free. How? Debug mode, Step Into (F11) with a small array: {1,3,1,1,0}
static int[] dedupf(int[] a) // 4 ms
{
if (a.Length < 2) return a;
var b = new byte[a.Length]; int c = 0, bi, ai, i, j;
for (i = 0; i < a.Length; i++)
{ ai = a[i]; bi = 1 ^ b[ai]; b[ai] |= (byte)bi; c += bi; }
a = new int[c]; i = 0; while (b[i] == 0) i++; a[0] = i++;
for (j = 0; i < b.Length; i++) a[j += bi = b[i]] += bi * i; return a;
}
A solution with two nested loops might take some time, especially for larger arrays.
static int[] dedup(int[] a)
{
int i, j, k = a.Length - 1;
for (i = 0; i < k; i++)
for (j = i + 1; j <= k; j++) if (a[i] == a[j]) a[j--] = a[k--];
Array.Resize(ref a, k + 1); return a;
}
Here is a O(n*n) approach that uses O(1) space.
void removeDuplicates(char* strIn)
{
int numDups = 0, prevIndex = 0;
if(NULL != strIn && *strIn != '\0')
{
int len = strlen(strIn);
for(int i = 0; i < len; i++)
{
bool foundDup = false;
for(int j = 0; j < i; j++)
{
if(strIn[j] == strIn[i])
{
foundDup = true;
numDups++;
break;
}
}
if(foundDup == false)
{
strIn[prevIndex] = strIn[i];
prevIndex++;
}
}
strIn[len-numDups] = '\0';
}
}
The hash/linq approaches above are what you would generally use in real life. However in interviews they usually want to put some constraints e.g. constant space which rules out hash or no internal api - which rules out using LINQ.
Maybe hashset which do not store duplicate elements and silently ignore requests to add duplicates.
static void Main()
{
string textWithDuplicates = "aaabbcccggg";
Console.WriteLine(textWithDuplicates.Count());
var letters = new HashSet<char>(textWithDuplicates);
Console.WriteLine(letters.Count());
foreach (char c in letters) Console.Write(c);
Console.WriteLine("");
int[] array = new int[] { 12, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
Console.WriteLine(array.Count());
var distinctArray = new HashSet<int>(array);
Console.WriteLine(distinctArray.Count());
foreach (int i in distinctArray) Console.Write(i + ",");
}