How to efficiently remove duplicates from an array without using Set

后端 未结 30 2524
情深已故
情深已故 2020-11-22 07:29

I was asked to write my own implementation to remove duplicated values in an array. Here is what I have created. But after tests with 1,000,000 elements it took very long ti

相关标签:
30条回答
  • 2020-11-22 07:55
    public void removeDup(){ 
    String[] arr = {"1","1","2","3","3"};
              boolean exists = false;
              String[] arr2 = new String[arr.length];
              int count1 = 0;
              for(int loop=0;loop<arr.length;loop++)
                {
                  String val = arr[loop];
                  exists = false;
                  for(int loop2=0;loop2<arr2.length;loop2++)
                  {     
                      if(arr2[loop2]==null)break;
                      if(arr2[loop2]==val){
                            exists = true;
                    }
                  }
                  if(!exists) {
                        arr2[count1] = val;
                        count1++;
                  }
                }
    }
    
    0 讨论(0)
  • 2020-11-22 07:56
    package com.pari.practice;
    
    import java.util.HashSet;
    import java.util.Iterator;
    
    import com.pari.sort.Sort;
    
    public class RemoveDuplicates {
    
     /**
     * brute force- o(N square)
     * 
     * @param input
     * @return
     */
    public static int[] removeDups(int[] input){
        boolean[] isSame = new boolean[input.length];
        int sameNums = 0;
    
        for( int i = 0; i < input.length; i++ ){
            for( int j = i+1; j < input.length; j++){
                if( input[j] == input[i] ){ //compare same
                    isSame[j] = true;
                    sameNums++;
                }
            }
        }
    
        //compact the array into the result.
        int[] result = new int[input.length-sameNums];
        int count = 0;
        for( int i = 0; i < input.length; i++ ){
            if( isSame[i] == true) {
                continue;
            }
            else{
                result[count] = input[i];
                count++;
            }
        }
    
        return result;
    }
    
    /**
     * set - o(N)
     * does not guarantee order of elements returned - set property
     * 
     * @param input
     * @return
     */
    public static int[] removeDups1(int[] input){
        HashSet myset = new HashSet();
    
        for( int i = 0; i < input.length; i++ ){
            myset.add(input[i]);
        }
    
        //compact the array into the result.
        int[] result = new int[myset.size()];
        Iterator setitr = myset.iterator();
        int count = 0;
        while( setitr.hasNext() ){
            result[count] = (int) setitr.next();
            count++;
        }
    
    return result;
    }
    
    /**
     * quicksort - o(Nlogn)
     * 
     * @param input
     * @return
     */
    public static int[] removeDups2(int[] input){
        Sort st = new Sort();
        st.quickSort(input, 0, input.length-1); //input is sorted
    
        //compact the array into the result.
        int[] intermediateResult = new int[input.length];
        int count = 0;
        int prev = Integer.MIN_VALUE;
        for( int i = 0; i < input.length; i++ ){
            if( input[i] != prev ){
                intermediateResult[count] = input[i];
                count++;
            }
            prev = input[i];
        }
    
        int[] result = new int[count];
        System.arraycopy(intermediateResult, 0, result, 0, count);
    
        return result;
    }
    
    
    public static void printArray(int[] input){
        for( int i = 0; i < input.length; i++ ){
            System.out.print(input[i] + " ");
        }
    }
    
    public static void main(String[] args){
        int[] input = {5,6,8,0,1,2,5,9,11,0};
        RemoveDuplicates.printArray(RemoveDuplicates.removeDups(input));
        System.out.println();
        RemoveDuplicates.printArray(RemoveDuplicates.removeDups1(input));
        System.out.println();
        RemoveDuplicates.printArray(RemoveDuplicates.removeDups2(input));
    }
    }
    

    Output: 5 6 8 0 1 2 9 11

    0 1 2 5 6 8 9 11

    0 1 2 5 6 8 9 11

    I have just written the above code for trying out. thanks.

    0 讨论(0)
  • 2020-11-22 07:57

    Not a big fun of updating user input, however considering your constraints...

    public int[] removeDup(int[] nums) {
      Arrays.sort(nums);
      int x = 0;
      for (int i = 0; i < nums.length; i++) {
        if (i == 0 || nums[i] != nums[i - 1]) {
        nums[x++] = nums[i];
        }
      }
      return Arrays.copyOf(nums, x);
    }
    

    Array sort can be easily replaced with any nlog(n) algorithm.

    0 讨论(0)
  • 2020-11-22 07:57

    You need to sort your array then then loop and remove duplicates. As you cannot use other tools you need to write be code yourself.

    You can easily find examples of quicksort in Java on the internet (on which this example is based).

    public static void main(String[] args) throws Exception {
        final int[] original = new int[]{1, 1, 2, 8, 9, 8, 4, 7, 4, 9, 1};
        System.out.println(Arrays.toString(original));
        quicksort(original);
        System.out.println(Arrays.toString(original));
        final int[] unqiue = new int[original.length];
        int prev = original[0];
        unqiue[0] = prev;
        int count = 1;
        for (int i = 1; i < original.length; ++i) {
            if (original[i] != prev) {
                unqiue[count++] = original[i];
            }
            prev = original[i];
        }
        System.out.println(Arrays.toString(unqiue));
        final int[] compressed = new int[count];
        System.arraycopy(unqiue, 0, compressed, 0, count);
        System.out.println(Arrays.toString(compressed));
    }
    
    private static void quicksort(final int[] values) {
        if (values.length == 0) {
            return;
        }
        quicksort(values, 0, values.length - 1);
    }
    
    private static void quicksort(final int[] values, final int low, final int high) {
        int i = low, j = high;
        int pivot = values[low + (high - low) / 2];
        while (i <= j) {
            while (values[i] < pivot) {
                i++;
            }
            while (values[j] > pivot) {
                j--;
            }
            if (i <= j) {
                swap(values, i, j);
                i++;
                j--;
            }
        }
        if (low < j) {
            quicksort(values, low, j);
        }
        if (i < high) {
            quicksort(values, i, high);
        }
    }
    
    private static void swap(final int[] values, final int i, final int j) {
        final int temp = values[i];
        values[i] = values[j];
        values[j] = temp;
    }
    

    So the process runs in 3 steps.

    1. Sort the array - O(nlgn)
    2. Remove duplicates - O(n)
    3. Compact the array - O(n)

    So this improves significantly on your O(n^3) approach.

    Output:

    [1, 1, 2, 8, 9, 8, 4, 7, 4, 9, 1]
    [1, 1, 1, 2, 4, 4, 7, 8, 8, 9, 9]
    [1, 2, 4, 7, 8, 9, 0, 0, 0, 0, 0]
    [1, 2, 4, 7, 8, 9]
    

    EDIT

    OP states values inside array doesn't matter really. But I can assume that range is between 0-1000. This is a classic case where an O(n) sort can be used.

    We create an array of size range +1, in this case 1001. We then loop over the data and increment the values on each index corresponding to the datapoint.

    We can then compact the resulting array, dropping values the have not been incremented. This makes the values unique as we ignore the count.

    public static void main(String[] args) throws Exception {
        final int[] original = new int[]{1, 1, 2, 8, 9, 8, 4, 7, 4, 9, 1, 1000, 1000};
        System.out.println(Arrays.toString(original));
        final int[] buckets = new int[1001];
        for (final int i : original) {
            buckets[i]++;
        }
        final int[] unique = new int[original.length];
        int count = 0;
        for (int i = 0; i < buckets.length; ++i) {
            if (buckets[i] > 0) {
                unique[count++] = i;
            }
        }
        final int[] compressed = new int[count];
        System.arraycopy(unique, 0, compressed, 0, count);
        System.out.println(Arrays.toString(compressed));
    }
    

    Output:

    [1, 1, 2, 8, 9, 8, 4, 7, 4, 9, 1, 1000, 1000]
    [1, 2, 4, 7, 8, 9, 1000]
    
    0 讨论(0)
  • 2020-11-22 07:58
    public static int[] removeDuplicates(int[] arr){
        HashSet<Integer> set = new HashSet<>();
        final int len = arr.length;
        //changed end to len
        for(int i = 0; i < len; i++){
            set.add(arr[i]);
        }
    
        int[] whitelist = new int[set.size()];
        int i = 0;
        for (Iterator<Integer> it = set.iterator(); it.hasNext();) {
            whitelist[i++] = it.next();
        }
        return whitelist;
    }
    

    Runs in O(N) time instead of your O(N^3) time

    0 讨论(0)
  • 2020-11-22 07:58

    How about this one, only for sorted array of numbers, to print array without duplicates, without using Set or other Collections, just Array:

     public static int[] removeDuplicates(int[] array) {
        int[] nums =new int[array.length];
        int addedNum = 0;
        int j=0;
        for(int i=0;i<array.length;i++) {
            if (addedNum != array[i]) {
            nums[j] = array[i];
            j++;
            addedNum = nums[j-1];
            }
        }
        return Arrays.copyOf(nums, j);
    }
    

    Array of 1040 duplicated numbers processed in 33020 nanoseconds(0.033020 millisec).

    0 讨论(0)
提交回复
热议问题