Generating unique ids with the max length of 3 digits/letters/simbols

妖精的绣舞 提交于 2019-12-30 15:39:54

问题


I have a list of 75200 words. I need to give a 'unique' id to each word, and the length of each id could be 3 letters or less. I can use numbers, letters or even symbols but the max length is 3.

Below is my code.

import java.io.*;
import java.util.*;


public class HashCreator {

    private Map completedWordMap;
    private String [] simpleLetters = {"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
    private String[] symbols = {"!","@","#","$","%","^","&","*","~","?"};
    private String indexNumber;
    String currentlyUsingLetter, currentlyUsingSymbol;
    private int currentlyActiveSimpleLetter = 0, currentlyActiveSymbol = 0, currentlyActiveSimpleLetter2 = 0, currentlyActiveSymbol2 = 0;
    private boolean secondaryNumberIsHundred = false;

    public HashCreator()
    {
        completedWordMap = createWordNumberingMap();
    }


    private Map createWordNumberingMap()
    {
        int number = 0;
        int secondaryNumber = 0;
        int thirdinoryNumber = 0;
        Map wordMap = new HashMap();
        BufferedReader br = null;
        String str = "";
        boolean reset = false;


        //First Read The File
        File readingFile = new File("WordList/NewWordsList.txt");

        try
        {
            br = new BufferedReader(new FileReader(readingFile));

            while((str=br.readLine())!=null)
            {
                if(number<1000) //Asign numbers from 0 t0 999
                {
                    indexNumber = String.valueOf(number);
                    wordMap.put(indexNumber, str);
                    number++;
                    System.out.println(indexNumber);
                }
                else // It is 1000 now. Length exceeds so find another way.
                {
                    if(indexNumber.length()<4)
                    {
                        if(currentlyActiveSimpleLetter<simpleLetters.length) //Start using simple letter array
                        {
                            if(secondaryNumber<100) //Start combining numbers with letters. Results will look like 'a0', a1', 'a2'......'x98',x99'
                            {
                                indexNumber = simpleLetters[currentlyActiveSimpleLetter]+secondaryNumber;
                                wordMap.put(indexNumber, str);
                                secondaryNumber++;
                                System.out.println(indexNumber);

                            }
                            else
                            { 
                                //If the number is 100, that means the last result is something like 'a99','b99'...'x99'
                                //Time to use a new letter and set the counter back to 0 and select the next letter

                                secondaryNumber = 0;
                                currentlyActiveSimpleLetter++;

                            }
                        }
                        else
                        {

                            if(currentlyActiveSymbol<symbols.length) //We have used the entire alphabet. Start using sybmols now.
                            {
                                if(currentlyActiveSymbol==0) //If this is the first time we are reaching this step, reset the counter to 0
                                {
                                    secondaryNumber = 0;
                                }

                                if(secondaryNumber<100)
                                {
                                    indexNumber = symbols[currentlyActiveSymbol]+secondaryNumber;
                                    wordMap.put(indexNumber, str);
                                    secondaryNumber++;
                                    System.out.println(indexNumber);

                                }
                                else
                                {
                                    //If the number is 100, that means the last result is something like '!99','@99'...'*99'
                                    //Time to use a new letter and set the counter back to 0 and select the next symbol
                                    secondaryNumber = 0;
                                    currentlyActiveSymbol++;

                                }
                            }
                            else 
                            {
                                //We have used entire list of numbers (0-999), entire list of letters (a0-z99) and entire set of symbols (!0 - ?99)
                                //Now we need to combine all 3 together.


                                if(thirdinoryNumber<10)//We are starting with a new 'Number' counter
                                {
                                    //We again start with replacing numbers. Here the first few and last few results will look like  a!0'.....'a!9'

                                        indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber;
                                        wordMap.put(indexNumber, str);
                                        thirdinoryNumber++;
                                        System.out.println(indexNumber);

                                        thirdinoryNumber++;
                                }
                                else
                                {
                                    //We have used number from 0-9. Time to start replacing letters

                                    if(currentlyActiveSimpleLetter2<simpleLetters.length)
                                    {
                                        if(currentlyActiveSimpleLetter2==0) //If this is the 'first' time we reach this point, reset the number counter.
                                        {
                                            thirdinoryNumber = 0;
                                        }

                                        if(thirdinoryNumber<10)
                                        {
                                            indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber;
                                            wordMap.put(indexNumber, str);
                                            thirdinoryNumber++;
                                            System.out.println(indexNumber);

                                        }
                                        else
                                        {
                                            thirdinoryNumber = 0;
                                            currentlyActiveSimpleLetter2++; //If we are at the peek of usable numbers (0-9) reset simpleletter array position to
                                                                            // 0 and numbercounter to 0

                                        }
                                    }
                                    else
                                    {
                                        //We have used number from 0-9. Time to start replacing symbols

                                        if(currentlyActiveSymbol2<symbols.length)
                                        {
                                            if(currentlyActiveSymbol2==0) //If this is the 'first' time we reach this point, reset the number counter.
                                            {
                                                thirdinoryNumber = 0;
                                            }

                                            if(thirdinoryNumber<10)
                                            {
                                                indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber;
                                                wordMap.put(indexNumber, str);
                                                thirdinoryNumber++;
                                                System.out.println(indexNumber);

                                            }
                                            else
                                            {
                                                thirdinoryNumber = 0;
                                                currentlyActiveSymbol2++; //If we are at the peek of usable numbers (0-9) reset symbol array position to
                                                                                // 0 and numbercounter to 0

                                            }
                                        }
                                    }
                                }

                            }

                        }
                    }
                    else
                    {
                        System.out.println("Error in Somewhere. Length Exceeded");
                    }
                }

            }

            br.close();
            System.out.println("Completed");
            System.out.println(wordMap.get(0));

        }
        catch(Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                br.close();
            }
            catch(Exception e)
            {
                e.printStackTrace();
            }
        }


        return wordMap;

    }


}

Unfortunately this doesn't work. It prints the results, and it is bunch of !0 after the result 'z99'. Below is a small piece of it:

!0
!0
!0
!0
...
Completed
null

Apart from that, after k99, it has generated ids from 10-199 then started back with m0 properly. You can find the result file from here.

As you can see, wordMap.get(0) also generated null. What is wrong here? If there is any other simple method for generating 75000 unique ids with maximum 3 digits/letters/symbols length, I am more than happy to move with it.


回答1:


You could create a method that basically converts a decimal number to a base of your choice. Here I have 46 symbols for example, which gives 97336 unique sequences:

private static final String[] symbols = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h",
        "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "!", "@", "#", "$", "%", "^", "&",
        "*", "~", "?" };
public static String getSequence(final int i) {
    return symbols[i / (symbols.length * symbols.length)] + symbols[(i / symbols.length) % symbols.length]
            + symbols[i % symbols.length];
}



回答2:


Here is generator with enough IDs.

public class Main {
    private char[] A;
    void init()
    {
       A = new char[] { 
           '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
           'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
           'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
           'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
           'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
       };
       System.out.println("digits = " + A.length);

       //for (int i = 0; i < A.length; i++)
       //    System.out.print(A[i] + " ");
       //System.out.println();
    }

    public void generate(int length, String id)
    {
        if (length == 3) {
            System.out.println(id);
        } else {
            for (int i = 0; i < A.length; i++)
                generate(length + 1, id + A[i]);
        }
    }

    public static void main(String[] args) {
        Main test = new Main();
        test.init();
        test.generate(0,  "");
    }
}

The number of unique IDs is (26 + 26 + 10) ^ 3 = 62^3 = 238328.

Obviously you need to adapt it to fit your particular problem.

Actually only 43 characters are needed since 43 ^ 3 = 79507 > 75200.

EDIT: Explanation of the generate() method.

This method implements a recursive algorithm to generate combinations of characters (the keys). The meaning of the parameters is the following:

  • length The length of the key.
  • id stores the combination of characters.

The following picture can help to understand the algorithm.

This is similar to how the decimal (or any other base) numbers are formed.

A thing that I don't noticed is that you are trying to first create all the possible keys of length 1, then all possible keys of length 2, and so on. My generator creates keys of exactly 3 character only. That behavior can be achieved modifying the generate() method as follows:

public void generate(int count, String id)
{
    if (count == 0) {
        System.out.println(id);
    } else {
        for (int i = 0; i < A.length; i++)
            generate(count - 1, id + A[i]);
    }
}

And then call the method tree times:

test.generate(1,  "");
test.generate(2,  "");
test.generate(3,  "");

Some keys contains leading zeros but that shouldn't be a problem since this keys are identifiers, not numbers. The number of possible keys increases by length(alphabet) + length(alphabet) ^ 2, i.e. we have 62 + 62^2 additional keys.

Since the length of the key is at most 3 the iterative version can be easily implemented using for loops:

public void iterative_generator()
{
    for (int i = 0; i < A.length; i++) {
        for (int j = 0; j < A.length; j++) {
            for (int k = 0; k < A.length; k++) {
                System.out.println("" + A[i] + A[j] + A[k]);
            }
        }
    }
}

I think you get the idea.




回答3:


(Posted on behalf of the question author).

This is how I wrote my code according to the answer of Stack Overflow user "Keppil".

import java.io.*;
import java.util.*;

public class HashCreator 
{
    private Map completedWordMap;
    private String[]simpleLetters = {"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
    private char[] A;

        private static final String[] symbols = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h",
        "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "!", "@", "#", "$", "%", "^", "&",
        "*", "~", "?" };

    public HashCreator()
    {

         for(int i=0;i<75001;i++)
         {
            System.out.println(getSequence(i));
         }
    }




    public static String getSequence(final int i) {
    return symbols[i / (symbols.length * symbols.length)] + symbols[(i / symbols.length) % symbols.length]
            + symbols[i % symbols.length];
}
}


来源:https://stackoverflow.com/questions/21391367/generating-unique-ids-with-the-max-length-of-3-digits-letters-simbols

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!