What is the best/easiest way to sort a large list of words (10,000-20,000) by the number of times they occur in the list, in Java. I tried a basic implementation but I get an ou
Have you considered using String interning in addition to a hashmap?
String interning means all same Strings use the same memory location in order to save memory.
Based on an answer Sort a Map
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeMap;
public class WordOccurSortExample {
public static void main(String[] args) {
new WordOccurSortExample();
}
public WordOccurSortExample()
{
ArrayList occuringWords = new ArrayList();
occuringWords.add("Menios".intern());
occuringWords.add("Menios".intern());
occuringWords.add("Menios".intern());
occuringWords.add("Menios".intern());
occuringWords.add("Moo".intern());
occuringWords.add("Moo".intern());
occuringWords.add("Moo".intern());
occuringWords.add("Moo".intern());
occuringWords.add("Moo".intern());
occuringWords.add("Boo".intern());
occuringWords.add("Boo".intern());
occuringWords.add("Boo".intern());
HashMap occurances = new HashMap();
Iterator it = occuringWords.iterator();
String word;
Integer count;
while(it.hasNext())
{
word = it.next();
if((count = occurances.get(word))==null)
occurances.put(word, 1);
else
occurances.put(word, new Integer(count+1));
}
ValueComparator bvc = new ValueComparator(occurances);
TreeMap sorted_map = new TreeMap(bvc);
System.out.println("unsorted map: "+occuringWords);
sorted_map.putAll(occurances);
System.out.println("results: "+sorted_map);
}
class ValueComparator implements Comparator {
HashMap base;
public ValueComparator(HashMap base) {
this.base = base;
}
// Note: this comparator imposes orderings that are inconsistent with equals.
public int compare(String a, String b) {
if (base.get(a) >= base.get(b)) {
return -1;
} else {
return 1;
} // returning 0 would merge keys
}
}
}