问题
I want to write a ComparatorSource for Lucene 3.6 with which I can accomplish a Round Robin style of sorting:
Let's say we have something like this:
A,A,A,B,C,D,D
And I want to have them sorted into this order (not neccessarily in alphabetical order)
A, B, C, D, A, D, A
How can I accomplish such a behaviour?
This is what I came up with, but it doesn't work because the timeshards arent updated in the correct place. Where can i do that instead?
public class RoundRobinFieldComparatorSource<T> extends FieldComparatorSource {
private static long seed = new Random().nextInt();
public static void setNewSeed(long seed) {
RoundRobinFieldComparatorSource.seed = seed;
}
private static final long serialVersionUID = -8959374194451783596L;
private final Set<T> roundRobinValues;
private final boolean excluded;
private final TwoWayStringBridge stringBridge;
private final Set<String> preferredValues;
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues) {
this(stringBridge, roundRobinValues, new HashSet<T>());
}
/**
* same as
* {@link #RoundRobinFieldComparatorSource(TwoWayStringBridge, Set, boolean)}
* but with excluded set to false
*/
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues, Set<T> preferredValues) {
this(stringBridge, roundRobinValues, preferredValues, false);
}
/**
* @param excluded
* determines whether the passed roundRobinValues are to be
* excluded in scheduling
*/
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues, Set<T> preferredValues, boolean excluded) {
this.stringBridge = stringBridge;
this.roundRobinValues = roundRobinValues;
this.excluded = excluded;
this.preferredValues = new HashSet<>();
if(preferredValues != null) {
for(T val : preferredValues) {
this.preferredValues.add(this.stringBridge.objectToString(val));
}
}
}
public static <T> Set<T> toSet(Collection<T> iterable) {
Set<T> ret = new HashSet<>();
ret.addAll(iterable);
return ret;
}
@Override
public FieldComparator<String> newComparator(final String fieldName,
int numHits, int sortPos, final boolean reversed)
throws IOException {
return new FieldComparator<String>() {
private String[] values;
private String bottom;
private String[] currentReaderValues;
private Map<String, Integer> passedTimeShards;
private Random random;
@Override
public int compare(int slot1, int slot2) {
return this.compare(this.values[slot1], this.values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return this.compare(this.bottom, this.currentReaderValues[doc]);
}
@Override
public void copy(int slot, int doc) throws IOException {
this.values[slot] = this.currentReaderValues[doc];
}
@Override
public void setBottom(int slot) {
this.bottom = this.values[slot];
}
@Override
public void setNextReader(IndexReader reader, int docBase)
throws IOException {
// TODO: maybe do a copy?
this.currentReaderValues = FieldCache.DEFAULT.getStrings(
reader, fieldName);
this.values = new String[reader.maxDoc()];
this.passedTimeShards = new HashMap<>();
this.random = new Random(seed);
// initialize our timeshards
// and make sure the values that have not been passed
// and were returned have their timeshard equal to
// Integer.MAX_VALUE so they won't be "scheduled"
// for a spot in the result list
for (String string : this.currentReaderValues) {
@SuppressWarnings("unchecked")
T object = (T) RoundRobinFieldComparatorSource.this.stringBridge
.stringToObject(string);
boolean scheduled = RoundRobinFieldComparatorSource.this.roundRobinValues
.contains(object);
if (RoundRobinFieldComparatorSource.this.excluded) {
scheduled = !scheduled;
}
int curTimeShard = 0;
if (!scheduled) {
curTimeShard = Integer.MAX_VALUE;
}
this.passedTimeShards.put(string, curTimeShard);
}
}
@Override
public String value(int slot) {
return this.values[slot];
}
private int compare(String first, String second) {
if (first == null && second == null) {
return 0;
} else if (first == null) {
return 1;
} else if (second == null) {
return -1;
}
Integer firstTimeShard = this.passedTimeShards.get(first);
Integer secondTimeShard = this.passedTimeShards.get(second);
int result = Integer.compare(firstTimeShard, secondTimeShard);
if (result == 0) {
// now check for preferred values (these are the first ones
// to be "scheduled" in each shard)
if (RoundRobinFieldComparatorSource.this.preferredValues
.contains(first)
&& RoundRobinFieldComparatorSource.this.preferredValues
.contains(second)) {
// coin flip if equal
if (this.random.nextBoolean()) {
result = 1;
} else {
result = -1;
}
} else if (RoundRobinFieldComparatorSource.this.preferredValues
.contains(first)) {
result = -1;
} else if (RoundRobinFieldComparatorSource.this.preferredValues
.contains(second)) {
result = 1;
} else {
// coin flip if equal
if (this.random.nextBoolean()) {
result = 1;
} else {
result = -1;
}
}
}
if (reversed) {
result *= -1;
}
// and add one timeshard to the "winner" of this round
if (result < 0) {
firstTimeShard += 1;
this.passedTimeShards.put(first, firstTimeShard);
} else {
secondTimeShard += 1;
this.passedTimeShards.put(second, secondTimeShard);
}
return result;
}
};
}
}
Thanks in advance.
回答1:
package de.indv.rentino.mk3.hibernate.lucene.search.sort;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.hibernate.search.bridge.TwoWayStringBridge;
/**
* Round Robin Sorting for Lucene
*
* @author Martin Braun
*/
public class RoundRobinFieldComparatorSource<T> extends FieldComparatorSource {
private static long seed = new Random().nextInt();
public static void setNewSeed(long seed) {
RoundRobinFieldComparatorSource.seed = seed;
}
private static final long serialVersionUID = -8959374194451783596L;
private final Set<T> roundRobinValues;
private final boolean excluded;
private final TwoWayStringBridge stringBridge;
private final Set<String> preferredValues;
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues) {
this(stringBridge, roundRobinValues, new HashSet<T>());
}
/**
* same as
* {@link #RoundRobinFieldComparatorSource(TwoWayStringBridge, Set, boolean)}
* but with excluded set to false
*/
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues, Set<T> preferredValues) {
this(stringBridge, roundRobinValues, preferredValues, false);
}
/**
* @param excluded
* determines whether the passed roundRobinValues are to be
* excluded in scheduling
*/
public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge,
Set<T> roundRobinValues, Set<T> preferredValues, boolean excluded) {
this.stringBridge = stringBridge;
this.roundRobinValues = roundRobinValues;
this.excluded = excluded;
this.preferredValues = new HashSet<>();
if (preferredValues != null) {
for (T val : preferredValues) {
this.preferredValues.add(this.stringBridge.objectToString(val));
}
}
}
public static <T> Set<T> toSet(Collection<T> iterable) {
Set<T> ret = new HashSet<>();
ret.addAll(iterable);
return ret;
}
@Override
public FieldComparator<Integer> newComparator(final String fieldName,
int numHits, int sortPos, final boolean reversed)
throws IOException {
return new FieldComparator<Integer>() {
private Integer[] values;
private Integer[] slotsForDocs;
private Integer bottom;
@Override
public void setNextReader(IndexReader reader, int docBase)
throws IOException {
final Random random = new Random(
RoundRobinFieldComparatorSource.seed);
// TODO: maybe do a copy?
String[] strings = FieldCache.DEFAULT.getStrings(reader,
fieldName);
this.values = new Integer[reader.maxDoc()];
Map<String, List<Integer>> scheduledOccurencesPerString = new HashMap<>();
Map<String, List<Integer>> unscheduledOccurencesPerString = new HashMap<>();
List<String> order = new ArrayList<>();
List<String> excludedOrder = new ArrayList<>();
{
{
Set<String> alreadyAdded = new HashSet<>();
for (int i = 0; i < strings.length; ++i) {
String string = strings[i];
this.values[i] = Integer.MAX_VALUE;
if (string != null) {
@SuppressWarnings("unchecked")
T object = (T) RoundRobinFieldComparatorSource.this.stringBridge
.stringToObject(string);
boolean scheduled = RoundRobinFieldComparatorSource.this.roundRobinValues
.contains(object);
if (RoundRobinFieldComparatorSource.this.excluded) {
scheduled = !scheduled;
}
if (scheduled) {
if (!alreadyAdded.contains(string)) {
alreadyAdded.add(string);
order.add(string);
}
List<Integer> occurences = scheduledOccurencesPerString
.get(string);
if (occurences == null) {
occurences = new ArrayList<>();
scheduledOccurencesPerString.put(
string, occurences);
}
occurences.add(i);
} else {
if (!alreadyAdded.contains(string)) {
alreadyAdded.add(string);
excludedOrder.add(string);
}
List<Integer> occurences = unscheduledOccurencesPerString
.get(string);
if (occurences == null) {
occurences = new ArrayList<>();
unscheduledOccurencesPerString.put(
string, occurences);
}
occurences.add(i);
}
}
}
}
sortByPreference(
order,
random,
RoundRobinFieldComparatorSource.this.preferredValues);
sortByPreference(excludedOrder, random,
new HashSet<String>());
}
int sortIndex = 0;
// schedule the included ones
sortIndex = schedule(sortIndex, order,
scheduledOccurencesPerString, values);
// schedule the excluded ones
sortIndex = schedule(sortIndex, excludedOrder,
unscheduledOccurencesPerString, values);
// slotsForDocs equals the values before the actual sorting has
// started
// in the lucene methods so we can just copy the values array
this.slotsForDocs = new Integer[values.length];
System.arraycopy(this.values, 0, this.slotsForDocs, 0,
this.values.length);
}
@Override
public int compare(int slot1, int slot2) {
return Integer.compare(this.values[slot1], this.values[slot2]);
}
@Override
public int compareBottom(int doc) throws IOException {
return Integer.compare(this.bottom, this.slotsForDocs[doc]);
}
@Override
public void setBottom(int slot) {
this.bottom = this.values[slot];
}
@Override
public void copy(int slot, int doc) throws IOException {
this.values[slot] = this.slotsForDocs[doc];
}
@Override
public Integer value(int slot) {
return this.values[slot];
}
};
}
private static void sortByPreference(List<String> list,
final Random random, final Set<String> preferredValues) {
//and shuffle before we do anything dumb
Collections.shuffle(list, random);
Collections.sort(list, new Comparator<String>() {
@Override
public int compare(String first, String second) {
int result;
if (preferredValues.contains(first)
&& preferredValues.contains(second)) {
result = 0;
} else if (preferredValues.contains(first)) {
result = -1;
} else if (preferredValues.contains(second)) {
result = 1;
} else {
result = 0;
}
return result;
}
});
}
private static int schedule(int sortIndex, List<String> order,
Map<String, List<Integer>> positions, Integer[] values) {
while (positions.size() > 0) {
// repeatedly iterate over the order
// and put the index for the next element
// with the same string if possible
for (int i = 0; i < order.size(); ++i) {
String cur = order.get(i);
if (positions.get(cur) != null) {
// get an occurence of the string
Integer pos = positions.get(cur).remove(0);
// and put the sort index into the values array
values[pos] = sortIndex++;
if (positions.get(cur).size() == 0) {
positions.remove(cur);
}
}
}
}
return sortIndex;
}
}
That's my solution.
来源:https://stackoverflow.com/questions/21528491/how-can-i-sort-in-a-round-robin-fashion-in-lucene