问题
I have the following program:
public class Hit
{
readonly Hits _hits;
readonly int _index;
public Hit(Hits hits, int index)
{
this._hits = hits;
this._index = index;
}
public int id { get { return _hits.Id(_index); } }
public float score { get { return _hits.Score(_index); } }
public string this[string key] { get { return _hits.Doc(_index).Get(key); } }
}
class HitList : IList<Hit>
{
protected Hits hits;
public HitList(Hits hits)
{
this.hits = hits;
}
#region IList Members
public int Add(object value) { throw new NotImplementedException(); }
public void Clear() { throw new NotImplementedException(); }
public bool Contains(object value) { throw new NotImplementedException(); }
public int IndexOf(object value) { throw new NotImplementedException(); }
public void Insert(int index, object value) { throw new NotImplementedException(); }
public bool IsFixedSize { get { throw new NotImplementedException(); } }
public bool IsReadOnly { get { throw new NotImplementedException(); } }
public void Remove(object value) { throw new NotImplementedException(); }
public void RemoveAt(int index) { throw new NotImplementedException(); }
public object this[int index] { get { return new Hit(hits, index); } set { throw new NotImplementedException(); } }
#endregion
#region ICollection Members
public void CopyTo(Array array, int index) { throw new NotImplementedException(); }
public int Count { get { return hits.Length(); } }
public bool IsSynchronized { get { throw new NotImplementedException(); } }
public object SyncRoot { get { throw new NotImplementedException(); } }
#endregion
#region IEnumerable Members
public System.Collections.IEnumerator GetEnumerator() { throw new NotImplementedException(); }
#endregion
#region IList<Hit> Members
public int IndexOf(Hit item) { throw new NotImplementedException(); }
public void Insert(int index, Hit item) { throw new NotImplementedException(); }
Hit IList<Hit>.this[int index] { get { return new Hit(hits, index); } set { throw new NotImplementedException(); } }
#endregion
#region ICollection<Hit> Members
public void Add(Hit item) { throw new NotImplementedException(); }
public bool Contains(Hit item) { throw new NotImplementedException(); }
public void CopyTo(Hit[] array, int arrayIndex) { throw new NotImplementedException(); }
public bool Remove(Hit item) { throw new NotImplementedException(); }
#endregion
#region IEnumerable<Hit> Members
IEnumerator<Hit> IEnumerable<Hit>.GetEnumerator() { throw new NotImplementedException(); }
#endregion
}
private const string IndexFileLocation = @"C:\Users\Public\Index";
private IList<Hit> _hits;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(IndexFileLocation, true);
Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
var indexWriter = new Lucene.Net.Index.IndexWriter(dir, analyzer, true);
for (var i = 0; i < 10; i++)
{
var doc = new Lucene.Net.Documents.Document();
var fldContent = new Lucene.Net.Documents.Field("content", "test " + i,
Lucene.Net.Documents.Field.Store.YES,
Lucene.Net.Documents.Field.Index.TOKENIZED,
Lucene.Net.Documents.Field.TermVector.YES);
doc.Add(fldContent);
indexWriter.AddDocument(doc);
}
indexWriter.Optimize();
indexWriter.Close();
var searcher = new Lucene.Net.Search.IndexSearcher(dir);
var searchTerm = new Lucene.Net.Index.Term("content", "test");
Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(searchTerm);
Lucene.Net.Search.Hits hits = searcher.Search(query);
for (var i = 0; i < hits.Length(); i++)
{
Document doc = hits.Doc(i);
string contentValue = doc.Get("content");
Debug.WriteLine(contentValue);
}
HitList h = new HitList(hits);
h.Shuffle();
for (var i = 0; i < h.Count; i++)
{
var z = (Hit)h[i];
string contentValue = z.id.ToString();
Debug.WriteLine(contentValue);
}
}
}
public static class SiteItemExtensions
{
public static void Shuffle<T>(this IList<T> list)
{
var rng = new Random();
int n = list.Count;
while (n > 1)
{
n--;
int k = rng.Next(n + 1);
T value = list[k];
list[k] = list[n];
list[n] = value;
}
}
}
What I am trying to do is "shuffle" the results I get back from the Hits collection. When I run this program, as is, it bombs when I get to the h.Shuffle();
line. I understand why its bombing. Its bombing because its executing my Shuffle extension method, when in turn, is trying to do a set
operation on an array value and I do not have a set implementation on the public object this[int index]
line.
My problem is, I can't implement a set because the Lucene id and score properties are read only, which, again, makes sense why Apache made them read only. My question is, how can I "shuffle" or randomize the Hits that I'm getting back? Any help would be appreciated.
回答1:
You need to copy your hits to an appropiate data structure and do your sorting there; the underlying problem is that the Hits
type is not intended for modification.
For the shuffling, I believe this should do the trick:
var shuffledHits = hits.Cast<Hit>().OrderBy(h => rng.Next());
回答2:
There might be some performance problems with the approach in question for shuffling search results.
First, If I recall correctly, Hits class does a local document caching and repeats the search for every 100 documents. So, enumarating all search results would require "HitCount/100" searches.
Second, loading a document is one of the most costly parts of the Lucene.Net. Just to be able to shuffle, loading all search results may not be a good choise.
I would prefer a "random scoring" approach as below:
public class RandomScoreQuery : Lucene.Net.Search.Function.CustomScoreQuery
{
Random r = new Random((int)(DateTime.Now.Ticks & 0x7fffffff));
public RandomScoreQuery(Query q): base(q)
{
}
public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
{
return r.Next(10000) / 1000.0f; //rand scores between 0-10
}
}
Query q1 = new TermQuery(new Term("content", "test"));
Query q2 = new RandomScoreQuery(q1);
TopDocs td = src.Search(q2, 100);
来源:https://stackoverflow.com/questions/7629235/shuffling-a-lucene-hits-result-set