C# Finding relevant document snippets for search result display

后端 未结 8 537
野性不改
野性不改 2021-02-04 12:07

In developing search for a site I am building, I decided to go the cheap and quick way and use Microsoft Sql Server\'s Full Text Search engine instead of something more robust l

8条回答
  •  独厮守ぢ
    2021-02-04 12:45

    public class Highlighter
    {        
        private class Packet
        {
            public string Sentence;
            public double Density;
            public int Offset;
        }
    
        public static string FindSnippet(string text, string query, int maxLength)
        {
            if (maxLength < 0)
            {
                throw new ArgumentException("maxLength");
            }
            var words = query.Split(' ').Where(w => !string.IsNullOrWhiteSpace(w)).Select(word => word.ToLower()).ToLookup(s => s);             
            var sentences = text.Split('.');
            var i = 0;
            var packets = sentences.Select(sentence => new Packet 
            { 
                Sentence = sentence, 
                Density = ComputeDensity(words, sentence),
                Offset = i++
            }).OrderByDescending(packet => packet.Density);
            var list = new SortedList();            
            int length = 0;                
            foreach (var packet in packets)
            {
                if (length >= maxLength || packet.Density == 0)
                {
                    break;
                }
                string sentence = packet.Sentence;
                list.Add(packet.Offset, sentence.Substring(0, Math.Min(sentence.Length, maxLength - length)));
                length += packet.Sentence.Length;
            }
            var sb = new List();
            int previous = -1;
            foreach (var item in list)
            {
                var offset = item.Key;
                var sentence = item.Value;
                if (previous != -1 && offset - previous != 1)
                {
                    sb.Add(".");
                }
                previous = offset;             
                sb.Add(Highlight(sentence, words));                
            }
            return String.Join(".", sb);
        }
    
        private static string Highlight(string sentence, ILookup words)
        {
            var sb = new List();
            var ff = true;
            foreach (var word in sentence.Split(' '))
            {
                var token = word.ToLower();
                if (ff && words.Contains(token))
                {
                    sb.Add("[[HIGHLIGHT]]");
                    ff = !ff;
                }
                if (!ff && !string.IsNullOrWhiteSpace(token) && !words.Contains(token))
                {
                    sb.Add("[[ENDHIGHLIGHT]]");
                    ff = !ff;
                }
                sb.Add(word);
            }
            if (!ff)
            {
                sb.Add("[[ENDHIGHLIGHT]]");
            }
            return String.Join(" ", sb);
        }
    
        private static double ComputeDensity(ILookup words, string sentence)
        {            
            if (string.IsNullOrEmpty(sentence) || words.Count == 0)
            {
                return 0;
            }
            int numerator = 0;
            int denominator = 0;
            foreach(var word in sentence.Split(' ').Select(w => w.ToLower()))
            {
                if (words.Contains(word))
                {
                    numerator++;
                }
                denominator++;
            }
            if (denominator != 0)
            {
                return (double)numerator / denominator;
            }
            else
            {
                return 0;
            }
        }
    }
    

    Example:

    highlight "Optic flow is defined as the change of structured light in the image, e.g. on the retina or the camera’s sensor, due to a relative motion between the eyeball or camera and the scene. Further definitions from the literature highlight different properties of optic flow" "optic flow"

    Output:

    [[HIGHLIGHT]] Optic flow [[ENDHIGHLIGHT]] is defined as the change of structured light in the image, e... Further definitions from the literature highlight diff erent properties of [[HIGHLIGHT]] optic flow [[ENDHIGHLIGHT]]

提交回复
热议问题