Search for multiple words elasticsearch haystack

余生长醉 提交于 2019-12-14 03:44:35

问题


I used to django, haystack and elasticsearch.

My search_index.py:

from haystack import indexes
from models import Advertisement



class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    make = indexes.CharField()
    section = indexes.CharField()
    subcategory = indexes.CharField()
    content = indexes.CharField(model_attr='content')
    images = indexes.CharField(model_attr='images')

    def get_model(self):
        return Advertisement

    def index_queryset(self, using=None):
        return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')

search Form:

    <form action="/search" method="get">
        <input type="text-search" name="q">
        <input type="submit" value="">
    </form>

template:

{% block content %}

{% for result in page.object_list %}
   <p>{{ result.object.title }}</p>
   <p>{{ result.object.content }}</p>
   <p>{{ result.object.images }}</p>
   <p>{{ result.object.make }}</p>
   <p>{{ result.object.section }}</p>
   <p>{{ result.object.subcategory }}</p>
{% empty %}
   <p>No result.</p>
{% endfor %}

{% endblock %}

Looking at curl -XGET "http://localhost:9200/_search?q=fender+boss" i get all the values where there "boss" and "fender"

when you type in the search box "boss fender" i get No result. From the search form I can get a result with only a single word, for example "boss". How to make the ability to search for multiple words?


回答1:


I fell into this issue during this month.

In order to perform the correct query you'll need override some haystack objects. I found this article very helpful Extending Haystack’s Elasticsearch backend. Quite complicated at the beginning, but once understand how it works... it works :-)

The blog article teaches how to implement elasticsearch's nested query... well... I've implemented a basic multi_match query.

# -*- coding: utf-8 -*-
from __future__ import absolute_import

from django.conf import settings

from haystack.backends.elasticsearch_backend import (
    ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)
from haystack.query import SearchQuerySet


class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
    DEFAULT_ANALYZER = "snowball"

    def __init__(self, connection_alias, **connection_options):
        super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)

        user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})
        if user_settings:
            setattr(self, 'DEFAULT_SETTINGS', user_settings)

        user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')
        if user_analyzer:
            setattr(self, 'DEFAULT_ANALYZER', user_analyzer)

    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None, multi_match=None):

        out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,
                                                                                end_offset,
                                                                                fields, highlight, facets,
                                                                                date_facets, query_facets,
                                                                                narrow_queries, spelling_query,
                                                                                within, dwithin, distance_point,
                                                                                models, limit_to_registered_models,
                                                                                result_class)

        if multi_match:
             out['query'] = {
                'multi_match': {
                    'query': multi_match['query'],
                    'fields': multi_match['fields'],
                    'tie_breaker': multi_match['tie_breaker'],
                    'minimum_should_match': multi_match['minimum_should_match'],
                }
            }

        return out

    def build_schema(self, fields):
        content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)

        for field_name, field_class in fields.items():
            field_mapping = mapping[field_class.index_fieldname]

            if field_mapping['type'] == 'string' and field_class.indexed:
                if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):
                    field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
            mapping.update({field_class.index_fieldname: field_mapping})

        return content_field_name, mapping

    def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):
        from elasticsearch_dsl import Search
        from elasticsearch_dsl.query import MultiMatch

        raw = Search().using(self.conn).query(
            MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)
        ).execute()

        return self._process_results(raw)


class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
    def multi_match(self, query, fields, minimum_should_match, tie_breaker):
        results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)
        self._results = results.get('results', [])
        self._hit_count = results.get('hits', 0)

    def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):
        self.multi_match_query = {
            'query': query,
            'fields': fields,
            'minimum_should_match': minimum_should_match,
            'tie_breaker': tie_breaker
        }

    def build_params(self, spelling_query=None, **kwargs):
        search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)
        if self.multi_match_query:
            search_kwargs['multi_match'] = self.multi_match_query

        return search_kwargs


class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
    def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):
        clone = self._clone()
        clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)
        clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)
        return clone


class ElasticsearchEngineCustom(ElasticsearchSearchEngine):
    backend = ElasticsearchEngineBackendCustom
    query = ElasticsearchSearchQueryCustom

As you can see I used elasticsearc-dsl to perform the query (MultiMatch) and this phrase summarizing the blog post: ElasticsearchSearchQuerySetCustom().multi_match(...) call depends on ElasticsearchSearchQueryCustom that depends on ElasticsearchEngineBackendCustom.

Then put in your settings the elasticsearch configuration, e.g:

ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'
ELASTICSEARCH_INDEX_SETTINGS = {
    "settings": {[...]}
}

You can grab your language(s) for ELASTICSEARCH_INDEX_SETTINGS from Language Analyzers

You'll need to override also the SearchForm:

# -*- coding: utf-8 -*-
from __future__ import absolute_import

from haystack.forms import SearchForm

from .backend import ElasticsearchSearchQuerySetCustom


class SearchFormCustom(SearchForm):
    def search(self):
        query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))
        if not self.is_valid() or not query:
            return self.no_query_found()

        sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])

        return sqs

The fields title and text must be in your index and the caret char is used to perform boost on fields.

You'll need override the haystack url patterns in order to use the custom form:

urlpatterns = patterns(
    'search.views',
    url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),
)

That's it, HTH :-)

Pay attention don't use result.object.something but use instead the fields on your index, e.g. result.tilte, because result.object.tilte hits the database! See Haystack Best Practices



来源:https://stackoverflow.com/questions/27802628/search-for-multiple-words-elasticsearch-haystack

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!