问题
I used to django, haystack and elasticsearch.
My search_index.py:
from haystack import indexes
from models import Advertisement
class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
make = indexes.CharField()
section = indexes.CharField()
subcategory = indexes.CharField()
content = indexes.CharField(model_attr='content')
images = indexes.CharField(model_attr='images')
def get_model(self):
return Advertisement
def index_queryset(self, using=None):
return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')
search Form:
<form action="/search" method="get">
<input type="text-search" name="q">
<input type="submit" value="">
</form>
template:
{% block content %}
{% for result in page.object_list %}
<p>{{ result.object.title }}</p>
<p>{{ result.object.content }}</p>
<p>{{ result.object.images }}</p>
<p>{{ result.object.make }}</p>
<p>{{ result.object.section }}</p>
<p>{{ result.object.subcategory }}</p>
{% empty %}
<p>No result.</p>
{% endfor %}
{% endblock %}
Looking at
i get all the values where there "boss" and "fender"curl -XGET "http://localhost:9200/_search?q=fender+boss"
when you type in the search box "boss fender" i get No result. From the search form I can get a result with only a single word, for example "boss". How to make the ability to search for multiple words?
回答1:
I fell into this issue during this month.
In order to perform the correct query you'll need override some haystack objects. I found this article very helpful Extending Haystack’s Elasticsearch backend. Quite complicated at the beginning, but once understand how it works... it works :-)
The blog article teaches how to implement elasticsearch's nested query... well... I've implemented a basic multi_match query.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)
from haystack.query import SearchQuerySet
class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = "snowball"
def __init__(self, connection_alias, **connection_options):
super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')
if user_analyzer:
setattr(self, 'DEFAULT_ANALYZER', user_analyzer)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None, multi_match=None):
out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,
end_offset,
fields, highlight, facets,
date_facets, query_facets,
narrow_queries, spelling_query,
within, dwithin, distance_point,
models, limit_to_registered_models,
result_class)
if multi_match:
out['query'] = {
'multi_match': {
'query': multi_match['query'],
'fields': multi_match['fields'],
'tie_breaker': multi_match['tie_breaker'],
'minimum_should_match': multi_match['minimum_should_match'],
}
}
return out
def build_schema(self, fields):
content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)
for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
mapping.update({field_class.index_fieldname: field_mapping})
return content_field_name, mapping
def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch
raw = Search().using(self.conn).query(
MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)
).execute()
return self._process_results(raw)
class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
def multi_match(self, query, fields, minimum_should_match, tie_breaker):
results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):
self.multi_match_query = {
'query': query,
'fields': fields,
'minimum_should_match': minimum_should_match,
'tie_breaker': tie_breaker
}
def build_params(self, spelling_query=None, **kwargs):
search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)
if self.multi_match_query:
search_kwargs['multi_match'] = self.multi_match_query
return search_kwargs
class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):
clone = self._clone()
clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)
clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)
return clone
class ElasticsearchEngineCustom(ElasticsearchSearchEngine):
backend = ElasticsearchEngineBackendCustom
query = ElasticsearchSearchQueryCustom
As you can see I used elasticsearc-dsl
to perform the query (MultiMatch) and this phrase summarizing the blog post: ElasticsearchSearchQuerySetCustom().multi_match(...)
call depends on ElasticsearchSearchQueryCustom
that depends on ElasticsearchEngineBackendCustom
.
Then put in your settings the elasticsearch configuration, e.g:
ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'
ELASTICSEARCH_INDEX_SETTINGS = {
"settings": {[...]}
}
You can grab your language(s) for ELASTICSEARCH_INDEX_SETTINGS
from Language Analyzers
You'll need to override also the SearchForm
:
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from haystack.forms import SearchForm
from .backend import ElasticsearchSearchQuerySetCustom
class SearchFormCustom(SearchForm):
def search(self):
query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))
if not self.is_valid() or not query:
return self.no_query_found()
sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])
return sqs
The fields title
and text
must be in your index and the caret char is used to perform boost on fields.
You'll need override the haystack url patterns in order to use the custom form:
urlpatterns = patterns(
'search.views',
url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),
)
That's it, HTH :-)
Pay attention don't use result.object.something
but use instead the fields on your index, e.g. result.tilte
, because result.object.tilte
hits the database! See Haystack Best Practices
来源:https://stackoverflow.com/questions/27802628/search-for-multiple-words-elasticsearch-haystack