问题
I am writing a Flask app and I am using elasticsearch.
Here is search.py
:
from flask import current_app
def query_object(index, fields, query, page, per_page, fuzziness=0):
search = current_app.elasticsearch.search(
index=index,
body={'query': {'multi_match': {'query': str(query), 'fields': fields, 'fuzziness': fuzziness, 'lenient': True}},
'from': (page - 1) * per_page, 'size': per_page}
)
ids = [int(hit['_id']) for hit in search['hits']['hits']]
return ids, search['hits']['total']['value']
The following model is indexed:
class WishList(db.Model, SearchableMixin):
__searchable__ = ['first_name', 'gender', 'wants', 'needs', 'wear',
'read', 'shoe_size_category', 'shoe_type', 'sheet_size', 'additional_comments', 'time_chosen',
'age', 'shoe_sock_size', 'program_number']
id = db.Column(db.Integer, primary_key=True)
program_number = db.Column(db.String(4))
first_name = db.Column(db.String(20))
age = db.Column(db.String(10))
gender = db.Column(db.String(20))
wants = db.Column(db.String(300))
needs = db.Column(db.String(300))
wear = db.Column(db.String(300))
read = db.Column(db.String(300))
pant_dress_size = db.Column(db.String(20), default='unspecified')
shirt_blouse_size = db.Column(db.String(20), default='unspecified')
jacket_sweater_size = db.Column(db.String(20), default='unspecified')
shoe_sock_size = db.Column(db.String(20), default='unspecified')
shoe_size_category = db.Column(db.String(20), default='unspecified')
shoe_type = db.Column(db.String(50), default='unspecified')
sheet_size = db.Column(db.String(20), default='unspecified')
additional_comments = db.Column(db.Text(), nullable=True, default=None)
time_chosen = db.Column(db.String(40), nullable=True, default=None)
sponsor_id = db.Column(db.Integer, db.ForeignKey(
'user.id'), nullable=True, default=None)
drive_id = db.Column(db.Integer, db.ForeignKey(
'holiday_cheer_drive.id'), nullable=False, default=None)
That model is made searchable by inheriting from the SearchableMixin class like so:
class SearchableMixin(object):
@classmethod
def search_object(cls, fields, expression, page, per_page, fuzziness=0):
ids, total = query_object(
cls.__tablename__, fields, expression, page, per_page, fuzziness=fuzziness)
if total == 0:
return cls.query.filter_by(id=0), 0
when = []
for i in range(len(ids)):
when.append((ids[i], i))
return cls.query.filter(cls.id.in_(ids)).order_by(
db.case(when, value=cls.id)), total
When I search it currently, all of the fields are searchable and return a valid result UNLESS I am searching with a numberic value.
Here is an example of output for a search that works when I tell the python to print values to the console:
Query: bob
Body of search:
{'from': 0,
'query': {'multi_match': {'fields': ['first_name',
'gender',
'wants',
'needs',
'wear',
'read',
'shoe_size_category',
'shoe_type',
'sheet_size',
'additional_comments',
'time_chosen',
'age',
'shoe_sock_size',
'program_number'],
'fuzziness': 0,
'lenient': True,
'query': 'bob'}},
'size': 10}
Python elasticsearch object:
{'took': 27, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 1.6916759, 'hits': [{'_index': 'wish_list', '_type': '_doc', '_id': '1', '_score': 1.6916759, '_source': {'first_name': 'bob', 'gender': 'male', 'wants': 'bike', 'needs': 'calculator', 'wear': 'hat', 'read': 'book', 'shoe_size_category': "men's", 'shoe_type': 'sneaker', 'sheet_size': 'unspecified', 'additional_comments': 'Likes cheese', 'time_chosen': None, 'age': '5', 'shoe_sock_size': '4', 'program_number': '215', 'mappings': {'properties': {'first_name': {'type': 'text'}, 'gender': {'type':
'text'}, 'wants': {'type': 'text'}, 'needs': {'type': 'text'}, 'wear': {'type': 'text'}, 'read': {'type': 'text'}, 'shoe_size_category': {'type': 'text'}, 'shoe_type': {'type': 'text'}, 'sheet_size': {'type': 'text'}, 'additional_comments': {'type': 'text'}, 'time_chosen': {'type': 'text'}, 'age': {'type': 'text'}, 'shoe_sock_size': {'type': 'text'}, 'program_number': {'type': 'text'}}}}}, {'_index': 'wish_list', '_type': '_doc', '_id': '9', '_score': 1.6916759, '_source': {'first_name': 'bob', 'gender': 'male', 'wants': 'bike', 'needs': 'calculator', 'wear': 'hat', 'read': 'book', 'shoe_size_category': "men's", 'shoe_type': 'sneaker', 'sheet_size': 'unspecified', 'additional_comments': 'Likes cheese', 'time_chosen': None, 'age': 5, 'shoe_sock_size': 4, 'program_number': 215, 'mappings': {'properties': {'first_name': {'type': 'text'}, 'gender': {'type': 'text'}, 'wants': {'type': 'text'}, 'needs': {'type': 'text'}, 'wear': {'type': 'text'}, 'read': {'type': 'text'}, 'shoe_size_category': {'type': 'text'}, 'shoe_type': {'type': 'text'}, 'sheet_size': {'type': 'text'}, 'additional_comments': {'type': 'text'}, 'time_chosen': {'type': 'text'}, 'age': {'type': 'text'}, 'shoe_sock_size': {'type': 'text'}, 'program_number': {'type': 'text'}}}}}]}}
And here's the same exact query on the same exact object, but with a numeric string:
Query: 215
Body of search:
{'from': 0,
'query': {'multi_match': {'fields': ['first_name',
'gender',
'wants',
'needs',
'wear',
'read',
'shoe_size_category',
'shoe_type',
'sheet_size',
'additional_comments',
'time_chosen',
'age',
'shoe_sock_size',
'program_number'],
'fuzziness': 0,
'lenient': True,
'query': '215'}},
'size': 10}
Python elasticsearch object:
{'took': 18, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
A string is being passed into the function, and the data is all saved as strings, but there seems to be some kind of type error. Before I added in lenient: True
, it threw an error saying elasticsearch couldn't build the query.
If I can understand how I would do it with the elasticsearch REST API, then I can probably figure out how to do it with python.
回答1:
The issue is happening due to the use of fuzziness
param on the numeric
data type and then use of lenient
true to make it work by as it removes format-based errors, such as providing a text query value for a numeric field, are ignored.
mentioned in this link.
Below is the error which you get while trying to use fuzziness
on numeric data types.
reason": "Can only use fuzzy queries on keyword and text fields - not on [age] which is of type [integer]"
And when you add "lenient" : true
, then the above error goes but doesn't return any document.
To make it work, simply remove fuzziness
and lenient
param from your search query and it should work, as Elasticsearch automatically converts valid string
to numeric
and vice versa as explained in coerce article.
Working example to show it using REST API
Index def
{
"mappings": {
"properties": {
"age" :{
"type" : "integer"
}
}
}
}
Index sample doc
{
"age" : "25" --> note use of `""`, sending it as string
}
{
"age" : 28 :- note sending numneric value
}
A search query in string format
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "28", --> note string format
"fields": [
"age" --> note you can add more fields
]
}
}
]
}
}
}
Search result
"hits": [
{
"_index": "so_numberic",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"program_number": "123456789",
"age": "28"
}
}
]
Search query in numeric format
{
"query": {
"match" : { --> query on single field.
"age" : {
"query" : 28 --> note numeric format
}
}
}
}
Result
"hits": [
{
"_index": "so_numberic",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"program_number": "123456789",
"age": "28"
}
}
]
Showing your fuzziness
and lenient
doesn't bring any result as explained earlier.
Search query
{
"query": {
"match": {
"age": {
"query": 28,
"fuzziness": 2,
"lenient": true
}
}
}
}
Result
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": { --> note 0 results.
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
}
}
来源:https://stackoverflow.com/questions/60860090/how-do-i-tell-an-elasticsearch-multi-match-query-that-i-want-numeric-fields-sto