Error when indexing data using elasticsearch dsl

二次信任 提交于 2019-12-13 03:39:02

问题


I have two models which are as follows:

class PostUser(models.Model):

    user_id = models.CharField(max_length=1000,blank=True,null=True)
    reputation = models.CharField(max_length = 1000 , blank = True , null = True)

    def __unicode__(self):
        return self.user_id

    def indexing(self):
        obj = PostUserIndex(
            meta = {'id': self.id},
            user_id = self.user_id,
            reputation = self.reputation,
        )
        obj.save(index = 'post-user-index')
        return obj.to_dict(include_meta=True)

class Posts(models.Model):

    user_post_id = models.CharField(max_length = 1000 , blank = True , null = True)
    score = models.CharField(max_length = 1000 , blank = True , null = True)
    owner_user_id = models.ForeignKey(PostUser,default="-100")


    def __unicode__(self):

        return self.user_post_id

    def indexing(self):
        obj = PostsIndex(
            meta = {'id': self.id},
            user_post_id = self.user_post_id,
            score = self.score,
            owner_user_id = self.owner_user_id,
        )
        obj.save(index = 'newuserposts-index')
        return obj.to_dict(include_meta=True)

The way I am trying to index my data is as follows:

class PostUserIndex(DocType):
    user_id = Text()
    reputation = Text()


class PostsIndex(DocType):
    user_post_id = Text()
    score = Text()
    owner_user_id = Nested(PostUserIndex)

Then i try to run the following method to index data:

def posts_indexing():
    PostsIndex.init(index='newuserposts-index')
    es = Elasticsearch()
    bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))

I have tried different approaches by manually entering the nested properties and also changing from doctype to inner doc of PostUser but still I am getting the weird error.

ERROR:

AttributeError: 'PostUser' object has no attribute 'copy'

Traceback (most recent call last):
  File "<console>", line 1, in <module>
  File "/Users/ammarkhan/Desktop/danny/src/dataquerying/datatoindex.py", line 74, in new_user_posts_indexing
    bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
    for ok, item in streaming_bulk(client, actions, **kwargs):
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
    client.transport.serializer):
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
    for action, data in actions:
  File "/Users/ammarkhan/Desktop/danny/src/dataquerying/datatoindex.py", line 74, in <genexpr>
    bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))
  File "/Users/ammarkhan/Desktop/danny/src/dataquerying/models.py", line 167, in indexing
    obj.save(index = 'newuserposts-index')
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 405, in save
    self.full_clean()
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 417, in full_clean
    self.clean_fields()
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 403, in clean_fields
    data = field.clean(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 179, in clean
    data = super(Object, self).clean(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 90, in clean
    data = self.deserialize(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 86, in deserialize
    return self._deserialize(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 166, in _deserialize
    return self._wrap(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 142, in _wrap
    return self._doc_class.from_es(data)
  File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 342, in from_es
    meta = hit.copy()
AttributeError: ‘PostUser' object has no attribute 'copy'

回答1:


You are calling .save in your indexing methods which will save the document to elasticsearch and then you are also passing it to bulk to accomplish the same, the save is extra.

You are also assigning an instance of PostUser to owner_user_id instead of properly serializing it by calling the indexing method on it (without the save inside):

  def indexing(self):
    obj = PostsIndex(
        meta = {'id': self.id, 'index': 'newuserposts-index'},
        user_post_id = self.user_post_id,
        score = self.score,
        owner_user_id = self.owner_user_id.indexing(),
    )
    return obj.to_dict(include_meta=True)


来源:https://stackoverflow.com/questions/50252992/error-when-indexing-data-using-elasticsearch-dsl

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!