Updating indexes on placeholderfields in real time with django/haystack/solr

徘徊边缘 提交于 2019-12-11 03:57:32

问题


Hello I am integrating searches on my django-cms project. I have made a wiki app, where the content of each page is stored in a PlaceholderField. I can initially index the content of the PlaceholderField with the sudo ./manage.py rebuild_index or update_index, and the search works perfectly. The issue is when I modify the PlaceholderField, the search indexes are not updating, even though I have in my settings.py:

HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'

Here is my model.py:

from django.db import models
from django.utils.text import slugify
from djangocms_text_ckeditor.fields import HTMLField
from cms.models.fields import PlaceholderField

def my_placeholder_slotname(instance):
    return 'content_placeholder'


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(my_placeholder_slotname)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug

Here is my search_indexes.py:

import datetime
from haystack import indexes
from .models import WikiPage, WikiSection
from aldryn_search.helpers import get_cleaned_bits, get_request
from aldryn_search.utils import clean_join, get_index_base, strip_tags
from cms.models import CMSPlugin
from djangocms_text_ckeditor.cms_plugins import TextPlugin
from django.template import loader, Context


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        request = get_request(obj)
        placeholders = obj.content_placeholder
        plugins = placeholders.get_plugins()
        text_bits = []
        for plugin in plugins:
            cleaned = get_cleaned_bits(plugin.render_plugin())
            text_bits.extend(cleaned)           

        return clean_join(' ', text_bits)

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data



class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    class Meta:
        model_name = WikiSection
        app_label = 'wiki'

I am looking for help with an update method, that I have no idea how to write. I have checked the doc and seen there is update_object and update methods you can extend, but don't know what to return.

EDIT

I have kept tweeking and now use

class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()
    content_placeholder = indexes.CharField(model_attr='content_placeholder')
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        plugins = obj.content_placeholder.get_plugins()
        text_bits = []

        for plugin in plugins:
            plugin_text = self.get_plugin_search_text(plugin, get_request())
            text_bits.append(plugin_text)

        return clean_join(' ', text_bits)

    def get_plugin_search_text(self, base_plugin, request):
        plugin_content_bits = get_plugin_index_data(base_plugin, request)
        return clean_join(' ', plugin_content_bits)

This is my wikipage_text.txt:

{{ object.name }} - Wiki Page (Section {{ object.section }})
Content: {{ object.get_placeholder_text }}

EDIT2

For those trying the code in the answer below, here is what I did to get it fully working. All the code from below is the same, but added a few things. The solution below updated the index once a plugin is added or removed from the placeholder, but not when it is edited. In my case, I needed it to update when the text in the djangocms-text-ckeditor plugin was changed. All that was needed was to register the signal from the text editor field from djangocms_text_ckeditor.models import Text, from there I connect another signal:

from djangocms_text_ckeditor.models import Text

signals.post_save.connect(
        update_wiki_page_index,
        sender=Text,
        dispatch_uid='post_save_update_wiki_page_index'
    )

The issue with this is that all pages in the website have placeholders and all of them probably contain text, which means this signal would trigger rather often. To prevent unnecessary call to the DB from wiki.save(update_fields=['content_placeholder_data']) I just check if the data has actually changed, like so:

def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']
    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # Make sure data has changed
    if wiki.content_placeholder_data != get_placeholder_index_data(placeholder):
        # DB based approach
        placeholder = wiki.content_placeholder
        placeholder_data = get_placeholder_index_data(placeholder)
        wiki.content_placeholder_data = placeholder_data

        # Saving here will trigger index update
        wiki.save(update_fields=['content_placeholder_data'])

回答1:


The search index is not updated when you add/delete a plugin to the placeholder because haystack signals only listen to models that are registered explicitly with the search, this would mean that you'd have to register a search index for each plugin that you would want to listen to.

The better and less complicated approach is to do your own signal handling but specific to plugins, django-cms uses these signals internally for a few things so it's a common scenario.

I've added three files, models and search_indexes have some modifications to them and then helpers is a new one.

I changed your placeholder name function to return a more unique value, added some comments there.

Regarding your issue, there's two ways to solve it using the signal handling.

One is by calculating placeholder data every time a plugin is saved/deleted and then store this data in a field on your model, then when we call save() with the updated_fields to only update the field we want, haystack's signal listener will be triggered and thus fire an update. When haystack updates the index then it just has to look at the data in db instead of having to recalculate plugin data again.

The second approach is to simply trigger the update manually from the signal handler, this would then tell haystack to update the wiki object in search engine just as if you would have saved the wiki object.

I've added both solutions there with comments that separate them, if wikis placeholders are modified constantly then I suggest using the db approach and plugging some async processing to haystack update signals (would need celery). Otherwise then you can just use the manual update.

DISCLAIMER I've not tested this personally, just wrote it based on previous experience :)

models.py

from django.db import models
from django.db.models import signals
from django.utils.text import slugify

from djangocms_text_ckeditor.fields import HTMLField

from cms.models import CMSPlugin
from cms.models.fields import PlaceholderField
from cms.signals.plugins import get_placeholder

from .helpers import (
    get_index_from_model,
    get_placeholder_index_data,
)


def get_wiki_placeholder_name(instance):
    # Even though slotname is not UNIQUE at db level
    # is always good to make it as "unique" as possible.
    # In this case you will easily spot the placeholder for a wiki
    # based on it's slotname.
    return 'wiki_%s_placeholder' % instance.pk


def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']

    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # DB based approach
    placeholder = wiki.content_placeholder
    placeholder_data = get_placeholder_index_data(placeholder)
    wiki.content_placeholder_data = placeholder_data
    # Saving here will trigger index update
    wiki.save(update_fields=['content_placeholder_data'])

    # OR

    # Realtime
    wiki.update_object_index()


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(get_wiki_placeholder_name)
    content_placeholder_data = models.TextField(editable=False)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def update_object_index(self):
        # By default will update all cores associated with object
        index = get_index_from_model(self._meta.model)

        if index:
            # update_object takes a using='' paremeter
            # if on a multi-language setup, you'll need to make sure
            # using reflects the language core
            index.update_object(instance=self.model_instance)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug


signals.pre_delete.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='pre_delete_update_wiki_page_index'
)


signals.post_save.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='post_save_update_wiki_page_index'
)

search_indexes.py

from django.template import loader, Context

from haystack import indexes

from .helpers import get_placeholder_index_data
from .models import WikiPage, WikiSection


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        # DB approach
        data = obj.content_placeholder_data

        # OR

        # Realtime approach
        placeholder = obj.content_placeholder
        data = get_placeholder_index_data(placeholder)
        return data

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data


class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

helpers.py

from haystack import connections
from haystack.constants import DEFAULT_ALIAS
from haystack.exceptions import NotHandled

from aldryn_search.helpers import get_plugin_index_data, get_request
from aldryn_search.utils import clean_join


def get_plugin_search_text(base_plugin, request):
    plugin_content_bits = get_plugin_index_data(base_plugin, request)
    return clean_join(' ', plugin_content_bits)


def get_placeholder_index_data(placeholder):
    request = get_request()
    plugins = placeholder.get_plugins()
    text_bits = []

    for base_plugin in plugins:
        plugin_text_content = get_plugin_search_text(base_plugin, request)
        text_bits.append(plugin_text_content)
    return clean_join(' ', text_bits)


def get_index_from_model(model_class):
    # Notice I'm explicitly using DEFAULT_ALIAS here
    # on a multi-language setup, you'll have to get the alias
    # from current language.
    unified_index = connections[DEFAULT_ALIAS].get_unified_index()

    try:
        model_index = unified_index.get_index(model_class)
    except NotHandled:
        model_index = None
    else:
        model_index._backend_alias = DEFAULT_ALIAS
    return model_index


来源:https://stackoverflow.com/questions/31927545/updating-indexes-on-placeholderfields-in-real-time-with-django-haystack-solr

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!