How to search special characters(+ ! \ ? : ) in Lucene

前端 未结 2 377

I want to search special characters in index.

I escaped all the special characters in query string but when i perform query as + on lucene in index it create query as +(

2条回答
  •  遇见更好的自我
    2021-02-04 11:28

    Maybe it's not actual for the author but to be able to search special characters you need:

    1. Create custom analyzer
    2. Use it for indexing and searching

    Example how it works for me:

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.custom.CustomAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.*;
    import org.apache.lucene.store.RAMDirectory;
    import org.junit.Test;
    
    import java.io.IOException;
    
    import static org.hamcrest.Matchers.equalTo;
    import static org.junit.Assert.assertThat;
    
    public class LuceneSpecialCharactersSearchTest {
    
    /**
     * Test that tries to search a string by some substring with each special character separately.
     */
    @Test
    public void testSpecialCharacterSearch() throws Exception {
        // GIVEN
        LuceneSpecialCharactersSearch service = new LuceneSpecialCharactersSearch();
        String[] luceneSpecialCharacters = new String[]{"+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":", "\\"};
    
        // WHEN
        for (String specialCharacter : luceneSpecialCharacters) {
            String actual = service.search("list's special-characters " + specialCharacter);
    
            // THEN
            assertThat(actual, equalTo(LuceneSpecialCharactersSearch.TEXT_WITH_SPECIAL_CHARACTERS));
        }
    }
    
    private static class LuceneSpecialCharactersSearch {
        private static final String TEXT_WITH_SPECIAL_CHARACTERS = "This is the list's of special-characters + - && || ! ( ) { } [ ] ^ \" ~ ? : \\ *";
    
        private final IndexWriter writer;
    
        public LuceneSpecialCharactersSearch() throws Exception {
            Document document = new Document();
            document.add(new TextField("body", TEXT_WITH_SPECIAL_CHARACTERS, Field.Store.YES));
    
            RAMDirectory directory = new RAMDirectory();
            writer = new IndexWriter(directory, new IndexWriterConfig(buildAnalyzer()));
            writer.addDocument(document);
            writer.commit();
        }
    
        public String search(String queryString) throws Exception {
            try (IndexReader reader = DirectoryReader.open(writer, false)) {
                IndexSearcher searcher = new IndexSearcher(reader);
    
                String escapedQueryString = QueryParser.escape(queryString).toLowerCase();
    
                Analyzer analyzer = buildAnalyzer();
                QueryParser bodyQueryParser = new QueryParser("body", analyzer);
                bodyQueryParser.setDefaultOperator(QueryParser.Operator.AND);
    
    
                Query bodyQuery = bodyQueryParser.parse(escapedQueryString);
                BooleanQuery query = new BooleanQuery.Builder()
                        .add(new BooleanClause(bodyQuery, BooleanClause.Occur.SHOULD))
                        .build();
                TopDocs searchResult = searcher.search(query, 1);
    
                return searcher.doc(searchResult.scoreDocs[0].doc).getField("body").stringValue();
            }
        }
    
        /**
         * Builds analyzer that is used for indexing and searching.
         */
        private static Analyzer buildAnalyzer() throws IOException {
            return CustomAnalyzer.builder()
                    .withTokenizer("whitespace")
                    .addTokenFilter("lowercase")
                    .addTokenFilter("standard")
                    .build();
    
        }
    }
    }
    

提交回复
热议问题