I want to search special characters in index.
I escaped all the special characters in query string but when i perform query as + on lucene in index it create query as +(
Maybe it's not actual for the author but to be able to search special characters you need:
Example how it works for me:
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
public class LuceneSpecialCharactersSearchTest {
/**
* Test that tries to search a string by some substring with each special character separately.
*/
@Test
public void testSpecialCharacterSearch() throws Exception {
// GIVEN
LuceneSpecialCharactersSearch service = new LuceneSpecialCharactersSearch();
String[] luceneSpecialCharacters = new String[]{"+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":", "\\"};
// WHEN
for (String specialCharacter : luceneSpecialCharacters) {
String actual = service.search("list's special-characters " + specialCharacter);
// THEN
assertThat(actual, equalTo(LuceneSpecialCharactersSearch.TEXT_WITH_SPECIAL_CHARACTERS));
}
}
private static class LuceneSpecialCharactersSearch {
private static final String TEXT_WITH_SPECIAL_CHARACTERS = "This is the list's of special-characters + - && || ! ( ) { } [ ] ^ \" ~ ? : \\ *";
private final IndexWriter writer;
public LuceneSpecialCharactersSearch() throws Exception {
Document document = new Document();
document.add(new TextField("body", TEXT_WITH_SPECIAL_CHARACTERS, Field.Store.YES));
RAMDirectory directory = new RAMDirectory();
writer = new IndexWriter(directory, new IndexWriterConfig(buildAnalyzer()));
writer.addDocument(document);
writer.commit();
}
public String search(String queryString) throws Exception {
try (IndexReader reader = DirectoryReader.open(writer, false)) {
IndexSearcher searcher = new IndexSearcher(reader);
String escapedQueryString = QueryParser.escape(queryString).toLowerCase();
Analyzer analyzer = buildAnalyzer();
QueryParser bodyQueryParser = new QueryParser("body", analyzer);
bodyQueryParser.setDefaultOperator(QueryParser.Operator.AND);
Query bodyQuery = bodyQueryParser.parse(escapedQueryString);
BooleanQuery query = new BooleanQuery.Builder()
.add(new BooleanClause(bodyQuery, BooleanClause.Occur.SHOULD))
.build();
TopDocs searchResult = searcher.search(query, 1);
return searcher.doc(searchResult.scoreDocs[0].doc).getField("body").stringValue();
}
}
/**
* Builds analyzer that is used for indexing and searching.
*/
private static Analyzer buildAnalyzer() throws IOException {
return CustomAnalyzer.builder()
.withTokenizer("whitespace")
.addTokenFilter("lowercase")
.addTokenFilter("standard")
.build();
}
}
}