【推荐】2019 Java 开发者跳槽指南.pdf(吐血整理) >>>
①拼写检查不同于其他域,它在建立索引时需要分词,但是检索时不需要分词,所以要建立一个特殊的域,以供拼写检查:
在schema.xml文件里设置所需的拼写检查域都有哪些字段:
<field name="spell" type="text_spell" indexed="true" stored="false" multiValued="true" />
<copyField source="name" dest="spell"/>
<copyField source="content" dest="spell"/>
<fieldType name="text_spell" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="false" conf="ik.conf"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
②在solrconfig.xml文件里设置:
<searchComponent name="spellerror" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">text_spell</str>
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">spell</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">2</int>
<float name="maxQueryFrequency">0.01</float>
</lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">file</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellcheckerFile</str>
</lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">spell</str>
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck">on</str>
<!-- <str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str> -->
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<!-- <str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str> -->
</lst>
<arr name="last-components">
<str>spellerror</str>
</arr>
</requestHandler>
③solrj里的代码
/**
* @method: testSpellCheck
* @Description: 拼写检查
*
* @author: ChenYW
* @date 2014-4-15 下午06:14:56
*/
public String spellCheck(String word){
SolrQuery query = new SolrQuery();
query.set("defType","edismax");//加权
query.set("qf","name^20.0");
query.set("spellcheck", "true");
query.set("spellcheck.q", word);
query.set("qt", "/spell");
query.set("spellcheck.build", "true");//遇到新的检查词,会自动添加到索引里面
query.set("spellcheck.count", 5);
try {
QueryResponse rsp = server.query(query);
SpellCheckResponse re=rsp.getSpellCheckResponse();
if (re != null) {
if(!re.isCorrectlySpelled()){
String t = re.getFirstSuggestion(word);//获取第一个推荐词
System.out.println("推荐词:" + t);
return t;
}
}
} catch (SolrServerException e) {
e.printStackTrace();
}
return null;
}
来源:oschina
链接:https://my.oschina.net/u/1473715/blog/261242