一.项目包含的功能
1.高亮搜索;
2.词语自动补全;
3.分页查询;
4.复合查询;
5.对索引的增、删、改、查;
6.对文档的增、删、改;
7.搜索结果排序;
8.其他功能:文件读取(word、txt、pdf)
二.依赖环境版本
1.Elasticsearch 6.6.1;
2.jdk 1.8
3.前端分页插件:网上找的
4.elasticsearch-rest-high-level-client 6.6.1
5.spring boot 2.1.3
6.kibana 6.6.0
三.访问路径
1.项目路径:http://localhost:8080/searchTest.html
2.kibana路径:http://localhost:5601
3.Elasticsearch启动检测路径:http://localhost:9200/
四.页面效果
五.代码
1.bean实体类
package com.demo.elasticsearch.bean;
import java.util.Date;
/**
* @Author: ln
* @Date: 2019/2/26 08:59
* @Description:
*/
public class FileBean {
//text支持分词搜索的字段有:name,author,content,filePath
//keyword支持不分词搜索的字段有:name,author
//suggest支持自动补全搜索的字段有:name,author
/** 主键id */
private String id;
/** 文件名称 */
private String name;
/** 作者名称 */
private String author;
/** 文件内容 */
private String content;
/** 文件路径 */
private String filePath;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getFilePath() {
return filePath;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
//不分词搜索
public String getKeywordName() {
return this.name;
}
public String getKeywordAuthor() {
return this.author;
}
//自动补全
public String getSuggestName() {
return this.name;
}
public String getSuggestAuthor() {
return this.author;
}
}
package com.demo.elasticsearch.bean;
/**
* @Author: ln
* @Date: 2019/2/26 08:59
* @Description:
*/
public class FileBeanQuery {
/** 文件名称 */
private String name;
/** 作者名称 */
private String author;
/** 文件内容 */
private String content;
/** 文件路径 */
private String filePath;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getFilePath() {
return filePath;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
}
2.controller控制层
package com.demo.elasticsearch.controller;
import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;
import com.demo.elasticsearch.service.ElasticsearchService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
/**
* @Author: ln
* @Date: 2019/2/22 14:04
* @Description: elasticsearch demo
*/
@Controller
@RequestMapping("/demo")
public class ElasticsearchController {
@Autowired
private ElasticsearchService elasticsearchService;
/* 创建索引 */
@RequestMapping("/createIndex")
@ResponseBody
public String createIndex(String index, FileMapping mapping) throws IOException {
String result = elasticsearchService.createIndex(index, mapping);
return result;
}
/* 删除索引 Elasticsearch的版本要与client的版本一致,spring的start版本6.4.3调试失败,6.6.1调试成功 */
@RequestMapping("/delIndex")
@ResponseBody
public String delIndex(String index) throws IOException {
String result = elasticsearchService.delIndex(index);
return result;
}
/* 新建文档(若索引不存在则新建) */
@RequestMapping("/putDocument")
@ResponseBody
public String putDocument(String index, FileBean fileBean) throws IOException {
String result = elasticsearchService.putDocument(index, fileBean);
return result;
}
/* 删除文档 */
@RequestMapping("/delDocument")
@ResponseBody
public String delDocument(String index, String id) throws IOException {
String result = elasticsearchService.delDocument(index, id);
return result;
}
/* 获取文档 */
@RequestMapping("/getDocument")
@ResponseBody
public String getDocument(String index, String id) throws IOException {
String result = elasticsearchService.getDocument(index, id);
return result;
}
/* 全局搜索 */
@RequestMapping("/keywordSearch")
@ResponseBody
public String keywordSearch(String index, String value,
int current, int size) throws IOException {
String result = elasticsearchService.keywordSearch(index, value, current, size);
return result;
}
/* 复合搜索 TODO:还未调试成功*/
@RequestMapping("/multiSearch")
@ResponseBody
public String multiSearch(String index, FileBeanQuery query,
int current, int size) throws IOException, IllegalAccessException {
String result = elasticsearchService.multiSearch(index, query, current, size);
return result;
}
/* 高亮搜索(注意QueryBuilders的查询方法) */
@RequestMapping("/highlightSearch")
@ResponseBody
public String highlightSearch(String index, String value, int current, int size) throws IOException {
String result = elasticsearchService.highlightSearch(index, value, current, size);
return result;
}
/* 词语补全(只能根据前缀补全) */
@RequestMapping("/suggestSearch")
@ResponseBody
public String suggestSearch(String index, String value) throws IOException {
String result = elasticsearchService.suggestSearch(index, value);
return result;
}
/* 全局搜索 */
@RequestMapping("/searchAll")
@ResponseBody
public String searchAll(String index, int current, int size) throws IOException {
String result = elasticsearchService.searchAll(index, current, size);
return result;
}/* 查询文档总数 */
@RequestMapping("/countQuery")
@ResponseBody
public String countQuery(String index) throws IOException {
String result = elasticsearchService.countQuery(index);
return result;
}
}
3.service业务逻辑层
package com.demo.elasticsearch.service;
import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;
import java.io.IOException;
/**
* @Author: ln
* @Date: 2019/2/26 08:59
* @Description:
*/
public interface ElasticsearchService {
/**
* @Description: 新建索引
* @Author: ln 2019/3/1 16:51
* @Param: [index:索引名称]
**/
String createIndex(String index, FileMapping mapping) throws IOException;
String delIndex(String index) throws IOException;
String putDocument(String index, FileBean fileBean) throws IOException;
String delDocument(String index, String id) throws IOException;
String getDocument(String index, String id) throws IOException;
String keywordSearch(String index, String value, int current, int size) throws IOException;
String multiSearch(String index, FileBeanQuery query, int current, int size) throws IOException, IllegalAccessException;
String highlightSearch(String index, String value, int current, int size) throws IOException;
String suggestSearch(String index, String value) throws IOException;
String searchAll(String index, int current, int size) throws IOException;
String countQuery(String index) throws IOException;
}
package com.demo.elasticsearch.service;
import com.alibaba.fastjson.JSON;
import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;
import com.demo.elasticsearch.util.AttachmentReader;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.MultiSearchRequest;
import org.elasticsearch.action.search.MultiSearchResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.SuggestionBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @Author: ln
* @Date: 2019/2/22 15:37
* @Description:
*/
@Service
public class ElasticsearchServiceImpl implements ElasticsearchService {
RestHighLevelClient client;
@Override
public String createIndex(String index, FileMapping mapping) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
CreateIndexRequest request = new CreateIndexRequest(index);
//索引配置
request.mapping("doc",
"keywordName", "type=keyword", "keywordAuthor", "type=keyword",
"suggestName", "type=completion", "suggestAuthor", "type=completion");
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
client.close();
return JSON.toJSONString(createIndexResponse);
}
@Override
public String delIndex(String index) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
DeleteIndexRequest request = new DeleteIndexRequest(index);
AcknowledgedResponse deleteIndexResponse = client.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(deleteIndexResponse));
client.close();
return JSON.toJSONString(deleteIndexResponse.isAcknowledged());
}
@Override
public String putDocument(String index, FileBean fileBean) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
File file = new File(fileBean.getFilePath());
String content = AttachmentReader.reader(fileBean.getFilePath());
fileBean.setContent(content);
fileBean.setName(file.getName());
IndexRequest indexRequest = new IndexRequest(index, "doc", fileBean.getId());
indexRequest.source(JSON.toJSONString(fileBean), XContentType.JSON);
IndexResponse response = client.index(indexRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(response));
client.close();
return JSON.toJSONString(response.status());
}
@Override
public String delDocument(String index, String id) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
DeleteRequest request = new DeleteRequest(index,"doc", id );
DeleteResponse deleteResponse = client.delete(request, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(deleteResponse));
client.close();
return JSON.toJSONString(deleteResponse.status());
}
@Override
public String getDocument(String index, String id) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
GetRequest getRequest = new GetRequest(index,"doc", id );
GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(getResponse));
client.close();
return JSON.toJSONString(getResponse);
}
@Override
public String keywordSearch(String index, String value,
int current, int size) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//支持全词搜索的字段有:keywordName,keywordAuthor"
searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, "keywordName", "keywordAuthor"));
searchSourceBuilder.from(current);
searchSourceBuilder.size(size);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse));
//处理返回结果
List<Map<String, Object>> result = dealResult(searchResponse.getHits());
client.close();
return JSON.toJSONString(result);
}
@Override
public String multiSearch(String index, FileBeanQuery query,
int current, int size) throws IOException, IllegalAccessException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
MultiSearchRequest request = new MultiSearchRequest();
for (Field field : query.getClass().getDeclaredFields()) {
field.setAccessible(true);
if(field.get(query) != null){
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.boolQuery().must(
QueryBuilders.matchQuery(field.getName(), field.get(query))));
searchRequest.source(searchSourceBuilder);
request.add(searchRequest);
}
}
MultiSearchResponse response = client.msearch(request, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(response));
//返回结果处理
List<Map<String, Object>> result = new ArrayList<>();
MultiSearchResponse.Item[] multiSearchResponses = response.getResponses();
for (MultiSearchResponse.Item multiSearchRespons : multiSearchResponses) {
SearchHits hits = multiSearchRespons.getResponse().getHits();
for (SearchHit hit : hits.getHits()) {
Map<String, Object> map = hit.getSourceAsMap();
if(!result.contains(map)){
result.add(map);
}
}
}
client.close();
return JSON.toJSONString(result);
}
@Override
public String highlightSearch(String index, String value, int current, int size) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//高亮,支持所有FileBean实体的字段
HighlightBuilder highlightBuilder = new HighlightBuilder();
FileBean fileBean = new FileBean();
String[] fieldNames = new String[fileBean.getClass().getDeclaredFields().length];
int i = 0;
for (Field f : fileBean.getClass().getDeclaredFields()) {
HighlightBuilder.Field highlight = new HighlightBuilder.Field(f.getName());
highlight.highlighterType("unified");
highlightBuilder.field(highlight);
fieldNames[i] = f.getName();
i++;
}
//设置高亮样式
highlightBuilder.preTags("<label style=\"color: red\">");
highlightBuilder.postTags("</label>");
//添加查询条件
searchSourceBuilder.highlighter(highlightBuilder);
searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, fieldNames));//搜索也支持所有FileBean实体的字段
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse));
//获取高亮字段
List<Map<String, Object>> result = new ArrayList<>();
SearchHits hits = searchResponse.getHits();
for (SearchHit hit : hits.getHits()) {
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
for (String fieldName : fieldNames) {
HighlightField highlight = highlightFields.get(fieldName);
System.out.println(fieldName);
if(highlight != null){
Text[] fragments = highlight.fragments();
String fragmentString = fragments[0].string();
System.out.println("高亮值:" + fragmentString);
Map<String, Object> map = hit.getSourceAsMap();
map.put(fieldName, fragmentString);
if(!result.contains(map)){
result.add(map);
}
}
}
}
client.close();
return JSON.toJSONString(result);
}
@Override
public String suggestSearch(String index, String value) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//查询补全词语
SuggestionBuilder completionName = SuggestBuilders.completionSuggestion("suggestName").text(value);
SuggestBuilder suggestBuilder = new SuggestBuilder();
suggestBuilder.addSuggestion("suggestName", completionName);
SuggestionBuilder completionAuthor = SuggestBuilders.completionSuggestion("suggestAuthor").text(value);
suggestBuilder.addSuggestion("suggestAuthor", completionAuthor);
searchSourceBuilder.suggest(suggestBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse));
//处理返回结果
Suggest suggest = searchResponse.getSuggest();
//支持自动补全搜索的字段有suggestName,suggestAuthor
CompletionSuggestion termSuggestion = suggest.getSuggestion("suggestName");
CompletionSuggestion termSuggestionAuthor = suggest.getSuggestion("suggestAuthor");
List<CompletionSuggestion.Entry> list = termSuggestion.getEntries();
list.addAll(termSuggestionAuthor.getEntries());
List<String> suggestList = new ArrayList<>();
for (CompletionSuggestion.Entry entry : list) {
for (CompletionSuggestion.Entry.Option option : entry) {
String suggestText = option.getText().string();
System.out.println("补全的词语:" + suggestText);
if(!suggestList.contains(suggestText)){
suggestList.add(suggestText);
}
}
}
client.close();
return JSON.toJSONString(suggestList);
}
@Override
public String searchAll(String index, int current, int size) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
searchSourceBuilder.from(current);
searchSourceBuilder.size(size);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSONString(searchResponse));
//处理返回结果
SearchHits hits = searchResponse.getHits();
client.close();
return JSON.toJSONString(hits);
}
@Override
public String countQuery(String index) throws IOException {
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
CountRequest countRequest = new CountRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
countRequest.source(searchSourceBuilder);
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.termQuery("user", "kimchy"));
countRequest.source(sourceBuilder);
CountResponse countResponse = client.count(countRequest, RequestOptions.DEFAULT);
long count = countResponse.getCount();
return count + "";
}private List<Map<String, Object>> dealResult(SearchHits hits){
List<Map<String, Object>> result = new ArrayList<>();
for (SearchHit hit : hits.getHits()) {
Map<String, Object> map = hit.getSourceAsMap();
result.add(map);
}
return result;
}
}
4.pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.3.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.demo</groupId>
<artifactId>elasticsearch</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>elasticsearch</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
<poi.version>3.16</poi.version>
<commonsio.version>2.4</commonsio.version>
<icepdf>6.2.3</icepdf>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- fastjson 版本1.2.28以前有远程代码漏洞,版本最好是该版本或之后的 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.31</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.9</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.6.1</version>
</dependency>
<!-- commons -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commonsio.version}</version>
</dependency>
<dependency>
<groupId>org.icepdf.os</groupId>
<artifactId>icepdf-pro-intl</artifactId>
<version>${icepdf}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sourceforge.jchardet/jchardet -->
<dependency>
<groupId>net.sourceforge.jchardet</groupId>
<artifactId>jchardet</artifactId>
<version>1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.9.2</version>
</dependency>
<!-- POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>${poi.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
5.其他工具类
package com.demo.elasticsearch.util;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.extractor.ExtractorFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
* 文档读取工具,支持word,pdf,txt
* 2017年9月15日
*/
public class AttachmentReader {
private static final String[] WORD= {"doc","docx","xls","xlsx","ppt","pptx"};
private static final String PDF ="pdf";
private static final String TXT="txt";
public static String reader(String path) {
String text = "";
String type = path.substring(path.lastIndexOf(".")+1).toLowerCase();
try {
if(TXT.equals(type)) {
text= txtReader(path);
} else if(PDF.equals(type)) {
text = pdfReader(path);
} else {
for (int i = 0; i < WORD.length; i++) {
if(WORD[i].equals(type)){
text = wordReader(path);
}
}
}
} catch (Exception e) {
e.getMessage();
}
return text;
}
public static String wordReader(String path) {
try {
return ExtractorFactory.createExtractor(new File(path)).getText();
} catch (Exception e) {
System.out.println(path);
throw new RuntimeException(e);
}
}
public static String txtReader(String path) {
try {
File file = new File(path);
//文本编码探测
FileCharsetDetector detector = new FileCharsetDetector();
String charset = detector.guessFileEncoding(file, 2);
String str = FileUtils.readFileToString(file,charset);
return str;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static String pdfReader(String path) {
String text = "";
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(path);
PDFParser parser = new PDFParser(new RandomAccessBuffer(is));
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
text = stripper.getText(document);
} catch (Exception e) {
throw new RuntimeException(e);
}finally {
if(null!=is){
}
try {
is.close();
} catch (IOException e) {
}
}
return text;
}
}
package com.demo.elasticsearch.util;
import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import java.io.*;
/**
* 字符集探测
* 2017年9月22日
*/
public class FileCharsetDetector {
private boolean found = false;
private String encoding = null;
public String guessFileEncoding(File file) throws FileNotFoundException, IOException {
return guessFileEncoding(file, new nsDetector());
}
public String guessFileEncoding(File file, int languageHint) throws FileNotFoundException, IOException {
return guessFileEncoding(file, new nsDetector(languageHint));
}
private String guessFileEncoding(File file, nsDetector det) throws FileNotFoundException, IOException {
// Set an observer...
// The Notify() will be called when a matching charset is found.
det.Init(new nsICharsetDetectionObserver() {
public void Notify(String charset) {
encoding = charset;
found = true;
}
});
BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file));
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = false;
while ((len = imp.read(buf, 0, buf.length)) != -1) {
// Check if the stream is only ascii.
isAscii = det.isAscii(buf, len);
if (isAscii) {
break;
}
// DoIt if non-ascii and not done yet.
done = det.DoIt(buf, len, false);
if (done) {
break;
}
}
imp.close();
det.DataEnd();
if (isAscii) {
encoding = "ASCII";
found = true;
}
if (!found) {
String[] prob = det.getProbableCharsets();
//这里将可能的字符集组合起来返回
for (int i = 0; i < prob.length; i++) {
if (i == 0) {
encoding = prob[i];
} else {
encoding += "," + prob[i];
}
}
if (prob.length > 0) {
// 在没有发现情况下,也可以只取第一个可能的编码,这里返回的是一个可能的序列
return encoding;
} else {
return null;
}
}
return encoding;
}
}
6.springboot启动类
package com.demo.elasticsearch;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class ElasticsearchApplication {
public static void main(String[] args) {
SpringApplication.run(ElasticsearchApplication.class, args);
}
}
待完善。。。
来源:oschina
链接:https://my.oschina.net/u/4394131/blog/4304982