在Java项目中使用Elasticsearch 6.x (一)对索引的增删改查

ぃ、小莉子 提交于 2020-08-07 04:08:01
一.项目包含的功能
1.高亮搜索;
2.词语自动补全;
3.分页查询;
4.复合查询;
5.对索引的增、删、改、查;
6.对文档的增、删、改;
7.搜索结果排序;
8.其他功能:文件读取(word、txt、pdf)

二.依赖环境版本
1.Elasticsearch 6.6.1;
2.jdk 1.8
3.前端分页插件:网上找的
4.elasticsearch-rest-high-level-client 6.6.1
5.spring boot 2.1.3
6.kibana 6.6.0

三.访问路径
1.项目路径:http://localhost:8080/searchTest.html
2.kibana路径:http://localhost:5601
3.Elasticsearch启动检测路径:http://localhost:9200/

四.页面效果






















 

五.代码
1.bean实体类
package com.demo.elasticsearch.bean;

import java.util.Date;

/**
 * @Author: ln
 * @Date: 2019/2/26 08:59
 * @Description:
 */
public class FileBean {
    //text支持分词搜索的字段有:name,author,content,filePath
    //keyword支持不分词搜索的字段有:name,author
    //suggest支持自动补全搜索的字段有:name,author
    /** 主键id */
    private String id;
    /** 文件名称 */
    private String name;
    /** 作者名称 */
    private String author;
    /** 文件内容 */
    private String content;
    /** 文件路径 */
    private String filePath;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String getFilePath() {
        return filePath;
    }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }
    //不分词搜索
    public String getKeywordName() {
        return this.name;
    }
    public String getKeywordAuthor() {
        return this.author;
    }
    //自动补全
    public String getSuggestName() {
        return this.name;
    }
    public String getSuggestAuthor() {
        return this.author;
    }
}
package com.demo.elasticsearch.bean;

/**
 * @Author: ln
 * @Date: 2019/2/26 08:59
 * @Description:
 */
public class FileBeanQuery {
    /** 文件名称 */
    private String name;
    /** 作者名称 */
    private String author;
    /** 文件内容 */
    private String content;
    /** 文件路径 */
    private String filePath;

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String getFilePath() {
        return filePath;
    }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }
}

  2.controller控制层

package com.demo.elasticsearch.controller;

import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;
import com.demo.elasticsearch.service.ElasticsearchService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import java.io.IOException;

/**
 * @Author: ln
 * @Date: 2019/2/22 14:04
 * @Description: elasticsearch demo
 */
@Controller
@RequestMapping("/demo")
public class ElasticsearchController {

    @Autowired
    private ElasticsearchService elasticsearchService;


    /* 创建索引 */
    @RequestMapping("/createIndex")
    @ResponseBody
    public String createIndex(String index, FileMapping mapping) throws IOException {
        String result = elasticsearchService.createIndex(index, mapping);
        return result;
    }
    /* 删除索引 Elasticsearch的版本要与client的版本一致,spring的start版本6.4.3调试失败,6.6.1调试成功 */
    @RequestMapping("/delIndex")
    @ResponseBody
    public String delIndex(String index) throws IOException {
        String result = elasticsearchService.delIndex(index);
        return result;
    }
    /* 新建文档(若索引不存在则新建) */
    @RequestMapping("/putDocument")
    @ResponseBody
    public String putDocument(String index, FileBean fileBean) throws IOException {
        String result = elasticsearchService.putDocument(index, fileBean);
        return result;
    }
    /* 删除文档 */
    @RequestMapping("/delDocument")
    @ResponseBody
    public String delDocument(String index, String id) throws IOException {
        String result = elasticsearchService.delDocument(index, id);
        return result;
    }
    /* 获取文档 */
    @RequestMapping("/getDocument")
    @ResponseBody
    public String getDocument(String index, String id) throws IOException {
        String result = elasticsearchService.getDocument(index, id);
        return result;
    }
    /* 全局搜索 */
    @RequestMapping("/keywordSearch")
    @ResponseBody
    public String keywordSearch(String index, String value,
                            int current, int size) throws IOException {
        String result = elasticsearchService.keywordSearch(index, value, current, size);
        return result;
    }
    /* 复合搜索 TODO:还未调试成功*/
    @RequestMapping("/multiSearch")
    @ResponseBody
    public String multiSearch(String index, FileBeanQuery query,
                              int current, int size) throws IOException, IllegalAccessException {
        String result = elasticsearchService.multiSearch(index, query, current, size);
        return result;
    }
    /* 高亮搜索(注意QueryBuilders的查询方法) */
    @RequestMapping("/highlightSearch")
    @ResponseBody
    public String highlightSearch(String index, String value, int current, int size) throws IOException {
        String result = elasticsearchService.highlightSearch(index, value, current, size);
        return result;
    }
    /* 词语补全(只能根据前缀补全) */
    @RequestMapping("/suggestSearch")
    @ResponseBody
    public String suggestSearch(String index, String value) throws IOException {
        String result = elasticsearchService.suggestSearch(index, value);
        return result;
    }
    /* 全局搜索 */
    @RequestMapping("/searchAll")
    @ResponseBody
    public String searchAll(String index, int current, int size) throws IOException {
        String result = elasticsearchService.searchAll(index, current, size);
        return result;
    }/* 查询文档总数 */
    @RequestMapping("/countQuery")
    @ResponseBody
    public String countQuery(String index) throws IOException {
        String result = elasticsearchService.countQuery(index);
        return result;
    }
}

  3.service业务逻辑层

package com.demo.elasticsearch.service;

import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;

import java.io.IOException;

/**
 * @Author: ln
 * @Date: 2019/2/26 08:59
 * @Description:
 */
public interface ElasticsearchService {
    /**
     * @Description: 新建索引
     * @Author: ln  2019/3/1 16:51
     * @Param: [index:索引名称]
     **/
    String createIndex(String index, FileMapping mapping) throws IOException;

    String delIndex(String index) throws IOException;

    String putDocument(String index, FileBean fileBean) throws IOException;

    String delDocument(String index, String id) throws IOException;

    String getDocument(String index, String id) throws IOException;

    String keywordSearch(String index, String value, int current, int size) throws IOException;

    String multiSearch(String index, FileBeanQuery query, int current, int size) throws IOException, IllegalAccessException;

    String highlightSearch(String index, String value, int current, int size) throws IOException;

    String suggestSearch(String index, String value) throws IOException;

    String searchAll(String index, int current, int size) throws IOException;
String countQuery(String index) throws IOException; }
package com.demo.elasticsearch.service;

import com.alibaba.fastjson.JSON;
import com.demo.elasticsearch.bean.FileBean;
import com.demo.elasticsearch.bean.FileBeanQuery;
import com.demo.elasticsearch.bean.FileMapping;
import com.demo.elasticsearch.util.AttachmentReader;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.MultiSearchRequest;
import org.elasticsearch.action.search.MultiSearchResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.SuggestionBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * @Author: ln
 * @Date: 2019/2/22 15:37
 * @Description:
 */
@Service
public class ElasticsearchServiceImpl implements ElasticsearchService {

    RestHighLevelClient client;


    @Override
    public String createIndex(String index, FileMapping mapping) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        CreateIndexRequest request = new CreateIndexRequest(index);

        //索引配置
        request.mapping("doc",
                "keywordName", "type=keyword", "keywordAuthor", "type=keyword",
                "suggestName", "type=completion", "suggestAuthor", "type=completion");

        CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
        client.close();
        return JSON.toJSONString(createIndexResponse);
    }

    @Override
    public String delIndex(String index) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        DeleteIndexRequest request = new DeleteIndexRequest(index);

        AcknowledgedResponse deleteIndexResponse = client.indices().delete(request, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(deleteIndexResponse));
        client.close();
        return JSON.toJSONString(deleteIndexResponse.isAcknowledged());
    }

    @Override
    public String putDocument(String index, FileBean fileBean) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));

        File file = new File(fileBean.getFilePath());
        String content = AttachmentReader.reader(fileBean.getFilePath());
        fileBean.setContent(content);
        fileBean.setName(file.getName());

        IndexRequest indexRequest = new IndexRequest(index, "doc", fileBean.getId());
        indexRequest.source(JSON.toJSONString(fileBean), XContentType.JSON);

        IndexResponse response = client.index(indexRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(response));
        client.close();
        return JSON.toJSONString(response.status());
    }

    @Override
    public String delDocument(String index, String id) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));

        DeleteRequest request = new DeleteRequest(index,"doc", id );
        DeleteResponse deleteResponse = client.delete(request, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(deleteResponse));
        client.close();
        return JSON.toJSONString(deleteResponse.status());
    }

    @Override
    public String getDocument(String index, String id) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));

        GetRequest getRequest = new GetRequest(index,"doc", id );
        GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(getResponse));
        client.close();
        return JSON.toJSONString(getResponse);
    }

    @Override
    public String keywordSearch(String index, String value,
                            int current, int size) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        SearchRequest searchRequest = new SearchRequest();
        searchRequest.indices(index);

        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //支持全词搜索的字段有:keywordName,keywordAuthor"
        searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, "keywordName", "keywordAuthor"));
        searchSourceBuilder.from(current);
        searchSourceBuilder.size(size);

        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(searchResponse));
        //处理返回结果
        List<Map<String, Object>> result = dealResult(searchResponse.getHits());
        client.close();
        return JSON.toJSONString(result);
    }

    @Override
    public String multiSearch(String index, FileBeanQuery query,
                              int current, int size) throws IOException, IllegalAccessException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        MultiSearchRequest request = new MultiSearchRequest();

        for (Field field : query.getClass().getDeclaredFields()) {
            field.setAccessible(true);
            if(field.get(query) != null){
                SearchRequest searchRequest = new SearchRequest(index);
                SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
                searchSourceBuilder.query(QueryBuilders.boolQuery().must(
                        QueryBuilders.matchQuery(field.getName(), field.get(query))));
                searchRequest.source(searchSourceBuilder);
                request.add(searchRequest);
            }
        }

        MultiSearchResponse response = client.msearch(request, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(response));
        //返回结果处理
        List<Map<String, Object>> result = new ArrayList<>();
        MultiSearchResponse.Item[] multiSearchResponses = response.getResponses();
        for (MultiSearchResponse.Item multiSearchRespons : multiSearchResponses) {
            SearchHits hits = multiSearchRespons.getResponse().getHits();
            for (SearchHit hit : hits.getHits()) {
                Map<String, Object> map = hit.getSourceAsMap();
                if(!result.contains(map)){
                    result.add(map);
                }
            }
        }
        client.close();
        return JSON.toJSONString(result);
    }

    @Override
    public String highlightSearch(String index, String value, int current, int size) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        SearchRequest searchRequest = new SearchRequest();
        searchRequest.indices(index);

        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //高亮,支持所有FileBean实体的字段
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        FileBean fileBean = new FileBean();
        String[] fieldNames = new String[fileBean.getClass().getDeclaredFields().length];
        int i = 0;
        for (Field f : fileBean.getClass().getDeclaredFields()) {
            HighlightBuilder.Field highlight = new HighlightBuilder.Field(f.getName());
            highlight.highlighterType("unified");
            highlightBuilder.field(highlight);
            fieldNames[i] = f.getName();
            i++;
        }
        //设置高亮样式
        highlightBuilder.preTags("<label style=\"color: red\">");
        highlightBuilder.postTags("</label>");
        //添加查询条件
        searchSourceBuilder.highlighter(highlightBuilder);
        searchSourceBuilder.query(QueryBuilders.multiMatchQuery(value, fieldNames));//搜索也支持所有FileBean实体的字段
        searchRequest.source(searchSourceBuilder);

        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(searchResponse));
        //获取高亮字段
        List<Map<String, Object>> result = new ArrayList<>();
        SearchHits hits = searchResponse.getHits();
        for (SearchHit hit : hits.getHits()) {
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            for (String fieldName : fieldNames) {
                HighlightField highlight = highlightFields.get(fieldName);
                System.out.println(fieldName);
                if(highlight != null){
                    Text[] fragments = highlight.fragments();
                    String fragmentString = fragments[0].string();
                    System.out.println("高亮值:" + fragmentString);
                    Map<String, Object> map = hit.getSourceAsMap();
                    map.put(fieldName, fragmentString);
                    if(!result.contains(map)){
                        result.add(map);
                    }
                }
            }
        }
        client.close();
        return JSON.toJSONString(result);
    }

    @Override
    public String suggestSearch(String index, String value) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        SearchRequest searchRequest = new SearchRequest(index);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        //查询补全词语
        SuggestionBuilder completionName = SuggestBuilders.completionSuggestion("suggestName").text(value);
        SuggestBuilder suggestBuilder = new SuggestBuilder();
        suggestBuilder.addSuggestion("suggestName", completionName);
        SuggestionBuilder completionAuthor = SuggestBuilders.completionSuggestion("suggestAuthor").text(value);
        suggestBuilder.addSuggestion("suggestAuthor", completionAuthor);

        searchSourceBuilder.suggest(suggestBuilder);
        searchRequest.source(searchSourceBuilder);

        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(searchResponse));
        //处理返回结果
        Suggest suggest = searchResponse.getSuggest();
        //支持自动补全搜索的字段有suggestName,suggestAuthor
        CompletionSuggestion termSuggestion = suggest.getSuggestion("suggestName");
        CompletionSuggestion termSuggestionAuthor = suggest.getSuggestion("suggestAuthor");
        List<CompletionSuggestion.Entry> list = termSuggestion.getEntries();
        list.addAll(termSuggestionAuthor.getEntries());
        List<String> suggestList = new ArrayList<>();
        for (CompletionSuggestion.Entry entry : list) {
            for (CompletionSuggestion.Entry.Option option : entry) {
                String suggestText = option.getText().string();
                System.out.println("补全的词语:" + suggestText);
                if(!suggestList.contains(suggestText)){
                    suggestList.add(suggestText);
                }
            }
        }
        client.close();
        return JSON.toJSONString(suggestList);
    }

    @Override
    public String searchAll(String index, int current, int size) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));
        SearchRequest searchRequest = new SearchRequest(index);

        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        searchSourceBuilder.from(current);
        searchSourceBuilder.size(size);

        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(JSON.toJSONString(searchResponse));
        //处理返回结果
        SearchHits hits = searchResponse.getHits();
        client.close();
        return JSON.toJSONString(hits);
    }

    @Override
    public String countQuery(String index) throws IOException {
        client = new RestHighLevelClient(
                RestClient.builder(new HttpHost("localhost", 9200, "http")));

        CountRequest countRequest = new CountRequest(index);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        countRequest.source(searchSourceBuilder);

        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(QueryBuilders.termQuery("user", "kimchy"));
        countRequest.source(sourceBuilder);

        CountResponse countResponse = client.count(countRequest, RequestOptions.DEFAULT);
        long count = countResponse.getCount();
        return count + "";

    }private List<Map<String, Object>> dealResult(SearchHits hits){
        List<Map<String, Object>> result = new ArrayList<>();
        for (SearchHit hit : hits.getHits()) {
            Map<String, Object> map = hit.getSourceAsMap();
            result.add(map);
        }
        return result;
    }
}

  4.pom.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.3.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.demo</groupId>
    <artifactId>elasticsearch</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>elasticsearch</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
        <poi.version>3.16</poi.version>
        <commonsio.version>2.4</commonsio.version>
        <icepdf>6.2.3</icepdf>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <!-- fastjson 版本1.2.28以前有远程代码漏洞,版本最好是该版本或之后的 -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.31</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.9</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client -->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>6.6.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>6.6.1</version>
        </dependency>
        <!-- commons -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>${commonsio.version}</version>
        </dependency>
        <dependency>
            <groupId>org.icepdf.os</groupId>
            <artifactId>icepdf-pro-intl</artifactId>
            <version>${icepdf}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/net.sourceforge.jchardet/jchardet -->
        <dependency>
            <groupId>net.sourceforge.jchardet</groupId>
            <artifactId>jchardet</artifactId>
            <version>1.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
        <dependency>
            <groupId>commons-beanutils</groupId>
            <artifactId>commons-beanutils</artifactId>
            <version>1.9.2</version>
        </dependency>
        <!-- POI -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>${poi.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>${poi.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>${poi.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>${poi.version}</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

  5.其他工具类

package com.demo.elasticsearch.util;

import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.extractor.ExtractorFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

/**
 * 文档读取工具,支持word,pdf,txt
 * 2017年9月15日
 */
public class AttachmentReader {
    private static final String[] WORD= {"doc","docx","xls","xlsx","ppt","pptx"};
    private static final String PDF ="pdf";
    private static final String TXT="txt";
    
    public static String reader(String path) {
        String text = "";
        String type =  path.substring(path.lastIndexOf(".")+1).toLowerCase();
        try {
            if(TXT.equals(type)) {
                text= txtReader(path);
            } else if(PDF.equals(type)) {
                text = pdfReader(path);
            } else {
                for (int i = 0; i < WORD.length; i++) {
                    if(WORD[i].equals(type)){
                        text = wordReader(path);
                    }
                }
            }
        } catch (Exception e) {
            e.getMessage();
        }
        return text;
    }


public static String wordReader(String path) { try { return ExtractorFactory.createExtractor(new File(path)).getText(); } catch (Exception e) { System.out.println(path); throw new RuntimeException(e); } } public static String txtReader(String path) { try { File file = new File(path); //文本编码探测 FileCharsetDetector detector = new FileCharsetDetector(); String charset = detector.guessFileEncoding(file, 2); String str = FileUtils.readFileToString(file,charset); return str; } catch (Exception e) { throw new RuntimeException(e); } }


public static String pdfReader(String path) { String text = ""; FileInputStream is = null; PDDocument document = null; try { is = new FileInputStream(path); PDFParser parser = new PDFParser(new RandomAccessBuffer(is)); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); text = stripper.getText(document); } catch (Exception e) { throw new RuntimeException(e); }finally { if(null!=is){ } try { is.close(); } catch (IOException e) { } } return text; } }
package com.demo.elasticsearch.util;

import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;

import java.io.*;

/**
 * 字符集探测
 * 2017年9月22日
 */
public class FileCharsetDetector {

    private boolean found = false;
    private String encoding = null;
  
    public String guessFileEncoding(File file) throws FileNotFoundException, IOException {
        return guessFileEncoding(file, new nsDetector());
    }


public String guessFileEncoding(File file, int languageHint) throws FileNotFoundException, IOException { return guessFileEncoding(file, new nsDetector(languageHint)); }
private String guessFileEncoding(File file, nsDetector det) throws FileNotFoundException, IOException { // Set an observer... // The Notify() will be called when a matching charset is found. det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { encoding = charset; found = true; } }); BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = false; while ((len = imp.read(buf, 0, buf.length)) != -1) { // Check if the stream is only ascii. isAscii = det.isAscii(buf, len); if (isAscii) { break; } // DoIt if non-ascii and not done yet. done = det.DoIt(buf, len, false); if (done) { break; } } imp.close(); det.DataEnd(); if (isAscii) { encoding = "ASCII"; found = true; } if (!found) { String[] prob = det.getProbableCharsets(); //这里将可能的字符集组合起来返回 for (int i = 0; i < prob.length; i++) { if (i == 0) { encoding = prob[i]; } else { encoding += "," + prob[i]; } } if (prob.length > 0) { // 在没有发现情况下,也可以只取第一个可能的编码,这里返回的是一个可能的序列 return encoding; } else { return null; } } return encoding; } }

   6.springboot启动类

package com.demo.elasticsearch;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class ElasticsearchApplication {

    public static void main(String[] args) {
        SpringApplication.run(ElasticsearchApplication.class, args);
    }

}

 

  待完善。。。

  

 

 

 


  
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!