package com.nj.obj.ysma;
import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.*; import java.util.concurrent.*; /** * @author ysma 2019-11-24 */ public class FetchWords { private static final Logger log = LoggerFactory.getLogger(FetchWords.class); private static final String dir = "D:\\ysma\\test"; private static final int processors = Runtime.getRuntime().availableProcessors(); //智能线程 cpu数目 private static ExecutorService executorService = Executors.newWorkStealingPool(processors); //线程安全 private static ConcurrentHashMap<String, Integer> calcMap = new ConcurrentHashMap<>(); public static void main(String[] args) { //1.获取文件目录 File directory = new File(dir); //2.校验文件目录 if(directory.exists()){ //2-1.获取文件列表 File[] files = directory.listFiles(); if(files == null || files.length == 0){ log.error("文件夹为空,path:{}", dir); } else { List<File> fileList = Arrays.asList(files); //多线程读取文件 Future[] futures = new Future[processors]; for(int i = 0; i < processors; i++){ futures[i] = executorService.submit(new SubFetchWords(i, fileList)); } //关闭任务 executorService.shutdown(); for(Future future : futures){ while (!future.isDone() || future.isCancelled()){ //等待线程执行完成 try { TimeUnit.MILLISECONDS.sleep(10); } catch (InterruptedException ignored) { } } } Integer max = calcMap.values().stream().max(Integer::compareTo).orElse(0); for(Map.Entry<String, Integer> entry : calcMap.entrySet()){ if(entry.getValue().equals(max)){ log.info("FetchWords 多文件出现频率最高的词汇为:{},次数:{}", entry.getKey(), max); } System.out.println(entry.getKey()+":"+entry.getValue()); } } } else { log.error("文件夹不存在,path:{}", dir); } } static class SubFetchWords implements Runnable{ private int index; private List<File> fileList; SubFetchWords(int index, List<File> fileList){ this.index = index; this.fileList = fileList; } @Override public void run() { //1.求余processors 决定文件归属线程 BufferedReader br = null; try { for (int i = 0; i < fileList.size(); i++) { if (i % processors == index) { File file = fileList.get(i); //2.读取文件 readline 小段读取 TODO 如果单行太长则采取另外方案 br = new BufferedReader(new FileReader(file)); String temp; while ((temp = br.readLine()) != null){ String[] words = temp.split(" "); //空格分隔符 for(String word : words){ calcMap.compute(word, (k, v) -> v == null? 1 : v +1); } } } } } catch (FileNotFoundException ex) { log.error("SubFetchWords FileNotFoundException 文件不存在", ex); } catch (IOException ex) { log.error("SubFetchWords IOException 文件解析异常", ex); } finally { if(br != null){ try { br.close(); } catch (IOException ignored) { } } } } } }
来源:oschina
链接:https://my.oschina.net/ysma1987/blog/3133361