How to know the Image or Picture Location while parsing MS Word Doc in java using apache poi

前端 未结 2 1918
暗喜
暗喜 2021-01-16 23:57
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
List picturesList = wordDoc.getPicturesTable().getAllPictures();
2条回答
  •  粉色の甜心
    2021-01-17 00:26

    You Should add PicturesSourceClass

    public class PicturesSource {

    private PicturesTable picturesTable;
    private Set output = new HashSet();
    private Map lookup;
    private List nonU1based;
    private List all;
    private int pn = 0;
    
    public PicturesSource(HWPFDocument doc) {
        picturesTable = doc.getPicturesTable();
        all = picturesTable.getAllPictures();
    
    
        lookup = new HashMap();
        for (Picture p : all) {
            lookup.put(p.getStartOffset(), p);
        }
    
    
        nonU1based = new ArrayList();
        nonU1based.addAll(all);
        Range r = doc.getRange();
        for (int i = 0; i < r.numCharacterRuns(); i++) {
            CharacterRun cr = r.getCharacterRun(i);
            if (picturesTable.hasPicture(cr)) {
                Picture p = getFor(cr);
                int at = nonU1based.indexOf(p);
                nonU1based.set(at, null);
            }
        }
    }
    
    
    private boolean hasPicture(CharacterRun cr) {
        return picturesTable.hasPicture(cr);
    }
    
    private void recordOutput(Picture picture) {
        output.add(picture);
    }
    
    private boolean hasOutput(Picture picture) {
        return output.contains(picture);
    }
    
    private int pictureNumber(Picture picture) {
        return all.indexOf(picture) + 1;
    }
    
    public Picture getFor(CharacterRun cr) {
        return lookup.get(cr.getPicOffset());
    }
    
    
    private Picture nextUnclaimed() {
        Picture p = null;
        while (pn < nonU1based.size()) {
            p = nonU1based.get(pn);
            pn++;
            if (p != null) return p;
        }
        return null;
    }
    

    }

提交回复
热议问题