问题
So I am creating a HSSFSheet
having a background bitmap set using apache poi
and own low level code. The https://www.openoffice.org/sc/excelfileformat.pdf declares for the Record BITMAP, BIFF8
:
Pixel data (array of height lines of the bitmap, from bottom line to top line, see below)
...
In each line all pixels are written from left to right. Each pixel is stored as 3-byte array: the red, green, and blue component of the colour of the pixel, in this order. The size of each line is aligned to multiples of 4 by inserting zero bytes after the last pixel.
See picture of the PDF for complete declaration:
For fulfilling this my approach is using java.awt.image.BufferedImage
having type BufferedImage.TYPE_3BYTE_BGR
. Then getting all bytes R G B from that BufferedImage's raster in correct order (from bottom line to top line) and filled up up to multiple of 4 in width (x direction).
See code:
import java.io.FileOutputStream;
import java.io.FileInputStream;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.hssf.record.RecordBase;
import org.apache.poi.hssf.record.StandardRecord;
import org.apache.poi.hssf.model.InternalSheet;
import org.apache.poi.util.LittleEndianOutput;
import java.lang.reflect.Field;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.awt.image.BufferedImage;
import java.awt.Graphics2D;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import javax.imageio.ImageIO;
public class CreateExcelHSSFSheetBackgroundBitmap {
static List<Byte> getBackgroundBitmapData(String filePath) throws Exception {
//see https://www.openoffice.org/sc/excelfileformat.pdf - BITMAP
List<Byte> data = new ArrayList<Byte>();
// get file byte data in type BufferedImage.TYPE_3BYTE_BGR
BufferedImage in = ImageIO.read(new FileInputStream(filePath));
BufferedImage image = new BufferedImage(in.getWidth(), in.getHeight(), BufferedImage.TYPE_3BYTE_BGR);
Graphics2D graphics = image.createGraphics();
graphics.drawImage(in, null, 0, 0);
graphics.dispose();
short width = (short)image.getWidth();
short height = (short)image.getHeight();
// each pixel has 3 bytes but the width bytes must be filled up to multiple of 4
int widthBytesMultOf4 = (int)((width * 3 + 3) / 4 * 4);
// --- this part takes much time but I have not found any better possibility
// put the bytes R G B into the data; lines of the bitmap must be from bottom line to top line
int bytes = 0;
for (short y = (short)(height - 1); y >= 0; y--) {
for (short x = 0; x < width; x++) {
int r = image.getData().getSample(x, y, 2);
data.add(Byte.valueOf((byte)r));
bytes++;
int g = image.getData().getSample(x, y, 1);
data.add(Byte.valueOf((byte)g));
bytes++;
int b = image.getData().getSample(x, y, 0);
data.add(Byte.valueOf((byte)b));
bytes++;
}
// fill up x with 0 bytes up to multiple of 4
for (int x = width * 3; x < widthBytesMultOf4; x++) {
data.add(Byte.valueOf((byte)0));
bytes++;
}
}
// ---
// size 12 bytes (additional headers, see below) + picture bytes
int size = 12 + bytes;
// get size int as LITTLE_ENDIAN bytes
ByteBuffer bSize = ByteBuffer.allocate(4);
bSize.order(ByteOrder.LITTLE_ENDIAN);
bSize.putInt(size);
// get width short as LITTLE_ENDIAN bytes
ByteBuffer bWidth = ByteBuffer.allocate(2);
bWidth.order(ByteOrder.LITTLE_ENDIAN);
bWidth.putShort(width);
// get height short as LITTLE_ENDIAN bytes
ByteBuffer bHeight = ByteBuffer.allocate(2);
bHeight.order(ByteOrder.LITTLE_ENDIAN);
bHeight.putShort(height);
// put the record headers into the data
Byte[] dataPart = new Byte[] { 0x09, 0x00, 0x01, 0x00,
bSize.array()[0], bSize.array()[1], bSize.array()[2], bSize.array()[3], // size
//now 12 bytes follow
0x0C, 0x00, 0x00, 0x00,
bWidth.array()[0], bWidth.array()[1], // width
bHeight.array()[0], bHeight.array()[1], // height
0x01, 0x00, 0x18, 0x00
};
data.addAll(0, Arrays.asList(dataPart));
return data;
}
public static void main(String[] args) throws Exception {
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("Sheet1");
sheet = workbook.createSheet("Sheet2"); // this sheet gets the background image set
// we need the binary records of the sheet
// get InternalSheet
Field _sheet = HSSFSheet.class.getDeclaredField("_sheet");
_sheet.setAccessible(true);
InternalSheet internalsheet = (InternalSheet)_sheet.get(sheet);
// get List of RecordBase
Field _records = InternalSheet.class.getDeclaredField("_records");
_records.setAccessible(true);
@SuppressWarnings("unchecked")
List<RecordBase> records = (List<RecordBase>)_records.get(internalsheet);
// get bytes of the image file
List<Byte> data = getBackgroundBitmapData("dummyText.png"); //PNG must not have transparency
// do creating BitmapRecord and ContinueRecords from the data in parts of 8220 bytes
BitmapRecord bitmapRecord = null;
List<ContinueRecord> continueRecords = new ArrayList<ContinueRecord>();
int bytes = 0;
if (data.size() > 8220) {
bitmapRecord = new BitmapRecord(data.subList(0, 8220));
bytes = 8220;
while (bytes < data.size()) {
if ((bytes + 8220) < data.size()) {
continueRecords.add(new ContinueRecord(data.subList(bytes, bytes + 8220)));
bytes += 8220;
} else {
continueRecords.add(new ContinueRecord(data.subList(bytes, data.size())));
break;
}
}
} else {
bitmapRecord = new BitmapRecord(data);
}
// add the records after PageSettingsBlock
int i = 0;
for (RecordBase r : records) {
if (r instanceof org.apache.poi.hssf.record.aggregates.PageSettingsBlock) {
break;
}
i++;
}
records.add(++i, bitmapRecord);
for (ContinueRecord continueRecord : continueRecords) {
records.add(++i, continueRecord);
}
// debug output
for (RecordBase r : internalsheet.getRecords()) {
System.out.println(r);
}
// write out workbook
workbook.write(new FileOutputStream("CreateExcelHSSFSheetBackgroundBitmap.xls"));
workbook.close();
}
static class BitmapRecord extends StandardRecord {
//see https://www.openoffice.org/sc/excelfileformat.pdf - BITMAP
List<Byte> data = new ArrayList<Byte>();
BitmapRecord(List<Byte> data) {
this.data = data;
}
public int getDataSize() {
return data.size();
}
public short getSid() {
return (short)0x00E9;
}
public void serialize(LittleEndianOutput out) {
for (Byte b : data) {
out.writeByte(b);
}
}
}
static class ContinueRecord extends StandardRecord {
//see https://www.openoffice.org/sc/excelfileformat.pdf - CONTINUE
List<Byte> data = new ArrayList<Byte>();
ContinueRecord(List<Byte> data) {
this.data = data;
}
public int getDataSize() {
return data.size();
}
public short getSid() {
return (short)0x003C;
}
public void serialize(LittleEndianOutput out) {
for (Byte b : data) {
out.writeByte(b);
}
}
}
}
The code works but the part between
// --- this part takes much time but I have not found any better possibility
and
// ---
takes much time since 3 bytes R G B for each single pixel needs to be got for getting them according to the above strange format.
Does anyone knows of a better approach? Maybe the above strange format is not as strange as I think it is and there are already other usages of it?
回答1:
Here's a modified version of your code that works for me, AND is pretty fast.
- I'm using
byte[]
(andByteArrayOutputStream
) all around, no moreList<Byte>
. - As we already have a
BufferedImage
ofTYPE_3BYTE_BGR
, we can use that almost directly as the BMP output. We just need to a) prepend a valid BMP header and b) write bottom-up, c) pad each scanline (row) to a 32 bit boundary and d) switch BGR -> RGB order. - I'm using the
Raster
to copy (padded) rows of data into the output, as copying larger chunks is faster than copying single bytes.
As already noted in the comments, the structure is a standard BMP with BITMAPCOREHEADER
(and no file header). Unfortunately, the ImageIO
BMPImageWriter
always write the file header and uses the BITMAPINFOHEADER
which is 40 bytes. You could probably get around these things, and use the standard writer, by massaging the data a little (hint: the file header contains an offset to the pixel data at offset 10), but as the core BMP format is trivial to implement, it might be just as easy to do as below.
While the documentation certainly implies that using other formats like PNG and JPEG directly, I haven't managed to do this properly.
There's probably still room for improvement if you like, to avoid some byte array copying (ie. use offset/length and pass the entire data array to the Bitmap/ContinueRecord
s instead of Arrays.copyOfRange()
).
Code:
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.awt.image.Raster;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.poi.hssf.model.InternalSheet;
import org.apache.poi.hssf.record.RecordBase;
import org.apache.poi.hssf.record.StandardRecord;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.util.LittleEndianOutput;
public class CreateExcelHSSFSheetBackgroundBitmap {
static byte[] getBackgroundBitmapData(String filePath) throws Exception {
//see https://www.openoffice.org/sc/excelfileformat.pdf - BITMAP
// get file byte data in type BufferedImage.TYPE_3BYTE_BGR
BufferedImage in = ImageIO.read(new FileInputStream(filePath));
BufferedImage image = new BufferedImage(in.getWidth(), in.getHeight(), BufferedImage.TYPE_3BYTE_BGR);
Graphics2D graphics = image.createGraphics();
graphics.drawImage(in, null, 0, 0);
graphics.dispose();
// calculate row size (c)
int rowSize = ((24 * image.getWidth() + 31) / 32) * 4;
ByteArrayOutputStream output = new ByteArrayOutputStream(image.getHeight() * rowSize * 3 + 1024);
// put the record headers into the data
ByteBuffer header = ByteBuffer.allocate(8 + 12);
header.order(ByteOrder.LITTLE_ENDIAN);
// Undocumented XLS stuff
header.putShort((short) 0x09);
header.putShort((short) 0x01);
header.putInt(image.getHeight() * rowSize + 12); // Size of image stream
// BITMAPCOREHEADER (a)
header.putInt(12);
header.putShort((short) image.getWidth());
header.putShort((short) image.getHeight()); // Use -height if writing top-down
header.putShort((short) 1); // planes, always 1
header.putShort((short) 24); // bitcount
output.write(header.array());
// Output rows bottom-up (b)
Raster raster = image.getRaster()
.createChild(0, 0, image.getWidth(), image.getHeight(), 0, 0, new int[]{2, 1, 0}); // Reverse BGR -> RGB (d)
byte[] row = new byte[rowSize]; // padded (c)
for (int i = image.getHeight() - 1; i >= 0; i--) {
row = (byte[]) raster.getDataElements(0, i, image.getWidth(), 1, row);
output.write(row);
}
return output.toByteArray();
}
public static void main(String[] args) throws Exception {
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("Sheet2"); // this sheet gets the background image set
// we need the binary records of the sheet
// get InternalSheet
Field _sheet = HSSFSheet.class.getDeclaredField("_sheet");
_sheet.setAccessible(true);
InternalSheet internalsheet = (InternalSheet)_sheet.get(sheet);
// get List of RecordBase
Field _records = InternalSheet.class.getDeclaredField("_records");
_records.setAccessible(true);
@SuppressWarnings("unchecked")
List<RecordBase> records = (List<RecordBase>)_records.get(internalsheet);
// get bytes of the image file
byte[] data = getBackgroundBitmapData("dummy.png"); //PNG must not have transparency
// do creating BitmapRecord and ContinueRecords from the data in parts of 8220 bytes
BitmapRecord bitmapRecord;
List<ContinueRecord> continueRecords = new ArrayList<>();
int bytes;
if (data.length > 8220) {
bitmapRecord = new BitmapRecord(Arrays.copyOfRange(data, 0, 8220));
bytes = 8220;
while (bytes < data.length) {
if ((bytes + 8220) < data.length) {
continueRecords.add(new ContinueRecord(Arrays.copyOfRange(data, bytes, bytes + 8220)));
bytes += 8220;
} else {
continueRecords.add(new ContinueRecord(Arrays.copyOfRange(data, bytes, data.length)));
break;
}
}
} else {
bitmapRecord = new BitmapRecord(data);
}
// add the records after PageSettingsBlock
int i = 0;
for (RecordBase r : records) {
if (r instanceof org.apache.poi.hssf.record.aggregates.PageSettingsBlock) {
break;
}
i++;
}
records.add(++i, bitmapRecord);
for (ContinueRecord continueRecord : continueRecords) {
records.add(++i, continueRecord);
}
// debug output
for (RecordBase r : internalsheet.getRecords()) {
System.out.println(r);
}
// write out workbook
workbook.write(new FileOutputStream("backgroundImage.xls"));
workbook.close();
}
static class BitmapRecord extends StandardRecord {
//see https://www.openoffice.org/sc/excelfileformat.pdf - BITMAP
byte[] data;
BitmapRecord(byte[] data) {
this.data = data;
}
public int getDataSize() {
return data.length;
}
public short getSid() {
return (short)0x00E9;
}
public void serialize(LittleEndianOutput out) {
out.write(data);
}
}
static class ContinueRecord extends StandardRecord {
//see https://www.openoffice.org/sc/excelfileformat.pdf - CONTINUE
byte[] data;
ContinueRecord(byte[] data) {
this.data = data;
}
public int getDataSize() {
return data.length;
}
public short getSid() {
return (short)0x003C;
}
public void serialize(LittleEndianOutput out) {
out.write(data);
}
}
}
回答2:
As often it is simply stupidity of the programmer himself ;-). In German there is a saying: "Can't see the forest because of the trees.".
Simply getting the Raster
of the BufferedImage
once outside the loops instead of getting it inside the loops over and over again increases the speed enormously:
...
// put the bytes R G B into the data; lines of the bitmap must be from bottom line to top line
int bytes = 0;
Raster raster = image.getData();
for (short y = (short)(height - 1); y >= 0; y--) {
for (short x = 0; x < width; x++) {
int r = raster.getSample(x, y, 2);
data.add(Byte.valueOf((byte)r));
bytes++;
int g = raster.getSample(x, y, 1);
data.add(Byte.valueOf((byte)g));
bytes++;
int b = raster.getSample(x, y, 0);
data.add(Byte.valueOf((byte)b));
bytes++;
}
// fill up x with 0 bytes up to multiple of 4
for (int x = width * 3; x < widthBytesMultOf4; x++) {
data.add(Byte.valueOf((byte)0));
bytes++;
}
}
...
But the hint from @Gagravarr in his comment looks interesting also.
2.4.19 BkHim
The BkHim record specifies image data for a sheet (1) background
cf (2 bytes): A signed integer that specifies the image format. MUST be a value from the following:
Value: 0x0009
Meaning: Bitmap format. The image data is stored in a bitmap format as described in [MSDN-BMP]
Value: 0x000E
Meaning: Native format. The image data is stored in the native format of another application and cannot be directly processed.
This sounds as if if being 0x000E
the first bytes instead of 0x0009
then native picture bytes (PNG, JPG, BMP, ...) could be stored directly. Will try this tomorrow.
Well, as often only futile effort with Microsoft's "documentations". The https://interoperability.blob.core.windows.net/files/MS-XLS/[MS-XLS].pdf seems correct but incomplete, at least on page 211: 2.4.19
BkHim. So using 0x000E
instead of 0x0009
will not simply allowing storing a native imageBlob
.
But also the description of 0x0009
:
Bitmap format. The image data is stored in a bitmap format as described in [MSDN-BMP]
only links to an incomplete description of what kind of bitmap must be used here. Nothing about the header structure at the beginning and pixel bytes from bottom line to top line. And also nothing about the need aligning the size of each line to multiples of 4 by inserting zero bytes after the last pixel. But without knowing this all even using a bitmap 0x0009
will not work.
When I put any kind of background image into a worksheet in a *.xls
file using Excel
's GUI and then have a look into that file using a hex dump, then this always looks like:
0xe900SSSS09000100SSSSSSSS0c000000WWWWHHHH01001800PPP...
where S means size, W means width, H means height and P means pixel bytes.
This is independent of whether I put a BMP, JPG or PNG as background image into a worksheet. Always this special kind of BMP data is used.
So what OpenOffice has recognized using reverse engineering in https://www.openoffice.org/sc/excelfileformat.pdf is more useful than the "documentation" from Microsoft.
So following code for putting background image in HSSFSheet
works for me tested with multiple different kinds of image files (BMP, PNG, JPG).
Also changed using List<Byte> data
to byte[] data
. So org.apache.poi.hssf.record.ContinueRecord can be used directly and must not be recreated.
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.ByteArrayOutputStream;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.hssf.record.RecordBase;
import org.apache.poi.hssf.record.StandardRecord;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.model.InternalSheet;
import org.apache.poi.util.LittleEndianOutput;
import java.lang.reflect.Field;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.awt.image.BufferedImage;
import java.awt.image.Raster;
import java.awt.Graphics2D;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import javax.imageio.ImageIO;
public class CreateExcelHSSFSheetBackgroundBMP {
static byte[] getBackgroundBitmapData(String filePath) throws Exception {
// see https://www.openoffice.org/sc/excelfileformat.pdf - 5.6 BITMAP
// and https://interoperability.blob.core.windows.net/files/MS-XLS/[MS-XLS].pdf - 2.4.19 BkHim
// get file byte data in type BufferedImage.TYPE_3BYTE_BGR
BufferedImage in = ImageIO.read(new FileInputStream(filePath));
BufferedImage image = new BufferedImage(in.getWidth(), in.getHeight(), BufferedImage.TYPE_3BYTE_BGR);
Graphics2D graphics = image.createGraphics();
graphics.drawImage(in, null, 0, 0);
graphics.dispose();
short width = (short)image.getWidth();
short height = (short)image.getHeight();
// each pixel has 3 bytes but the width bytes must be filled up to multiple of 4
int widthBytesMultOf4 = (int)((width * 3 + 3) / 4 * 4);
// size 12 bytes (additional headers, see below) + picture bytes
int size = 12 + height * widthBytesMultOf4;
// create the header section
ByteBuffer headers = ByteBuffer.allocate(20);
headers.order(ByteOrder.LITTLE_ENDIAN);
headers.putShort((short)0x09); // 0x0009 = signed integer that specifies the image format BMP
headers.putShort((short)0x01); // reserved (2 bytes): MUST be 0x0001
headers.putInt(size); // signed integer that specifies the size of imageBlob in bytes
// BMP header section:
headers.putInt(0x0C); // length 0x0C = 12 bytes
headers.putShort(width); // pixels width
headers.putShort(height); // pixels heigth
headers.putShort((short)0x01); // number of planes: always 1
headers.putShort((short)0x18); // color depth 0x018 = 24 bit
//create data ByteArrayOutputStream
ByteArrayOutputStream data = new ByteArrayOutputStream();
// write headers section
data.write(headers.array());
// put the bytes R G B into the data; lines of the bitmap must be from bottom line to top line
Raster raster = image.getData();
for (short y = (short)(height - 1); y >= 0; y--) {
for (short x = 0; x < width; x++) {
int r = raster.getSample(x, y, 2);
data.write((byte)r);
int g = raster.getSample(x, y, 1);
data.write((byte)g);
int b = raster.getSample(x, y, 0);
data.write((byte)b);
}
// fill up x with 0 bytes up to multiple of 4
for (int x = width * 3; x < widthBytesMultOf4; x++) {
data.write((byte)0);
}
}
return data.toByteArray();
}
public static void main(String[] args) throws Exception {
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("Sheet1");
sheet = workbook.createSheet("Sheet2"); // this sheet gets the background image set
// we need the binary records of the sheet
// get InternalSheet
Field _sheet = HSSFSheet.class.getDeclaredField("_sheet");
_sheet.setAccessible(true);
InternalSheet internalsheet = (InternalSheet)_sheet.get(sheet);
// get List of RecordBase
Field _records = InternalSheet.class.getDeclaredField("_records");
_records.setAccessible(true);
@SuppressWarnings("unchecked")
List<RecordBase> records = (List<RecordBase>)_records.get(internalsheet);
// get bytes of the image file
byte[] data = getBackgroundBitmapData("dummyText.png"); //PNG must not have transparency
// do creating BitmapRecord and ContinueRecords from the data in parts of 8220 bytes
BitmapRecord bitmapRecord = null;
List<ContinueRecord> continueRecords = new ArrayList<ContinueRecord>();
int bytes = 0;
if (data.length > 8220) {
bitmapRecord = new BitmapRecord(Arrays.copyOfRange(data, 0, 8220));
bytes = 8220;
while (bytes < data.length) {
if ((bytes + 8220) < data.length) {
continueRecords.add(new ContinueRecord(Arrays.copyOfRange(data, bytes, bytes + 8220)));
bytes += 8220;
} else {
continueRecords.add(new ContinueRecord(Arrays.copyOfRange(data, bytes, data.length)));
break;
}
}
} else {
bitmapRecord = new BitmapRecord(data);
}
// add the records after PageSettingsBlock
int i = 0;
for (RecordBase r : records) {
if (r instanceof org.apache.poi.hssf.record.aggregates.PageSettingsBlock) {
break;
}
i++;
}
records.add(++i, bitmapRecord);
for (ContinueRecord continueRecord : continueRecords) {
records.add(++i, continueRecord);
}
// debug output
for (RecordBase r : internalsheet.getRecords()) {
System.out.println(r.getClass());
}
// write out workbook
FileOutputStream out = new FileOutputStream("CreateExcelHSSFSheetBackgroundBMP.xls");
workbook.write(out);
workbook.close();
out.close();
}
static class BitmapRecord extends StandardRecord {
// see https://www.openoffice.org/sc/excelfileformat.pdf - 5.6 BITMAP
// and https://interoperability.blob.core.windows.net/files/MS-XLS/[MS-XLS].pdf - 2.4.19 BkHim
byte[] data;
BitmapRecord(byte[] data) {
this.data = data;
}
public int getDataSize() {
return data.length;
}
public short getSid() {
return (short)0x00E9;
}
public void serialize(LittleEndianOutput out) {
out.write(data);
}
}
}
来源:https://stackoverflow.com/questions/52419600/using-java-awt-image-bufferedimage-for-creating-biff8-bitmap-record-takes-much-t