My mission is pretty simple: converting every single page of a pdf file into images. I tried using icepdf open source version to generate the images but they don\'t generate
Convert PDF file 04-Request-Headers.pdf to image using pdfbox.
Download this file and paste it in Documents
folder.
Example:
package com.pdf.pdfbox.test;
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.PDFImageWriter;
public class ConvertPDFPageToImageWithoutText {
public static void main(String[] args) {
try {
String oldPath = "C:/Documents/04-Request-Headers.pdf";
File oldFile = new File(oldPath);
if (oldFile.exists()) {
PDDocument document = PDDocument.load(oldPath);
@SuppressWarnings("unchecked")
List<PDPage> list = document.getDocumentCatalog().getAllPages();
String fileName = oldFile.getName().replace(".pdf", "");
String imageFormat = "png";
String password = "";
int startPage = 1;
int endPage = list.size();
String outputPrefix = "C:/Documents/PDFCopy/";//converted images saved here
File file = new File(outputPrefix);
if (!file.exists()) {
file.mkdirs();
}
int imageType = 24;
String color = "rgb";
int resolution;
try {
resolution = Toolkit.getDefaultToolkit().getScreenResolution();
} catch (HeadlessException e) {
resolution = 96;
}
if ("bilevel".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_BINARY;
} else if ("indexed".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_INDEXED;
} else if ("gray".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_GRAY;
} else if ("rgb".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_INT_RGB;
} else if ("rgba".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_INT_ARGB;
} else {
System.err.println("Error: the number of bits per pixel must be 1, 8 or 24.");
}
PDFImageWriter pdfImageWriter = new PDFImageWriter();
boolean imageWriter = pdfImageWriter.writeImage(document, imageFormat, password, startPage, endPage, outputPrefix + fileName, imageType, resolution);
if (!imageWriter) {
throw new Exception("No writer found for format '" + imageFormat + "'");
}
document.close();
} else {
System.err.println(oldPath +" File Can't be found");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
OR
Try the below solution for convert pdf files to image format.
How to Convert PDF to image with resolution in java Using PDF Renderer
I ended up trying different pdf libraries out there. The best solution is to use "JPedal", but you can only get a trial version for free. You can also try icepdf for free, but it might not generate the correct font.
Use the following code for conversions it works fine!
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFImageWriter;
/**
* Convert a PDF document to an image.
*
* @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.6 $
*/
public class PDFToImage
{
private static final String PASSWORD = "-password";
private static final String START_PAGE = "-startPage";
private static final String END_PAGE = "-endPage";
private static final String IMAGE_FORMAT = "-imageType";
private static final String OUTPUT_PREFIX = "-outputPrefix";
private static final String COLOR = "-color";
private static final String RESOLUTION = "-resolution";
/**
* private constructor.
*/
private PDFToImage()
{
//static class
}
/**
* Infamous main method.
*
* @param args Command line arguments, should be one and a reference to a file.
*
* @throws Exception If there is an error parsing the document.
*/
public static void main( String[] args ) throws Exception
{
String password = "";
String pdfFile = "D:/docoverview.pdf";
String outputPrefix = "D:/printdata/pdfimages/";
String imageFormat = "jpg";
int startPage = 1;
int endPage = Integer.MAX_VALUE;
String color = "rgb";
int resolution;
try
{
resolution = Toolkit.getDefaultToolkit().getScreenResolution();
}
catch( HeadlessException e )
{
resolution = 96;
}
for( int i = 0; i < args.length; i++ )
{
if( args[i].equals( PASSWORD ) )
{
i++;
if( i >= args.length )
{
usage();
}
password = args[i];
}
else if( args[i].equals( START_PAGE ) )
{
i++;
if( i >= args.length )
{
usage();
}
startPage = Integer.parseInt( args[i] );
}
else if( args[i].equals( END_PAGE ) )
{
i++;
if( i >= args.length )
{
usage();
}
endPage = Integer.parseInt( args[i] );
}
else if( args[i].equals( IMAGE_FORMAT ) )
{
i++;
imageFormat = args[i];
}
else if( args[i].equals( OUTPUT_PREFIX ) )
{
i++;
outputPrefix = args[i];
}
else if( args[i].equals( COLOR ) )
{
i++;
color = args[i];
}
else if( args[i].equals( RESOLUTION ) )
{
i++;
resolution = Integer.parseInt(args[i]);
}
else
{
if( pdfFile == null )
{
pdfFile = args[i];
}
}
}
if( pdfFile == null )
{
usage();
}
else
{
if(outputPrefix == null)
{
outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' ));
}
PDDocument document = null;
try
{
document = PDDocument.load( pdfFile );
//document.print();
if( document.isEncrypted() )
{
try
{
document.decrypt( password );
}
catch( InvalidPasswordException e )
{
if( args.length == 4 )//they supplied the wrong password
{
System.err.println( "Error: The supplied password is incorrect." );
System.exit( 2 );
}
else
{
//they didn't supply a password and the default of "" was wrong.
System.err.println( "Error: The document is encrypted." );
usage();
}
}
}
int imageType = 24;
if ("bilevel".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_BINARY;
}
else if ("indexed".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_INDEXED;
}
else if ("gray".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_GRAY;
}
else if ("rgb".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_INT_RGB;
}
else if ("rgba".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_INT_ARGB;
}
else
{
System.err.println( "Error: the number of bits per pixel must be 1, 8 or 24." );
System.exit( 2 );
}
//Make the call
PDFImageWriter imageWriter = new PDFImageWriter();
boolean success = imageWriter.writeImage(document, imageFormat, password,
startPage, endPage, outputPrefix, imageType, resolution);
if (!success)
{
System.err.println( "Error: no writer found for image format '"
+ imageFormat + "'" );
System.exit(1);
}
}
catch (Exception e)
{
System.err.println(e);
}
finally
{
if( document != null )
{
document.close();
}
}
}
}
/**
* This will print the usage requirements and exit.
*/
private static void usage()
{
System.err.println( "Usage: java org.apache.pdfbox.PDFToImage [OPTIONS] <PDF file>\n" +
" -password <password> Password to decrypt document\n" +
" -imageType <image type> (" + getImageFormats() + ")\n" +
" -outputPrefix <output prefix> Filename prefix for image files\n" +
" -startPage <number> The first page to start extraction(1 based)\n" +
" -endPage <number> The last page to extract(inclusive)\n" +
" -color <string> The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" +
" -resolution <number> The bitmap resolution in dpi\n" +
" <PDF file> The PDF document to use\n"
);
System.exit(1);
}
private static String getImageFormats()
{
StringBuffer retval = new StringBuffer();
String[] formats = ImageIO.getReaderFormatNames();
for( int i = 0; i < formats.length; i++ )
{
retval.append( formats[i] );
if( i + 1 < formats.length )
{
retval.append( "," );
}
}
return retval.toString();
}
}