I am trying to extract all the images from a pdf using itextsharp but can\'t seem to overcome this one hurdle.
The error occures on the line System.Drawing.I
In newer version of iTextSharp, the 1st parameter of ImageRenderInfo.CreateForXObject
is not Matrix
anymore but GraphicsState
. @der_chirurg's approach should work. I tested myself with the information from the following link and it worked beautifully:
http://www.thevalvepage.com/swmonkey/2014/11/26/extract-images-from-pdf-files-using-itextsharp/
Works for me like this, using these two methods:
public static List<System.Drawing.Image> ExtractImagesFromPDF(byte[] bytes)
{
var imgs = new List<System.Drawing.Image>();
var pdf = new PdfReader(bytes);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
List<PdfObject> objs = FindImageInPDFDictionary(pg);
foreach (var obj in objs)
{
if (obj != null)
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
var pdfImage = new PdfImageObject((PRStream)pdfStrem);
var img = pdfImage.GetDrawingImage();
imgs.Add(img);
}
}
}
}
finally
{
pdf.Close();
}
return imgs;
}
private static List<PdfObject> FindImageInPDFDictionary(PdfDictionary pg)
{
var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
var pdfObgs = new List<PdfObject>();
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
var tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type)) // image at the root of the pdf
{
pdfObgs.Add(obj);
}
else if (PdfName.FORM.Equals(type)) // image inside a form
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
else if (PdfName.GROUP.Equals(type)) // image inside a group
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
}
}
}
return pdfObgs;
}