Question: How do I download a PDF file which is embedded in Excel?
This question has been asked so many times but I have not seen a single working a
Note: This will only work for pdf files. If there is a mix of embedded files then this will not work.
Basic Preparations:
Let's say our Excel File C:\Users\routs\Desktop\Sample.xlsx
has 2 Pdf Files embedded as shown below.
For testing purpose, we will create a temp folder on our desktop C:\Users\routs\Desktop\Temp
.
Logic:
Excel saves the oleObjects
in the \xl\embeddings\
folder. If you rename the Excel file to zip and open it in say Winzip, you can see the following
If you extract the bin files and rename it to pdf then you will be able to open the pdf in Microsoft Edge
but not in any other pdf viewer. To make it compatible with any other pdf viewer, we will have to do some Binary
reading and editing.
If you open the bin file in any Hex Editor, you will see the below. I used the online hex editor https://hexed.it/
We have to delete everything before the word %PDF
We will try and find the 8 bit unsigned values of %PDF
... Or more specifically of %
, P
, D
and F
If you scroll down in the hex editor, you will get those four values
Value of %
Value of P
Value of D
Value of F
Now all we have to do is read the binary file and delete everything before %PDF
and save the file with a .Pdf
extention.
Code:
Option Explicit
Const TmpPath As String = "C:\Users\routs\Desktop\Temp"
Const ExcelFile As String = "C:\Users\routs\Desktop\Sample.xlsx"
Const ZipName As String = "C:\Users\routs\Desktop\Sample.zip"
Sub ExtractPDF()
Dim tmpPdf As String
Dim oApp As Object
Dim i As Long
'~~> Deleting any previously created files. This is
'~~> usually helpful from 2nd run onwards
On Error Resume Next
Kill ZipName
Kill TmpPath & "\*.*"
On Error GoTo 0
'~~> Copy and rename the Excel file as zip file
FileCopy ExcelFile, ZipName
Set oApp = CreateObject("Shell.Application")
'~~> Extract the bin file from xl\embeddings\
For i = 1 To oApp.Namespace(ZipName).items.Count
oApp.Namespace(TmpPath).CopyHere oApp.Namespace(ZipName).items.Item("xl\embeddings\oleObject" & i & ".bin")
tmpPdf = TmpPath & "\oleObject" & i & ".bin"
'~~> Read and Edit the Bin File
If Dir(tmpPdf) <> "" Then ReadAndWriteExtractedBinFile tmpPdf
Next i
MsgBox "Done"
End Sub
'~~> Read and ReWrite Bin File
Sub ReadAndWriteExtractedBinFile(s As String)
Dim intFileNum As Long, bytTemp As Byte
Dim MyAr() As Long, NewAr() As Long
Dim fileName As String
Dim i As Long, j As Long, k As Long
j = 1
intFileNum = FreeFile
'~~> Open the bing file
Open s For Binary Access Read As intFileNum
'~~> Get the number of lines in the bin file
Do While Not EOF(intFileNum)
Get intFileNum, , bytTemp
j = j + 1
Loop
'~~> Create an array to store the filtered results of the bin file
'~~> We will use this to recreate the bin file
ReDim MyAr(1 To j)
j = 1
'~~> Go to first record
If EOF(intFileNum) Then Seek intFileNum, 1
'~~> Store the contents of bin file in an array
Do While Not EOF(intFileNum)
Get intFileNum, , bytTemp
MyAr(j) = bytTemp
j = j + 1
Loop
Close intFileNum
'~~> Check for the #PDF and Filter out rest of the data
For i = LBound(MyAr) To UBound(MyAr)
If i = UBound(MyAr) - 4 Then Exit For
If Val(MyAr(i)) = 37 And Val(MyAr(i + 1)) = 80 And _
Val(MyAr(i + 2)) = 68 And Val(MyAr(i + 3)) = 70 Then
ReDim NewAr(1 To j - i + 2)
k = 1
For j = i To UBound(MyAr)
NewAr(k) = MyAr(j)
k = k + 1
Next j
Exit For
End If
Next i
intFileNum = FreeFile
'~~> Decide on the new name of the pdf file
'~~> Format(Now, "ddmmyyhhmmss") This method will awlays ensure that
'~~> you will get a unique filename
fileName = TmpPath & "\" & Format(Now, "ddmmyyhhmmss") & ".pdf"
'~~> Write the new binary file
Open fileName For Binary Lock Read Write As #intFileNum
For i = LBound(NewAr) To UBound(NewAr)
Put #intFileNum, , CByte(NewAr(i))
Next i
Close #intFileNum
End Sub
Output