Wednesday, June 24, 2020

Get text data from pdf to excel range using acrobat library in VBA


First we need to add the acrobat reference and see if Adobe Acrobat 10.0 Type Library is enabled

Option Explicit
Public Const pdf_file  As String = "C:\Users\allso\Desktop\Business Loan Application Form.pdf"



Sub pdftoexcel()
Dim eapp As Acrobat.AcroApp
Dim av_doc As CAcroAVDoc
Dim pdf_doc As CAcroPDDoc
Dim sel_text As CAcroPDTextSelect
Dim i, j As Long
Dim pagenumber, pagecontent, content
Set eapp = CreateObject("AcroExch.App")
Set av_doc = CreateObject("AcroExch.AVDoc")
If av_doc.Open(pdf_file, vbNull) <> True Then Exit Sub
While av_doc Is Nothing
Set av_doc = eapp.GetActiveDoc
Wend
Set pdf_doc = av_doc.GetPDDoc
For i = 0 To pdf_doc.GetNumPages - 1
Set pagenumber = pdf_doc.AcquirePage(i)
Set pagecontent = CreateObject("AcroExch.HiliteList")
On Error Resume Next
If pagecontent.Add(0, 9000) <> True Then Exit Sub
Set sel_text = pagenumber.CreatePageHilite(pagecontent)
On Error GoTo 0

For j = 0 To sel_text.GetNumText - 1
'Debug.Print sel_text.GetText(j)
Range("A" & Rows.Count).End(xlUp).Offset(1, 0).Value = sel_text.GetText(j)
Next j

Next i

av_doc.Close False
eapp.Exit
Set sel_text = Nothing
Set pagenumber = Nothing
Set eapp = Nothing
Set av_doc = Nothing
Set pdf_doc = Nothing
End Sub

No comments:

Post a Comment