Friday, June 26, 2020

Read Table from pdf using acrobat pro and vba

Option Explicit
Public Const pdf_file  As String = "C:\Users\allso\Desktop\table.pdf"



Sub pdftoexcel()
Dim eapp As Acrobat.AcroApp
Dim av_doc As CAcroAVDoc
Dim pdf_doc As CAcroPDDoc
Dim sel_text As CAcroPDTextSelect
Dim i, j As Long
Dim pagenumber, pagecontent, content
Dim data_print As Boolean
Dim cnt As Long
Dim currow As Long
currow = 1
Sheets("PDF_To_Excel").Select
Cells.Clear


Set eapp = CreateObject("AcroExch.App")
Set av_doc = CreateObject("AcroExch.AVDoc")
If av_doc.Open(pdf_file, vbNull) <> True Then Exit Sub
While av_doc Is Nothing
Set av_doc = eapp.GetActiveDoc
Wend
Set pdf_doc = av_doc.GetPDDoc
For i = 0 To pdf_doc.GetNumPages - 1
Set pagenumber = pdf_doc.AcquirePage(i)
Set pagecontent = CreateObject("AcroExch.HiliteList")
On Error Resume Next
If pagecontent.Add(0, 9000) <> True Then Exit Sub
Set sel_text = pagenumber.CreatePageHilite(pagecontent)
On Error GoTo 0

For j = 0 To sel_text.GetNumText - 1
'Debug.Print sel_text.GetText(j)
'content = sel_text.GetNumText(j)
content = sel_text.GetText(j)
If content Like "*Disability*" Then
data_print = True
ElseIf content Like "*Postal*" Then
data_print = False
Exit For
End If

If data_print = True Then
cnt = cnt + 1
Cells(currow, cnt) = Application.WorksheetFunction.Clean(Trim(content))
'Debug.Print content
End If

If cnt = 6 Then
cnt = 0
currow = currow + 1
End If

'Range("A" & Rows.Count).End(xlUp).Offset(1, 0).Value = sel_text.GetText(j)
Next j

Next i

av_doc.Close False
eapp.Exit
Set sel_text = Nothing
Set pagenumber = Nothing
Set eapp = Nothing
Set av_doc = Nothing
Set pdf_doc = Nothing
End Sub


No comments:

Post a Comment