Showing posts with label Read Table from pdf using acrobat pro and vba. Show all posts
Showing posts with label Read Table from pdf using acrobat pro and vba. Show all posts

Friday, June 26, 2020

Read Table from pdf using acrobat pro and vba

Option Explicit
Public Const pdf_file  As String = "C:\Users\allso\Desktop\table.pdf"



Sub pdftoexcel()
Dim eapp As Acrobat.AcroApp
Dim av_doc As CAcroAVDoc
Dim pdf_doc As CAcroPDDoc
Dim sel_text As CAcroPDTextSelect
Dim i, j As Long
Dim pagenumber, pagecontent, content
Dim data_print As Boolean
Dim cnt As Long
Dim currow As Long
currow = 1
Sheets("PDF_To_Excel").Select
Cells.Clear


Set eapp = CreateObject("AcroExch.App")
Set av_doc = CreateObject("AcroExch.AVDoc")
If av_doc.Open(pdf_file, vbNull) <> True Then Exit Sub
While av_doc Is Nothing
Set av_doc = eapp.GetActiveDoc
Wend
Set pdf_doc = av_doc.GetPDDoc
For i = 0 To pdf_doc.GetNumPages - 1
Set pagenumber = pdf_doc.AcquirePage(i)
Set pagecontent = CreateObject("AcroExch.HiliteList")
On Error Resume Next
If pagecontent.Add(0, 9000) <> True Then Exit Sub
Set sel_text = pagenumber.CreatePageHilite(pagecontent)
On Error GoTo 0

For j = 0 To sel_text.GetNumText - 1
'Debug.Print sel_text.GetText(j)
'content = sel_text.GetNumText(j)
content = sel_text.GetText(j)
If content Like "*Disability*" Then
data_print = True
ElseIf content Like "*Postal*" Then
data_print = False
Exit For
End If

If data_print = True Then
cnt = cnt + 1
Cells(currow, cnt) = Application.WorksheetFunction.Clean(Trim(content))
'Debug.Print content
End If

If cnt = 6 Then
cnt = 0
currow = currow + 1
End If

'Range("A" & Rows.Count).End(xlUp).Offset(1, 0).Value = sel_text.GetText(j)
Next j

Next i

av_doc.Close False
eapp.Exit
Set sel_text = Nothing
Set pagenumber = Nothing
Set eapp = Nothing
Set av_doc = Nothing
Set pdf_doc = Nothing
End Sub