Extract Text From Coordinates from PDF - Microsoft Excel
PDF Extractor SDK sample in Microsoft Excel demonstrating ‘Extract Text From Coordinates from PDF’
ExtractTextFromPDF_VBA.txt
' IMPORTANT: This demo uses VBA so if you have it disabled please temporary enable
' by going to Tools - Macro - Security.. and changing the security mode to ""Medium""
' to Ask if you want enable macro or not. Then close and reopen this Excel document
' You should have evaluation version of the ByteScout SDK installed to get it working - get it from https://bytescout.com
' If you are getting error message like
' "File or assembly named Bytescout SDK, or one of its dependencies, was not found"
' then please try the following:
'
' - Close Excel
' - (for Office 2003 only) download and install this hotfix from Microsoft:
' http://www.microsoft.com/downloads/details.aspx?FamilyId=1B0BFB35-C252-43CC-8A2A-6A64D6AC4670&displaylang=en
'
' and then try again!
'
' If you have any questions please contact us at http://bytescout.com/support/ or at support@bytescout.com
'==============================================
'References used
'=================
'Bytescout PDF Extractor SDK
'
' IMPORTANT:
' ==============================================================
'1) Add the ActiveX reference in Tools -> References
'
'==================================================================
Private Sub CommandButton1_Click()
' Create TextExtractor object
' Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
Dim extractor As New Bytescout_PDFExtractor.TextExtractor
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile ("c:\sample1.pdf")
' Get page count
pageCount = extractor.GetPageCount()
Dim wb As Workbook
Dim ws As Worksheet
Dim TxtRng As Range
Set wb = ActiveWorkbook
Set ws = wb.Sheets("Sheet1")
For i = 0 To pageCount - 1
RectLeft = 10
RectTop = 10
RectWidth = 100
RectHeight = 100
' check the same text is extracted from returned coordinates
extractor.SetExtractionArea RectLeft, RectTop, RectWidth, RectHeight
' extract text from given area
extractedText = extractor.GetTextFromPage(i)
' insert rows
' Rows(1).Insert shift:=xlShiftDown
' write cell value
Set TxtRng = ws.Range("A" & CStr(i + 2))
TxtRng.Value = extractedText
Next
Set extractor = Nothing
End Sub