Link Search Menu Expand Document

Extract Text From Coordinates from PDF - Microsoft Excel

PDF Extractor SDK sample in Microsoft Excel demonstrating ‘Extract Text From Coordinates from PDF’

ExtractTextFromPDF_VBA.txt
' IMPORTANT: This demo uses VBA so if you have it disabled please temporary enable
' by going to Tools - Macro - Security.. and changing the security mode to ""Medium""
' to Ask if you want enable macro or not. Then close and reopen this Excel document

' You should have evaluation version of the ByteScout SDK installed to get it working - get it from https://bytescout.com

' If you are getting error message like
' "File or assembly named Bytescout SDK, or one of its dependencies, was not found"
' then please try the following:
'
' - Close Excel
' - (for Office 2003 only) download and install this hotfix from Microsoft:
' http://www.microsoft.com/downloads/details.aspx?FamilyId=1B0BFB35-C252-43CC-8A2A-6A64D6AC4670&displaylang=en
'
' and then try again!
'
' If you have any questions please contact us at http://bytescout.com/support/ or at support@bytescout.com
                            

'==============================================
'References used
'=================
'Bytescout PDF Extractor SDK
'
' IMPORTANT:
' ==============================================================
'1) Add the ActiveX reference in Tools -> References
'
'==================================================================


Private Sub CommandButton1_Click()

' Create TextExtractor object
' Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
Dim extractor As New Bytescout_PDFExtractor.TextExtractor

extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile ("c:\sample1.pdf")

' Get page count
pageCount = extractor.GetPageCount()

Dim wb As Workbook
Dim ws As Worksheet
Dim TxtRng  As Range

Set wb = ActiveWorkbook
Set ws = wb.Sheets("Sheet1")

For i = 0 To pageCount - 1
            RectLeft = 10
            RectTop = 10
            RectWidth = 100
            RectHeight = 100

            ' check the same text is extracted from returned coordinates
            extractor.SetExtractionArea RectLeft, RectTop, RectWidth, RectHeight
            ' extract text from given area
            extractedText = extractor.GetTextFromPage(i)
                    
            ' insert rows
            ' Rows(1).Insert shift:=xlShiftDown
            ' write cell value
             Set TxtRng = ws.Range("A" & CStr(i + 2))
             TxtRng.Value = extractedText
    
Next

Set extractor = Nothing


End Sub

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK