Find Text in PDF Using Regex - VBScript
PDF Extractor SDK sample in VBScript demonstrating ‘Find Text in PDF Using Regex’
FindTextUsingRegex.vbs
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("Invoice.pdf")
extractor.RegexSearch = True ' Turn on the regex search
pattern = "[0-9]{2}/[0-9]{2}/[0-9]{4}" ' Search dates in format 'mm/dd/yyyy'
' Get page count
pageCount = extractor.GetPageCount()
For i = 0 to PageCount - 1
If extractor.Find(i, pattern, false) Then ' Parameters are: page index, string to find, case sensitivity
Do
extractedString = extractor.FoundText.Text
MsgBox "Found match on page #" & CStr(i) & ": " & extractedString
extractor.ResetExtractionArea()
Loop While extractor.FindNext
End If
Next
MsgBox "Done"
Set extractor = Nothing