Find Text in PDF with Regex - VB.NET
PDF Extractor SDK sample in VB.NET demonstrating ‘Find Text in PDF with Regex’
Program.vb
Imports System.Drawing
Imports Bytescout.PDFExtractor
' Note: if you are looking for a more higher level API to extract data from invoices, reports, statements
' then please check Document Parser SDK and Web API at https://bytescout.com/products/developer/documentparsersdk/index.html
' and https://pdf.co for secure and scalable web api
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile(".\Invoice.pdf")
extractor.RegexSearch = True ' Enable the regular expressions
Dim pageCount As Integer = extractor.GetPageCount()
' Search through pages
For i As Integer = 0 To pageCount - 1
' Search dates in format 12/31/1999
Dim regexPattern As String = "[0-9]{2}/[0-9]{2}/[0-9]{4}"
' See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
' Search each page for the pattern
If extractor.Find(i, regexPattern, False) Then
Do
Console.WriteLine("")
Console.WriteLine(("Found on page " & i & " at location ") + extractor.FoundText.Bounds.ToString())
Console.WriteLine("")
' Iterate through each element in the found text
For Each element As ISearchResultElement In extractor.FoundText.Elements
Console.WriteLine(" Text: " + element.Text)
Console.WriteLine(" Font is bold: " + element.FontIsBold.ToString())
Console.WriteLine(" Font is italic:" + element.FontIsItalic.ToString())
Console.WriteLine(" Font name: " + element.FontName)
Console.WriteLine(" Font size:" + element.FontSize.ToString())
Console.WriteLine(" Font color:" + element.FontColor.ToString())
Console.WriteLine()
Next
Loop While extractor.FindNext()
End If
Next
' Cleanup
extractor.Dispose()
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Class