Get Word Coordinates in XML - VB.NET
PDF Extractor SDK sample in VB.NET demonstrating ‘Get Word Coordinates in XML’
Module1.vb
Imports Bytescout.PDFExtractor
Namespace PDF2XML
Class Program
Shared Sub Main(ByVal args As String())
' Create Bytescout.PDFExtractor.XMLExtractor instance
Dim extractor As New XMLExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample3.pdf")
' Add the following params to get clean data with word nodes only
extractor.DetectNewColumnBySpacesRatio = 0.1 ' this splits all text into words
extractor.PreserveFormattingOnTextExtraction = False ' Get rid Of empty nodes
extractor.SaveXMLToFile("output.XML")
' Cleanup
extractor.Dispose()
Console.WriteLine()
Console.WriteLine("Data has been extracted to 'output.XML' file.")
Console.WriteLine()
Console.WriteLine("Press any key...")
Console.ReadKey()
End Sub
End Class
End Namespace
Resources.Designer.vb
��' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' <