Find Table in PDF And Extract As XML - VB.NET
PDF Extractor SDK sample in VB.NET demonstrating ‘Find Table in PDF And Extract As XML’
Program.vb
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.XMLExtractor instance
Dim xmlExtractor As New XMLExtractor()
xmlExtractor.RegistrationName = "demo"
xmlExtractor.RegistrationKey = "demo"
' Create Bytescout.PDFExtractor.TableDetector instance
Dim tableDetector As New TableDetector()
tableDetector.RegistrationName = "demo"
tableDetector.RegistrationKey = "demo"
' We should define what kind of tables we should detect.
' So we set min required number of columns to 3 ...
tableDetector.DetectionMinNumberOfColumns = 3
' ... and we set min required number of rows to 3
tableDetector.DetectionMinNumberOfRows = 3
' Load sample PDF document
xmlExtractor.LoadDocumentFromFile(".\sample3.pdf")
tableDetector.LoadDocumentFromFile(".\sample3.pdf")
' Get page count
Dim pageCount As Integer = tableDetector.GetPageCount()
For i As Integer = 0 To pageCount - 1
Dim t As Integer = 1
' Find first table and continue if found
If (tableDetector.FindTable(i)) Then
Do
' Set extraction area for XML extractor to rectangle received from the table detector
xmlExtractor.SetExtractionArea(tableDetector.FoundTableLocation)
' Export the table to XML file
xmlExtractor.SavePageXMLToFile(i, "page-" + i.ToString() + "-table-" + t.ToString() + ".xml")
t = t + 1
Loop While tableDetector.FindNextTable()
End If
Next
' Cleanup
xmlExtractor.Dispose()
tableDetector.Dispose()
' Open first output file in default associated application (for demo purposes)
System.Diagnostics.Process.Start("page-0-table-1.xml")
End Sub
End Class