Link Search Menu Expand Document

Find Borderless Table in PDF And Extract As CSV - VB.NET

PDF Extractor SDK sample in VB.NET demonstrating ‘Find Borderless Table in PDF And Extract As CSV’

Program.vb
Imports System.Collections.Generic
Imports Bytescout.PDFExtractor

Class Program
    Friend Shared Sub Main(args As String())

        ' Create Bytescout.PDFExtractor.CSVExtractor instance
        Dim csvExtractor As New CSVExtractor()
        csvExtractor.RegistrationName = "demo"
        csvExtractor.RegistrationKey = "demo"

        ' Create Bytescout.PDFExtractor.TableDetector instance
        Dim tableDetector As New TableDetector2()
        tableDetector.RegistrationName = "demo"
        tableDetector.RegistrationKey = "demo"

        ' Load sample PDF document
        csvExtractor.LoadDocumentFromFile(".\sample_borderless.pdf")
        tableDetector.LoadDocumentFromFile(".\sample_borderless.pdf")

        ' Get page count
        Dim pageCount As Integer = tableDetector.GetPageCount()

        Dim extractedCsvFiles As New List(Of String)()

        For pageIndex As Integer = 0 To pageCount - 1
            Dim foundTables As DetectedTable() = tableDetector.FindTables(pageIndex).ToArray()

            ' Find first table And continue if found
            If foundTables.Length > 0 Then
                For indexTable As Integer = 0 To foundTables.Length - 1
                    ' Set extraction area for CSV extractor to rectangle received from the table detector
                    csvExtractor.SetExtractionArea(foundTables(indexTable).Bounds)

                    ' Result CSV file name
                    Dim outputCsvName As String = $"page-{pageIndex + 1}-table-{indexTable + 1}.csv"

                    ' Export the table to CSV file
                    csvExtractor.SavePageCSVToFile(pageIndex, outputCsvName)
                    extractedCsvFiles.Add(outputCsvName)
                Next
            End If
        Next

        ' Cleanup
        csvExtractor.Dispose()
        tableDetector.Dispose()

        ' Show Summary
        Console.Clear()
        If (extractedCsvFiles.Count > 0) Then
            Console.WriteLine($"Total {extractedCsvFiles.Count} tables found!")
            Console.WriteLine("--------------------------")
            Console.WriteLine(String.Join(Environment.NewLine, extractedCsvFiles))
        Else
            Console.WriteLine("No Table Found!")
        End If

        Console.ReadLine()

    End Sub
End Class

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK