Return to previous page Explore PDF Extractor SDK

Find PDF Table And Extract As XML - C#

PDF Extractor SDK sample in C# demonstrating ‘Find PDF Table And Extract As XML’

View on GitHub Download Source Code (.zip)

Program.cs

using System.Diagnostics;
using Bytescout.PDFExtractor;

namespace FindTableAndExtractAsXml
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XMLExtractor instance
            XMLExtractor xmlExtractor = new XMLExtractor();
            xmlExtractor.RegistrationName = "demo";
            xmlExtractor.RegistrationKey = "demo";

            // Create Bytescout.PDFExtractor.TableDetector instance
            TableDetector tableDetector = new TableDetector();
            tableDetector.RegistrationKey = "demo";
            tableDetector.RegistrationName = "demo";

            // We should define what kind of tables we should detect.
            // So we set min required number of columns to 3 ...
            tableDetector.DetectionMinNumberOfColumns = 3;
            // ... and we set min required number of rows to 3
            tableDetector.DetectionMinNumberOfRows = 3;

            // Load sample PDF document
            xmlExtractor.LoadDocumentFromFile(@".\sample3.pdf");
            tableDetector.LoadDocumentFromFile(@".\sample3.pdf");

            // Get page count
            int pageCount = tableDetector.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                int t = 1;
                // Find first table and continue if found
                if (tableDetector.FindTable(i))
                {
                    do
                    {
                        // Set extraction area for XML extractor to rectangle received from the table detector
                        xmlExtractor.SetExtractionArea(tableDetector.FoundTableLocation);
                        // Export the table to XML file
                        xmlExtractor.SavePageXMLToFile(i, "page-" + i + "-table-" + t + ".xml");
                        t++;
                    } 
                    while (tableDetector.FindNextTable()); // search next table
                }
            }

            // Cleanup
            xmlExtractor.Dispose();
            tableDetector.Dispose();

            // Open first output file in default associated application (for demo purposes)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("page-0-table-1.xml");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
    }
}

Find PDF Table And Extract As XML - C#

Program.cs

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK