Link Search Menu Expand Document

Extract Table Structure from PDF - C#

PDF Extractor SDK sample in C# demonstrating ‘Extract Table Structure from PDF’

Program.cs
using Bytescout.PDFExtractor;
using System.Diagnostics;
using System;

namespace TableStructure
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.StructuredExtractor instance
			StructuredExtractor extractor = new StructuredExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile(@".\sample3.pdf");

            for (int pageIndex = 0; pageIndex < extractor.GetPageCount(); pageIndex++)
            {
                Console.WriteLine("Starting extraction from page #" + pageIndex);
                Console.WriteLine();

                extractor.PrepareStructure(pageIndex);

                int rowCount = extractor.GetRowCount(pageIndex);

                for (int row = 0; row < rowCount; row++)
                {
                    int columnCount = extractor.GetColumnCount(pageIndex, row);

                    for (int col = 0; col < columnCount; col++)
                    {
                        Console.WriteLine(extractor.GetCellValue(pageIndex, row, col));
                    }
                }
            }

            // Cleanup
			extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key..");
            Console.ReadKey();
		}
	}
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK


Copyright © 2016 - 2021 ByteScout