Link Search Menu Expand Document

Get Word Coordinates in XML - C#

PDF Extractor SDK sample in C# demonstrating ‘Get Word Coordinates in XML’

Program.cs
using System.IO;
using Bytescout.PDFExtractor;
using System.Diagnostics;

namespace PDF2XLS
{

    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            File.Delete("output.xls");

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Splits all text into words
            extractor.DetectNewColumnBySpacesRatio = 0.1f;

            // Add the following params to get clean data with word nodes only:
            extractor.PreserveFormattingOnTextExtraction = false; // Get rid of empty nodes
            extractor.OutputFormat = SpreadseetOutputFormat.XLS; // Set the output format to XLS

            // Save the spreadsheet to file
            extractor.SaveToXLSFile("output.xls");

            // Cleanup
            extractor.Dispose();

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK