Link Search Menu Expand Document

Get Word Coordinates in XML - C#

PDF Extractor SDK sample in C# demonstrating ‘Get Word Coordinates in XML’

using System.IO;
using Bytescout.PDFExtractor;
using System.Diagnostics;

namespace PDF2XLS

    class Program
        static void Main(string[] args)
            // Create Bytescout.PDFExtractor.XLSExtractor instance
            XLSExtractor extractor = new XLSExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";


            // Load sample PDF document

            // Uncomment this line if you need all pages converted into a single worksheet:
            //extractor.PageToWorksheet = false;

            // Splits all text into words
            extractor.DetectNewColumnBySpacesRatio = 0.1f;

            // Add the following params to get clean data with word nodes only:
            extractor.PreserveFormattingOnTextExtraction = false; // Get rid of empty nodes
            extractor.OutputFormat = SpreadseetOutputFormat.XLS; // Set the output format to XLS

            // Save the spreadsheet to file

            // Cleanup

            // Open result document in default associated application (for demo purpose)
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");
            processStartInfo.UseShellExecute = true;

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK