Link Search Menu Expand Document

Get Word Coordinates in JSON - C#

PDF Extractor SDK sample in C# demonstrating ‘Get Word Coordinates in JSON’

Program.cs
using Bytescout.PDFExtractor;
using System;
using System.Diagnostics;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.JSONExtractor instance
            JSONExtractor extractor = new JSONExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            // Add the following params to get clean data with word nodes only:
            extractor.DetectNewColumnBySpacesRatio = 0.1f;  // this splits all text into words
            extractor.PreserveFormattingOnTextExtraction = false;  // Get rid Of empty nodes

            extractor.SaveJSONToFile("output.json");

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Data has been extracted to 'output.json' file.");
            Console.WriteLine();
            Console.WriteLine("Press any key to continue and open JSON in default viewer...");
            Console.ReadKey();

            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.json");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK