Get Word Coordinates in XML - C#
PDF Extractor SDK sample in C# demonstrating ‘Get Word Coordinates in XML’
Program.cs
using System.IO;
using Bytescout.PDFExtractor;
using System.Diagnostics;
namespace PDF2XLS
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.XLSExtractor instance
XLSExtractor extractor = new XLSExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
File.Delete("output.xls");
// Load sample PDF document
extractor.LoadDocumentFromFile("sample3.pdf");
// Uncomment this line if you need all pages converted into a single worksheet:
//extractor.PageToWorksheet = false;
// Splits all text into words
extractor.DetectNewColumnBySpacesRatio = 0.1f;
// Add the following params to get clean data with word nodes only:
extractor.PreserveFormattingOnTextExtraction = false; // Get rid of empty nodes
extractor.OutputFormat = SpreadseetOutputFormat.XLS; // Set the output format to XLS
// Save the spreadsheet to file
extractor.SaveToXLSFile("output.xls");
// Cleanup
extractor.Dispose();
// Open result document in default associated application (for demo purpose)
ProcessStartInfo processStartInfo = new ProcessStartInfo("output.xls");
processStartInfo.UseShellExecute = true;
Process.Start(processStartInfo);
}
}
}