Save OCR Objects As XML - C#
Text Recognition SDK sample in C# demonstrating ‘Save OCR Objects As XML’
Program.cs
using System;
using System.Diagnostics;
using ByteScout.TextRecognition;
namespace SaveOcrObjectAsXml
{
class Program
{
static void Main(string[] args)
{
string inputDocument = @".\ocr-sample.pdf";
string outputDocument = @".\result.xml";
// Create and activate TextRecognizer instance
using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
{
try
{
// Load document (image or PDF)
textRecognizer.LoadDocument(inputDocument);
// Set the location of OCR language data files
textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";
// Set OCR language.
// "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
// Find more language files at https://github.com/bytescout/ocrdata
textRecognizer.OCRLanguage = "eng";
// Recognize text from page and save each ocr word object to xml
textRecognizer.SaveOCRObjectsAsXML(outputDocument, 0, OCRObjectType.Word);
// Open the result file in default associated application (for demo purposes)
Process.Start(outputDocument);
}
catch (Exception exception)
{
Console.WriteLine(exception);
}
}
Console.WriteLine();
Console.WriteLine("Press any key...");
Console.ReadKey();
}
}
}