Link Search Menu Expand Document

Save OCR Objects As XML - C#

Text Recognition SDK sample in C# demonstrating ‘Save OCR Objects As XML’

using System;
using System.Diagnostics;
using ByteScout.TextRecognition;

namespace SaveOcrObjectAsXml
    class Program
        static void Main(string[] args)
            string inputDocument = @".\ocr-sample.pdf";
            string outputDocument = @".\result.xml";

            // Create and activate TextRecognizer instance
            using (TextRecognizer textRecognizer = new TextRecognizer("demo", "demo"))
                    // Load document (image or PDF)

                    // Set the location of OCR language data files
                    textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\ocrdata_best\";

                    // Set OCR language.
                    // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish, etc. - according to files in "ocrdata" folder
                    // Find more language files at
                    textRecognizer.OCRLanguage = "eng"; 
                    // Recognize text from page and save each ocr word object to xml
                    textRecognizer.SaveOCRObjectsAsXML(outputDocument, 0, OCRObjectType.Word);

                    // Open the result file in default associated application (for demo purposes)
                catch (Exception exception)

            Console.WriteLine("Press any key...");

Download Source Code (.zip)

Return to the previous page Explore Text Recognition SDK

Copyright © 2016 - 2021 ByteScout