Link Search Menu Expand Document

OCR Modes - C#

PDF Extractor SDK sample in C# demonstrating ‘OCR Modes’

using Bytescout.PDFExtractor;
using System;

// To make OCR work you should add the following references to your project:
// 'Bytescout.PDFExtractor.dll', 'Bytescout.PDFExtractor.OCRExtension.dll'.

namespace OCRModes
    class Program
        static void Main(string[] args)
            // Input document containing vector, image and font
            string inputDocument = @".\SampleWith_Vector_Image_Font.pdf";

            // Extracting text with different OCRModes
            // 1. TextFromImagesOnly (Plain Mode)
            Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesOnly \n---------------------------------");
            var resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesOnly);

            // 2. TextFromVectorOnly (Plain Mode)
            Console.WriteLine("---------------------------------\nExtraction Mode: TextFromVectorOnly \n---------------------------------");
            resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromVectorsOnly);

            // 3. TextFromImagesAndFonts (Combined Mode)
            Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesAndFonts \n---------------------------------");
            resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndFonts);

            // 4. TextFromImagesAndVectorsAndFonts (Combined Mode)
            Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesAndVectorsAndFonts \n---------------------------------");
            resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndVectorsAndFonts);


        /// <summary>
        /// Extract text from document with specific Ocr Mode
        /// </summary>
        /// <param name="inputDocument"></param>
        /// <param name="oCRMode"></param>
        /// <returns></returns>
        private static string _ExtractTextWithSpecificOCRMode(string inputDocument, OCRMode ocrMode)
            // Location of language data files
            string ocrLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

            // OCR language
            string ocrLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
                                        // Find more language files at

            // Create TextExtractor instance
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
                // Load document to TextExtractor

                // Specify Ocr Mode
                textExtractor.OCRMode = ocrMode;

                // Ocr language data folder path and language
                textExtractor.OCRLanguageDataFolder = ocrLanguageDataFolder;
                textExtractor.OCRLanguage = ocrLanguage;

                // Return extracted text
                return textExtractor.GetText();

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK