Link Search Menu Expand Document

Check If OCR Is Required for PDF - C#

PDF Extractor SDK sample in C# demonstrating ‘Check If OCR Is Required for PDF’

using Bytescout.PDFExtractor;
using System;

namespace CheckIfOCRIsRequired
    class Program
        static void Main(string[] args)
                // Loop through all files in directory and check whether OCR operation is required
                foreach (string filePath in System.IO.Directory.GetFiles("InputFiles"))

            catch (Exception ex)
                Console.WriteLine("Error: " + ex.Message);

			Console.WriteLine("Press enter key to exit...");

        /// <summary>
        /// Check whether OCR Operation is required
        /// </summary>
        /// <param name="filePath"></param>
        private static void _CheckOCRRequired(string filePath)
            //Read all file content...
            using (TextExtractor extractor = new TextExtractor())
                extractor.RegistrationKey = "demo";
                extractor.RegistrationName = "demo";

                // Load document
                Console.WriteLine("\n*******************\n\nFilePath: {0}", filePath);

                int pageIndex = 0;

                // Identify OCR operation is recommended for page
                if (extractor.IsOCRRecommendedForPage(pageIndex))
                    Console.WriteLine("\nOCR Recommended: True");

                    // Enable Optical Character Recognition (OCR)
                    // in .Auto mode (SDK automatically checks if needs to use OCR or not)
                    extractor.OCRMode = OCRMode.Auto;

                    // Set the location of language data files
                    extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                    // Set OCR language
                    extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
                    // Find more language files at

                    // Set PDF document rendering resolution
                    extractor.OCRResolution = 300;
                    Console.WriteLine("\nOCR Recommended: False");

                //Read all text
                var allExtractedText = extractor.GetText();
                Console.WriteLine("\nExtracted Text:\n{0}\n\n", allExtractedText);



Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK