Link Search Menu Expand Document

Read Text From Noisy Image - C#

PDF Extractor SDK sample in C# demonstrating ‘Read Text From Noisy Image’

using Bytescout.PDFExtractor;
using System;

namespace ReadTextFromNoisyImage
    class Program
        static void Main(string[] args)
                //Read all text from noisy image file
                using (TextExtractor extractor = new TextExtractor())
                    // Load noisy image document

                    // Set the font repairing OCR mode 
                    extractor.OCRMode = OCRMode.TextFromImagesAndVectorsAndRepairedFonts;

                    // Set the location of OCR language data files
                    extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                    // Set OCR language
                    extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
                                                   // Find more language files at

                    // Set document rendering resolution
                    extractor.OCRResolution = 300;

                    // You can also apply various preprocessing filters
                    // to improve the recognition on low-quality scans.

					Console.WriteLine("Please wait while PDF Extractor SDK is processing noisy image to read data...");

                    // Automatically deskew skewed scans

                    // Remove vertical or horizontal lines (sometimes helps to avoid OCR engine's page segmentation errors)

                    // Repair broken letters
                    // Remove noise

                    // Apply Gamma Correction

                    // Add Contrast

                    // (!) You can use new OCRAnalyser class to find an optimal set of image preprocessing 
                    // filters for your specific document.
                    // See "OCR Analyser" example.

                    //Read all text
                    string allText = extractor.GetText();

                    Console.WriteLine("Extracted Text: \n\n" + allText);
            catch (Exception ex)
                Console.WriteLine("Exception: " + ex.Message);


Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK