Link Search Menu Expand Document

Repair Text in PDF - C#

PDF Extractor SDK sample in C# demonstrating ‘Repair Text in PDF’

using Bytescout.PDFExtractor;
using System;

namespace RepairText
    class Program
        static void Main(string[] args)
                //Read all text from pdf file
                using (TextExtractor extractor = new TextExtractor())
                    // Load PDF document

                    // Set the font repairing OCR mode 
                    extractor.OCRMode = OCRMode.TextFromImagesAndVectorsAndRepairedFonts;

                    // Set the location of OCR language data files
                    extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                    // Set OCR language
                    extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
                    // Find more language files at

                    // Set PDF document rendering resolution
                    extractor.OCRResolution = 300;

                    //Read all text
                    string allText = extractor.GetText();

                    Console.WriteLine("Extracted Text: \n\n" + allText);
            catch (Exception ex)


Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK

Copyright © 2016 - 2021 ByteScout