Link Search Menu Expand Document

Make Searchable PDF Discarding Existing Content - C#

PDF Extractor SDK sample in C# demonstrating ‘Make Searchable PDF Discarding Existing Content’

Program.cs
using System.Diagnostics;
using Bytescout.PDFExtractor;

// To make OCR work you should references "Bytescout.PDFExtractor.dll" and "Bytescout.PDFExtractor.OCRExtension.dll" from your project.

namespace MakeSearchablePDFDiscardingExistingContent
{
    class Program
    {
        static void Main(string[] args)
        {
            /*
            By default, "SearchablePDFMaker" uses one of the standard PDF fonts to apply 
            recognized text over the scanned document. Such fonts contain only basic characters 
            from ISO-8859-1 charset. 
            If you run OCR for one of the languages with characters that are not present in the default
            encoding, you should explicitly specify the font that contains the required characters
            using ".LabelingFont" property.
            If you run the application in Windows with a selected locale that matches OCR language, 
            it will be enough to specify the usual font "Arial". But if your app will run in an unknown
            environment (for example, in some virtual machine) you will need to install some full Unicode 
            font (e.g. "Arial Unicode MS") and then use it with SearchablePDFMaker:

            //searchablePDFMaker.LabelingFont = "Arial Unicode MS";
            */

            // Create Bytescout.PDFExtractor.TextExtractor instance
            SearchablePDFMaker searchablePDFMaker = new SearchablePDFMaker();
            searchablePDFMaker.RegistrationName = "demo";
            searchablePDFMaker.RegistrationKey = "demo";

            // Load sample PDF document
            searchablePDFMaker.LoadDocumentFromFile("sample_ocr_withText.pdf");
            
            // Set the location of language data files
            searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

            // Set OCR language
            searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder

            // Set PDF document rendering resolution
            searchablePDFMaker.OCRResolution = 300;

            // Discard Existing Text in document
            searchablePDFMaker.DiscardExistingDocumentText = true;

            // Process document
            searchablePDFMaker.MakePDFSearchable("output.pdf");

            // Cleanup
            searchablePDFMaker.Dispose();

            // Open output file in default associated application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("output.pdf");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK