Make Searchable PDF Discarding Existing Content - C#
PDF Extractor SDK sample in C# demonstrating ‘Make Searchable PDF Discarding Existing Content’
Program.cs
using System.Diagnostics;
using Bytescout.PDFExtractor;
// To make OCR work you should references "Bytescout.PDFExtractor.dll" and "Bytescout.PDFExtractor.OCRExtension.dll" from your project.
namespace MakeSearchablePDFDiscardingExistingContent
{
class Program
{
static void Main(string[] args)
{
/*
By default, "SearchablePDFMaker" uses one of the standard PDF fonts to apply
recognized text over the scanned document. Such fonts contain only basic characters
from ISO-8859-1 charset.
If you run OCR for one of the languages with characters that are not present in the default
encoding, you should explicitly specify the font that contains the required characters
using ".LabelingFont" property.
If you run the application in Windows with a selected locale that matches OCR language,
it will be enough to specify the usual font "Arial". But if your app will run in an unknown
environment (for example, in some virtual machine) you will need to install some full Unicode
font (e.g. "Arial Unicode MS") and then use it with SearchablePDFMaker:
//searchablePDFMaker.LabelingFont = "Arial Unicode MS";
*/
// Create Bytescout.PDFExtractor.TextExtractor instance
SearchablePDFMaker searchablePDFMaker = new SearchablePDFMaker();
searchablePDFMaker.RegistrationName = "demo";
searchablePDFMaker.RegistrationKey = "demo";
// Load sample PDF document
searchablePDFMaker.LoadDocumentFromFile("sample_ocr_withText.pdf");
// Set the location of language data files
searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";
// Set OCR language
searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
// Set PDF document rendering resolution
searchablePDFMaker.OCRResolution = 300;
// Discard Existing Text in document
searchablePDFMaker.DiscardExistingDocumentText = true;
// Process document
searchablePDFMaker.MakePDFSearchable("output.pdf");
// Cleanup
searchablePDFMaker.Dispose();
// Open output file in default associated application
ProcessStartInfo processStartInfo = new ProcessStartInfo("output.pdf");
processStartInfo.UseShellExecute = true;
Process.Start(processStartInfo);
}
}
}