PDF and OCR (Optical Character Recognition) - C++
PDF Extractor SDK sample in C++ demonstrating ‘PDF and OCR (Optical Character Recognition)’
CPPExample.cpp
	#include "stdafx.h"
	#include "comip.h"
	#import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net4.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only
	using namespace Bytescout_PDFExtractor;
	int _tmain(int argc, _TCHAR* argv[])
	{
		// Initialize COM.
		HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
		// Create the interface pointer.
		_TextExtractorPtr pITextExtractor(__uuidof(TextExtractor));
		// Set the registration name and key
		// Note: You should use _bstr_t or BSTR to pass string to the library because of COM requirements
		_bstr_t bstrRegName(L"DEMO"); 
		pITextExtractor->put_RegistrationName(bstrRegName);
		
		_bstr_t bstrRegKey(L"DEMO");
		pITextExtractor->put_RegistrationKey(bstrRegKey);
		// Load sample PDF document
		_bstr_t bstrPath(L"..\\..\\sample_ocr.pdf");
		pITextExtractor->LoadDocumentFromFile(bstrPath);
		// Enable Optical Character Recognition (OCR)
		// in .Auto mode (SDK automatically checks if needs to use OCR or not)
		pITextExtractor->put_OCRMode(OCRMode_Auto);
		
		// Set the location of OCR language data files
		_bstr_t bstrOCRLangDataPath(L"c:\\Program Files\\Bytescout PDF Extractor SDK\\ocrdata");
		pITextExtractor->put_OCRLanguageDataFolder(bstrOCRLangDataPath);
		// Set OCR language
		_bstr_t bstrOCRLanguage(L"eng");
		pITextExtractor->put_OCRLanguage(bstrOCRLanguage);
		// Set PDF document rendering resolution
		pITextExtractor->put_OCRResolution(300);
		// You can also apply various preprocessing filters
		// to improve the recognition on low-quality scans.
		_ImagePreprocessingFiltersCollection* pIImagePreprocessingFilters;
		pITextExtractor->get_OCRImagePreprocessingFilters(&pIImagePreprocessingFilters);
		// Automatically deskew skewed scans
		//pIImagePreprocessingFilters->AddDeskew();
		// Remove vertical or horizontal lines (sometimes helps to avoid OCR engine's page segmentation errors)
		//pIImagePreprocessingFilters->AddVerticalLinesRemover();
		//pIImagePreprocessingFilters->AddHorizontalLinesRemover();
		// Repair broken letters
		//pIImagePreprocessingFilters->AddDilate();
		// Remove noise
		//pIImagePreprocessingFilters->AddMedian();
		// Apply Gamma Correction
		//pIImagePreprocessingFilters->AddGammaCorrection();
		// Add Contrast
		//pIImagePreprocessingFilters->AddContrast(20);
		// (!) You can use new OCRAnalyser class to find an optimal set of image preprocessing 
		// filters for your specific document.
		// See "OCR Analyser" example.
		// Save extracted text to file
		_bstr_t bstrOutputFile(L"output.txt");
		pITextExtractor->SaveTextToFile(bstrOutputFile);
		pITextExtractor->Release();
		CoUninitialize();
		return 0;
	}
stdafx.cpp
// stdafx.cpp : source file that includes just the standard includes
// CPPExample.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file
stdafx.h
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "targetver.h"
#include <stdio.h>
#include <tchar.h>
// TODO: reference additional headers your program requires here
targetver.h
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>