Link Search Menu Expand Document

Check If OCR Is Required for PDF - VB.NET

PDF Extractor SDK sample in VB.NET demonstrating ‘Check If OCR Is Required for PDF’

Imports Bytescout.PDFExtractor

Module Program

    Sub Main()


            ' Loop through all files in directory and check whether OCR operation is required
            For Each filePath As String In System.IO.Directory.GetFiles("InputFiles")

        Catch ex As Exception
            Console.WriteLine("Error: " + ex.Message)
        End Try

        Console.WriteLine("Press enter key to exit...")

    End Sub

    ''' <summary>
    ''' Check whether OCR Operation is required
    ''' </summary>
    ''' <param name="filePath"></param>
    Private Sub _CheckOCRRequired(ByVal filePath As String)

        ' Read all file content...
        Using extractor As TextExtractor = New TextExtractor()

            extractor.RegistrationKey = "demo"
            extractor.RegistrationName = "demo"

            ' Load document
            Console.WriteLine("{1}*******************{1}{1}FilePath: {0}", filePath, vbLf)

            Dim pageIndex As Int32 = 0

            ' Identify OCR operation is recommended for page
            If (extractor.IsOCRRecommendedForPage(pageIndex)) Then

                Console.WriteLine("{0}OCR Recommended: True", vbLf)

                ' Enable Optical Character Recognition (OCR)
                ' in .Auto mode (SDK automatically checks if needs to use OCR or not)
                extractor.OCRMode = OCRMode.Auto

                ' Set the location of OCR language data files
                extractor.OCRLanguageDataFolder = "c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"

                ' Set OCR language
                extractor.OCRLanguage = "eng" ' "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
                ' Find more language files at

                ' Set PDF document rendering resolution
                extractor.OCRResolution = 300

                Console.WriteLine("{0}OCR Recommended: False", vbLf)
            End If

            ' Read all text
            Dim allExtractedText = extractor.GetText()
            Console.WriteLine("{1}Extracted Text:{1}{0}{1}{1}", allExtractedText, vbLf)

        End Using

    End Sub

End Module

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK