Extract Text By Columns from PDF - Powershell
PDF Extractor SDK sample in Powershell demonstrating ‘Extract Text By Columns from PDF’
ExtractTextByColumns.bat
@echo off
if "%~1"=="" (
echo -----------------------------------------------------
echo Invalid parameter!
echo -----------------------------------------------------
echo Usage: ExtractTextByColumns.bat file_name
echo Example: ExtractTextByColumns.bat "columns.pdf"
echo -----------------------------------------------------
if not "%NOPAUSE%"=="1" pause
exit /b 1
)
powershell -NoProfile -ExecutionPolicy Bypass -Command "& .\ExtractTextByColumns.ps1" "%1"
echo Script finished with errorlevel=%errorlevel%
pause
ExtractTextByColumns.ps1
#*******************************************************************************************#
# #
# Download Free Evaluation Version From: https://bytescout.com/download/web-installer #
# #
# Also available as Web API! Get Your Free API Key: https://app.pdf.co/signup #
# #
# Copyright © 2017-2020 ByteScout, Inc. All rights reserved. #
# https://www.bytescout.com #
# https://pdf.co #
# #
#*******************************************************************************************#
Param (
[Parameter(Mandatory = $true)]
[string] $InputFileName = ""
)
#Add reference to Bytescout.PDFExtractor.dll assembly
Add-Type -Path "C:\Program Files\Bytescout PDF Extractor SDK\net4.00\Bytescout.PDFExtractor.dll"
# Check input file exists
if ((Test-Path $InputFileName) -eq $false) {
Write-Host "Input file does not exist." -ForegroundColor Red
Exit 0
}
# Create and activate Bytescout.PDFExtractor.TextExtractor instance
$Extractor = New-Object Bytescout.PDFExtractor.TextExtractor
$Extractor.RegistrationName = "demo"
$Extractor.RegistrationKey = "demo"
try {
# Load sample PDF document
$Extractor.LoadDocumentFromFile($InputFileName)
# Extract text by columns (useful if PDF document is designed in column layout like a newspaper)
$Extractor.ExtractColumnByColumn = $true
# Construct output file name
$OutputFileName = [System.IO.Path]::ChangeExtension($InputFileName, "txt")
# Save extracted text to file
$Extractor.SaveTextToFile($OutputFileName)
Write-Host "Data has been extracted to $OutputFileName file."
} catch {
Write-Host $_.Exception.Message
}
$Extractor.Dispose()