Find PDF Table And Extract As CSV - Powershell
PDF Extractor SDK sample in Powershell demonstrating ‘Find PDF Table And Extract As CSV’
@echo off
if "%~1"=="" (
echo -----------------------------------------------------
echo Invalid parameter!
echo -----------------------------------------------------
echo Usage: FindTableAndExtractAsCsv.bat file_name
echo Example: FindTableAndExtractAsCsv.bat "sample3.pdf"
echo -----------------------------------------------------
if not "%NOPAUSE%"=="1" pause
exit /b 1
powershell -NoProfile -ExecutionPolicy Bypass -Command "& .\FindTableAndExtractAsCsv.ps1" "%1"
echo Script finished with errorlevel=%errorlevel%
Explore SDK documentations here.
Param (
[Parameter(Mandatory = $true)]
[string] $InputFileName = ""
#Add reference to Bytescout.PDFExtractor.dll assembly
Add-Type -Path "C:\Program Files\Bytescout PDF Extractor SDK\net4.00\Bytescout.PDFExtractor.dll"
# Check input file exists
if ((Test-Path $InputFileName) -eq $false) {
Write-Host "Input file does not exist." -ForegroundColor Red
Exit 0
# Create and activate Bytescout.PDFExtractor.CSVExtractor instance
$Extractor = New-Object Bytescout.PDFExtractor.CSVExtractor
$Extractor.RegistrationName = "demo"
$Extractor.RegistrationKey = "demo"
# Create and activate Bytescout.PDFExtractor.TableDetector instance
$Detector = New-Object Bytescout.PDFExtractor.TableDetector
$Detector.RegistrationName = "demo"
$Detector.RegistrationKey = "demo"
# Set table detection mode to "bordered tables" - best for tables with closed solid borders.
# 0 = ColumnDetectionMode_ContentGroupsAndBorders
# 1 = ColumnDetectionMode_ContentGroups
# 2 = ColumnDetectionMode_Borders
# 3 = ColumnDetectionMode_BorderedTables
$Detector.ColumnDetectionMode = 3
# We should define what kind of tables we should detect.
# So we set min required number of columns to 3 ...
$Detector.DetectionMinNumberOfColumns = 3
# ... and we set min required number of rows to 3
$Detector.DetectionMinNumberOfRows = 3
try {
# Load sample PDF document
# Get page count
$PageCount = $Extractor.GetPageCount()
Write-Host "Pages: $PageCount"
# Iterate through pages
for ($i = 0; $i -lt $PageCount; $i++) {
$TableIndex = 1
# Find first table and continue if found
if ($Detector.FindTable($i) -eq $true) {
do {
# Set extraction area for CSV extractor to rectangle received from the table detector
# export the table to CSV file
$PageNo = $i + 1
# Construct output file name
$OutputFileName = "page-$PageNo-table-$TableIndex.csv"
$Extractor.SavePageCSVToFile($i, $OutputFileName)
} while ($Detector.FindNextTable() -eq $true) # search next table
} catch {
Write-Host $_.Exception.Message
# Cleanup
