Find Table And Extract As CSV from PDF - C++
PDF Extractor SDK sample in C++ demonstrating ‘Find Table And Extract As CSV from PDF’
FindTableAndExtractAsCsv.cpp
#include "stdafx.h"
#include "comip.h"
#import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net4.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only
using namespace Bytescout_PDFExtractor;
int _tmain(int argc, _TCHAR* argv[])
{
// Initialize COM.
HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
// Create CSVExtractor instance
_CSVExtractorPtr pICSVExtractor(__uuidof(CSVExtractor));
pICSVExtractor->put_RegistrationName(_bstr_t(L"DEMO"));
pICSVExtractor->put_RegistrationKey(_bstr_t(L"DEMO"));
// Create TableDetector instance
_TableDetectorPtr pITableDetector(__uuidof(TableDetector));
pITableDetector->put_RegistrationName(_bstr_t(L"DEMO"));
pITableDetector->put_RegistrationKey(_bstr_t(L"DEMO"));
// Set table detection mode to "bordered tables" - best for tables with closed solid borders.
pITableDetector->put_ColumnDetectionMode(ColumnDetectionMode_BorderedTables);
// We should define what kind of tables we should detect.
// So we set min required number of columns to 2 ...
pITableDetector->put_DetectionMinNumberOfColumns(2);
// ... and we set min required number of rows to 2
pITableDetector->put_DetectionMinNumberOfRows(2);
// Load sample PDF document
_bstr_t inputFile(L"..\\..\\sample3.pdf");
pICSVExtractor->LoadDocumentFromFile(inputFile);
pITableDetector->LoadDocumentFromFile(inputFile);
// Get page count
long pageCount;
pITableDetector->GetPageCount(&pageCount);
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
{
int t = 1;
VARIANT_BOOL vbResult;
// Find first table and continue if found
pITableDetector->FindTable(pageIndex, &vbResult);
if (vbResult == VARIANT_TRUE)
{
do
{
float left, top, width, height;
pITableDetector->GetFoundTableRectangle_Left(&left);
pITableDetector->GetFoundTableRectangle_Top(&top);
pITableDetector->GetFoundTableRectangle_Width(&width);
pITableDetector->GetFoundTableRectangle_Height(&height);
// Set extraction area for CSV extractor to rectangle received from the table detector
pICSVExtractor->SetExtractionArea(left, top, width, height);
// Export the table to CSV file
CString fileName;
fileName.Format(L"page-%d-table-%d.csv", pageIndex, t);
pICSVExtractor->SavePageCSVToFile(pageIndex, _bstr_t(fileName));
t++;
pITableDetector->FindNextTable(&vbResult);
} while (vbResult == VARIANT_TRUE);
}
}
pICSVExtractor->Release();
pITableDetector->Release();
CoUninitialize();
return 0;
}
stdafx.cpp
// stdafx.cpp : source file that includes just the standard includes
// CPPExample.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"
// TODO: reference any additional headers you need in STDAFX.H
// and not in this file
stdafx.h
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#pragma once
#include "targetver.h"
#include <stdio.h>
#include <tchar.h>
// TODO: reference additional headers your program requires here
#include <atlstr.h>
targetver.h
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>