Find PDF Table And Extract As CSV - C#
PDF Extractor SDK sample in C# demonstrating ‘Find PDF Table And Extract As CSV’
Program.cs
using System.Diagnostics;
using Bytescout.PDFExtractor;
namespace FindTableAndExtractAsCsv
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.CSVExtractor instance
CSVExtractor csvExtractor = new CSVExtractor();
csvExtractor.RegistrationName = "demo";
csvExtractor.RegistrationKey = "demo";
// Create Bytescout.PDFExtractor.TableDetector instance
TableDetector tableDetector = new TableDetector();
tableDetector.RegistrationKey = "demo";
tableDetector.RegistrationName = "demo";
// Set table detection mode to "bordered tables" - best for tables with closed solid borders.
tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables;
// We should define what kind of tables we should detect.
// So we set min required number of columns to 3 ...
tableDetector.DetectionMinNumberOfColumns = 3;
// ... and we set min required number of rows to 3
tableDetector.DetectionMinNumberOfRows = 3;
// Load sample PDF document
csvExtractor.LoadDocumentFromFile(@".\sample3.pdf");
tableDetector.LoadDocumentFromFile(@".\sample3.pdf");
// Get page count
int pageCount = tableDetector.GetPageCount();
for (int i = 0; i < pageCount; i++)
{
int t = 1;
// Find first table and continue if found
if (tableDetector.FindTable(i))
{
do
{
// Set extraction area for CSV extractor to rectangle received from the table detector
csvExtractor.SetExtractionArea(tableDetector.FoundTableLocation);
// Export the table to CSV file
csvExtractor.SavePageCSVToFile(i, "page-" + i + "-table-" + t + ".csv");
t++;
}
while (tableDetector.FindNextTable()); // search next table
}
}
// Cleanup
csvExtractor.Dispose();
tableDetector.Dispose();
// Open first output file in default associated application (for demo purposes)
ProcessStartInfo processStartInfo = new ProcessStartInfo("page-0-table-1.csv");
processStartInfo.UseShellExecute = true;
Process.Start(processStartInfo);
}
}
}