Return to previous page Explore PDF Extractor SDK

Load Unicode CSV to DataTable using OLEDB - C#

PDF Extractor SDK sample in C# demonstrating ‘Load Unicode CSV to DataTable using OLEDB’

View on GitHub Download Source Code (.zip)

Program.cs

using Bytescout.PDFExtractor;
using System;
using System.Data;
using System.Data.OleDb;
using System.Globalization;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string inputDocument = Path.GetFullPath(@".\UnicodeSample.pdf");
            string csvFilePath = Path.ChangeExtension(inputDocument, ".csv");
            string csvFileName = Path.GetFileName(csvFilePath);
            string csvDirectory = Path.GetDirectoryName(Path.GetFullPath(csvFilePath));

            // Create Bytescout.PDFExtractor.CSVExtractor instance
            using (CSVExtractor extractor = new CSVExtractor("demo", "demo"))
            {
                extractor.LoadDocumentFromFile(inputDocument);

                extractor.CSVSeparatorSymbol = ",";

                string csvText = extractor.GetCSV();
            
                // Save csv text in UTF-8 encoding without BOM (byte order mark):
                File.WriteAllText(csvFilePath, csvText);
            }

            // Please Note: Target the project to x86 because Microsoft.Jet.OLEDB.4.0 driver is 32-bit only.
            using (OleDbConnection connection = new OleDbConnection($@"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=""{csvDirectory}"";Extended Properties=""Text;FMT=$;HDR=No;CharacterSet=65001"""))
            {
                using (OleDbCommand command = new OleDbCommand($"select * from [{csvFileName}]", connection))
                {
                    using (OleDbDataAdapter adapter = new OleDbDataAdapter(command))
                    {
                        DataTable table = new DataTable();
                        table.Locale = CultureInfo.CurrentCulture;
                        adapter.Fill(table);

                        Console.WriteLine($"Loaded {table.Rows.Count} lines.");
                    }
                }
            }

            Console.ReadKey();
        }
    }
}

Load Unicode CSV to DataTable using OLEDB - C#

Program.cs

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK