Link Search Menu Expand Document

PDF files Parallel Processing - C#

PDF Extractor SDK sample in C# demonstrating ‘PDF files Parallel Processing’

Program.cs
using System;
using System.IO;
using System.Threading;
using Bytescout.PDFExtractor;

namespace Parallel_Processing
{
    class Program
    {
        // Limit to 4 threads in queue.
        // Set this value to number of your processor cores for max performance.
        private static readonly Semaphore ThreadLimiter = new Semaphore(4, 4);

        static void Main(string[] args)
        {
            // Get all PDF files in a folder
            string[] files = Directory.GetFiles(@"..\..\..\..\", "*.pdf");
            // Array of events to wait
            ManualResetEvent[] doneEvents = new ManualResetEvent[files.Length];

            for (int i = 0; i < files.Length; i++)
            {
                // Wait for the queue
                ThreadLimiter.WaitOne();

                // Start thread with filename and event in params
                doneEvents[i] = new ManualResetEvent(false);
                object[] threadData = new object[] { files[i], doneEvents[i] };
                ThreadPool.QueueUserWorkItem(ConvertPdfToTxt, threadData);
            }

            // Wait until all threads finish
            WaitHandle.WaitAll(doneEvents);

            Console.WriteLine();
            Console.WriteLine("All is done.");
            Console.WriteLine();
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }

        private static void ConvertPdfToTxt(object state)
        {
            // Get filename and event from params
            string file = (string) ((object[]) state)[0];
            ManualResetEvent doneEvent = (ManualResetEvent)((object[])state)[1];

            string resultFileName = Path.GetFileName(file) + ".txt";

            try
            {
                Console.WriteLine("Converting " + file);

                using (TextExtractor extractor = new TextExtractor("demo", "demo"))
                {
                    extractor.LoadDocumentFromFile(file);
                    extractor.SaveTextToFile(resultFileName);
                }

                Console.WriteLine("Finished " + resultFileName);
            }
            finally
            {
                // Signal the thread is finished
                doneEvent.Set();
                // Release semaphore
                ThreadLimiter.Release();
            }
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK