Link Search Menu Expand Document

Extract Text from Foldable Brochure Booklet - C#

PDF Extractor SDK sample in C# demonstrating ‘Extract Text from Foldable Brochure Booklet’

Program.cs
using Bytescout.PDFExtractor;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Text;

namespace ExtractTextFromFoldableBrochure
{
    class Program
    {
        static void Main(string[] args)
        {
            // Set extraction regions
            // Use Bytescout Template Editor / Bytescout PDF Multitool or other tool to know region co-ordinates
            var lstExtractionRegion = new List<RectangleF>();
            lstExtractionRegion.Add(new RectangleF(7.5f, 33.8f, 244.5f, 353.3f));
            lstExtractionRegion.Add(new RectangleF(273.8f, 201.8f, 247.5f, 198.0f));
            lstExtractionRegion.Add(new RectangleF(537.8f, 27.0f, 246.0f, 268.5f));

            // Ouput File
            var resFile = "result.txt";
            var sRes = new StringBuilder(string.Empty);

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\SampleFoldable.pdf");

            // Loop through all extraction regions, and extract text
            foreach (var oRegion in lstExtractionRegion)
            {
                var extractedText = GetTextFromRegion(extractor, oRegion);
                sRes.AppendLine(extractedText);
            }

            // Cleanup
            extractor.Dispose();

            // Write all reslut to output file
            File.WriteAllText(resFile, sRes.ToString());

            // Open result file in default associated application
            ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\result.txt");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }

        /// <summary>
        /// Get text from particular region
        /// </summary>
        private static string GetTextFromRegion(TextExtractor textExtractor, RectangleF extractionRegion, int pageIndex = 0)
        {
            // Set Extraction Area
            textExtractor.SetExtractionArea(extractionRegion);

            // Get Text from that region
            return textExtractor.GetTextFromPage(pageIndex);
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK


Copyright © 2016 - 2021 ByteScout