Link Search Menu Expand Document

Find US Address in PDF with Regex - C#

PDF Extractor SDK sample in C# demonstrating ‘Find US Address in PDF with Regex’

Program.cs
using Bytescout.PDFExtractor;
using System;

namespace FindUsAddressRegex
{

    // Note: if you are looking for a more higher level API to extract data from invoices, reports, statements
    // then please check Document Parser SDK and Web API at https://bytescout.com/products/developer/documentparsersdk/index.html
    // and https://pdf.co for secure and scalable web api

    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                // Create Bytescout.PDFExtractor.TextExtractor instance
                using (TextExtractor extractor = new TextExtractor())
                {
                    extractor.RegistrationName = "demo";
                    extractor.RegistrationKey = "demo";

                    // Load sample PDF document
                    extractor.LoadDocumentFromFile("samplePDF_Address.pdf");

                    extractor.RegexSearch = true; // Enable the regular expressions

                    int pageCount = extractor.GetPageCount();

                    // Search through pages
                    for (int i = 0; i < pageCount; i++)
                    {
                        // Search Address
                        string regexPattern = @"((\w+[ ,])+ ){2}([a-zA-Z]){2}[ , ] (\d+)";
                        // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx

                        // Search each page for the pattern
                        if (extractor.Find(i, regexPattern, false))
                        {
                            do
                            {
                                // Iterate through each element in the found text
                                foreach (ISearchResultElement element in extractor.FoundText.Elements)
                                {
                                    Console.WriteLine("Found Address: " + element.Text);
                                }
                            }
                            while (extractor.FindNext());
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: " + ex.Message);
            }

            Console.WriteLine();
            Console.WriteLine("Press enter key to continue...");
            Console.ReadLine();
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK