Link Search Menu Expand Document

Find Keyword And Extract Text in PDF - C#

PDF Extractor SDK sample in C# demonstrating ‘Find Keyword And Extract Text in PDF’

Program.cs
using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile(@".\sample2.pdf");
			
			int pageCount = extractor.GetPageCount();

			// Search each page for some keyword 
			for (int i = 0; i < pageCount; i++)
			{
				if (extractor.Find(i, "References", false))
				{
					// If page contains the keyword, extract a text from it.
					// For demonstration we'll extract the text from top part of the page only
					extractor.SetExtractionArea(0, 0, 600, 200);
					string text = extractor.GetTextFromPage(i);
					Console.WriteLine(text);
				}
			}

			// Cleanup
			extractor.Dispose();
			
			Console.WriteLine();
			Console.WriteLine("Press any key to continue...");
			Console.ReadLine();
		}
	}
}

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK