PDF To XML With Images - C#
PDF Extractor SDK sample in C# demonstrating ‘PDF To XML With Images’
Program.cs
using System;
using Bytescout.PDFExtractor;
namespace PDF2XML
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.XMLExtractor instance
XMLExtractor extractor = new XMLExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample1.pdf");
// Uncomment this line to get rid of empty nodes in XML
//extractor.PreserveFormattingOnTextExtraction = false;
// Set output image format
extractor.ImageFormat = OutputImageFormat.PNG;
// Save images to external files
extractor.SaveImages = ImageHandling.OuterFile;
extractor.ImageFolder = "images"; // Folder for external images
extractor.SaveXMLToFile("result_with_external_images.xml");
// Embed images into XML as Base64 encoded string
extractor.SaveImages = ImageHandling.Embed;
extractor.SaveXMLToFile("result_with_embedded_images.xml");
// Cleanup
extractor.Dispose();
}
}
}