Link Search Menu Expand Document

Data Masking in PDF - C#

PDF Extractor SDK sample in C# demonstrating ‘Data Masking in PDF’

using Bytescout.PDFExtractor;
using System;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;

namespace RepairText
    class Program
        static void Main(string[] args)
                // Generate CSVExtractor instance
                using (CSVExtractor extractor = new CSVExtractor("demo", "demo"))
                    // Load PDF document

                    // Get all data
                    string allData = extractor.GetCSV();

                    // Regular expressions and replacements
                    string ssnRegex = @"\d{3}[-]?\d{2}[-]?\d{4}";
                    string ssnReplace = "***-**-****";

                    string phoneRegex = @"\d{3}[-]?\d{3}[-]?\d{4}";
                    string phoneReplace = "***-***-****";

                    // Find and mask SSN and phone numbers
                    allData = Regex.Replace(allData, ssnRegex , ssnReplace);
                    allData = Regex.Replace(allData, phoneRegex, phoneReplace);

                    // Write as CSV
                    File.WriteAllText("output.csv", allData);

                    // Open file
                    ProcessStartInfo processStartInfo = new ProcessStartInfo("output.csv");
                    processStartInfo.UseShellExecute = true;
            catch (Exception ex)

            Console.WriteLine("Press enter key to close...");

Explore SDK documentations here.

Download Source Code (.zip)

Return to the previous page Explore PDF Extractor SDK