Link Search Menu Expand Document

Extract line items from tables on multiple pages - C#

Document Parser SDK sample in C# demonstrating ‘Extract line items from tables on multiple pages’

MultiPageTable-template1.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
  keywords:
  - Sample document with multi-page table
objects:
- name: total
  objectType: field
  fieldProperties:
    fieldType: macros
    expression: TOTAL{{Spaces}}({{Number}})
    regex: true
    dataType: decimal
- name: table1
  objectType: table
  tableProperties:
    start:
      expression: Item{{Spaces}}Description{{Spaces}}Price
      regex: true
    end:
      expression: TOTAL{{Spaces}}{{Number}}
      regex: true
    row:
      expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})'
      regex: true
    columns:
    - name: itemNo
      dataType: integer
    - name: description
      dataType: string
    - name: price
      dataType: decimal
    - name: qty
      dataType: integer
    - name: extPrice
      dataType: decimal
    multipage: true


MultiPageTable-template2.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
  keywords:
  - Sample document with multi-page table
objects:
- name: total
  objectType: field
  fieldProperties:
    fieldType: regex
    expression: TOTAL{{Spaces}}({{Number}})
    regex: true
    dataType: decimal
- name: table1
  objectType: table
  tableProperties:
    start:
      expression: Item{{Spaces}}Description{{Spaces}}Price
      regex: true
    end:
      expression: (Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}})
      regex: true
    left: 51
    right: 528
    columns:
    - x: 51
      name: itemNo
      dataType: integer
    - x: 102
      name: description
      dataType: string
    - x: 324
      name: price
      dataType: decimal
    - x: 396
      name: qty
      dataType: integer
    - x: 441
      name: extPrice
      dataType: decimal
    multipage: true


Program.cs
using System;
using ByteScout.DocumentParser;

// This example demonstrates extracting line items from tables on multiple pages with two different approaches.
// See comments in the code of templates.

namespace ExtractLineItemFromTableOnMultiplePages
{
    class Program
    {
        static void Main(string[] args)
        {
            string inputDocument = @".\MultiPageTable.pdf";
            string template1 = @".\MultiPageTable-template1.yml";
            string template2 = @".\MultiPageTable-template2.yml";

            // Process using template-1
            using (DocumentParser documentParser = new DocumentParser("demo", "demo"))
            {
                Console.WriteLine($"Loading template 1...");
                documentParser.AddTemplate(template1);

                Console.WriteLine($"Template 1 loaded.");
                Console.WriteLine();

                Console.WriteLine($"Parsing \"{inputDocument}\"...");
                Console.WriteLine();

                // Parse document data in JSON format
                documentParser.ParseDocument(inputDocument, "result1.json", OutputFormat.JSON);

                Console.WriteLine("Parsing results saved to `result1.json`.");
                Console.WriteLine();
            }

            // Process using template-2
            using (DocumentParser documentParser = new DocumentParser("demo", "demo"))
            {
                Console.WriteLine($"Loading template 2...");
                documentParser.AddTemplate(template2);

                Console.WriteLine($"Template 2 loaded.");
                Console.WriteLine();

                Console.WriteLine($"Parsing \"{inputDocument}\"...");
                Console.WriteLine();

                // Parse document data in JSON format
                documentParser.ParseDocument(inputDocument, "result2.json", OutputFormat.JSON);

                Console.WriteLine("Parsing results saved to `result2.json`.");
                Console.WriteLine();
            }

            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
    }
}

Download Source Code (.zip)

Return to the previous page Explore Document Parser SDK