Link Search Menu Expand Document

Extract line items from tables on multiple pages - VB.NET

Document Parser SDK sample in VB.NET demonstrating ‘Extract line items from tables on multiple pages’

Module1.vb
Imports ByteScout.DocumentParser

' This example demonstrates extracting line items from tables on multiple pages with two different approaches.
' See comments in the code of templates.

Module Module1

    Sub Main()

        Dim inputDocument As String = ".\MultiPageTable.pdf"
        Dim template1 As String = ".\MultiPageTable-template1.yml"
        Dim template2 As String = ".\MultiPageTable-template2.yml"

        ' Process using template-1
        Using documentParser As New DocumentParser("demo", "demo")

            Console.WriteLine("Loading template 1...")
            documentParser.AddTemplate(template1)

            Console.WriteLine("Template 1 loaded.")
            Console.WriteLine()

            Console.WriteLine($"Parsing ""{inputDocument}""...")
            Console.WriteLine()

            ' Parse document data in JSON format
            documentParser.ParseDocument(inputDocument, "result1.json", OutputFormat.JSON)
            
            Console.WriteLine("Parsing results saved to `result1.json`.")
            Console.WriteLine()

        End Using

        ' Process using template-2
        Using documentParser As New DocumentParser("demo", "demo")

            Console.WriteLine("Loading template 2...")
            documentParser.AddTemplate(template2)

            Console.WriteLine("Template 2 loaded.")
            Console.WriteLine()

            Console.WriteLine($"Parsing ""{inputDocument}""...")
            Console.WriteLine()

            ' Parse document data in JSON format
            documentParser.ParseDocument(inputDocument, "result2.json", OutputFormat.JSON)
            
            Console.WriteLine("Parsing results saved to `result2.json`.")
            Console.WriteLine()

        End Using

        Console.WriteLine()
        Console.WriteLine("Press any key to continue...")
        Console.ReadLine()

    End Sub

End Module

MultiPageTable-template1.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
  keywords:
  - Sample document with multi-page table
objects:
- name: total
  objectType: field
  fieldProperties:
    fieldType: macros
    expression: TOTAL{{Spaces}}({{Number}})
    regex: true
    dataType: decimal
- name: table1
  objectType: table
  tableProperties:
    start:
      expression: Item{{Spaces}}Description{{Spaces}}Price
      regex: true
    end:
      expression: TOTAL{{Spaces}}{{Number}}
      regex: true
    row:
      expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})'
      regex: true
    columns:
    - name: itemNo
      dataType: integer
    - name: description
      dataType: string
    - name: price
      dataType: decimal
    - name: qty
      dataType: integer
    - name: extPrice
      dataType: decimal
    multipage: true


MultiPageTable-template2.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
  keywords:
  - Sample document with multi-page table
objects:
- name: total
  objectType: field
  fieldProperties:
    fieldType: regex
    expression: TOTAL{{Spaces}}({{Number}})
    regex: true
    dataType: decimal
- name: table1
  objectType: table
  tableProperties:
    start:
      expression: Item{{Spaces}}Description{{Spaces}}Price
      regex: true
    end:
      expression: (Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}})
      regex: true
    left: 51
    right: 528
    columns:
    - x: 51
      name: itemNo
      dataType: integer
    - x: 102
      name: description
      dataType: string
    - x: 324
      name: price
      dataType: decimal
    - x: 396
      name: qty
      dataType: integer
    - x: 441
      name: extPrice
      dataType: decimal
    multipage: true


Download Source Code (.zip)

Return to the previous page Explore Document Parser SDK