Extract line items from tables on multiple pages - VBScript and VB6
Document Parser SDK sample in VBScript and VB6 demonstrating ‘Extract line items from tables on multiple pages’
ExtractLineItemFromTableOnMultiplePages.vbs
' This example demonstrates extracting line items from tables on multiple pages with two different approaches.
' See comments in the code of templates.
inputDocument = ".\MultiPageTable.pdf"
template1 = ".\MultiPageTable-template1.yml"
template2 = ".\MultiPageTable-template2.yml"
' Create DocumentParser object
Set documentParser = CreateObject("Bytescout.DocumentParser.DocumentParser")
documentParser.RegistrationName = "demo"
documentParser.RegistrationKey = "demo"
' Loading template...
documentParser.AddTemplate(template1)
' Parse document data in JSON format
documentParser.ParseDocument inputDocument, "result1.json", 0
Set documentParser = Nothing
' Create DocumentParser object
Set documentParser = CreateObject("Bytescout.DocumentParser.DocumentParser")
documentParser.RegistrationName = "demo"
documentParser.RegistrationKey = "demo"
' Loading template...
documentParser.AddTemplate(template2)
' Parse document data in JSON format
documentParser.ParseDocument inputDocument, "result2.json", 0
WScript.Echo "Parsed data saved as 'result1.json' and 'result2.json'."
Set documentParser = Nothing
MultiPageTable-template1.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Sample document with multi-page table
objects:
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: TOTAL{{Spaces}}({{Number}})
regex: true
dataType: decimal
- name: table1
objectType: table
tableProperties:
start:
expression: Item{{Spaces}}Description{{Spaces}}Price
regex: true
end:
expression: TOTAL{{Spaces}}{{Number}}
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})'
regex: true
columns:
- name: itemNo
dataType: integer
- name: description
dataType: string
- name: price
dataType: decimal
- name: qty
dataType: integer
- name: extPrice
dataType: decimal
multipage: true
MultiPageTable-template2.yml
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Sample document with multi-page table
objects:
- name: total
objectType: field
fieldProperties:
fieldType: regex
expression: TOTAL{{Spaces}}({{Number}})
regex: true
dataType: decimal
- name: table1
objectType: table
tableProperties:
start:
expression: Item{{Spaces}}Description{{Spaces}}Price
regex: true
end:
expression: (Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}})
regex: true
left: 51
right: 528
columns:
- x: 51
name: itemNo
dataType: integer
- x: 102
name: description
dataType: string
- x: 324
name: price
dataType: decimal
- x: 396
name: qty
dataType: integer
- x: 441
name: extPrice
dataType: decimal
multipage: true