Parse Simple Document - VB.NET
Document Parser SDK sample in VB.NET demonstrating ‘Parse Simple Document’
AmazonAWS.yml
templateName: Amazon Web Services Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Amazon Web Services
- ATTN
- Invoice
objects:
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: TOTAL AMOUNT DUE ON{{Anything}}{{Dollar}}({{Number}})
regex: true
dataType: decimal
- name: subTotal
objectType: field
fieldProperties:
fieldType: macros
expression: '{{LineStart}}{{Spaces}}Charges{{Spaces}}{{Dollar}}({{Number}})'
regex: true
dataType: decimal
- name: dateIssued
objectType: field
fieldProperties:
fieldType: macros
expression: Invoice Date:{{Spaces}}({{Anything}}){{LineEnd}}
regex: true
dataType: date
dateFormat: MMMM d , yyyy
- name: invoiceId
objectType: field
fieldProperties:
fieldType: macros
expression: Invoice Number:{{Spaces}}({{Digits}})
regex: true
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: Amazon Web Services, Inc.
regex: true
- name: companyWebsite
objectType: field
fieldProperties:
fieldType: static
expression: aws.amazon.com
regex: true
- name: billTo
objectType: field
fieldProperties:
fieldType: rectangle
expression: Bill to Address:{{ToggleSingleLineMode}}({{AnythingGreedy}})
regex: true
rectangle:
- 33
- 115.5
- 213.75
- 72.75
pageIndex: 0
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: '{{LineStart}}{{Spaces}}Detail{{LineEnd}}'
regex: true
end:
expression: '{{EndOfPage}}'
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}}){{LineEnd}}'
regex: true
columns:
- name: unitPrice
dataType: decimal
DigitalOcean.yml
templateName: DigitalOcean Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- DigitalOcean
- 101 Avenue of the Americas
- Invoice Number
objects:
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: DigitalOcean
regex: true
- name: invoiceId
objectType: field
fieldProperties:
fieldType: macros
expression: 'Invoice Number: ({{Digits}})'
regex: true
- name: dateIssued
objectType: field
fieldProperties:
fieldType: macros
expression: 'Date Issued: ({{SmartDate}})'
regex: true
dataType: date
dateFormat: auto-mdy
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: 'Total: {{Dollar}}({{Number}})'
regex: true
dataType: decimal
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: Description{{Spaces}}Hours
regex: true
end:
expression: 'Total:'
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})'
regex: true
columns:
- name: hours
dataType: integer
- name: unitPrice
dataType: decimal
Google.yml
templateName: Google Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Google
- 77-0493581
- Invoice
objects:
- name: invoiceId
objectType: field
fieldProperties:
expression: Invoice number:{{Spaces}}({{Digits}})
regex: true
- name: dateIssued
objectType: field
fieldProperties:
expression: Issue date:{{Spaces}}({{SmartDate}})
regex: true
dataType: date
dateFormat: MMM d, yyyy
- name: total
objectType: field
fieldProperties:
expression: Amount due in USD:{{Spaces}}{{Number}}
regex: true
dataType: decimal
- name: subTotal
objectType: field
fieldProperties:
expression: Subtotal in USD:{{Spaces}}{{Number}}
regex: true
dataType: decimal
- name: taxRate
objectType: field
fieldProperties:
expression: State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}}
regex: true
dataType: integer
- name: tax
objectType: field
fieldProperties:
expression: State sales tax{{Anything}}{{Number}}{{LineEnd}}
regex: true
dataType: decimal
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: Google LLC
regex: true
- name: billTo
objectType: field
fieldProperties:
fieldType: rectangle
regex: true
rectangle:
- 0
- 152
- 280
- 72
pageIndex: 0
- name: billingId
objectType: field
fieldProperties:
expression: Billing ID:{{Spaces}}({{DigitsOrSymbols}})
regex: true
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount
regex: true
end:
expression: Subtotal in USD
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})'
regex: true
columns:
- name: quantity
dataType: integer
- name: amount
dataType: decimal
Module1.vb
Imports ByteScout.DocumentParser
' This example demonstrates document data parsing to JSON, YAML and XML formats.
Module Module1
Sub Main()
Dim templates As String() = New String() {"DigitalOcean.yml", "AmazonAWS.yml", "Google.yml"}
Dim inputDocument1 As String = ".\DigitalOcean.pdf"
Dim inputDocument2 As String = ".\AmazonAWS.pdf"
Dim inputDocument3 As String = ".\Google.pdf"
' Create DocumentParser instance
Using documentParser As New DocumentParser("demo", "demo")
Console.WriteLine("Loading templates...")
For Each template In templates
documentParser.AddTemplate(template)
Next
Console.WriteLine("Templates loaded.")
Console.WriteLine()
Console.WriteLine($"Parsing ""{inputDocument1}""...")
Console.WriteLine()
' Parse document data in JSON format
Dim jsonString As String = documentParser.ParseDocument(inputDocument1, OutputFormat.JSON)
' Display parsed data in console
Console.WriteLine("Parsing results in JSON format:")
Console.WriteLine()
Console.WriteLine(jsonString)
Console.WriteLine()
Console.WriteLine($"Parsing ""{inputDocument2}""...")
Console.WriteLine()
' Parse document data in YAML format
Dim yamlString As String = documentParser.ParseDocument(inputDocument2, OutputFormat.YAML)
' Display parsed data in console
Console.WriteLine("Parsing results in YAML format:")
Console.WriteLine()
Console.WriteLine(yamlString)
Console.WriteLine()
Console.WriteLine($"Parsing ""{inputDocument3}""...")
Console.WriteLine()
' Parse document data in CSV format
Dim csvOptions = New CSVOptions()
csvOptions.GenerateColumnHeaders = True
csvOptions.SeparatorCharacter = ","
csvOptions.QuotationCharacter = """"
csvOptions.EncodingName = "utf-8"
Dim xmlString As String = documentParser.ParseDocument(inputDocument3, OutputFormat.CSV, csvOptions)
' Display parsed data in console
Console.WriteLine("Parsing results in XML format:")
Console.WriteLine()
Console.WriteLine(xmlString)
End Using
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Module