Parse Invoice to CSV - SharePoint
Document Parser SDK sample in SharePoint demonstrating ‘Parse Invoice to CSV’
VisualWebPart1.cs
using Microsoft.SharePoint;
using Microsoft.SharePoint.WebControls;
using System;
using System.ComponentModel;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
namespace ExtractDataWebPart.VisualWebPart1
{
[ToolboxItemAttribute(false)]
public class VisualWebPart1 : WebPart
{
// Visual Studio might automatically update this path when you change the Visual Web Part project item.
private const string _ascxPath = @"~/_CONTROLTEMPLATES/15/ExtractDataWebPart/VisualWebPart1/VisualWebPart1UserControl.ascx";
protected override void CreateChildControls()
{
var control = (VisualWebPart1UserControl)Page.LoadControl(_ascxPath);
Controls.Add(control);
control.CurrentWeb = SPContext.Current.Web;
}
}
}
VisualWebPart1UserControl.ascx.cs
using Microsoft.SharePoint;
using Newtonsoft.Json.Linq;
using System;
using System.Globalization;
using System.IO;
using System.Net;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using ByteScout.DocumentParser;
using System.Text;
namespace ExtractDataWebPart.VisualWebPart1
{
/// <summary>
/// Extract data from PDF invoices using PDF.co Document
/// Parser (and its default invoice parser template)
/// on a SharePoint folder and then put them back
/// as CSV files on the same SharePoint folder.
/// </summary>
public partial class VisualWebPart1UserControl : UserControl
{
public SPWeb CurrentWeb { get; set; }
// Destination PDF file name
const string DestinationLibName = "Shared Documents";
protected void Page_Load(object sender, EventArgs e)
{
}
protected void StartButton_Click(object sender, EventArgs e)
{
//string DestinationLibName = FolderTextBox.Text;
SPSite site = SPContext.Current.Site;
SPWeb web = CurrentWeb;
SPSecurity.RunWithElevatedPrivileges(delegate ()
{
using (SPSite ElevatedSite = new SPSite(site.ID))
{
using (SPWeb ElevatedWeb = ElevatedSite.OpenWeb(web.ID))
{
ConvertDocuments(ElevatedWeb);
}
}
});
LogTextBox.Text += "\n";
LogTextBox.Text += "Done...\n";
}
private void ConvertDocuments(SPWeb web)
{
try
{
var spLibrary = web.Folders[DestinationLibName];
var spfileColl = spLibrary.Files;
foreach (SPFile file in spfileColl)
{
string inputDocument = file.Name;
// Create InvoiceParser instance
using (DocumentParser documentParser = new DocumentParser("demo", "demo"))
{
// Add an internal generic template for typical invoices.
// Note, if it does not parse all required fields, you should create
// own template using Template Editor application.
documentParser.AddTemplate("internal://invoice");
LogTextBox.Text += $"Parsing \"{inputDocument}\"...";
Console.WriteLine();
// Parse document data in JSON format
string ret = documentParser.ParseDocument(file.OpenBinaryStream(), OutputFormat.CSV);
// Display parsed data in console
LogTextBox.Text += "Parsing results in CSV format:";
LogTextBox.Text += ret;
var DestinationFile = inputDocument.Split('.')[0] + ".csv";
SaveToSharePoint(ret, DestinationFile);
LogTextBox.Text += String.Format("Generated CSV file saved as \"{0}\\{1}\" file. \n", DestinationLibName, DestinationFile);
}
}
}
catch (Exception ex)
{
LogTextBox.Text += ex.ToString() + " \n";
}
}
private void SaveToSharePoint(string data, string DestinationFile)
{
byte[] bytes = Encoding.ASCII.GetBytes(data);
//Upload file to SharePoint document linrary
//Read create stream
using (MemoryStream stream = new MemoryStream(bytes))
{
//Get handle of library
SPFolder spLibrary = CurrentWeb.Folders[DestinationLibName];
//Replace existing file
var replaceExistingFile = true;
//Upload document to library
SPFile spfile = spLibrary.Files.Add(DestinationFile, stream, replaceExistingFile);
spLibrary.Update();
}
}
}
}
VisualWebPart1UserControl.ascx.designer.cs
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace ExtractDataWebPart.VisualWebPart1
{
public partial class VisualWebPart1UserControl
{
/// <summary>
/// FolderTextBox control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.TextBox FolderTextBox;
/// <summary>
/// StartButton control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.Button StartButton;
/// <summary>
/// LogTextBox control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.TextBox LogTextBox;
}
}
readme.txt
This folder contains language files for Optical Character Recognition (OCR).
Document Parser SDK is shipped with language files for English, Franch, German and Spanish.
You can download more languages at https://github.com/bytescout/ocrdata