From c3c6a50992a09d9a0c99d65d5190cb140cb65f70 Mon Sep 17 00:00:00 2001
From: Jonathan Jenne
Date: Wed, 4 Mar 2020 16:42:06 +0100
Subject: [PATCH] WIP: PDF Attachment Extractor
---
GUIs.Test.ZUGFeRDTest/Form1.Designer.vb | 22 +++--
GUIs.Test.ZUGFeRDTest/Form1.vb | 14 +++-
.../EDMI/ZUGFeRD/ImportZUGFeRDFiles.vb | 14 +++-
Modules.Jobs/EDMI/ZUGFeRD/PDFAttachments.vb | 83 ++++++++++++++++++-
Modules.Jobs/EDMI/ZUGFeRD/WorkerArgs.vb | 2 +
Modules.Jobs/Jobs.vbproj | 3 +
6 files changed, 126 insertions(+), 12 deletions(-)
diff --git a/GUIs.Test.ZUGFeRDTest/Form1.Designer.vb b/GUIs.Test.ZUGFeRDTest/Form1.Designer.vb
index adc6be79..ea45550e 100644
--- a/GUIs.Test.ZUGFeRDTest/Form1.Designer.vb
+++ b/GUIs.Test.ZUGFeRDTest/Form1.Designer.vb
@@ -32,6 +32,7 @@ Partial Class Form1
Me.Button5 = New System.Windows.Forms.Button()
Me.GroupBox1 = New System.Windows.Forms.GroupBox()
Me.GroupBox2 = New System.Windows.Forms.GroupBox()
+ Me.Button7 = New System.Windows.Forms.Button()
Me.GroupBox3 = New System.Windows.Forms.GroupBox()
Me.txtPropName = New System.Windows.Forms.TextBox()
Me.GroupBox4 = New System.Windows.Forms.GroupBox()
@@ -60,7 +61,7 @@ Partial Class Form1
Me.ListBox1.FormattingEnabled = True
Me.ListBox1.Location = New System.Drawing.Point(378, 12)
Me.ListBox1.Name = "ListBox1"
- Me.ListBox1.Size = New System.Drawing.Size(526, 407)
+ Me.ListBox1.Size = New System.Drawing.Size(526, 472)
Me.ListBox1.TabIndex = 1
'
'Button2
@@ -122,19 +123,29 @@ Partial Class Form1
'GroupBox2
'
Me.GroupBox2.Controls.Add(Me.Button2)
+ Me.GroupBox2.Controls.Add(Me.Button7)
Me.GroupBox2.Controls.Add(Me.Button3)
Me.GroupBox2.Location = New System.Drawing.Point(12, 110)
Me.GroupBox2.Name = "GroupBox2"
- Me.GroupBox2.Size = New System.Drawing.Size(360, 90)
+ Me.GroupBox2.Size = New System.Drawing.Size(360, 155)
Me.GroupBox2.TabIndex = 8
Me.GroupBox2.TabStop = False
Me.GroupBox2.Text = "Run Functions on a single file (needs Breakpoint)"
'
+ 'Button7
+ '
+ Me.Button7.Location = New System.Drawing.Point(6, 77)
+ Me.Button7.Name = "Button7"
+ Me.Button7.Size = New System.Drawing.Size(221, 23)
+ Me.Button7.TabIndex = 3
+ Me.Button7.Text = "Extract PDF Attachments"
+ Me.Button7.UseVisualStyleBackColor = True
+ '
'GroupBox3
'
Me.GroupBox3.Controls.Add(Me.Button4)
Me.GroupBox3.Controls.Add(Me.txtMD5Checksum)
- Me.GroupBox3.Location = New System.Drawing.Point(12, 206)
+ Me.GroupBox3.Location = New System.Drawing.Point(12, 271)
Me.GroupBox3.Name = "GroupBox3"
Me.GroupBox3.Size = New System.Drawing.Size(360, 85)
Me.GroupBox3.TabIndex = 9
@@ -152,7 +163,7 @@ Partial Class Form1
'
Me.GroupBox4.Controls.Add(Me.Button6)
Me.GroupBox4.Controls.Add(Me.txtPropName)
- Me.GroupBox4.Location = New System.Drawing.Point(12, 297)
+ Me.GroupBox4.Location = New System.Drawing.Point(12, 362)
Me.GroupBox4.Name = "GroupBox4"
Me.GroupBox4.Size = New System.Drawing.Size(360, 122)
Me.GroupBox4.TabIndex = 11
@@ -172,7 +183,7 @@ Partial Class Form1
'
Me.AutoScaleDimensions = New System.Drawing.SizeF(6.0!, 13.0!)
Me.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font
- Me.ClientSize = New System.Drawing.Size(916, 435)
+ Me.ClientSize = New System.Drawing.Size(916, 492)
Me.Controls.Add(Me.GroupBox4)
Me.Controls.Add(Me.GroupBox3)
Me.Controls.Add(Me.GroupBox2)
@@ -204,4 +215,5 @@ Partial Class Form1
Friend WithEvents txtPropName As TextBox
Friend WithEvents GroupBox4 As GroupBox
Friend WithEvents Button6 As Button
+ Friend WithEvents Button7 As Button
End Class
diff --git a/GUIs.Test.ZUGFeRDTest/Form1.vb b/GUIs.Test.ZUGFeRDTest/Form1.vb
index ec510ee9..0fc13b40 100644
--- a/GUIs.Test.ZUGFeRDTest/Form1.vb
+++ b/GUIs.Test.ZUGFeRDTest/Form1.vb
@@ -85,6 +85,7 @@ Public Class Form1
Dim args As New WorkerArgs()
args = LoadFolderConfig(args)
args = LoadPropertyMapFor(args, "DEFAULT")
+ args.GDPictureKey = "21182889975216572111813147150675976632"
Dim job As New Jobs.ImportZUGFeRDFiles(_logConfig, _firebird)
@@ -138,7 +139,7 @@ Public Class Form1
End Function
Private Sub Button5_Click(sender As Object, e As EventArgs) Handles Button5.Click
- Process.Start("\\dd-sto01\DD-STO01-A2\SharedObjects\Public\Projekte\Test\Import\ZUGFerD\Email_in")
+ Process.Start("\\dd-sto01\DD-DFSR01\SharedObjects\Public\Projekte\Test\Import\ZUGFerD\Email_in")
End Sub
Private Sub Button6_Click(sender As Object, e As EventArgs) Handles Button6.Click
@@ -160,4 +161,15 @@ Public Class Form1
End Try
End If
End Sub
+
+ Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
+ Dim oExtractor = New Jobs.PDFAttachments(_logConfig, "21182889975216572111813147150675976632")
+
+ Dim oResult = OpenFileDialog1.ShowDialog()
+
+ If oResult = DialogResult.OK Then
+ oExtractor.Extract(OpenFileDialog1.FileName, AllowedExtensions:=New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"})
+ End If
+
+ End Sub
End Class
diff --git a/Modules.Jobs/EDMI/ZUGFeRD/ImportZUGFeRDFiles.vb b/Modules.Jobs/EDMI/ZUGFeRD/ImportZUGFeRDFiles.vb
index 67adb1d7..80ab60d1 100644
--- a/Modules.Jobs/EDMI/ZUGFeRD/ImportZUGFeRDFiles.vb
+++ b/Modules.Jobs/EDMI/ZUGFeRD/ImportZUGFeRDFiles.vb
@@ -42,6 +42,9 @@ Public Class ImportZUGFeRDFiles
"""
+ ' List of allowed extensions for PDF/A Attachments
+ Private AllowedExtensions = New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"}
+
Private _logger As Logger
Private _logConfig As LogConfig
Private _zugferd As ZUGFeRDInterface
@@ -318,6 +321,7 @@ Public Class ImportZUGFeRDFiles
Public Sub Start(Arguments As Object) Implements IJob.Start
Dim oArgs As WorkerArgs = Arguments
Dim oPropertyExtractor = New PropertyValues(_logConfig)
+ Dim oAttachmentExtractor = New PDFAttachments(_logConfig, oArgs.GDPictureKey)
_logger.Debug("Starting Job {0}", [GetType].Name)
@@ -404,10 +408,16 @@ Public Class ImportZUGFeRDFiles
_logger.Warn("Unexpected Error occurred while extracting ZUGFeRD Information from file {0}", oFile.FullName)
Throw ex
End Select
-
-
End Try
+ Dim oAttachments = oAttachmentExtractor.Extract(oFile.FullName, AllowedExtensions)
+ If oAttachments Is Nothing Then
+ _logger.Warn("Attachments for file [{0}] could not be extracted", oFile.FullName)
+ Else
+ oFileAttachmentFiles.AddRange(oFileGroupFiles)
+ oFileAttachmentFiles.AddRange(oAttachments)
+ End If
+
oMD5CheckSum = CreateMD5(oFile.FullName)
If oMD5CheckSum <> String.Empty Then
Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"
diff --git a/Modules.Jobs/EDMI/ZUGFeRD/PDFAttachments.vb b/Modules.Jobs/EDMI/ZUGFeRD/PDFAttachments.vb
index 78708998..7c4f8919 100644
--- a/Modules.Jobs/EDMI/ZUGFeRD/PDFAttachments.vb
+++ b/Modules.Jobs/EDMI/ZUGFeRD/PDFAttachments.vb
@@ -1,9 +1,84 @@
-Public Class PDFAttachments
- Public Sub New(GdPictureKey As String)
+Imports System.Collections.Generic
+Imports System.IO
+Imports DigitalData.Modules.Logging
+Imports GdPicture14
+Public Class PDFAttachments
+ Private Logger As Logger
+
+ Private Const ZUGFERD_XML_FILENAME = "ZUGFeRD-invoice.xml"
+
+ Public Sub New(LogConfig As LogConfig, GdPictureKey As String)
+ Logger = LogConfig.GetLogger
End Sub
- Public Shared Function Extract(FileName As String)
- Using oGDPicturePDF As New GDPicturePDF
+ Public Function Extract(FileName As String, AllowedExtensions As List(Of String)) As List(Of FileInfo)
+ Dim oResults As New List(Of FileInfo)
+ Dim oExtensions = AllowedExtensions.ConvertAll(Of String)(New Converter(Of String, String)(Function(ext) ext.ToUpper))
+
+ Try
+ Using oGDPicturePDF As New GdPicturePDF()
+ If oGDPicturePDF.LoadFromFile(FileName, False) = GdPictureStatus.OK Then
+ Dim oEmbeddedFileCount As Integer = oGDPicturePDF.GetEmbeddedFileCount()
+ If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
+ If oEmbeddedFileCount > 1 Then
+ For index = 0 To oEmbeddedFileCount - 1
+ Dim oFileName As String = oGDPicturePDF.GetEmbeddedFileName(index)
+
+ If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
+ Dim oExtension = New FileInfo(oFileName).Extension.ToUpper.Substring(1)
+ If oFileName.ToUpper <> ZUGFERD_XML_FILENAME.ToUpper Then
+ If oExtensions.Contains(oExtension) Then
+ Dim FileSize As Integer = oGDPicturePDF.GetEmbeddedFileSize(index)
+
+ If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
+ Dim FileData As Byte() = New Byte(FileSize) {}
+ Dim status As GdPictureStatus = oGDPicturePDF.ExtractEmbeddedFile(index, FileData)
+
+ If status = GdPictureStatus.OK Then
+ Dim oTempName As String = Path.Combine(Path.GetTempPath(), oFileName)
+ Using oFileStream As New FileStream(oTempName, FileMode.OpenOrCreate)
+ oFileStream.Write(FileData, 0, FileData.Length)
+ End Using
+
+ oResults.Add(New FileInfo(oTempName))
+ Else
+ Logger.Error("The embedded file [{0}] has failed to extract. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
+ Continue For
+ End If
+ Else
+ Logger.Error("An error occurred getting the file size for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
+ Continue For
+ End If
+ Else
+ Logger.Warn("File [{0}] was skipped because its extension [{1}] is not allowed.", oFileName, oExtension)
+ Continue For
+ End If
+ Else
+ Logger.Debug("File [{0}] was skipped because its name indicates the invoice data file.", oFileName)
+ Continue For
+ End If
+ Else
+ Logger.Error("An error occurred getting the file name for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
+ Continue For
+ End If
+ Next
+ End If
+ Else
+ Logger.Error("An error occurred getting the number of embedded files. Status: {0}", oGDPicturePDF.GetStat().ToString())
+ Return Nothing
+ End If
+ Else
+ Logger.Error("The file [{0}] can't be loaded.", FileName)
+ Return Nothing
+ End If
+ End Using
+
+ Return oResults
+ Catch ex As Exception
+ Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", FileName)
+ Logger.Error(ex)
+ Return Nothing
+ End Try
End Function
End Class
diff --git a/Modules.Jobs/EDMI/ZUGFeRD/WorkerArgs.vb b/Modules.Jobs/EDMI/ZUGFeRD/WorkerArgs.vb
index 71b3f456..8785c5b6 100644
--- a/Modules.Jobs/EDMI/ZUGFeRD/WorkerArgs.vb
+++ b/Modules.Jobs/EDMI/ZUGFeRD/WorkerArgs.vb
@@ -9,6 +9,7 @@ Public Class WorkerArgs
Public AttachmentsSubDirectory As String
Public PropertyMap As Dictionary(Of String, XmlItemProperty)
Public InsertIntoSQLServer As Boolean
+ Public GDPictureKey As String
Public Sub New()
WatchDirectories = New List(Of String)
@@ -19,5 +20,6 @@ Public Class WorkerArgs
AttachmentsSubDirectory = Nothing
PropertyMap = New Dictionary(Of String, XmlItemProperty)
InsertIntoSQLServer = False
+ GDPictureKey = String.Empty
End Sub
End Class
\ No newline at end of file
diff --git a/Modules.Jobs/Jobs.vbproj b/Modules.Jobs/Jobs.vbproj
index 8258ced9..3a28c163 100644
--- a/Modules.Jobs/Jobs.vbproj
+++ b/Modules.Jobs/Jobs.vbproj
@@ -109,6 +109,9 @@
..\packages\FirebirdSql.Data.FirebirdClient.6.4.0\lib\net452\FirebirdSql.Data.FirebirdClient.dll
+
+ D:\ProgramFiles\GdPicture.NET 14\Redist\GdPicture.NET (.NET Framework 4.5)\GdPicture.NET.14.dll
+
..\packages\NLog.4.6.8\lib\net45\NLog.dll