WIP: PDF Attachment Extractor

This commit is contained in:
Jonathan Jenne 2020-03-04 16:42:06 +01:00
parent b995304ee3
commit c3c6a50992
6 changed files with 126 additions and 12 deletions

View File

@ -32,6 +32,7 @@ Partial Class Form1
Me.Button5 = New System.Windows.Forms.Button() Me.Button5 = New System.Windows.Forms.Button()
Me.GroupBox1 = New System.Windows.Forms.GroupBox() Me.GroupBox1 = New System.Windows.Forms.GroupBox()
Me.GroupBox2 = New System.Windows.Forms.GroupBox() Me.GroupBox2 = New System.Windows.Forms.GroupBox()
Me.Button7 = New System.Windows.Forms.Button()
Me.GroupBox3 = New System.Windows.Forms.GroupBox() Me.GroupBox3 = New System.Windows.Forms.GroupBox()
Me.txtPropName = New System.Windows.Forms.TextBox() Me.txtPropName = New System.Windows.Forms.TextBox()
Me.GroupBox4 = New System.Windows.Forms.GroupBox() Me.GroupBox4 = New System.Windows.Forms.GroupBox()
@ -60,7 +61,7 @@ Partial Class Form1
Me.ListBox1.FormattingEnabled = True Me.ListBox1.FormattingEnabled = True
Me.ListBox1.Location = New System.Drawing.Point(378, 12) Me.ListBox1.Location = New System.Drawing.Point(378, 12)
Me.ListBox1.Name = "ListBox1" Me.ListBox1.Name = "ListBox1"
Me.ListBox1.Size = New System.Drawing.Size(526, 407) Me.ListBox1.Size = New System.Drawing.Size(526, 472)
Me.ListBox1.TabIndex = 1 Me.ListBox1.TabIndex = 1
' '
'Button2 'Button2
@ -122,19 +123,29 @@ Partial Class Form1
'GroupBox2 'GroupBox2
' '
Me.GroupBox2.Controls.Add(Me.Button2) Me.GroupBox2.Controls.Add(Me.Button2)
Me.GroupBox2.Controls.Add(Me.Button7)
Me.GroupBox2.Controls.Add(Me.Button3) Me.GroupBox2.Controls.Add(Me.Button3)
Me.GroupBox2.Location = New System.Drawing.Point(12, 110) Me.GroupBox2.Location = New System.Drawing.Point(12, 110)
Me.GroupBox2.Name = "GroupBox2" Me.GroupBox2.Name = "GroupBox2"
Me.GroupBox2.Size = New System.Drawing.Size(360, 90) Me.GroupBox2.Size = New System.Drawing.Size(360, 155)
Me.GroupBox2.TabIndex = 8 Me.GroupBox2.TabIndex = 8
Me.GroupBox2.TabStop = False Me.GroupBox2.TabStop = False
Me.GroupBox2.Text = "Run Functions on a single file (needs Breakpoint)" Me.GroupBox2.Text = "Run Functions on a single file (needs Breakpoint)"
' '
'Button7
'
Me.Button7.Location = New System.Drawing.Point(6, 77)
Me.Button7.Name = "Button7"
Me.Button7.Size = New System.Drawing.Size(221, 23)
Me.Button7.TabIndex = 3
Me.Button7.Text = "Extract PDF Attachments"
Me.Button7.UseVisualStyleBackColor = True
'
'GroupBox3 'GroupBox3
' '
Me.GroupBox3.Controls.Add(Me.Button4) Me.GroupBox3.Controls.Add(Me.Button4)
Me.GroupBox3.Controls.Add(Me.txtMD5Checksum) Me.GroupBox3.Controls.Add(Me.txtMD5Checksum)
Me.GroupBox3.Location = New System.Drawing.Point(12, 206) Me.GroupBox3.Location = New System.Drawing.Point(12, 271)
Me.GroupBox3.Name = "GroupBox3" Me.GroupBox3.Name = "GroupBox3"
Me.GroupBox3.Size = New System.Drawing.Size(360, 85) Me.GroupBox3.Size = New System.Drawing.Size(360, 85)
Me.GroupBox3.TabIndex = 9 Me.GroupBox3.TabIndex = 9
@ -152,7 +163,7 @@ Partial Class Form1
' '
Me.GroupBox4.Controls.Add(Me.Button6) Me.GroupBox4.Controls.Add(Me.Button6)
Me.GroupBox4.Controls.Add(Me.txtPropName) Me.GroupBox4.Controls.Add(Me.txtPropName)
Me.GroupBox4.Location = New System.Drawing.Point(12, 297) Me.GroupBox4.Location = New System.Drawing.Point(12, 362)
Me.GroupBox4.Name = "GroupBox4" Me.GroupBox4.Name = "GroupBox4"
Me.GroupBox4.Size = New System.Drawing.Size(360, 122) Me.GroupBox4.Size = New System.Drawing.Size(360, 122)
Me.GroupBox4.TabIndex = 11 Me.GroupBox4.TabIndex = 11
@ -172,7 +183,7 @@ Partial Class Form1
' '
Me.AutoScaleDimensions = New System.Drawing.SizeF(6.0!, 13.0!) Me.AutoScaleDimensions = New System.Drawing.SizeF(6.0!, 13.0!)
Me.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font Me.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font
Me.ClientSize = New System.Drawing.Size(916, 435) Me.ClientSize = New System.Drawing.Size(916, 492)
Me.Controls.Add(Me.GroupBox4) Me.Controls.Add(Me.GroupBox4)
Me.Controls.Add(Me.GroupBox3) Me.Controls.Add(Me.GroupBox3)
Me.Controls.Add(Me.GroupBox2) Me.Controls.Add(Me.GroupBox2)
@ -204,4 +215,5 @@ Partial Class Form1
Friend WithEvents txtPropName As TextBox Friend WithEvents txtPropName As TextBox
Friend WithEvents GroupBox4 As GroupBox Friend WithEvents GroupBox4 As GroupBox
Friend WithEvents Button6 As Button Friend WithEvents Button6 As Button
Friend WithEvents Button7 As Button
End Class End Class

View File

@ -85,6 +85,7 @@ Public Class Form1
Dim args As New WorkerArgs() Dim args As New WorkerArgs()
args = LoadFolderConfig(args) args = LoadFolderConfig(args)
args = LoadPropertyMapFor(args, "DEFAULT") args = LoadPropertyMapFor(args, "DEFAULT")
args.GDPictureKey = "21182889975216572111813147150675976632"
Dim job As New Jobs.ImportZUGFeRDFiles(_logConfig, _firebird) Dim job As New Jobs.ImportZUGFeRDFiles(_logConfig, _firebird)
@ -138,7 +139,7 @@ Public Class Form1
End Function End Function
Private Sub Button5_Click(sender As Object, e As EventArgs) Handles Button5.Click Private Sub Button5_Click(sender As Object, e As EventArgs) Handles Button5.Click
Process.Start("\\dd-sto01\DD-STO01-A2\SharedObjects\Public\Projekte\Test\Import\ZUGFerD\Email_in") Process.Start("\\dd-sto01\DD-DFSR01\SharedObjects\Public\Projekte\Test\Import\ZUGFerD\Email_in")
End Sub End Sub
Private Sub Button6_Click(sender As Object, e As EventArgs) Handles Button6.Click Private Sub Button6_Click(sender As Object, e As EventArgs) Handles Button6.Click
@ -160,4 +161,15 @@ Public Class Form1
End Try End Try
End If End If
End Sub End Sub
Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
Dim oExtractor = New Jobs.PDFAttachments(_logConfig, "21182889975216572111813147150675976632")
Dim oResult = OpenFileDialog1.ShowDialog()
If oResult = DialogResult.OK Then
oExtractor.Extract(OpenFileDialog1.FileName, AllowedExtensions:=New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"})
End If
End Sub
End Class End Class

View File

@ -42,6 +42,9 @@ Public Class ImportZUGFeRDFiles
</ul></p> </ul></p>
""" """
' List of allowed extensions for PDF/A Attachments
Private AllowedExtensions = New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"}
Private _logger As Logger Private _logger As Logger
Private _logConfig As LogConfig Private _logConfig As LogConfig
Private _zugferd As ZUGFeRDInterface Private _zugferd As ZUGFeRDInterface
@ -318,6 +321,7 @@ Public Class ImportZUGFeRDFiles
Public Sub Start(Arguments As Object) Implements IJob.Start Public Sub Start(Arguments As Object) Implements IJob.Start
Dim oArgs As WorkerArgs = Arguments Dim oArgs As WorkerArgs = Arguments
Dim oPropertyExtractor = New PropertyValues(_logConfig) Dim oPropertyExtractor = New PropertyValues(_logConfig)
Dim oAttachmentExtractor = New PDFAttachments(_logConfig, oArgs.GDPictureKey)
_logger.Debug("Starting Job {0}", [GetType].Name) _logger.Debug("Starting Job {0}", [GetType].Name)
@ -404,10 +408,16 @@ Public Class ImportZUGFeRDFiles
_logger.Warn("Unexpected Error occurred while extracting ZUGFeRD Information from file {0}", oFile.FullName) _logger.Warn("Unexpected Error occurred while extracting ZUGFeRD Information from file {0}", oFile.FullName)
Throw ex Throw ex
End Select End Select
End Try End Try
Dim oAttachments = oAttachmentExtractor.Extract(oFile.FullName, AllowedExtensions)
If oAttachments Is Nothing Then
_logger.Warn("Attachments for file [{0}] could not be extracted", oFile.FullName)
Else
oFileAttachmentFiles.AddRange(oFileGroupFiles)
oFileAttachmentFiles.AddRange(oAttachments)
End If
oMD5CheckSum = CreateMD5(oFile.FullName) oMD5CheckSum = CreateMD5(oFile.FullName)
If oMD5CheckSum <> String.Empty Then If oMD5CheckSum <> String.Empty Then
Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))" Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"

View File

@ -1,9 +1,84 @@
Public Class PDFAttachments Imports System.Collections.Generic
Public Sub New(GdPictureKey As String) Imports System.IO
Imports DigitalData.Modules.Logging
Imports GdPicture14
Public Class PDFAttachments
Private Logger As Logger
Private Const ZUGFERD_XML_FILENAME = "ZUGFeRD-invoice.xml"
Public Sub New(LogConfig As LogConfig, GdPictureKey As String)
Logger = LogConfig.GetLogger
End Sub End Sub
Public Shared Function Extract(FileName As String) Public Function Extract(FileName As String, AllowedExtensions As List(Of String)) As List(Of FileInfo)
Using oGDPicturePDF As New GDPicturePDF Dim oResults As New List(Of FileInfo)
Dim oExtensions = AllowedExtensions.ConvertAll(Of String)(New Converter(Of String, String)(Function(ext) ext.ToUpper))
Try
Using oGDPicturePDF As New GdPicturePDF()
If oGDPicturePDF.LoadFromFile(FileName, False) = GdPictureStatus.OK Then
Dim oEmbeddedFileCount As Integer = oGDPicturePDF.GetEmbeddedFileCount()
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
If oEmbeddedFileCount > 1 Then
For index = 0 To oEmbeddedFileCount - 1
Dim oFileName As String = oGDPicturePDF.GetEmbeddedFileName(index)
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
Dim oExtension = New FileInfo(oFileName).Extension.ToUpper.Substring(1)
If oFileName.ToUpper <> ZUGFERD_XML_FILENAME.ToUpper Then
If oExtensions.Contains(oExtension) Then
Dim FileSize As Integer = oGDPicturePDF.GetEmbeddedFileSize(index)
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
Dim FileData As Byte() = New Byte(FileSize) {}
Dim status As GdPictureStatus = oGDPicturePDF.ExtractEmbeddedFile(index, FileData)
If status = GdPictureStatus.OK Then
Dim oTempName As String = Path.Combine(Path.GetTempPath(), oFileName)
Using oFileStream As New FileStream(oTempName, FileMode.OpenOrCreate)
oFileStream.Write(FileData, 0, FileData.Length)
End Using
oResults.Add(New FileInfo(oTempName))
Else
Logger.Error("The embedded file [{0}] has failed to extract. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Error("An error occurred getting the file size for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Warn("File [{0}] was skipped because its extension [{1}] is not allowed.", oFileName, oExtension)
Continue For
End If
Else
Logger.Debug("File [{0}] was skipped because its name indicates the invoice data file.", oFileName)
Continue For
End If
Else
Logger.Error("An error occurred getting the file name for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Next
End If
Else
Logger.Error("An error occurred getting the number of embedded files. Status: {0}", oGDPicturePDF.GetStat().ToString())
Return Nothing
End If
Else
Logger.Error("The file [{0}] can't be loaded.", FileName)
Return Nothing
End If
End Using
Return oResults
Catch ex As Exception
Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", FileName)
Logger.Error(ex)
Return Nothing
End Try
End Function End Function
End Class End Class

View File

@ -9,6 +9,7 @@ Public Class WorkerArgs
Public AttachmentsSubDirectory As String Public AttachmentsSubDirectory As String
Public PropertyMap As Dictionary(Of String, XmlItemProperty) Public PropertyMap As Dictionary(Of String, XmlItemProperty)
Public InsertIntoSQLServer As Boolean Public InsertIntoSQLServer As Boolean
Public GDPictureKey As String
Public Sub New() Public Sub New()
WatchDirectories = New List(Of String) WatchDirectories = New List(Of String)
@ -19,5 +20,6 @@ Public Class WorkerArgs
AttachmentsSubDirectory = Nothing AttachmentsSubDirectory = Nothing
PropertyMap = New Dictionary(Of String, XmlItemProperty) PropertyMap = New Dictionary(Of String, XmlItemProperty)
InsertIntoSQLServer = False InsertIntoSQLServer = False
GDPictureKey = String.Empty
End Sub End Sub
End Class End Class

View File

@ -109,6 +109,9 @@
<Reference Include="FirebirdSql.Data.FirebirdClient, Version=6.4.0.0, Culture=neutral, PublicKeyToken=3750abcc3150b00c, processorArchitecture=MSIL"> <Reference Include="FirebirdSql.Data.FirebirdClient, Version=6.4.0.0, Culture=neutral, PublicKeyToken=3750abcc3150b00c, processorArchitecture=MSIL">
<HintPath>..\packages\FirebirdSql.Data.FirebirdClient.6.4.0\lib\net452\FirebirdSql.Data.FirebirdClient.dll</HintPath> <HintPath>..\packages\FirebirdSql.Data.FirebirdClient.6.4.0\lib\net452\FirebirdSql.Data.FirebirdClient.dll</HintPath>
</Reference> </Reference>
<Reference Include="GdPicture.NET.14">
<HintPath>D:\ProgramFiles\GdPicture.NET 14\Redist\GdPicture.NET (.NET Framework 4.5)\GdPicture.NET.14.dll</HintPath>
</Reference>
<Reference Include="Microsoft.CSharp" /> <Reference Include="Microsoft.CSharp" />
<Reference Include="NLog, Version=4.0.0.0, Culture=neutral, PublicKeyToken=5120e14c03d0593c, processorArchitecture=MSIL"> <Reference Include="NLog, Version=4.0.0.0, Culture=neutral, PublicKeyToken=5120e14c03d0593c, processorArchitecture=MSIL">
<HintPath>..\packages\NLog.4.6.8\lib\net45\NLog.dll</HintPath> <HintPath>..\packages\NLog.4.6.8\lib\net45\NLog.dll</HintPath>