Auslesen von embedded Files aus einer XML-Datei

This commit is contained in:
PitzM 2025-06-02 11:47:15 +02:00
parent 0261d237b6
commit 7e70c059b6
3 changed files with 297 additions and 31 deletions

View File

@ -54,7 +54,7 @@ Public Class PropertyValues
ToDictionary(Function(Item) Item.Key, ToDictionary(Function(Item) Item.Key,
Function(Item) Item.Value) Function(Item) Item.Value)
_logger.Debug("Found {0} default properties.", oDefaultProperties.Count) _logger.Debug("Found {0} ungrouped properties.", oDefaultProperties.Count)
' PropertyMap items with `IsGrouped = True` are grouped by group scope ' PropertyMap items with `IsGrouped = True` are grouped by group scope
Dim oGroupedProperties = PropertyMap. Dim oGroupedProperties = PropertyMap.
@ -118,7 +118,7 @@ Public Class PropertyValues
' Returns nothing if oColumn.Value contains an empty list ' Returns nothing if oColumn.Value contains an empty list
Dim oPropertyValue = oColumn.Value.ElementAtOrDefault(oRowIndex) Dim oPropertyValue = oColumn.Value.ElementAtOrDefault(oRowIndex)
_logger.Debug("Processing itemSpecification *TableColumn* [{0}].", oTableColumn) _logger.Debug("Processing itemColumn *TableColumn* [{0}].", oTableColumn)
If oTableColumn = "INVOICE_SELLER_EMAIL" Then If oTableColumn = "INVOICE_SELLER_EMAIL" Then
Console.WriteLine("INVOICE_SELLER_EMAIL") Console.WriteLine("INVOICE_SELLER_EMAIL")
ElseIf oTableColumn = "INVOICE_POSITION_ARTICLE" Then ElseIf oTableColumn = "INVOICE_POSITION_ARTICLE" Then
@ -126,20 +126,28 @@ Public Class PropertyValues
End If End If
If IsNothing(oPropertyValue) OrElse String.IsNullOrEmpty(oPropertyValue) Then If IsNothing(oPropertyValue) OrElse String.IsNullOrEmpty(oPropertyValue) Then
If oColumn.Key.IsRequired Then If oColumn.Key.IsRequired Then
_logger.Warn($"{MessageId} # oPropertyValue for specification [{oTableColumn}] is empty or not found but is required. Continuing with Empty String.") _logger.Warn($"{MessageId} # oPropertyValue for column [{oTableColumn}] is empty or not found but is required. Continuing with Empty String.")
Dim oMissingProperty = New MissingProperty() With { Dim oMissingProperty = New MissingProperty() With {
.Description = oPropertyDescription, .Description = oPropertyDescription,
.XMLPath = oPropertyPath .XMLPath = oPropertyPath
} }
oResult.MissingProperties.Add(oMissingProperty) oResult.MissingProperties.Add(oMissingProperty)
Else Else
_logger.Debug($"{MessageId} # oPropertyValue for specification [{oTableColumn}] is empty or not found. Continuing with Empty String.") _logger.Debug($"{MessageId} # oPropertyValue for column [{oTableColumn}] is empty or not found. Continuing with Empty String.")
End If End If
oPropertyValue = String.Empty oPropertyValue = String.Empty
End If End If
_logger.Debug("ItemSpecification [{0}] has value '{1}'", oTableColumn, oPropertyValue) If (oPropertyValue IsNot Nothing) Then
Dim logValue As String = oPropertyValue.ToString()
If logValue.Length > 50 Then
_logger.Debug("Item [{0}] has value '{1}...'", oTableColumn, logValue.Substring(1, 50))
Else
_logger.Debug("Item [{0}] has value '{1}'", oTableColumn, oPropertyValue)
End If
End If
oResult.ValidProperties.Add(New ValidProperty() With { oResult.ValidProperties.Add(New ValidProperty() With {
.MessageId = MessageId, .MessageId = MessageId,
@ -290,7 +298,7 @@ Public Class PropertyValues
Obj = Obj(0) Obj = Obj(0)
End If End If
If IsArray(Obj) And Not oHasIndex Then If IsArray(Obj) And Not oHasIndex And oPart <> "Value" Then
Dim oCurrentPart As String = oPart Dim oCurrentPart As String = oPart
Dim oSplitString As String() = New String() {oCurrentPart & "."} Dim oSplitString As String() = New String() {oCurrentPart & "."}
Dim oPathFragments = PropertyName.Split(oSplitString, StringSplitOptions.None) Dim oPathFragments = PropertyName.Split(oSplitString, StringSplitOptions.None)
@ -339,8 +347,20 @@ Public Class PropertyValues
Select Case oCount Select Case oCount
Case 0 Case 0
Return Nothing Return Nothing
Case 1
Dim firstElement As Object
firstElement = oList.FirstOrDefault()
If firstElement IsNot Nothing AndAlso IsArray(firstElement) Then
' Attachments sind Byte-Arrays und müssen umgewandelt werden
Return Convert.ToBase64String(firstElement)
Else
Return DoGetFinalPropValue(oList.First())
End If
Case Else Case Else
Return DoGetFinalPropValue(oList.First()) Return DoGetFinalPropValue(oList.First())
End Select End Select
Return DoGetFinalPropValue(Value) Return DoGetFinalPropValue(Value)

View File

@ -33,11 +33,6 @@ Public Class HashFunctions
End If End If
' Check if Checksum exists in History Table ' Check if Checksum exists in History Table
'Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"
'Dim oTable As DataTable = _firebird.GetDatatable(oCheckCommand, Firebird.TransactionMode.NoTransaction)
' Check if Checksum exists in History Table
' TODO: WHAT THE FUCK IS THIS
Dim oCheckCommand = $"SELECT * FROM TBEMLP_HISTORY WHERE GUID = (SELECT MAX(GUID) FROM TBEMLP_HISTORY WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))" Dim oCheckCommand = $"SELECT * FROM TBEMLP_HISTORY WHERE GUID = (SELECT MAX(GUID) FROM TBEMLP_HISTORY WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"
Dim oTable As DataTable = Database.GetDatatable(oCheckCommand, MSSQLServer.TransactionMode.NoTransaction) Dim oTable As DataTable = Database.GetDatatable(oCheckCommand, MSSQLServer.TransactionMode.NoTransaction)

View File

@ -11,6 +11,7 @@ Imports DigitalData.Modules.Interfaces.Exceptions
Imports DigitalData.Modules.Interfaces.PropertyValues Imports DigitalData.Modules.Interfaces.PropertyValues
Imports DigitalData.Modules.Jobs.Exceptions Imports DigitalData.Modules.Jobs.Exceptions
Imports DigitalData.Modules.Logging Imports DigitalData.Modules.Logging
Imports GdPicture14
Public Class ImportZUGFeRDFiles Public Class ImportZUGFeRDFiles
Implements IJob Implements IJob
@ -31,6 +32,11 @@ Public Class ImportZUGFeRDFiles
' This list should not contain xml so the zugferd xml file will be filtered out ' This list should not contain xml so the zugferd xml file will be filtered out
Private ReadOnly AllowedExtensions As New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"} Private ReadOnly AllowedExtensions As New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"}
' List of the Columns we need to store embedded files on disk and database
Private ReadOnly EmbeddedFilesColumnNames As List(Of String) = New List(Of String) From {
"ATTACHMENT_FILE_FILENAME", "ATTACHMENT_FILE_VALUE", "ATTACHMENT_FILE_MIMECODE"
}
Private ReadOnly _logger As Logger Private ReadOnly _logger As Logger
Private ReadOnly _logConfig As LogConfig Private ReadOnly _logConfig As LogConfig
Private ReadOnly _filesystem As FilesystemEx Private ReadOnly _filesystem As FilesystemEx
@ -563,7 +569,7 @@ Public Class ImportZUGFeRDFiles
Try Try
oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_XML oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_XML
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs) Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs, oResult)
Catch ex As Exception Catch ex As Exception
Throw ex Throw ex
End Try End Try
@ -653,7 +659,7 @@ Public Class ImportZUGFeRDFiles
Try Try
oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_PDF oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_PDF
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs) Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs, oResult)
Catch ex As Exception Catch ex As Exception
Throw ex Throw ex
End Try End Try
@ -670,7 +676,7 @@ Public Class ImportZUGFeRDFiles
End Function End Function
Private Function StoreXMLItemsInDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pFile As FileInfo, pConnections As DatabaseConnections, pArgs As WorkerArgs) As Boolean Private Function StoreXMLItemsInDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pFile As FileInfo, pConnections As DatabaseConnections, pArgs As WorkerArgs, pProcessFileResult As ProcessFileResult) As Boolean
' Check the document against the configured property map and return: ' Check the document against the configured property map and return:
' - a List of valid properties ' - a List of valid properties
' - a List of missing properties ' - a List of missing properties
@ -693,41 +699,286 @@ Public Class ImportZUGFeRDFiles
Throw New Exception("Bulk Insert failed! Exiting.") Throw New Exception("Bulk Insert failed! Exiting.")
End If End If
' TODO hier BAUSTELLE
' Eingebettete Dateien speichern ' Eingebettete Dateien speichern
'If CreateEmbeddedFilesOnDisk(pMessageId, pDocument, pConnections, oCheckResult) = False Then If HandleEmbeddedAttachments(pMessageId, pDocument, pConnections, oCheckResult, pArgs, pProcessFileResult) = False Then
' _logger.Debug("Files saving for MessageId [{0}] failed!", pMessageId) _logger.Debug("Files saving for MessageId [{0}] failed!", pMessageId)
'End If End If
Return True Return True
End Function End Function
Private Function CreateEmbeddedFilesOnDisk(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResult As CheckPropertyValuesResult) As Boolean ''' <summary>
''' Hier werden die Dateianhänge behandelt, die im XML als base64 gespeichert wurden
''' Die Knotendefinition muss ITEM_TYPE = 3 für den Dateiinhalt haben!
''' Die zusammengehörigen Knoten müssen über "FILES" gruppiert werden!
''' </summary>
Private Function HandleEmbeddedAttachments(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResult As CheckPropertyValuesResult, pArgs As WorkerArgs, pProcessFileResult As ProcessFileResult) As Boolean
' Finde alle Eintraege in pCheckResult mit Item_Type=3 ' TODO: klären!!! - Fehlerhandling? Ab wann liegt ein Fehler und damit eine Ablehnung vor?
' Finde Dateinamen (Index nach ~attm) und Dateityp. Wir speichern nur PDF.
' TODO Funktion aufrufen If (pCheckResult Is Nothing) Then
'SaveBase64ToDisk("", "") = False Then _logger.Debug("pCheckResult is empty!")
Return True
End If
If (CheckEmbeddedAttachmentEntries(pCheckResult) = False) Then
_logger.Debug("No embedded Files in XML found!")
Return True
End If
Dim embAttachmentList As List(Of ValidProperty) = pCheckResult.ValidProperties.Where(
Function(z)
Return EmbeddedFilesColumnNames.Contains(z.TableColumn)
End Function
).ToList()
If embAttachmentList Is Nothing OrElse embAttachmentList.Count <= 0 Then
_logger.Debug("No Fields for Embedded Files configured!")
Return True
End If
Dim oIndexList As HashSet(Of Integer) = New HashSet(Of Integer)
For Each resultItem In embAttachmentList
oIndexList.Add(resultItem.GroupCounter)
Next
Dim oOutputPath As String = GetOutputPathForEmbeddedAttachments(pArgs)
Dim nextAttachmentIndex As Integer = 0
nextAttachmentIndex = GetNextAttachmentIndex(pMessageId)
If nextAttachmentIndex <= 0 Then
nextAttachmentIndex = 1
End If
For Each groupIndex In oIndexList
Dim oMimeCodeString As String = String.Empty
Dim oOrgFilename As String = String.Empty
Dim oBase64String As String = String.Empty
Dim oMimeTypeProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_MIMECODE")
If oMimeTypeProperty IsNot Nothing AndAlso oMimeTypeProperty.Value IsNot Nothing Then
oMimeCodeString = oMimeTypeProperty.Value
Else
_logger.Info("Empty MIME-Code! File can not be stored!")
Continue For
End If
If Not oMimeCodeString.Equals("application/pdf", StringComparison.InvariantCultureIgnoreCase) = True Then
_logger.Info("Not allowed MIME-Code! File will not be stored!")
Continue For
End If
Dim oFilenameProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_FILENAME")
If oFilenameProperty IsNot Nothing Then
oOrgFilename = oFilenameProperty.Value
End If
Dim oBase64ValueProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_VALUE")
If oBase64ValueProperty IsNot Nothing Then
oBase64String = oBase64ValueProperty.Value
Else
_logger.Warn("Empty base64 String")
Continue For
End If
Dim newAttachmentFilename = pMessageId + "~attm" + nextAttachmentIndex.ToString + ".pdf"
Dim embeddedFilePath = Path.Combine(oOutputPath, newAttachmentFilename)
If SaveBase64ToDisk(embeddedFilePath, oBase64String) = True Then
_logger.Debug("Saved file [{0}] to disk", embeddedFilePath)
pProcessFileResult.EmailAttachmentFiles.Add(New FileInfo(embeddedFilePath))
Else
_logger.Error("Could not save File to Disk!")
Return False
End If
If InsertAttachmentHistoryEntry(pMessageId, oOrgFilename, embeddedFilePath) = False Then
_logger.Error("Could not save attachment Data to DB!")
Return False
End If
If InsertEmbeddedFileData(pMessageId, oBase64String, oOrgFilename, oMimeCodeString, groupIndex) = False Then
_logger.Error("Could not save attachment Data to DB!")
Return False
End If
nextAttachmentIndex += 1
Next
Return True Return True
End Function End Function
Private Shared Function GetIndexProperty(pListResult As List(Of ValidProperty), pGroupIndex As Integer, pTableColumn As String) As ValidProperty
Return pListResult.Where(
Function(z)
Return z.GroupCounter = pGroupIndex AndAlso z.TableColumn = pTableColumn
End Function
).FirstOrDefault
End Function
''' <summary>
''' Speichert die Daten inkl. base64-String in die Datenbank
''' </summary>
Private Function InsertEmbeddedFileData(pMessageId As String, pItemValue As String, pOrgFilename As String, pMimeType As String, pGroupIndex As Integer) As Boolean
Try
Dim oCommand = New SqlCommand(
"INSERT INTO TBEDMI_ITEM_FILES (
REFERENCE_GUID,
ITEM_VALUE,
ORG_FILENAME,
MIME_TYPE,
GROUP_INDEX,
CREATED_WHO
) VALUES (
@MESSAGE_ID,
@ITEM_VALUE,
@ORG_FILENAME,
@MIME_TYPE,
@GROUP_INDEX,
@CREATED_WHO
)")
oCommand.Parameters.Add("MESSAGE_ID", SqlDbType.VarChar, 250).Value = pMessageId
oCommand.Parameters.Add("ITEM_VALUE", SqlDbType.VarChar).Value = pItemValue
oCommand.Parameters.Add("ORG_FILENAME", SqlDbType.VarChar, 256).Value = pOrgFilename
oCommand.Parameters.Add("MIME_TYPE", SqlDbType.VarChar, 256).Value = pMimeType
oCommand.Parameters.Add("GROUP_INDEX", SqlDbType.Int).Value = pGroupIndex
oCommand.Parameters.Add("CREATED_WHO", SqlDbType.VarChar, 100).Value = "eInvoice Parser"
_mssql.ExecuteNonQuery(oCommand)
Return True
Catch ex As Exception
_logger.Error(ex)
Return False
End Try
End Function
''' <summary>
''' Ermittelt den Ausgabepfad für die eingebetteten Anhänge
''' </summary>
Private Function GetOutputPathForEmbeddedAttachments(pArgs As WorkerArgs) As String
Return pArgs.WatchDirectory
End Function
''' <summary>
''' Prüft, ob Embedded Attachments in den XML-Ergebnissen enthalten sind
''' </summary>
''' <param name="pCheckResult"></param>
''' <returns></returns>
Private Function CheckEmbeddedAttachmentEntries(pCheckResult As CheckPropertyValuesResult) As Boolean
Try
Dim resultList = pCheckResult.ValidProperties.Where(
Function(z)
Return z.TableColumn = "ATTACHMENT_FILE_VALUE"
End Function
).ToList()
If resultList.Count > 0 Then
_logger.Info("Found [{0}] embedded XML-Attachments.", resultList.Count)
Return True
Else
_logger.Info("No embedded XML-Attachments found.")
Return False
End If
Catch ex As Exception
_logger.Error("Error searching pCheckResult! {0}", ex.Message)
Return False
End Try
End Function
''' <summary>
''' Speichere base64 als Datei auf der Platte ab
''' </summary>
Private Function SaveBase64ToDisk(pExportFilePath As String, pBase64String As String) As Boolean Private Function SaveBase64ToDisk(pExportFilePath As String, pBase64String As String) As Boolean
Try Try
Dim base64BinaryDataString As String = pBase64String ' Hier Base64-String einfügen Dim base64BinaryDataString As String = pBase64String ' Hier Base64-String einfügen
Dim binaryDataString As Byte() = System.Convert.FromBase64String(base64BinaryDataString) Dim binaryDataString As Byte() = Convert.FromBase64String(base64BinaryDataString)
Dim oFilename As String = pExportFilePath Dim oFilename As String = pExportFilePath
Dim Stream As System.IO.FileStream = New System.IO.FileStream(oFilename, System.IO.FileMode.Create) ' Using verwenden, um blockieren des PDF zu verhindern
Using Stream As FileStream = New FileStream(oFilename, FileMode.Create)
Stream.Write(binaryDataString, 0, binaryDataString.Length) Stream.Write(binaryDataString, 0, binaryDataString.Length)
Stream.Close() Stream.Close()
Catch ex As Exception End Using
_logger.Error("Could NOT save File to Disk for MessageId [{0}] !", pExportFilePath)
Dim oGdPicturePDF As New GdPicturePDF
Dim oStatus As GdPictureStatus = oGdPicturePDF.LoadFromFile(pExportFilePath, True)
If oStatus <> GdPictureStatus.OK Then
_logger.Error("File [{0}] has no proper state!", pExportFilePath)
Return False
End If
Catch ex As Exception
_logger.Error("Could NOT save File [{0}] to Disk! Exception: [{1}]", pExportFilePath, ex.Message)
Return False
End Try End Try
Return True Return True
End Function End Function
''' <summary>
''' Die Methode lädt die bisherigen Dateinamen zu einer MessageID
''' Die Datei mit dem höchsten Index gibt den folgenden Index vor.
''' </summary>
''' <returns>Nächster Attachment Index</returns>
Private Function GetNextAttachmentIndex(pMessageId As String) As Integer
Try
Dim oSQL = $"SELECT count(*) FROM TBEMLP_HISTORY_ATTACHMENT WHERE EMAIL_MSGID = '{pMessageId}'"
Dim sqlResult = _mssql.GetScalarValue(oSQL)
If sqlResult = 0 Then
sqlResult = 1 ' Kleinster Index = 1
End If
Return sqlResult
Catch ex As Exception
_logger.Error(ex)
Throw ex
End Try
End Function
''' <summary>
''' Speichert die Infos zu einem embedded Dateianhang in die DB.
''' Mangels EMail-Daten werden die EMail-Felder (FROM, BODY, usw.) nicht gefüllt
''' </summary>
''' <returns>true, wenn erfolgreich</returns>
Private Function InsertAttachmentHistoryEntry(pMessageId As String, pFileName As String, pNewFileName As String) As Boolean
Try
Dim oCommand = New SqlCommand(
"INSERT INTO TBEMLP_HISTORY_ATTACHMENT (
WORK_PROCESS,
EMAIL_MSGID,
EMAIL_ATTMT,
EMAIL_ATTMT_INDEX,
EMAIL_FROM,
EMAIL_BODY
) VALUES (
@WORK_PROCESS,
@MESSAGE_ID,
@ATTACHMENT,
@ATTACHMENT_INDEX,
'-',
'-'
)")
oCommand.Parameters.Add("WORK_PROCESS", SqlDbType.VarChar, 100).Value = "Attachment Sniffer (Embedded Files)"
oCommand.Parameters.Add("MESSAGE_ID", SqlDbType.VarChar, 500).Value = pMessageId
oCommand.Parameters.Add("ATTACHMENT", SqlDbType.VarChar, 500).Value = pFileName
oCommand.Parameters.Add("ATTACHMENT_INDEX", SqlDbType.VarChar, 500).Value = pNewFileName
_mssql.ExecuteNonQuery(oCommand)
Return True
Catch ex As Exception
_logger.Error(ex)
Return False
End Try
End Function
Private Function BulkInsertDataToDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResults As CheckPropertyValuesResult) As Boolean Private Function BulkInsertDataToDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResults As CheckPropertyValuesResult) As Boolean
If DeleteExistingPropertyValues(pMessageId, pConnections) = False Then If DeleteExistingPropertyValues(pMessageId, pConnections) = False Then
Throw New Exception("Could not cleanup data. Exiting.") Throw New Exception("Could not cleanup data. Exiting.")