Auslesen von embedded Files aus einer XML-Datei

This commit is contained in:
PitzM 2025-06-02 11:47:15 +02:00
parent 0261d237b6
commit 7e70c059b6
3 changed files with 297 additions and 31 deletions

View File

@ -54,7 +54,7 @@ Public Class PropertyValues
ToDictionary(Function(Item) Item.Key,
Function(Item) Item.Value)
_logger.Debug("Found {0} default properties.", oDefaultProperties.Count)
_logger.Debug("Found {0} ungrouped properties.", oDefaultProperties.Count)
' PropertyMap items with `IsGrouped = True` are grouped by group scope
Dim oGroupedProperties = PropertyMap.
@ -118,7 +118,7 @@ Public Class PropertyValues
' Returns nothing if oColumn.Value contains an empty list
Dim oPropertyValue = oColumn.Value.ElementAtOrDefault(oRowIndex)
_logger.Debug("Processing itemSpecification *TableColumn* [{0}].", oTableColumn)
_logger.Debug("Processing itemColumn *TableColumn* [{0}].", oTableColumn)
If oTableColumn = "INVOICE_SELLER_EMAIL" Then
Console.WriteLine("INVOICE_SELLER_EMAIL")
ElseIf oTableColumn = "INVOICE_POSITION_ARTICLE" Then
@ -126,20 +126,28 @@ Public Class PropertyValues
End If
If IsNothing(oPropertyValue) OrElse String.IsNullOrEmpty(oPropertyValue) Then
If oColumn.Key.IsRequired Then
_logger.Warn($"{MessageId} # oPropertyValue for specification [{oTableColumn}] is empty or not found but is required. Continuing with Empty String.")
_logger.Warn($"{MessageId} # oPropertyValue for column [{oTableColumn}] is empty or not found but is required. Continuing with Empty String.")
Dim oMissingProperty = New MissingProperty() With {
.Description = oPropertyDescription,
.XMLPath = oPropertyPath
}
oResult.MissingProperties.Add(oMissingProperty)
Else
_logger.Debug($"{MessageId} # oPropertyValue for specification [{oTableColumn}] is empty or not found. Continuing with Empty String.")
_logger.Debug($"{MessageId} # oPropertyValue for column [{oTableColumn}] is empty or not found. Continuing with Empty String.")
End If
oPropertyValue = String.Empty
End If
_logger.Debug("ItemSpecification [{0}] has value '{1}'", oTableColumn, oPropertyValue)
If (oPropertyValue IsNot Nothing) Then
Dim logValue As String = oPropertyValue.ToString()
If logValue.Length > 50 Then
_logger.Debug("Item [{0}] has value '{1}...'", oTableColumn, logValue.Substring(1, 50))
Else
_logger.Debug("Item [{0}] has value '{1}'", oTableColumn, oPropertyValue)
End If
End If
oResult.ValidProperties.Add(New ValidProperty() With {
.MessageId = MessageId,
@ -290,7 +298,7 @@ Public Class PropertyValues
Obj = Obj(0)
End If
If IsArray(Obj) And Not oHasIndex Then
If IsArray(Obj) And Not oHasIndex And oPart <> "Value" Then
Dim oCurrentPart As String = oPart
Dim oSplitString As String() = New String() {oCurrentPart & "."}
Dim oPathFragments = PropertyName.Split(oSplitString, StringSplitOptions.None)
@ -339,8 +347,20 @@ Public Class PropertyValues
Select Case oCount
Case 0
Return Nothing
Case 1
Dim firstElement As Object
firstElement = oList.FirstOrDefault()
If firstElement IsNot Nothing AndAlso IsArray(firstElement) Then
' Attachments sind Byte-Arrays und müssen umgewandelt werden
Return Convert.ToBase64String(firstElement)
Else
Return DoGetFinalPropValue(oList.First())
End If
Case Else
Return DoGetFinalPropValue(oList.First())
End Select
Return DoGetFinalPropValue(Value)

View File

@ -33,11 +33,6 @@ Public Class HashFunctions
End If
' Check if Checksum exists in History Table
'Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"
'Dim oTable As DataTable = _firebird.GetDatatable(oCheckCommand, Firebird.TransactionMode.NoTransaction)
' Check if Checksum exists in History Table
' TODO: WHAT THE FUCK IS THIS
Dim oCheckCommand = $"SELECT * FROM TBEMLP_HISTORY WHERE GUID = (SELECT MAX(GUID) FROM TBEMLP_HISTORY WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"
Dim oTable As DataTable = Database.GetDatatable(oCheckCommand, MSSQLServer.TransactionMode.NoTransaction)

View File

@ -11,6 +11,7 @@ Imports DigitalData.Modules.Interfaces.Exceptions
Imports DigitalData.Modules.Interfaces.PropertyValues
Imports DigitalData.Modules.Jobs.Exceptions
Imports DigitalData.Modules.Logging
Imports GdPicture14
Public Class ImportZUGFeRDFiles
Implements IJob
@ -31,6 +32,11 @@ Public Class ImportZUGFeRDFiles
' This list should not contain xml so the zugferd xml file will be filtered out
Private ReadOnly AllowedExtensions As New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"}
' List of the Columns we need to store embedded files on disk and database
Private ReadOnly EmbeddedFilesColumnNames As List(Of String) = New List(Of String) From {
"ATTACHMENT_FILE_FILENAME", "ATTACHMENT_FILE_VALUE", "ATTACHMENT_FILE_MIMECODE"
}
Private ReadOnly _logger As Logger
Private ReadOnly _logConfig As LogConfig
Private ReadOnly _filesystem As FilesystemEx
@ -563,7 +569,7 @@ Public Class ImportZUGFeRDFiles
Try
oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_XML
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs)
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs, oResult)
Catch ex As Exception
Throw ex
End Try
@ -653,7 +659,7 @@ Public Class ImportZUGFeRDFiles
Try
oDocument.ReceiptFileType = ZUGFeRDInterface.RECEIPT_TYPE_PDF
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs)
Dim sqlResult As Boolean = StoreXMLItemsInDatabase(pMessageId, oDocument, pFile, pConnections, pArgs, oResult)
Catch ex As Exception
Throw ex
End Try
@ -670,7 +676,7 @@ Public Class ImportZUGFeRDFiles
End Function
Private Function StoreXMLItemsInDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pFile As FileInfo, pConnections As DatabaseConnections, pArgs As WorkerArgs) As Boolean
Private Function StoreXMLItemsInDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pFile As FileInfo, pConnections As DatabaseConnections, pArgs As WorkerArgs, pProcessFileResult As ProcessFileResult) As Boolean
' Check the document against the configured property map and return:
' - a List of valid properties
' - a List of missing properties
@ -693,41 +699,286 @@ Public Class ImportZUGFeRDFiles
Throw New Exception("Bulk Insert failed! Exiting.")
End If
' TODO hier BAUSTELLE
' Eingebettete Dateien speichern
'If CreateEmbeddedFilesOnDisk(pMessageId, pDocument, pConnections, oCheckResult) = False Then
' _logger.Debug("Files saving for MessageId [{0}] failed!", pMessageId)
'End If
If HandleEmbeddedAttachments(pMessageId, pDocument, pConnections, oCheckResult, pArgs, pProcessFileResult) = False Then
_logger.Debug("Files saving for MessageId [{0}] failed!", pMessageId)
End If
Return True
End Function
Private Function CreateEmbeddedFilesOnDisk(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResult As CheckPropertyValuesResult) As Boolean
''' <summary>
''' Hier werden die Dateianhänge behandelt, die im XML als base64 gespeichert wurden
''' Die Knotendefinition muss ITEM_TYPE = 3 für den Dateiinhalt haben!
''' Die zusammengehörigen Knoten müssen über "FILES" gruppiert werden!
''' </summary>
Private Function HandleEmbeddedAttachments(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResult As CheckPropertyValuesResult, pArgs As WorkerArgs, pProcessFileResult As ProcessFileResult) As Boolean
' Finde alle Eintraege in pCheckResult mit Item_Type=3
' Finde Dateinamen (Index nach ~attm) und Dateityp. Wir speichern nur PDF.
' TODO Funktion aufrufen
'SaveBase64ToDisk("", "") = False Then
' TODO: klären!!! - Fehlerhandling? Ab wann liegt ein Fehler und damit eine Ablehnung vor?
If (pCheckResult Is Nothing) Then
_logger.Debug("pCheckResult is empty!")
Return True
End If
If (CheckEmbeddedAttachmentEntries(pCheckResult) = False) Then
_logger.Debug("No embedded Files in XML found!")
Return True
End If
Dim embAttachmentList As List(Of ValidProperty) = pCheckResult.ValidProperties.Where(
Function(z)
Return EmbeddedFilesColumnNames.Contains(z.TableColumn)
End Function
).ToList()
If embAttachmentList Is Nothing OrElse embAttachmentList.Count <= 0 Then
_logger.Debug("No Fields for Embedded Files configured!")
Return True
End If
Dim oIndexList As HashSet(Of Integer) = New HashSet(Of Integer)
For Each resultItem In embAttachmentList
oIndexList.Add(resultItem.GroupCounter)
Next
Dim oOutputPath As String = GetOutputPathForEmbeddedAttachments(pArgs)
Dim nextAttachmentIndex As Integer = 0
nextAttachmentIndex = GetNextAttachmentIndex(pMessageId)
If nextAttachmentIndex <= 0 Then
nextAttachmentIndex = 1
End If
For Each groupIndex In oIndexList
Dim oMimeCodeString As String = String.Empty
Dim oOrgFilename As String = String.Empty
Dim oBase64String As String = String.Empty
Dim oMimeTypeProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_MIMECODE")
If oMimeTypeProperty IsNot Nothing AndAlso oMimeTypeProperty.Value IsNot Nothing Then
oMimeCodeString = oMimeTypeProperty.Value
Else
_logger.Info("Empty MIME-Code! File can not be stored!")
Continue For
End If
If Not oMimeCodeString.Equals("application/pdf", StringComparison.InvariantCultureIgnoreCase) = True Then
_logger.Info("Not allowed MIME-Code! File will not be stored!")
Continue For
End If
Dim oFilenameProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_FILENAME")
If oFilenameProperty IsNot Nothing Then
oOrgFilename = oFilenameProperty.Value
End If
Dim oBase64ValueProperty As ValidProperty = GetIndexProperty(embAttachmentList, groupIndex, "ATTACHMENT_FILE_VALUE")
If oBase64ValueProperty IsNot Nothing Then
oBase64String = oBase64ValueProperty.Value
Else
_logger.Warn("Empty base64 String")
Continue For
End If
Dim newAttachmentFilename = pMessageId + "~attm" + nextAttachmentIndex.ToString + ".pdf"
Dim embeddedFilePath = Path.Combine(oOutputPath, newAttachmentFilename)
If SaveBase64ToDisk(embeddedFilePath, oBase64String) = True Then
_logger.Debug("Saved file [{0}] to disk", embeddedFilePath)
pProcessFileResult.EmailAttachmentFiles.Add(New FileInfo(embeddedFilePath))
Else
_logger.Error("Could not save File to Disk!")
Return False
End If
If InsertAttachmentHistoryEntry(pMessageId, oOrgFilename, embeddedFilePath) = False Then
_logger.Error("Could not save attachment Data to DB!")
Return False
End If
If InsertEmbeddedFileData(pMessageId, oBase64String, oOrgFilename, oMimeCodeString, groupIndex) = False Then
_logger.Error("Could not save attachment Data to DB!")
Return False
End If
nextAttachmentIndex += 1
Next
Return True
End Function
Private Shared Function GetIndexProperty(pListResult As List(Of ValidProperty), pGroupIndex As Integer, pTableColumn As String) As ValidProperty
Return pListResult.Where(
Function(z)
Return z.GroupCounter = pGroupIndex AndAlso z.TableColumn = pTableColumn
End Function
).FirstOrDefault
End Function
''' <summary>
''' Speichert die Daten inkl. base64-String in die Datenbank
''' </summary>
Private Function InsertEmbeddedFileData(pMessageId As String, pItemValue As String, pOrgFilename As String, pMimeType As String, pGroupIndex As Integer) As Boolean
Try
Dim oCommand = New SqlCommand(
"INSERT INTO TBEDMI_ITEM_FILES (
REFERENCE_GUID,
ITEM_VALUE,
ORG_FILENAME,
MIME_TYPE,
GROUP_INDEX,
CREATED_WHO
) VALUES (
@MESSAGE_ID,
@ITEM_VALUE,
@ORG_FILENAME,
@MIME_TYPE,
@GROUP_INDEX,
@CREATED_WHO
)")
oCommand.Parameters.Add("MESSAGE_ID", SqlDbType.VarChar, 250).Value = pMessageId
oCommand.Parameters.Add("ITEM_VALUE", SqlDbType.VarChar).Value = pItemValue
oCommand.Parameters.Add("ORG_FILENAME", SqlDbType.VarChar, 256).Value = pOrgFilename
oCommand.Parameters.Add("MIME_TYPE", SqlDbType.VarChar, 256).Value = pMimeType
oCommand.Parameters.Add("GROUP_INDEX", SqlDbType.Int).Value = pGroupIndex
oCommand.Parameters.Add("CREATED_WHO", SqlDbType.VarChar, 100).Value = "eInvoice Parser"
_mssql.ExecuteNonQuery(oCommand)
Return True
Catch ex As Exception
_logger.Error(ex)
Return False
End Try
End Function
''' <summary>
''' Ermittelt den Ausgabepfad für die eingebetteten Anhänge
''' </summary>
Private Function GetOutputPathForEmbeddedAttachments(pArgs As WorkerArgs) As String
Return pArgs.WatchDirectory
End Function
''' <summary>
''' Prüft, ob Embedded Attachments in den XML-Ergebnissen enthalten sind
''' </summary>
''' <param name="pCheckResult"></param>
''' <returns></returns>
Private Function CheckEmbeddedAttachmentEntries(pCheckResult As CheckPropertyValuesResult) As Boolean
Try
Dim resultList = pCheckResult.ValidProperties.Where(
Function(z)
Return z.TableColumn = "ATTACHMENT_FILE_VALUE"
End Function
).ToList()
If resultList.Count > 0 Then
_logger.Info("Found [{0}] embedded XML-Attachments.", resultList.Count)
Return True
Else
_logger.Info("No embedded XML-Attachments found.")
Return False
End If
Catch ex As Exception
_logger.Error("Error searching pCheckResult! {0}", ex.Message)
Return False
End Try
End Function
''' <summary>
''' Speichere base64 als Datei auf der Platte ab
''' </summary>
Private Function SaveBase64ToDisk(pExportFilePath As String, pBase64String As String) As Boolean
Try
Dim base64BinaryDataString As String = pBase64String ' Hier Base64-String einfügen
Dim binaryDataString As Byte() = System.Convert.FromBase64String(base64BinaryDataString)
Dim binaryDataString As Byte() = Convert.FromBase64String(base64BinaryDataString)
Dim oFilename As String = pExportFilePath
Dim Stream As System.IO.FileStream = New System.IO.FileStream(oFilename, System.IO.FileMode.Create)
' Using verwenden, um blockieren des PDF zu verhindern
Using Stream As FileStream = New FileStream(oFilename, FileMode.Create)
Stream.Write(binaryDataString, 0, binaryDataString.Length)
Stream.Close()
Catch ex As Exception
_logger.Error("Could NOT save File to Disk for MessageId [{0}] !", pExportFilePath)
End Using
Dim oGdPicturePDF As New GdPicturePDF
Dim oStatus As GdPictureStatus = oGdPicturePDF.LoadFromFile(pExportFilePath, True)
If oStatus <> GdPictureStatus.OK Then
_logger.Error("File [{0}] has no proper state!", pExportFilePath)
Return False
End If
Catch ex As Exception
_logger.Error("Could NOT save File [{0}] to Disk! Exception: [{1}]", pExportFilePath, ex.Message)
Return False
End Try
Return True
End Function
''' <summary>
''' Die Methode lädt die bisherigen Dateinamen zu einer MessageID
''' Die Datei mit dem höchsten Index gibt den folgenden Index vor.
''' </summary>
''' <returns>Nächster Attachment Index</returns>
Private Function GetNextAttachmentIndex(pMessageId As String) As Integer
Try
Dim oSQL = $"SELECT count(*) FROM TBEMLP_HISTORY_ATTACHMENT WHERE EMAIL_MSGID = '{pMessageId}'"
Dim sqlResult = _mssql.GetScalarValue(oSQL)
If sqlResult = 0 Then
sqlResult = 1 ' Kleinster Index = 1
End If
Return sqlResult
Catch ex As Exception
_logger.Error(ex)
Throw ex
End Try
End Function
''' <summary>
''' Speichert die Infos zu einem embedded Dateianhang in die DB.
''' Mangels EMail-Daten werden die EMail-Felder (FROM, BODY, usw.) nicht gefüllt
''' </summary>
''' <returns>true, wenn erfolgreich</returns>
Private Function InsertAttachmentHistoryEntry(pMessageId As String, pFileName As String, pNewFileName As String) As Boolean
Try
Dim oCommand = New SqlCommand(
"INSERT INTO TBEMLP_HISTORY_ATTACHMENT (
WORK_PROCESS,
EMAIL_MSGID,
EMAIL_ATTMT,
EMAIL_ATTMT_INDEX,
EMAIL_FROM,
EMAIL_BODY
) VALUES (
@WORK_PROCESS,
@MESSAGE_ID,
@ATTACHMENT,
@ATTACHMENT_INDEX,
'-',
'-'
)")
oCommand.Parameters.Add("WORK_PROCESS", SqlDbType.VarChar, 100).Value = "Attachment Sniffer (Embedded Files)"
oCommand.Parameters.Add("MESSAGE_ID", SqlDbType.VarChar, 500).Value = pMessageId
oCommand.Parameters.Add("ATTACHMENT", SqlDbType.VarChar, 500).Value = pFileName
oCommand.Parameters.Add("ATTACHMENT_INDEX", SqlDbType.VarChar, 500).Value = pNewFileName
_mssql.ExecuteNonQuery(oCommand)
Return True
Catch ex As Exception
_logger.Error(ex)
Return False
End Try
End Function
Private Function BulkInsertDataToDatabase(pMessageId As String, pDocument As ZUGFeRDInterface.ZugferdResult, pConnections As DatabaseConnections, pCheckResults As CheckPropertyValuesResult) As Boolean
If DeleteExistingPropertyValues(pMessageId, pConnections) = False Then
Throw New Exception("Could not cleanup data. Exiting.")