WIP: PDF Attachment Extractor

This commit is contained in:
Jonathan Jenne
2020-03-04 16:42:06 +01:00
parent b995304ee3
commit c3c6a50992
6 changed files with 126 additions and 12 deletions

View File

@@ -42,6 +42,9 @@ Public Class ImportZUGFeRDFiles
</ul></p>
"""
' List of allowed extensions for PDF/A Attachments
Private AllowedExtensions = New List(Of String) From {"docx", "doc", "pdf", "xls", "xlsx", "ppt", "pptx", "txt"}
Private _logger As Logger
Private _logConfig As LogConfig
Private _zugferd As ZUGFeRDInterface
@@ -318,6 +321,7 @@ Public Class ImportZUGFeRDFiles
Public Sub Start(Arguments As Object) Implements IJob.Start
Dim oArgs As WorkerArgs = Arguments
Dim oPropertyExtractor = New PropertyValues(_logConfig)
Dim oAttachmentExtractor = New PDFAttachments(_logConfig, oArgs.GDPictureKey)
_logger.Debug("Starting Job {0}", [GetType].Name)
@@ -404,10 +408,16 @@ Public Class ImportZUGFeRDFiles
_logger.Warn("Unexpected Error occurred while extracting ZUGFeRD Information from file {0}", oFile.FullName)
Throw ex
End Select
End Try
Dim oAttachments = oAttachmentExtractor.Extract(oFile.FullName, AllowedExtensions)
If oAttachments Is Nothing Then
_logger.Warn("Attachments for file [{0}] could not be extracted", oFile.FullName)
Else
oFileAttachmentFiles.AddRange(oFileGroupFiles)
oFileAttachmentFiles.AddRange(oAttachments)
End If
oMD5CheckSum = CreateMD5(oFile.FullName)
If oMD5CheckSum <> String.Empty Then
Dim oCheckCommand = $"SELECT * FROM TBEDM_ZUGFERD_HISTORY_IN WHERE GUID = (SELECT MAX(GUID) FROM TBEDM_ZUGFERD_HISTORY_IN WHERE UPPER(MD5HASH) = UPPER('{oMD5CheckSum}'))"