Imports System.Collections.Generic Imports System.IO Imports System.Reflection Imports System.Runtime.Remoting.Messaging Imports DigitalData.Modules.Logging Imports GdPicture14 Public Class PDFEmbeds Private ReadOnly Logger As Logger Public Const ZUGFERD_XML_FILENAME = "ZUGFeRD-invoice.xml" Public Const FACTUR_X_XML_FILENAME_FR = "factur-x.xml" Public Const FACTUR_X_XML_FILENAME_DE = "xrechnung.xml" Public Class EmbeddedFile Public FileName As String Public FileContents As Byte() End Class Public Sub New(LogConfig As LogConfig) Logger = LogConfig.GetLogger End Sub ''' ''' Extracts all embedded files from a PDF file. ''' Note: This does NOT filter out `ZUGFeRD-invoice.xml` anymore to allow for a more generic use. ''' ''' Filepath of the pdf ''' List of allowed extensions to be extracted Public Function Extract(FilePath As String, AllowedExtensions As List(Of String)) As List(Of EmbeddedFile) Dim oFile As New List(Of EmbeddedFile) Dim oFileInfo As FileInfo Dim oExtensions = AllowedExtensions.Select(Function(ext) ext.ToUpper).ToList() Logger.Debug("Extracting embedded files from [{0}]", FilePath) Try oFileInfo = New FileInfo(FilePath) Logger.Debug("Filename: {0}", oFileInfo.Name) Logger.Debug("Filesize: {0} bytes", oFileInfo.Length) Logger.Debug("Exists: {0}", oFileInfo.Exists) Catch ex As Exception Logger.Warn("File information for [{0}] could not be read!", FilePath) Logger.Error(ex) End Try Try Using oGDPicturePDF As New GdPicturePDF() If oGDPicturePDF.LoadFromFile(FilePath, False) = GdPictureStatus.OK Then oFile = DoExtract(oGDPicturePDF, oExtensions) Else Dim oMessage = String.Format("The file [{0}] can't be loaded. Status: [{1}]", FilePath, oGDPicturePDF.GetStat().ToString()) Throw New ApplicationException(oMessage) End If End Using Return oFile Catch ex As Exception Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", FilePath) Logger.Error(ex) Return Nothing End Try End Function ''' ''' Extracts all embedded files from a PDF file. ''' Note: This does NOT filter out `ZUGFeRD-invoice.xml` anymore to allow for a more generic use. ''' ''' Filestream of the pdf ''' List of allowed extensions to be extracted Public Function Extract(Stream As Stream, AllowedExtensions As List(Of String)) As List(Of EmbeddedFile) Dim oResults As New List(Of EmbeddedFile) Dim oExtensions = AllowedExtensions.Select(Function(ext) ext.ToUpper).ToList() Logger.Debug("Extracting embedded files from stream") Try Using oGDPicturePDF As New GdPicturePDF() If oGDPicturePDF.LoadFromStream(Stream, False) = GdPictureStatus.OK Then oResults = DoExtract(oGDPicturePDF, oExtensions) Else Dim oMessage = String.Format("The filestream can't be loaded. Status: [{0}]", oGDPicturePDF.GetStat().ToString()) Throw New ApplicationException(oMessage) End If End Using Return oResults Catch ex As Exception Logger.Warn("Unexpected Error while Extracting attachments from Filestream") Logger.Error(ex) Return Nothing End Try End Function Public Function RemoveEmbeddedFiles(pFilePath As String) As Boolean Dim oFile As New List(Of EmbeddedFile) Dim oFileInfo As FileInfo Logger.Debug("Removing embedded files from [{0}]", pFilePath) Try oFileInfo = New FileInfo(pFilePath) Logger.Debug("Filename: {0}", oFileInfo.Name) Logger.Debug("Filesize: {0} bytes", oFileInfo.Length) Logger.Debug("Exists: {0}", oFileInfo.Exists) Catch ex As Exception Logger.Warn("File information for [{0}] could not be read!", pFilePath) Logger.Error(ex) End Try Try Using oGDPicturePDF As New GdPicturePDF() If oGDPicturePDF.LoadFromFile(pFilePath, False) <> GdPictureStatus.OK Then Dim oMessage = String.Format("The file [{0}] can't be loaded. Status: [{1}]", pFilePath, oGDPicturePDF.GetStat().ToString()) Throw New ApplicationException(oMessage) End If If DoRemove(oGDPicturePDF) = False Then Dim oMessage = String.Format("Attachments for file [{0}] can't be removed. Status: [{1}]", pFilePath, oGDPicturePDF.GetStat().ToString()) Throw New ApplicationException(oMessage) End If End Using Return True Catch ex As Exception Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", pFilePath) Logger.Error(ex) Return False End Try End Function Private Function DoRemove(GDPicturePDF As GdPicturePDF) As Boolean Dim oStatus As GdPictureStatus Dim oEmbeddedFileCount As Integer = GDPicturePDF.GetEmbeddedFileCount() If oStatus <> GdPictureStatus.OK Then Logger.Warn("Embedded files could not be removed. Status: [{0}]", oStatus.ToString) Return False End If If oEmbeddedFileCount = 0 Then Return True End If While GDPicturePDF.GetEmbeddedFileCount() > 0 GDPicturePDF.DeleteEmbeddedFile(0) End While End Function Private Function DoExtract(GDPicturePDF As GdPicturePDF, pExtensions As List(Of String)) As List(Of EmbeddedFile) Dim oResults As New List(Of EmbeddedFile) Dim oEmbeddedFileCount As Integer = GDPicturePDF.GetEmbeddedFileCount() If GDPicturePDF.GetStat() = GdPictureStatus.OK Then Logger.Debug("Embedded file count is: [{0}]", oEmbeddedFileCount) If oEmbeddedFileCount > 0 Then For oIndex = 0 To oEmbeddedFileCount - 1 Dim oFileName As String = GDPicturePDF.GetEmbeddedFileName(oIndex) If GDPicturePDF.GetStat() = GdPictureStatus.OK Then Logger.Debug("Extracting embedded file [{0}]", oFileName) Dim oExtension = New FileInfo(oFileName).Extension.ToUpper.Substring(1) If pExtensions.Contains(oExtension) Then Dim oFileSize As Integer = GDPicturePDF.GetEmbeddedFileSize(oIndex) If GDPicturePDF.GetStat() = GdPictureStatus.OK Then Logger.Debug("Filesize of embedded file is [{0}]", oFileSize) Dim oFileData As Byte() = New Byte(oFileSize) {} Dim oStatus As GdPictureStatus = GDPicturePDF.ExtractEmbeddedFile(oIndex, oFileData) If oStatus = GdPictureStatus.OK Then Logger.Debug("Embedded file [{0}] extracted sucessfully!", oFileName) oResults.Add(New EmbeddedFile() With { .FileContents = oFileData, .FileName = oFileName }) Else Logger.Error("The embedded file [{0}] has failed to extract. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString()) Continue For End If Else Logger.Error("An error occurred getting the file size for [{0}]. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString()) Continue For End If Else Logger.Debug("File [{0}] was skipped because its extension [{1}] is not allowed.", oFileName, oExtension) Continue For End If Else Logger.Error("An error occurred getting the file name for [{0}]. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString()) Continue For End If Next End If Return oResults Else Dim oMessage = String.Format("An error occurred getting the number of embedded files. Status: {0}", GDPicturePDF.GetStat().ToString()) Throw New ApplicationException(oMessage) End If End Function End Class