151 lines
6.7 KiB
VB.net

Imports System.Collections.Generic
Imports System.IO
Imports DigitalData.Modules.Logging
Imports GdPicture14
Public Class PDFEmbeds
Private ReadOnly Logger As Logger
Public Const ZUGFERD_XML_FILENAME = "ZUGFeRD-invoice.xml"
Public Const FACTUR_X_XML_FILENAME_FR = "factur-x.xml"
Public Const FACTUR_X_XML_FILENAME_DE = "xrechnung.xml"
Public Class EmbeddedFile
Public FileName As String
Public FileContents As Byte()
End Class
Public Sub New(LogConfig As LogConfig)
Logger = LogConfig.GetLogger
End Sub
''' <summary>
''' Extracts all embedded files from a PDF file.
''' Note: This does NOT filter out `ZUGFeRD-invoice.xml` anymore to allow for a more generic use.
''' </summary>
''' <param name="FilePath">Filepath of the pdf</param>
''' <param name="AllowedExtensions">List of allowed extensions to be extracted</param>
Public Function Extract(FilePath As String, AllowedExtensions As List(Of String)) As List(Of EmbeddedFile)
Dim oFile As New List(Of EmbeddedFile)
Dim oFileInfo As FileInfo
Dim oExtensions = AllowedExtensions.Select(Function(ext) ext.ToUpper).ToList()
Logger.Debug("Extracting embedded files from [{0}]", FilePath)
Try
oFileInfo = New FileInfo(FilePath)
Logger.Debug("Filename: {0}", oFileInfo.Name)
Logger.Debug("Filesize: {0} bytes", oFileInfo.Length)
Logger.Debug("Exists: {0}", oFileInfo.Exists)
Catch ex As Exception
Logger.Warn("File information for [{0}] could not be read!", FilePath)
Logger.Error(ex)
End Try
Try
Using oGDPicturePDF As New GdPicturePDF()
If oGDPicturePDF.LoadFromFile(FilePath, False) = GdPictureStatus.OK Then
oFile = DoExtract(oGDPicturePDF, oExtensions)
Else
Dim oMessage = String.Format("The file [{0}] can't be loaded. Status: [{1}]", FilePath, oGDPicturePDF.GetStat().ToString())
Throw New ApplicationException(oMessage)
End If
End Using
Return oFile
Catch ex As Exception
Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", FilePath)
Logger.Error(ex)
Return Nothing
End Try
End Function
''' <summary>
''' Extracts all embedded files from a PDF file.
''' Note: This does NOT filter out `ZUGFeRD-invoice.xml` anymore to allow for a more generic use.
''' </summary>
''' <param name="Stream">Filestream of the pdf</param>
''' <param name="AllowedExtensions">List of allowed extensions to be extracted</param>
Public Function Extract(Stream As Stream, AllowedExtensions As List(Of String)) As List(Of EmbeddedFile)
Dim oResults As New List(Of EmbeddedFile)
Dim oExtensions = AllowedExtensions.Select(Function(ext) ext.ToUpper).ToList()
Logger.Debug("Extracting embedded files from stream")
Try
Using oGDPicturePDF As New GdPicturePDF()
If oGDPicturePDF.LoadFromStream(Stream, False) = GdPictureStatus.OK Then
oResults = DoExtract(oGDPicturePDF, oExtensions)
Else
Dim oMessage = String.Format("The filestream can't be loaded. Status: [{0}]", oGDPicturePDF.GetStat().ToString())
Throw New ApplicationException(oMessage)
End If
End Using
Return oResults
Catch ex As Exception
Logger.Warn("Unexpected Error while Extracting attachments from Filestream")
Logger.Error(ex)
Return Nothing
End Try
End Function
Private Function DoExtract(GDPicturePDF As GdPicturePDF, pExtensions As List(Of String)) As List(Of EmbeddedFile)
Dim oResults As New List(Of EmbeddedFile)
Dim oEmbeddedFileCount As Integer = GDPicturePDF.GetEmbeddedFileCount()
If GDPicturePDF.GetStat() = GdPictureStatus.OK Then
Logger.Debug("Embedded file count is: [{0}]", oEmbeddedFileCount)
If oEmbeddedFileCount > 0 Then
For oIndex = 0 To oEmbeddedFileCount - 1
Dim oFileName As String = GDPicturePDF.GetEmbeddedFileName(oIndex)
If GDPicturePDF.GetStat() = GdPictureStatus.OK Then
Logger.Debug("Extracting embedded file [{0}]", oFileName)
Dim oExtension = New FileInfo(oFileName).Extension.ToUpper.Substring(1)
If pExtensions.Contains(oExtension) Then
Dim oFileSize As Integer = GDPicturePDF.GetEmbeddedFileSize(oIndex)
If GDPicturePDF.GetStat() = GdPictureStatus.OK Then
Logger.Debug("Filesize of embedded file is [{0}]", oFileSize)
Dim oFileData As Byte() = New Byte(oFileSize) {}
Dim oStatus As GdPictureStatus = GDPicturePDF.ExtractEmbeddedFile(oIndex, oFileData)
If oStatus = GdPictureStatus.OK Then
Logger.Debug("Embedded file [{0}] extracted sucessfully!", oFileName)
oResults.Add(New EmbeddedFile() With {
.FileContents = oFileData,
.FileName = oFileName
})
Else
Logger.Error("The embedded file [{0}] has failed to extract. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Error("An error occurred getting the file size for [{0}]. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Debug("File [{0}] was skipped because its extension [{1}] is not allowed.", oFileName, oExtension)
Continue For
End If
Else
Logger.Error("An error occurred getting the file name for [{0}]. Status: {1}", oFileName, GDPicturePDF.GetStat().ToString())
Continue For
End If
Next
End If
Return oResults
Else
Dim oMessage = String.Format("An error occurred getting the number of embedded files. Status: {0}", GDPicturePDF.GetStat().ToString())
Throw New ApplicationException(oMessage)
End If
End Function
End Class