WIP: PDF Attachment Extractor

This commit is contained in:
Jonathan Jenne
2020-03-04 16:42:06 +01:00
parent b995304ee3
commit c3c6a50992
6 changed files with 126 additions and 12 deletions

View File

@@ -1,9 +1,84 @@
Public Class PDFAttachments
Public Sub New(GdPictureKey As String)
Imports System.Collections.Generic
Imports System.IO
Imports DigitalData.Modules.Logging
Imports GdPicture14
Public Class PDFAttachments
Private Logger As Logger
Private Const ZUGFERD_XML_FILENAME = "ZUGFeRD-invoice.xml"
Public Sub New(LogConfig As LogConfig, GdPictureKey As String)
Logger = LogConfig.GetLogger
End Sub
Public Shared Function Extract(FileName As String)
Using oGDPicturePDF As New GDPicturePDF
Public Function Extract(FileName As String, AllowedExtensions As List(Of String)) As List(Of FileInfo)
Dim oResults As New List(Of FileInfo)
Dim oExtensions = AllowedExtensions.ConvertAll(Of String)(New Converter(Of String, String)(Function(ext) ext.ToUpper))
Try
Using oGDPicturePDF As New GdPicturePDF()
If oGDPicturePDF.LoadFromFile(FileName, False) = GdPictureStatus.OK Then
Dim oEmbeddedFileCount As Integer = oGDPicturePDF.GetEmbeddedFileCount()
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
If oEmbeddedFileCount > 1 Then
For index = 0 To oEmbeddedFileCount - 1
Dim oFileName As String = oGDPicturePDF.GetEmbeddedFileName(index)
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
Dim oExtension = New FileInfo(oFileName).Extension.ToUpper.Substring(1)
If oFileName.ToUpper <> ZUGFERD_XML_FILENAME.ToUpper Then
If oExtensions.Contains(oExtension) Then
Dim FileSize As Integer = oGDPicturePDF.GetEmbeddedFileSize(index)
If oGDPicturePDF.GetStat() = GdPictureStatus.OK Then
Dim FileData As Byte() = New Byte(FileSize) {}
Dim status As GdPictureStatus = oGDPicturePDF.ExtractEmbeddedFile(index, FileData)
If status = GdPictureStatus.OK Then
Dim oTempName As String = Path.Combine(Path.GetTempPath(), oFileName)
Using oFileStream As New FileStream(oTempName, FileMode.OpenOrCreate)
oFileStream.Write(FileData, 0, FileData.Length)
End Using
oResults.Add(New FileInfo(oTempName))
Else
Logger.Error("The embedded file [{0}] has failed to extract. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Error("An error occurred getting the file size for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Else
Logger.Warn("File [{0}] was skipped because its extension [{1}] is not allowed.", oFileName, oExtension)
Continue For
End If
Else
Logger.Debug("File [{0}] was skipped because its name indicates the invoice data file.", oFileName)
Continue For
End If
Else
Logger.Error("An error occurred getting the file name for [{0}]. Status: {1}", oFileName, oGDPicturePDF.GetStat().ToString())
Continue For
End If
Next
End If
Else
Logger.Error("An error occurred getting the number of embedded files. Status: {0}", oGDPicturePDF.GetStat().ToString())
Return Nothing
End If
Else
Logger.Error("The file [{0}] can't be loaded.", FileName)
Return Nothing
End If
End Using
Return oResults
Catch ex As Exception
Logger.Warn("Unexpected Error while Extracting attachments from File [{0}]", FileName)
Logger.Error(ex)
Return Nothing
End Try
End Function
End Class