Files
DXApp/DXApp.TemplateKitProject/Services/PdfAttachmentExtractorService.cs
2026-05-21 14:35:02 +02:00

142 lines
4.7 KiB
C#

using DevExpress.Pdf;
using DXApp.TemplateKitProject.Models;
namespace DXApp.TemplateKitProject.Services;
public class PdfAttachmentExtractorService(
IConfiguration configuration,
ILogger<PdfAttachmentExtractorService> logger)
{
private static readonly string[] ZugferdFileNames =
[
"zugferd-invoice.xml",
"factur-x.xml",
"xrechnung.xml",
"zugferd_2p0_en16931_muster.xml",
"cii-data.xml"
];
public PdfExtractionResult ExtractAttachments(Stream pdfStream, string sourceFileName)
{
var result = new PdfExtractionResult();
var outputDir = ResolveOutputDirectory(sourceFileName);
try
{
using var processor = new PdfDocumentProcessor();
processor.LoadDocument(pdfStream);
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property
var attachments = processor.Document.FileAttachments.ToList();
if (attachments.Count == 0)
{
logger.LogInformation("PDF '{FileName}': Keine Anhänge gefunden.", sourceFileName);
return result;
}
logger.LogInformation(
"PDF '{FileName}': {Count} Anhang/Anhänge gefunden.",
sourceFileName, attachments.Count);
Directory.CreateDirectory(outputDir);
foreach (var attachment in attachments)
{
var extracted = SaveAttachment(attachment, outputDir);
if (extracted is not null)
result.Attachments.Add(extracted);
}
}
catch (Exception ex)
{
logger.LogError(ex,
"Fehler beim Extrahieren der Anhänge aus '{FileName}'.", sourceFileName);
throw;
}
LogExtractionSummary(sourceFileName, result);
return result;
}
private ExtractedAttachment? SaveAttachment(PdfFileAttachment attachment, string outputDir)
{
try
{
var safeFileName = SanitizeFileName(attachment.FileName);
var targetPath = EnsureUniqueFilePath(Path.Combine(outputDir, safeFileName));
var data = attachment.Data;
File.WriteAllBytes(targetPath, data);
var isZugferd = IsZugferdXml(attachment.FileName);
logger.LogInformation(
" → Gespeichert: '{FileName}' ({Bytes} Bytes){Zugferd}",
safeFileName, data.Length,
isZugferd ? " [ZUGFeRD/Factur-X XML]" : string.Empty);
return new ExtractedAttachment
{
OriginalFileName = attachment.FileName,
SavedFilePath = targetPath,
FileSizeBytes = data.Length,
IsZugferdXml = isZugferd
};
}
catch (Exception ex)
{
logger.LogWarning(ex,
" → Anhang '{Name}' konnte nicht gespeichert werden.", attachment.FileName);
return null;
}
}
private string ResolveOutputDirectory(string sourceFileName)
{
var baseDir = configuration["PdfExtraction:OutputDirectory"]
?? Path.Combine(Path.GetTempPath(), "PdfExtractions");
var folderName = $"{Path.GetFileNameWithoutExtension(sourceFileName)}_{DateTime.UtcNow:yyyyMMdd_HHmmss}";
return Path.Combine(baseDir, folderName);
}
private static bool IsZugferdXml(string fileName)
{
var lower = fileName.ToLowerInvariant();
return ZugferdFileNames.Any(z => lower.EndsWith(z, StringComparison.OrdinalIgnoreCase));
}
private static string SanitizeFileName(string fileName)
{
var invalid = Path.GetInvalidFileNameChars();
var safe = string.Concat(fileName.Select(c => invalid.Contains(c) ? '_' : c));
return string.IsNullOrWhiteSpace(safe) ? "attachment" : safe;
}
private static string EnsureUniqueFilePath(string filePath)
{
if (!File.Exists(filePath)) return filePath;
var dir = Path.GetDirectoryName(filePath)!;
var name = Path.GetFileNameWithoutExtension(filePath);
var ext = Path.GetExtension(filePath);
var i = 1;
string candidate;
do { candidate = Path.Combine(dir, $"{name}_{i++}{ext}"); }
while (File.Exists(candidate));
return candidate;
}
private void LogExtractionSummary(string sourceFileName, PdfExtractionResult result)
{
logger.LogInformation(
"PDF '{FileName}': {Total} Anhang/Anhänge extrahiert. ZUGFeRD-XML: {HasXml}",
sourceFileName,
result.Attachments.Count,
result.HasZugferdXml
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
: "Nein");
}
}