using DevExpress.Pdf; using DXApp.TemplateKitProject.Models; namespace DXApp.TemplateKitProject.Services; public class PdfAttachmentExtractorService( IConfiguration configuration, ILogger logger) { private static readonly string[] ZugferdFileNames = [ "zugferd-invoice.xml", "factur-x.xml", "xrechnung.xml", "zugferd_2p0_en16931_muster.xml", "cii-data.xml" ]; public PdfExtractionResult ExtractAttachments(Stream pdfStream, string sourceFileName) { var result = new PdfExtractionResult(); var outputDir = ResolveOutputDirectory(sourceFileName); try { using var processor = new PdfDocumentProcessor(); processor.LoadDocument(pdfStream); // PDF/A-Konformität prüfen var compatibility = processor.Document.PdfACompatibility; result.PdfALevel = compatibility switch { PdfACompatibility.None => "Kein PDF/A", PdfACompatibility.PdfA1b => "PDF/A-1b", PdfACompatibility.PdfA2b => "PDF/A-2b", PdfACompatibility.PdfA3b => "PDF/A-3b", _ => compatibility.ToString() }; result.PdfAWarning = compatibility == PdfACompatibility.None; logger.LogDebug( "PDF '{FileName}': Konformität = {Level}", sourceFileName, result.PdfALevel); // ZUGFeRD Guideline-ID aus XMP-Metadaten lesen try { var xmpData = processor.Document.Metadata?.Data; if (!string.IsNullOrWhiteSpace(xmpData)) { result.ZugferdGuidelineId = ExtractGuidelineId(xmpData); if (!string.IsNullOrEmpty(result.ZugferdGuidelineId)) logger.LogDebug( "PDF '{FileName}': Guideline-ID = {GuidelineId}", sourceFileName, result.ZugferdGuidelineId); } } catch (Exception ex) { logger.LogWarning(ex, "XMP-Metadaten konnten nicht gelesen werden."); } // Fix: .ToList() → IEnumerable → List mit Count-Property var attachments = processor.Document.FileAttachments.ToList(); if (attachments.Count == 0) { logger.LogInformation("PDF '{FileName}': Keine Anhänge gefunden.", sourceFileName); return result; } logger.LogInformation( "PDF '{FileName}': {Count} Anhang/Anhänge gefunden.", sourceFileName, attachments.Count); Directory.CreateDirectory(outputDir); foreach (var attachment in attachments) { var extracted = SaveAttachment(attachment, outputDir); if (extracted is not null) result.Attachments.Add(extracted); } } catch (Exception ex) { logger.LogError(ex, "Fehler beim Extrahieren der Anhänge aus '{FileName}'.", sourceFileName); throw; } LogExtractionSummary(sourceFileName, result); return result; } private ExtractedAttachment? SaveAttachment(PdfFileAttachment attachment, string outputDir) { try { var safeFileName = SanitizeFileName(attachment.FileName); var targetPath = EnsureUniqueFilePath(Path.Combine(outputDir, safeFileName)); var data = attachment.Data; File.WriteAllBytes(targetPath, data); var isZugferd = IsZugferdXml(attachment.FileName); logger.LogDebug( " → Gespeichert: '{FileName}' ({Bytes} Bytes){Zugferd}", safeFileName, data.Length, isZugferd ? " [ZUGFeRD/Factur-X XML]" : string.Empty); return new ExtractedAttachment { OriginalFileName = attachment.FileName, SavedFilePath = targetPath, FileSizeBytes = data.Length, IsZugferdXml = isZugferd }; } catch (Exception ex) { logger.LogWarning(ex, " → Anhang '{Name}' konnte nicht gespeichert werden.", attachment.FileName); return null; } } private string ResolveOutputDirectory(string sourceFileName) { var baseDir = configuration["PdfExtraction:OutputDirectory"] ?? Path.Combine(Path.GetTempPath(), "PdfExtractions"); var folderName = $"{Path.GetFileNameWithoutExtension(sourceFileName)}_{DateTime.UtcNow:yyyyMMdd_HHmmss}"; return Path.Combine(baseDir, folderName); } private static bool IsZugferdXml(string fileName) { var lower = fileName.ToLowerInvariant(); return ZugferdFileNames.Any(z => lower.EndsWith(z, StringComparison.OrdinalIgnoreCase)); } private static string SanitizeFileName(string fileName) { var invalid = Path.GetInvalidFileNameChars(); var safe = string.Concat(fileName.Select(c => invalid.Contains(c) ? '_' : c)); return string.IsNullOrWhiteSpace(safe) ? "attachment" : safe; } private static string EnsureUniqueFilePath(string filePath) { if (!File.Exists(filePath)) return filePath; var dir = Path.GetDirectoryName(filePath)!; var name = Path.GetFileNameWithoutExtension(filePath); var ext = Path.GetExtension(filePath); var i = 1; string candidate; do { candidate = Path.Combine(dir, $"{name}_{i++}{ext}"); } while (File.Exists(candidate)); return candidate; } private void LogExtractionSummary(string sourceFileName, PdfExtractionResult result) { logger.LogInformation( "PDF '{FileName}': {Total} Anhang/Anhänge extrahiert. ZUGFeRD-XML: {HasXml}", sourceFileName, result.Attachments.Count, result.HasZugferdXml ? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}" : "Nein"); } private static string ExtractGuidelineId(string xmpData) { // Bekannte ZUGFeRD/Factur-X Guideline-Präfixe string[] knownPrefixes = [ "urn:ferd:", "urn:cen.eu:", "urn:factur-x.", "urn:zugferd:", "urn:xoev-de:" ]; // XMP ist XML → wir suchen nach dem Wert zwischen Tags // Typisch: urn:ferd:invoice:rc:comfort // oder: EN 16931 foreach (var prefix in knownPrefixes) { var idx = xmpData.IndexOf(prefix, StringComparison.OrdinalIgnoreCase); if (idx < 0) continue; // Ende des Wertes finden (nächstes < Zeichen) var end = xmpData.IndexOf('<', idx); if (end < 0) end = xmpData.Length; return xmpData[idx..end].Trim(); } return string.Empty; } }