Files
DXApp/DXApp.TemplateKitProject/Services/PdfAttachmentExtractorService.cs
OlgunR 98226f239b Adjust logging levels to reduce verbosity
Changed logging levels from LogInformation to LogDebug in
PdfAttachmentExtractorService and PdfResultPackageService.
This includes logs for PDF conformity levels, ZUGFeRD
Guideline-IDs, saved attachment details, and result report
discovery. These changes aim to reduce log verbosity in
production environments by moving less critical information
to the debug level.
2026-05-29 09:52:05 +02:00

206 lines
7.0 KiB
C#

using DevExpress.Pdf;
using DXApp.TemplateKitProject.Models;
namespace DXApp.TemplateKitProject.Services;
public class PdfAttachmentExtractorService(
IConfiguration configuration,
ILogger<PdfAttachmentExtractorService> logger)
{
private static readonly string[] ZugferdFileNames =
[
"zugferd-invoice.xml",
"factur-x.xml",
"xrechnung.xml",
"zugferd_2p0_en16931_muster.xml",
"cii-data.xml"
];
public PdfExtractionResult ExtractAttachments(Stream pdfStream, string sourceFileName)
{
var result = new PdfExtractionResult();
var outputDir = ResolveOutputDirectory(sourceFileName);
try
{
using var processor = new PdfDocumentProcessor();
processor.LoadDocument(pdfStream);
// PDF/A-Konformität prüfen
var compatibility = processor.Document.PdfACompatibility;
result.PdfALevel = compatibility switch
{
PdfACompatibility.None => "Kein PDF/A",
PdfACompatibility.PdfA1b => "PDF/A-1b",
PdfACompatibility.PdfA2b => "PDF/A-2b",
PdfACompatibility.PdfA3b => "PDF/A-3b",
_ => compatibility.ToString()
};
result.PdfAWarning = compatibility == PdfACompatibility.None;
logger.LogDebug(
"PDF '{FileName}': Konformität = {Level}",
sourceFileName, result.PdfALevel);
// ZUGFeRD Guideline-ID aus XMP-Metadaten lesen
try
{
var xmpData = processor.Document.Metadata?.Data;
if (!string.IsNullOrWhiteSpace(xmpData))
{
result.ZugferdGuidelineId = ExtractGuidelineId(xmpData);
if (!string.IsNullOrEmpty(result.ZugferdGuidelineId))
logger.LogDebug(
"PDF '{FileName}': Guideline-ID = {GuidelineId}",
sourceFileName, result.ZugferdGuidelineId);
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "XMP-Metadaten konnten nicht gelesen werden.");
}
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property
var attachments = processor.Document.FileAttachments.ToList();
if (attachments.Count == 0)
{
logger.LogInformation("PDF '{FileName}': Keine Anhänge gefunden.", sourceFileName);
return result;
}
logger.LogInformation(
"PDF '{FileName}': {Count} Anhang/Anhänge gefunden.",
sourceFileName, attachments.Count);
Directory.CreateDirectory(outputDir);
foreach (var attachment in attachments)
{
var extracted = SaveAttachment(attachment, outputDir);
if (extracted is not null)
result.Attachments.Add(extracted);
}
}
catch (Exception ex)
{
logger.LogError(ex,
"Fehler beim Extrahieren der Anhänge aus '{FileName}'.", sourceFileName);
throw;
}
LogExtractionSummary(sourceFileName, result);
return result;
}
private ExtractedAttachment? SaveAttachment(PdfFileAttachment attachment, string outputDir)
{
try
{
var safeFileName = SanitizeFileName(attachment.FileName);
var targetPath = EnsureUniqueFilePath(Path.Combine(outputDir, safeFileName));
var data = attachment.Data;
File.WriteAllBytes(targetPath, data);
var isZugferd = IsZugferdXml(attachment.FileName);
logger.LogDebug(
" → Gespeichert: '{FileName}' ({Bytes} Bytes){Zugferd}",
safeFileName, data.Length,
isZugferd ? " [ZUGFeRD/Factur-X XML]" : string.Empty);
return new ExtractedAttachment
{
OriginalFileName = attachment.FileName,
SavedFilePath = targetPath,
FileSizeBytes = data.Length,
IsZugferdXml = isZugferd
};
}
catch (Exception ex)
{
logger.LogWarning(ex,
" → Anhang '{Name}' konnte nicht gespeichert werden.", attachment.FileName);
return null;
}
}
private string ResolveOutputDirectory(string sourceFileName)
{
var baseDir = configuration["PdfExtraction:OutputDirectory"]
?? Path.Combine(Path.GetTempPath(), "PdfExtractions");
var folderName = $"{Path.GetFileNameWithoutExtension(sourceFileName)}_{DateTime.UtcNow:yyyyMMdd_HHmmss}";
return Path.Combine(baseDir, folderName);
}
private static bool IsZugferdXml(string fileName)
{
var lower = fileName.ToLowerInvariant();
return ZugferdFileNames.Any(z => lower.EndsWith(z, StringComparison.OrdinalIgnoreCase));
}
private static string SanitizeFileName(string fileName)
{
var invalid = Path.GetInvalidFileNameChars();
var safe = string.Concat(fileName.Select(c => invalid.Contains(c) ? '_' : c));
return string.IsNullOrWhiteSpace(safe) ? "attachment" : safe;
}
private static string EnsureUniqueFilePath(string filePath)
{
if (!File.Exists(filePath)) return filePath;
var dir = Path.GetDirectoryName(filePath)!;
var name = Path.GetFileNameWithoutExtension(filePath);
var ext = Path.GetExtension(filePath);
var i = 1;
string candidate;
do { candidate = Path.Combine(dir, $"{name}_{i++}{ext}"); }
while (File.Exists(candidate));
return candidate;
}
private void LogExtractionSummary(string sourceFileName, PdfExtractionResult result)
{
logger.LogInformation(
"PDF '{FileName}': {Total} Anhang/Anhänge extrahiert. ZUGFeRD-XML: {HasXml}",
sourceFileName,
result.Attachments.Count,
result.HasZugferdXml
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
: "Nein");
}
private static string ExtractGuidelineId(string xmpData)
{
// Bekannte ZUGFeRD/Factur-X Guideline-Präfixe
string[] knownPrefixes =
[
"urn:ferd:",
"urn:cen.eu:",
"urn:factur-x.",
"urn:zugferd:",
"urn:xoev-de:"
];
// XMP ist XML → wir suchen nach dem Wert zwischen Tags
// Typisch: <ram:ID>urn:ferd:invoice:rc:comfort</ram:ID>
// oder: <fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
foreach (var prefix in knownPrefixes)
{
var idx = xmpData.IndexOf(prefix, StringComparison.OrdinalIgnoreCase);
if (idx < 0) continue;
// Ende des Wertes finden (nächstes < Zeichen)
var end = xmpData.IndexOf('<', idx);
if (end < 0) end = xmpData.Length;
return xmpData[idx..end].Trim();
}
return string.Empty;
}
}