Enhanced `PdfExtractionResult` with a new `ZugferdGuidelineId` property to store the ZUGFeRD Guideline-ID extracted from XMP metadata. Updated `Upload.cshtml` to display this information in the UI if available. Implemented ZUGFeRD Guideline-ID extraction in `PdfAttachmentExtractorService` using a new helper method `ExtractGuidelineId`, which parses XMP metadata for known prefixes. Added logging for extracted Guideline-IDs and improved error handling with a `try-catch` block for metadata processing. Fixed `FileAttachments` handling in `PdfAttachmentExtractorService` by converting to `List<T>` for proper `Count` property usage.
206 lines
7.0 KiB
C#
206 lines
7.0 KiB
C#
using DevExpress.Pdf;
|
|
using DXApp.TemplateKitProject.Models;
|
|
|
|
namespace DXApp.TemplateKitProject.Services;
|
|
|
|
public class PdfAttachmentExtractorService(
|
|
IConfiguration configuration,
|
|
ILogger<PdfAttachmentExtractorService> logger)
|
|
{
|
|
private static readonly string[] ZugferdFileNames =
|
|
[
|
|
"zugferd-invoice.xml",
|
|
"factur-x.xml",
|
|
"xrechnung.xml",
|
|
"zugferd_2p0_en16931_muster.xml",
|
|
"cii-data.xml"
|
|
];
|
|
|
|
public PdfExtractionResult ExtractAttachments(Stream pdfStream, string sourceFileName)
|
|
{
|
|
var result = new PdfExtractionResult();
|
|
var outputDir = ResolveOutputDirectory(sourceFileName);
|
|
|
|
try
|
|
{
|
|
using var processor = new PdfDocumentProcessor();
|
|
processor.LoadDocument(pdfStream);
|
|
|
|
// PDF/A-Konformität prüfen
|
|
var compatibility = processor.Document.PdfACompatibility;
|
|
result.PdfALevel = compatibility switch
|
|
{
|
|
PdfACompatibility.None => "Kein PDF/A",
|
|
PdfACompatibility.PdfA1b => "PDF/A-1b",
|
|
PdfACompatibility.PdfA2b => "PDF/A-2b",
|
|
PdfACompatibility.PdfA3b => "PDF/A-3b",
|
|
_ => compatibility.ToString()
|
|
};
|
|
result.PdfAWarning = compatibility == PdfACompatibility.None;
|
|
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': Konformität = {Level}",
|
|
sourceFileName, result.PdfALevel);
|
|
|
|
// ZUGFeRD Guideline-ID aus XMP-Metadaten lesen
|
|
try
|
|
{
|
|
var xmpData = processor.Document.Metadata?.Data;
|
|
if (!string.IsNullOrWhiteSpace(xmpData))
|
|
{
|
|
result.ZugferdGuidelineId = ExtractGuidelineId(xmpData);
|
|
if (!string.IsNullOrEmpty(result.ZugferdGuidelineId))
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': Guideline-ID = {GuidelineId}",
|
|
sourceFileName, result.ZugferdGuidelineId);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogWarning(ex, "XMP-Metadaten konnten nicht gelesen werden.");
|
|
}
|
|
|
|
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property
|
|
var attachments = processor.Document.FileAttachments.ToList();
|
|
|
|
if (attachments.Count == 0)
|
|
{
|
|
logger.LogInformation("PDF '{FileName}': Keine Anhänge gefunden.", sourceFileName);
|
|
return result;
|
|
}
|
|
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': {Count} Anhang/Anhänge gefunden.",
|
|
sourceFileName, attachments.Count);
|
|
|
|
Directory.CreateDirectory(outputDir);
|
|
|
|
foreach (var attachment in attachments)
|
|
{
|
|
var extracted = SaveAttachment(attachment, outputDir);
|
|
if (extracted is not null)
|
|
result.Attachments.Add(extracted);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex,
|
|
"Fehler beim Extrahieren der Anhänge aus '{FileName}'.", sourceFileName);
|
|
throw;
|
|
}
|
|
|
|
LogExtractionSummary(sourceFileName, result);
|
|
return result;
|
|
}
|
|
|
|
private ExtractedAttachment? SaveAttachment(PdfFileAttachment attachment, string outputDir)
|
|
{
|
|
try
|
|
{
|
|
var safeFileName = SanitizeFileName(attachment.FileName);
|
|
var targetPath = EnsureUniqueFilePath(Path.Combine(outputDir, safeFileName));
|
|
var data = attachment.Data;
|
|
|
|
File.WriteAllBytes(targetPath, data);
|
|
|
|
var isZugferd = IsZugferdXml(attachment.FileName);
|
|
|
|
logger.LogInformation(
|
|
" → Gespeichert: '{FileName}' ({Bytes} Bytes){Zugferd}",
|
|
safeFileName, data.Length,
|
|
isZugferd ? " [ZUGFeRD/Factur-X XML]" : string.Empty);
|
|
|
|
return new ExtractedAttachment
|
|
{
|
|
OriginalFileName = attachment.FileName,
|
|
SavedFilePath = targetPath,
|
|
FileSizeBytes = data.Length,
|
|
IsZugferdXml = isZugferd
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogWarning(ex,
|
|
" → Anhang '{Name}' konnte nicht gespeichert werden.", attachment.FileName);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private string ResolveOutputDirectory(string sourceFileName)
|
|
{
|
|
var baseDir = configuration["PdfExtraction:OutputDirectory"]
|
|
?? Path.Combine(Path.GetTempPath(), "PdfExtractions");
|
|
var folderName = $"{Path.GetFileNameWithoutExtension(sourceFileName)}_{DateTime.UtcNow:yyyyMMdd_HHmmss}";
|
|
return Path.Combine(baseDir, folderName);
|
|
}
|
|
|
|
private static bool IsZugferdXml(string fileName)
|
|
{
|
|
var lower = fileName.ToLowerInvariant();
|
|
return ZugferdFileNames.Any(z => lower.EndsWith(z, StringComparison.OrdinalIgnoreCase));
|
|
}
|
|
|
|
private static string SanitizeFileName(string fileName)
|
|
{
|
|
var invalid = Path.GetInvalidFileNameChars();
|
|
var safe = string.Concat(fileName.Select(c => invalid.Contains(c) ? '_' : c));
|
|
return string.IsNullOrWhiteSpace(safe) ? "attachment" : safe;
|
|
}
|
|
|
|
private static string EnsureUniqueFilePath(string filePath)
|
|
{
|
|
if (!File.Exists(filePath)) return filePath;
|
|
|
|
var dir = Path.GetDirectoryName(filePath)!;
|
|
var name = Path.GetFileNameWithoutExtension(filePath);
|
|
var ext = Path.GetExtension(filePath);
|
|
var i = 1;
|
|
|
|
string candidate;
|
|
do { candidate = Path.Combine(dir, $"{name}_{i++}{ext}"); }
|
|
while (File.Exists(candidate));
|
|
|
|
return candidate;
|
|
}
|
|
|
|
private void LogExtractionSummary(string sourceFileName, PdfExtractionResult result)
|
|
{
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': {Total} Anhang/Anhänge extrahiert. ZUGFeRD-XML: {HasXml}",
|
|
sourceFileName,
|
|
result.Attachments.Count,
|
|
result.HasZugferdXml
|
|
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
|
|
: "Nein");
|
|
}
|
|
|
|
private static string ExtractGuidelineId(string xmpData)
|
|
{
|
|
// Bekannte ZUGFeRD/Factur-X Guideline-Präfixe
|
|
string[] knownPrefixes =
|
|
[
|
|
"urn:ferd:",
|
|
"urn:cen.eu:",
|
|
"urn:factur-x.",
|
|
"urn:zugferd:",
|
|
"urn:xoev-de:"
|
|
];
|
|
|
|
// XMP ist XML → wir suchen nach dem Wert zwischen Tags
|
|
// Typisch: <ram:ID>urn:ferd:invoice:rc:comfort</ram:ID>
|
|
// oder: <fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
|
|
foreach (var prefix in knownPrefixes)
|
|
{
|
|
var idx = xmpData.IndexOf(prefix, StringComparison.OrdinalIgnoreCase);
|
|
if (idx < 0) continue;
|
|
|
|
// Ende des Wertes finden (nächstes < Zeichen)
|
|
var end = xmpData.IndexOf('<', idx);
|
|
if (end < 0) end = xmpData.Length;
|
|
|
|
return xmpData[idx..end].Trim();
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
} |