Add ZUGFeRD Guideline-ID extraction and display

Enhanced `PdfExtractionResult` with a new `ZugferdGuidelineId` property to store the ZUGFeRD Guideline-ID extracted from XMP metadata. Updated `Upload.cshtml` to display this information in the UI if available.

Implemented ZUGFeRD Guideline-ID extraction in `PdfAttachmentExtractorService` using a new helper method `ExtractGuidelineId`, which parses XMP metadata for known prefixes. Added logging for extracted Guideline-IDs and improved error handling with a `try-catch` block for metadata processing.

Fixed `FileAttachments` handling in `PdfAttachmentExtractorService` by converting to `List<T>` for proper `Count` property usage.
This commit is contained in:
OlgunR
2026-05-26 17:05:52 +02:00
parent 42d4222fb3
commit 6a46bf4f4b
3 changed files with 60 additions and 0 deletions

View File

@@ -42,6 +42,24 @@ public class PdfAttachmentExtractorService(
"PDF '{FileName}': Konformität = {Level}",
sourceFileName, result.PdfALevel);
// ZUGFeRD Guideline-ID aus XMP-Metadaten lesen
try
{
var xmpData = processor.Document.Metadata?.Data;
if (!string.IsNullOrWhiteSpace(xmpData))
{
result.ZugferdGuidelineId = ExtractGuidelineId(xmpData);
if (!string.IsNullOrEmpty(result.ZugferdGuidelineId))
logger.LogInformation(
"PDF '{FileName}': Guideline-ID = {GuidelineId}",
sourceFileName, result.ZugferdGuidelineId);
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "XMP-Metadaten konnten nicht gelesen werden.");
}
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property
var attachments = processor.Document.FileAttachments.ToList();
@@ -155,4 +173,34 @@ public class PdfAttachmentExtractorService(
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
: "Nein");
}
private static string ExtractGuidelineId(string xmpData)
{
// Bekannte ZUGFeRD/Factur-X Guideline-Präfixe
string[] knownPrefixes =
[
"urn:ferd:",
"urn:cen.eu:",
"urn:factur-x.",
"urn:zugferd:",
"urn:xoev-de:"
];
// XMP ist XML → wir suchen nach dem Wert zwischen Tags
// Typisch: <ram:ID>urn:ferd:invoice:rc:comfort</ram:ID>
// oder: <fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
foreach (var prefix in knownPrefixes)
{
var idx = xmpData.IndexOf(prefix, StringComparison.OrdinalIgnoreCase);
if (idx < 0) continue;
// Ende des Wertes finden (nächstes < Zeichen)
var end = xmpData.IndexOf('<', idx);
if (end < 0) end = xmpData.Length;
return xmpData[idx..end].Trim();
}
return string.Empty;
}
}