Compare commits

...

3 Commits

Author SHA1 Message Date
OlgunR
6a46bf4f4b Add ZUGFeRD Guideline-ID extraction and display
Enhanced `PdfExtractionResult` with a new `ZugferdGuidelineId` property to store the ZUGFeRD Guideline-ID extracted from XMP metadata. Updated `Upload.cshtml` to display this information in the UI if available.

Implemented ZUGFeRD Guideline-ID extraction in `PdfAttachmentExtractorService` using a new helper method `ExtractGuidelineId`, which parses XMP metadata for known prefixes. Added logging for extracted Guideline-IDs and improved error handling with a `try-catch` block for metadata processing.

Fixed `FileAttachments` handling in `PdfAttachmentExtractorService` by converting to `List<T>` for proper `Count` property usage.
2026-05-26 17:05:52 +02:00
OlgunR
42d4222fb3 Handle UTF-8 BOM in string conversion
Previously, the method directly converted a byte array to a string
without accounting for a potential UTF-8 Byte Order Mark (BOM).
This commit introduces logic to remove the BOM (if present)
using `TrimStart('\uFEFF')` after converting the byte array
to a string.

Additionally, a comment was added to clarify the purpose of
this change.
2026-05-26 11:24:54 +02:00
OlgunR
4c90d2e5f1 Add PDF/A compliance checks and warnings
Added `PdfALevel` and `PdfAWarning` properties to the `PdfExtractionResult` class to store the PDF/A compliance level and indicate if a warning should be displayed for non-compliance.

Updated `Upload.cshtml` to display the PDF/A compliance level and conditionally show a warning message if the document is not PDF/A compliant.

Enhanced `PdfAttachmentExtractorService` to determine the PDF/A compliance level using `PdfDocumentProcessor`, map it to a string representation, and log the compliance level. Added logic to set `PdfAWarning` for non-compliant documents.
2026-05-26 11:16:28 +02:00
4 changed files with 96 additions and 1 deletions

View File

@@ -9,6 +9,19 @@ public class PdfExtractionResult
Attachments.FirstOrDefault(a => a.IsZugferdXml); Attachments.FirstOrDefault(a => a.IsZugferdXml);
public bool HasZugferdXml => ZugferdXmlAttachment is not null; public bool HasZugferdXml => ZugferdXmlAttachment is not null;
// Welche PDF/A-Stufe hat das Dokument?
// Beispiel: "PDF/A-3b", "PDF/A-2b", oder "Kein PDF/A"
public string PdfALevel { get; set; } = string.Empty;
// Soll eine Warnung angezeigt werden?
// true wenn kein PDF/A → ZUGFeRD-Rechnungen müssen PDF/A-3b sein
public bool PdfAWarning { get; set; }
// ZUGFeRD Guideline-ID aus XMP-Metadaten
// Beispiel: "urn:ferd:invoice:rc:comfort"
// Leer wenn keine ZUGFeRD-Metadaten gefunden
public string ZugferdGuidelineId { get; set; } = string.Empty;
} }
public class ExtractedAttachment public class ExtractedAttachment

View File

@@ -42,6 +42,22 @@
} }
</div> </div>
@* PDF/A-Konformitätsstufe anzeigen *@
<div class="alert @(Model.Result.PdfAWarning ? "alert-warning" : "alert-info") mt-2">
<strong>PDF/A-Konformität:</strong> @Model.Result.PdfALevel
@if (!string.IsNullOrEmpty(Model.Result.ZugferdGuidelineId))
{
<br />
<strong>Guideline-ID:</strong>
<code>@Model.Result.ZugferdGuidelineId</code>
}
@if (Model.Result.PdfAWarning)
{
<span> ⚠️ ZUGFeRD-Rechnungen müssen PDF/A-3b sein.</span>
}
</div>
<table class="table table-sm table-bordered"> <table class="table table-sm table-bordered">
<thead class="table-light"> <thead class="table-light">
<tr> <tr>

View File

@@ -26,6 +26,40 @@ public class PdfAttachmentExtractorService(
using var processor = new PdfDocumentProcessor(); using var processor = new PdfDocumentProcessor();
processor.LoadDocument(pdfStream); processor.LoadDocument(pdfStream);
// PDF/A-Konformität prüfen
var compatibility = processor.Document.PdfACompatibility;
result.PdfALevel = compatibility switch
{
PdfACompatibility.None => "Kein PDF/A",
PdfACompatibility.PdfA1b => "PDF/A-1b",
PdfACompatibility.PdfA2b => "PDF/A-2b",
PdfACompatibility.PdfA3b => "PDF/A-3b",
_ => compatibility.ToString()
};
result.PdfAWarning = compatibility == PdfACompatibility.None;
logger.LogInformation(
"PDF '{FileName}': Konformität = {Level}",
sourceFileName, result.PdfALevel);
// ZUGFeRD Guideline-ID aus XMP-Metadaten lesen
try
{
var xmpData = processor.Document.Metadata?.Data;
if (!string.IsNullOrWhiteSpace(xmpData))
{
result.ZugferdGuidelineId = ExtractGuidelineId(xmpData);
if (!string.IsNullOrEmpty(result.ZugferdGuidelineId))
logger.LogInformation(
"PDF '{FileName}': Guideline-ID = {GuidelineId}",
sourceFileName, result.ZugferdGuidelineId);
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "XMP-Metadaten konnten nicht gelesen werden.");
}
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property // Fix: .ToList() → IEnumerable → List<T> mit Count-Property
var attachments = processor.Document.FileAttachments.ToList(); var attachments = processor.Document.FileAttachments.ToList();
@@ -139,4 +173,34 @@ public class PdfAttachmentExtractorService(
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}" ? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
: "Nein"); : "Nein");
} }
private static string ExtractGuidelineId(string xmpData)
{
// Bekannte ZUGFeRD/Factur-X Guideline-Präfixe
string[] knownPrefixes =
[
"urn:ferd:",
"urn:cen.eu:",
"urn:factur-x.",
"urn:zugferd:",
"urn:xoev-de:"
];
// XMP ist XML → wir suchen nach dem Wert zwischen Tags
// Typisch: <ram:ID>urn:ferd:invoice:rc:comfort</ram:ID>
// oder: <fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
foreach (var prefix in knownPrefixes)
{
var idx = xmpData.IndexOf(prefix, StringComparison.OrdinalIgnoreCase);
if (idx < 0) continue;
// Ende des Wertes finden (nächstes < Zeichen)
var end = xmpData.IndexOf('<', idx);
if (end < 0) end = xmpData.Length;
return xmpData[idx..end].Trim();
}
return string.Empty;
}
} }

View File

@@ -25,7 +25,9 @@
if (isZugferd || attachment.MimeType == "text/xml") if (isZugferd || attachment.MimeType == "text/xml")
{ {
byte[] data = attachment.Data; byte[] data = attachment.Data;
return Encoding.UTF8.GetString(data); // BOM entfernen falls vorhanden (EF BB BF am Anfang)
var text = Encoding.UTF8.GetString(data);
return text.TrimStart('\uFEFF');
} }
} }