Added `PdfALevel` and `PdfAWarning` properties to the `PdfExtractionResult` class to store the PDF/A compliance level and indicate if a warning should be displayed for non-compliance. Updated `Upload.cshtml` to display the PDF/A compliance level and conditionally show a warning message if the document is not PDF/A compliant. Enhanced `PdfAttachmentExtractorService` to determine the PDF/A compliance level using `PdfDocumentProcessor`, map it to a string representation, and log the compliance level. Added logic to set `PdfAWarning` for non-compliant documents.
158 lines
5.4 KiB
C#
158 lines
5.4 KiB
C#
using DevExpress.Pdf;
|
|
using DXApp.TemplateKitProject.Models;
|
|
|
|
namespace DXApp.TemplateKitProject.Services;
|
|
|
|
public class PdfAttachmentExtractorService(
|
|
IConfiguration configuration,
|
|
ILogger<PdfAttachmentExtractorService> logger)
|
|
{
|
|
private static readonly string[] ZugferdFileNames =
|
|
[
|
|
"zugferd-invoice.xml",
|
|
"factur-x.xml",
|
|
"xrechnung.xml",
|
|
"zugferd_2p0_en16931_muster.xml",
|
|
"cii-data.xml"
|
|
];
|
|
|
|
public PdfExtractionResult ExtractAttachments(Stream pdfStream, string sourceFileName)
|
|
{
|
|
var result = new PdfExtractionResult();
|
|
var outputDir = ResolveOutputDirectory(sourceFileName);
|
|
|
|
try
|
|
{
|
|
using var processor = new PdfDocumentProcessor();
|
|
processor.LoadDocument(pdfStream);
|
|
|
|
// PDF/A-Konformität prüfen
|
|
var compatibility = processor.Document.PdfACompatibility;
|
|
result.PdfALevel = compatibility switch
|
|
{
|
|
PdfACompatibility.None => "Kein PDF/A",
|
|
PdfACompatibility.PdfA1b => "PDF/A-1b",
|
|
PdfACompatibility.PdfA2b => "PDF/A-2b",
|
|
PdfACompatibility.PdfA3b => "PDF/A-3b",
|
|
_ => compatibility.ToString()
|
|
};
|
|
result.PdfAWarning = compatibility == PdfACompatibility.None;
|
|
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': Konformität = {Level}",
|
|
sourceFileName, result.PdfALevel);
|
|
|
|
// Fix: .ToList() → IEnumerable → List<T> mit Count-Property
|
|
var attachments = processor.Document.FileAttachments.ToList();
|
|
|
|
if (attachments.Count == 0)
|
|
{
|
|
logger.LogInformation("PDF '{FileName}': Keine Anhänge gefunden.", sourceFileName);
|
|
return result;
|
|
}
|
|
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': {Count} Anhang/Anhänge gefunden.",
|
|
sourceFileName, attachments.Count);
|
|
|
|
Directory.CreateDirectory(outputDir);
|
|
|
|
foreach (var attachment in attachments)
|
|
{
|
|
var extracted = SaveAttachment(attachment, outputDir);
|
|
if (extracted is not null)
|
|
result.Attachments.Add(extracted);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogError(ex,
|
|
"Fehler beim Extrahieren der Anhänge aus '{FileName}'.", sourceFileName);
|
|
throw;
|
|
}
|
|
|
|
LogExtractionSummary(sourceFileName, result);
|
|
return result;
|
|
}
|
|
|
|
private ExtractedAttachment? SaveAttachment(PdfFileAttachment attachment, string outputDir)
|
|
{
|
|
try
|
|
{
|
|
var safeFileName = SanitizeFileName(attachment.FileName);
|
|
var targetPath = EnsureUniqueFilePath(Path.Combine(outputDir, safeFileName));
|
|
var data = attachment.Data;
|
|
|
|
File.WriteAllBytes(targetPath, data);
|
|
|
|
var isZugferd = IsZugferdXml(attachment.FileName);
|
|
|
|
logger.LogInformation(
|
|
" → Gespeichert: '{FileName}' ({Bytes} Bytes){Zugferd}",
|
|
safeFileName, data.Length,
|
|
isZugferd ? " [ZUGFeRD/Factur-X XML]" : string.Empty);
|
|
|
|
return new ExtractedAttachment
|
|
{
|
|
OriginalFileName = attachment.FileName,
|
|
SavedFilePath = targetPath,
|
|
FileSizeBytes = data.Length,
|
|
IsZugferdXml = isZugferd
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogWarning(ex,
|
|
" → Anhang '{Name}' konnte nicht gespeichert werden.", attachment.FileName);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private string ResolveOutputDirectory(string sourceFileName)
|
|
{
|
|
var baseDir = configuration["PdfExtraction:OutputDirectory"]
|
|
?? Path.Combine(Path.GetTempPath(), "PdfExtractions");
|
|
var folderName = $"{Path.GetFileNameWithoutExtension(sourceFileName)}_{DateTime.UtcNow:yyyyMMdd_HHmmss}";
|
|
return Path.Combine(baseDir, folderName);
|
|
}
|
|
|
|
private static bool IsZugferdXml(string fileName)
|
|
{
|
|
var lower = fileName.ToLowerInvariant();
|
|
return ZugferdFileNames.Any(z => lower.EndsWith(z, StringComparison.OrdinalIgnoreCase));
|
|
}
|
|
|
|
private static string SanitizeFileName(string fileName)
|
|
{
|
|
var invalid = Path.GetInvalidFileNameChars();
|
|
var safe = string.Concat(fileName.Select(c => invalid.Contains(c) ? '_' : c));
|
|
return string.IsNullOrWhiteSpace(safe) ? "attachment" : safe;
|
|
}
|
|
|
|
private static string EnsureUniqueFilePath(string filePath)
|
|
{
|
|
if (!File.Exists(filePath)) return filePath;
|
|
|
|
var dir = Path.GetDirectoryName(filePath)!;
|
|
var name = Path.GetFileNameWithoutExtension(filePath);
|
|
var ext = Path.GetExtension(filePath);
|
|
var i = 1;
|
|
|
|
string candidate;
|
|
do { candidate = Path.Combine(dir, $"{name}_{i++}{ext}"); }
|
|
while (File.Exists(candidate));
|
|
|
|
return candidate;
|
|
}
|
|
|
|
private void LogExtractionSummary(string sourceFileName, PdfExtractionResult result)
|
|
{
|
|
logger.LogInformation(
|
|
"PDF '{FileName}': {Total} Anhang/Anhänge extrahiert. ZUGFeRD-XML: {HasXml}",
|
|
sourceFileName,
|
|
result.Attachments.Count,
|
|
result.HasZugferdXml
|
|
? $"Ja → {result.ZugferdXmlAttachment!.OriginalFileName}"
|
|
: "Nein");
|
|
}
|
|
} |