Imports System.IO Imports System.Text.RegularExpressions Imports DigitalData.Modules.Logging Public Class FileGroups Private ReadOnly _logger As Logger Public Sub New(LogConfig As LogConfig) _logger = LogConfig.GetLogger() End Sub ''' ''' Group files by message id. Message id is extracted from filename. ''' Filename is expected to be in the form: 1234@subdomain.company.com ''' The list of files to process ''' Public Function GroupFiles(Files As List(Of FileInfo)) As Dictionary(Of String, List(Of FileInfo)) Dim oGrouped As New Dictionary(Of String, List(Of FileInfo)) If Files.Count = 0 Then Return oGrouped End If For Each oFile In Files Dim oMessageId = GetMessageIdFromFileName(oFile.Name) If oMessageId Is Nothing Then _logger.Warn("File {0} did not have the required filename-format!", oFile.Name) Continue For End If If oGrouped.ContainsKey(oMessageId) Then oGrouped.Item(oMessageId).Add(oFile) Else oGrouped.Add(oMessageId, New List(Of FileInfo) From {oFile}) End If Next Return oGrouped End Function ''' ''' Group files by message id. Message id is created from `FakeMessageIdDomain` and a random string ''' ''' The list of files to process ''' Arbitrary domain for message id generation. Example: sub.company.com ''' Public Function GroupFiles(Files As List(Of FileInfo), FakeMessageIdDomain As String) As Dictionary(Of String, List(Of FileInfo)) Dim oGrouped As New Dictionary(Of String, List(Of FileInfo)) If Files.Count = 0 Then Return oGrouped End If For Each oFile In Files Dim oIdentifier = Guid.NewGuid().ToString() Dim oMessageId = $"{oIdentifier}@{FakeMessageIdDomain}" If oGrouped.ContainsKey(oMessageId) Then oGrouped.Item(oMessageId).Add(oFile) Else oGrouped.Add(oMessageId, New List(Of FileInfo) From {oFile}) End If Next Return oGrouped End Function Private Function GetMessageIdFromFileName(Filename As String) As String ' Regex to find MessageId ' See also: https://stackoverflow.com/questions/3968500/regex-to-validate-a-message-id-as-per-rfc2822 'Dim oRegex = "(((([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)|(""(([\x01-\x08\x0B\x0C\x0E-\x1F\x7F]|[\x21\x23-\x5B\x5D-\x7E])|(\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*""))@(([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)|(\[(([\x01-\x08\x0B\x0C\x0E-\x1F\x7F]|[\x21-\x5A\x5E-\x7E])|(\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*\]))))~.+" Dim oRegex = "([A-Z0-9]+)~ATTM\d+\..*" Dim oMatch = Regex.Match(Filename, oRegex, RegexOptions.IgnoreCase) If oMatch.Success Then Dim oMessageId = oMatch.Groups(1).Value Return oMessageId Else Return Nothing End If End Function End Class