83 lines
3.2 KiB
VB.net

Imports System.IO
Imports System.Text.RegularExpressions
Imports DigitalData.Modules.Logging
Public Class FileGroups
Private _logger As Logger
Public Sub New(LogConfig As LogConfig)
_logger = LogConfig.GetLogger()
End Sub
''' <summary>
''' Group files by message id. Message id is extracted from filename.
''' Filename is expected to be in the form: 1234@subdomain.company.com
''' <param name="Files">The list of files to process</param>
''' </summary>
Public Function GroupFiles(Files As List(Of FileInfo)) As Dictionary(Of String, List(Of FileInfo))
Dim oGrouped As New Dictionary(Of String, List(Of FileInfo))
If Files.Count = 0 Then
Return oGrouped
End If
For Each oFile In Files
Dim oMessageId = GetMessageIdFromFileName(oFile.Name)
If oMessageId Is Nothing Then
_logger.Warn("File {0} did not have the required filename-format!", oMessageId)
Continue For
End If
If oGrouped.ContainsKey(oMessageId) Then
oGrouped.Item(oMessageId).Add(oFile)
Else
oGrouped.Add(oMessageId, New List(Of FileInfo) From {oFile})
End If
Next
Return oGrouped
End Function
''' <summary>
''' Group files by message id. Message id is created from `FakeMessageIdDomain` and a random string
''' </summary>
''' <param name="Files">The list of files to process</param>
''' <param name="FakeMessageIdDomain">Arbitrary domain for message id generation. Example: sub.company.com</param>
''' <returns></returns>
Public Function GroupFiles(Files As List(Of FileInfo), FakeMessageIdDomain As String) As Dictionary(Of String, List(Of FileInfo))
Dim oGrouped As New Dictionary(Of String, List(Of FileInfo))
If Files.Count = 0 Then
Return oGrouped
End If
For Each oFile In Files
Dim oIdentifier = Guid.NewGuid().ToString()
Dim oMessageId = $"{oIdentifier}@{FakeMessageIdDomain}"
If oGrouped.ContainsKey(oMessageId) Then
oGrouped.Item(oMessageId).Add(oFile)
Else
oGrouped.Add(oMessageId, New List(Of FileInfo) From {oFile})
End If
Next
Return oGrouped
End Function
Private Function GetMessageIdFromFileName(Filename As String) As String
' Regex to find MessageId
' See also: https://stackoverflow.com/questions/3968500/regex-to-validate-a-message-id-as-per-rfc2822
Dim oRegex = "(((([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)|(""(([\x01-\x08\x0B\x0C\x0E-\x1F\x7F]|[\x21\x23-\x5B\x5D-\x7E])|(\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*""))@(([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)|(\[(([\x01-\x08\x0B\x0C\x0E-\x1F\x7F]|[\x21-\x5A\x5E-\x7E])|(\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*\]))))~.+"
Dim oMatch = Regex.Match(Filename, oRegex, RegexOptions.IgnoreCase)
If oMatch.Success Then
Dim oMessageId = oMatch.Groups(1).Value
Return oMessageId
Else
Return Nothing
End If
End Function
End Class