Imports System
Imports System.Linq
Imports System.Collections.Generic
Imports System.Globalization
Imports System.Text
' ===== Modèle =====
Public Class Record
Public Property Id As Integer
Public Property Nom As String
Public Property DateCreation As DateTime
End Class
Module Module1
' --- 1) Normalisation robuste -> clé comparable ---
' - supprime diacritiques
' - remplace tout séparateur/ponctuation par espace
' - compacte espaces
' - minuscule invariant
' - split en mots, tri alphabétique, rejoin
Private Function NormalizeNameKey(input As String) As String
If String.IsNullOrWhiteSpace(input) Then Return ""
' Décomposition Unicode (FormD) pour isoler les diacritiques
Dim formD As String = input.Normalize(NormalizationForm.FormD)
' Filtrage caractères + normalisation espaces/ponctuations
Dim sb As New StringBuilder(formD.Length)
Dim prevSpace As Boolean = False
For Each ch As Char In formD
Dim cat = CharUnicodeInfo.GetUnicodeCategory(ch)
Select Case cat
Case UnicodeCategory.NonSpacingMark
' ignore les accents
Continue For
Case UnicodeCategory.SpaceSeparator,
UnicodeCategory.LineSeparator,
UnicodeCategory.ParagraphSeparator,
UnicodeCategory.ConnectorPunctuation,
UnicodeCategory.DashPunctuation,
UnicodeCategory.OpenPunctuation,
UnicodeCategory.ClosePunctuation,
UnicodeCategory.InitialQuotePunctuation,
UnicodeCategory.FinalQuotePunctuation,
UnicodeCategory.OtherPunctuation,
UnicodeCategory.MathSymbol,
UnicodeCategory.ModifierSymbol,
UnicodeCategory.OtherSymbol
' tout ça -> espace unique
If Not prevSpace Then
sb.Append(" "c)
prevSpace = True
End If
Case Else
sb.Append(ch)
prevSpace = False
End Select
Next
' Recomposition, trim, minuscule
Dim cleaned As String = sb.ToString().Normalize(NormalizationForm.FormC).Trim().ToLowerInvariant()
If cleaned.Length = 0 Then Return ""
' Split mots + tri via LINQ, puis rejoin
Dim key As String =
String.Join(" ",
cleaned.Split(New Char() {" "c}, StringSplitOptions.RemoveEmptyEntries) _
.Select(Function(w) w.Trim()) _
.Where(Function(w) w.Length > 0) _
.OrderBy(Function(w) w, StringComparer.Ordinal)
)
Return key
End Function
' --- 2) IDs impliqués dans un doublon sur la clé normalisée ---
Public Function GetDuplicateIds(data As IEnumerable(Of Record)) As List(Of Integer)
Return data _
.GroupBy(Function(r) NormalizeNameKey(r.Nom)) _
.Where(Function(g) g.Count() >= 2 AndAlso g.Key <> "") _
.SelectMany(Function(g) g.Select(Function(r) r.Id)) _
.Distinct() _
.ToList()
End Function
' --- 3) Records complets impliqués, triés par (clé normalisée, DateCreation) ---
Public Function GetDuplicateRecords(data As IEnumerable(Of Record)) As List(Of Record)
Dim dupKeys = data _
.GroupBy(Function(r) NormalizeNameKey(r.Nom)) _
.Where(Function(g) g.Count() >= 2 AndAlso g.Key <> "") _
.Select(Function(g) g.Key) _
.ToHashSet(StringComparer.Ordinal)
Return data _
.Where(Function(r) dupKeys.Contains(NormalizeNameKey(r.Nom))) _
.OrderBy(Function(r) NormalizeNameKey(r.Nom), StringComparer.Ordinal) _
.ThenBy(Function(r) r.DateCreation) _
.ToList()
End Function
' --- 4) Exemple d’utilisation ---
Sub Main()
Dim data As New List(Of Record) From {
New Record With {.Id = 1, .Nom = "Jean Dupont", .DateCreation = #8/1/2025#},
New Record With {.Id = 2, .Nom = "dupont JEAN " , .DateCreation = #8/2/2025#}, ' NBSP + espace final
New Record With {.Id = 3, .Nom = "Alice–Martin", .DateCreation = #8/3/2025#}, ' tiret Unicode
New Record With {.Id = 4, .Nom = "Martin Alice", .DateCreation = #8/4/2025#},
New Record With {.Id = 5, .Nom = "Élodie MARIE", .DateCreation = #8/5/2025#},
New Record With {.Id = 6, .Nom = "Elodie-Marie", .DateCreation = #8/6/2025#}
}
Dim ids = GetDuplicateIds(data)
Console.WriteLine("IDs doublons : " & String.Join(", ", ids))
Dim recs = GetDuplicateRecords(data)
For Each r In recs
Console.WriteLine($"{NormalizeNameKey(r.Nom)} | {r.Id} | {r.Nom} | {r.DateCreation:yyyy-MM-dd}")
Next
End Sub
End Module