Let’s say we want to check for name existence in a database. However the user input might not be sanitized or in the same order as the one in the database. The name might have spelling mistakes. Also the female name is sometimes adjusted to match the male name after mariage. This might pose an issue in identifying the man or woman.
Now, the alternatives for a woman when she marries are as shown below (using the examples of Szendrey Júlia and Petőfi Sándor – Júlia and Sándor are their given names):
-
Júlia can keep her maiden name, as Szendrey Júlia (now very popular, especially among more-educated women).
-
Júlia gives up her name, adds the suffix -né to her husband’s full name, and will be called Petőfi Sándorné.
-
Júlia adds the suffix -né to her husband’s family name, adds her full name and will be called Petőfiné Szendrey Júlia.
-
Júlia adds the suffix -né to her husband’s full name, adds her full name and will be called Petőfi Sándorné Szendrey Júlia (less popular these days, because it is long to write).
-
Júlia takes her husband’s family name, keeps her given name “Júlia” and will be called Petőfi Júlia.
You can see thy this might be a problem.
This is my solution:
It uses code from here: Getting percentage of character match of 2 strings – MSSQL – SQL Server / Alternative to SOUNDEX
CREATE FUNCTION [dbo].[CompareTwoNames] ( @string1 NVARCHAR(2000), @string2 NVARCHAR(2000), @percentMatchThreshold INT = 80, @withWordPermutation BIT = 0 ) RETURNS BIT AS BEGIN SET @string1 = REPLACE(@string1, 'DR.','') SET @string2 = REPLACE(@string2, 'DR.','') SET @string1 = LTRIM(RTRIM(@string1)) SET @string2 = LTRIM(RTRIM(@string2)) DECLARE @AreStringsEqual BIT SET @AreStringsEqual = 0 SET @string1 = REPLACE(@string1,N'Ăś', 'Ö') SET @string1 = REPLACE(@string1,N'Ăź', 'Ü') SET @string1 = REPLACE(@string1,N'Ĺą', 'Ü') SET @string1 = REPLACE(@string1,N'ĂĄ', 'Á') SET @string1 = REPLACE(@string1,N'ĂŠ', 'É') SET @string1 = REPLACE(@string1,N'Ăł','Ó') SET @string1 = REPLACE(@string1,N'Ă','Á') SET @string1 = REPLACE(@string1,N'Ĺ','O') SET @string1 = REPLACE(@string1,N'é','É' ) SET @string1 = REPLACE(@string1, N'ó','Ó' ) SET @string1 = REPLACE(@string1,N'Ăł','Ó') SET @string1 = REPLACE(@string1,N'Ăś','O' ) SET @string1 = REPLACE(@string1, N'Ă','É' ) SET @string1 = REPLACE(@string1,N'ù','Ü' ) SET @string1 = REPLACE(@string1, N'è','É' ) SET @string2 = REPLACE(@string2,N'Ăś', 'Ö') SET @string2 = REPLACE(@string2,N'Ăź', 'Ü') SET @string2 = REPLACE(@string2,N'Ĺą', 'Ü') SET @string2 = REPLACE(@string2,N'ĂĄ', 'Á') SET @string2 = REPLACE(@string2,N'ĂŠ', 'É') SET @string2 = REPLACE(@string2,N'Ăł','Ó') SET @string2 = REPLACE(@string2,N'Ă','Á') SET @string2 = REPLACE(@string2,N'Ĺ','O') SET @string2 = REPLACE(@string2,N'é','É' ) SET @string2 = REPLACE(@string2, N'ó','Ó') SET @string2 = REPLACE(@string2,N'Ăł','Ó') SET @string2 = REPLACE(@string2,N'Ăś','O' ) SET @string2 = REPLACE(@string2, N'Ă','É') SET @string2 = REPLACE(@string2,N'ù','Ü' ) SET @string2 = REPLACE(@string2, N'è','É' ) IF @string1 IS NULL AND @string2 IS NULL BEGIN SET @AreStringsEqual = 1 RETURN @AreStringsEqual END IF LEN( Ltrim(Rtrim(@string1))) = 0 AND LEN(Ltrim(Rtrim(@string2))) = 0 BEGIN SET @AreStringsEqual = 1 RETURN @AreStringsEqual END IF @string1 IS NULL AND LEN(Ltrim(Rtrim(@string2))) = 0 BEGIN SET @AreStringsEqual = 1 RETURN @AreStringsEqual END IF LEN( Ltrim(Rtrim(@string1))) = 0 AND @string2 IS NULL BEGIN SET @AreStringsEqual = 1 RETURN @AreStringsEqual END --SET @string1= @string1 COLLATE SQL_Latin1_General_CP1_CI_AS -- SET @string2= @string2 COLLATE SQL_Latin1_General_CP1_CI_AS SET @string1 = dbo.RemoveAccents(@string1) SET @string2 = dbo.RemoveAccents(@string2) SET @string1 = UPPER(@string1) SET @string2 = UPPER(@string2) IF ( Ltrim(Rtrim(@string1 COLLATE Latin1_General_CI_AI)) = Ltrim(Rtrim(@string2 COLLATE Latin1_General_CI_AI)) ) BEGIN SET @AreStringsEqual = 1 GOTO JustBeforeReturn--RETURN @AreStringsEqual END IF SOUNDEX(@string1) = SOUNDEX(@string2) BEGIN SET @AreStringsEqual = 1 GOTO JustBeforeReturn--RETURN @AreStringsEqual END DECLARE @percentageMatch INT SELECT @percentageMatch = [AstraHu].[dbo].[GetPercentageOfTwoStringMatching] ( @string1, @string2) IF @percentageMatch >= @percentMatchThreshold BEGIN SET @AreStringsEqual = 1 GOTO JustBeforeReturn--RETURN @AreStringsEqual END IF(@string1 LIKE '%' + @string2 +'%') OR (@string2 LIKE '%' + @string1 +'%') BEGIN SET @AreStringsEqual = 1 GOTO JustBeforeReturn--RETURN @AreStringsEqual END IF @withWordPermutation = 1 BEGIN DECLARE @namesWithWordPermutationTable TABLE ( id INT NOT NULL IDENTITY(1,1), name NVARCHAR(500) ) INSERT INTO @namesWithWordPermutationTable SELECT name FROM [dbo].[GenerateNamePermutation](@string1) DECLARE @Index INT = 1, @MaxIndex INT SELECT @MaxIndex = COUNT(1) FROM @namesWithWordPermutationTable WHILE @Index <= @MaxIndex BEGIN SET @AreStringsEqual = [dbo].[CompareTwoNames]((SELECT TOP(1) name FROM @namesWithWordPermutationTable WHERE id = @Index) , @string2 , @percentMatchThreshold, 0) IF @AreStringsEqual = 1 BREAK SET @Index= @Index + 1 END END JustBeforeReturn: DECLARE @string1ContainsFemaleName BIT = 0, @string2ContainsFemaleName BIT = 0 IF (@string1 LIKE '%NÉ %') OR (@string1 LIKE '%NÉ') OR (@string1 LIKE '%NE %') OR (@string1 LIKE '%NE') SET @string1ContainsFemaleName = 1 IF (@string2 LIKE '%NÉ %') OR (@string2 LIKE '%NÉ') OR (@string2 LIKE '%NE %') OR (@string2 LIKE '%NE') SET @string2ContainsFemaleName = 1 IF(@string1ContainsFemaleName <> @string2ContainsFemaleName) SET @AreStringsEqual = 0 IF @string1ContainsFemaleName = @string2ContainsFemaleName AND @string2ContainsFemaleName = 1 BEGIN DECLARE @string1WithoutFemaleForm VARCHAR(2000), @string2WithoutFemaleForm VARCHAR(2000) SET @string1WithoutFemaleForm = REPLACE( REPLACE( REPLACE(@string1,'NÉ ',' ') ,'NE ' ,' ') ,'NE' ,''); SET @string2WithoutFemaleForm = REPLACE( REPLACE( REPLACE(@string2,'NÉ ',' ') ,'NE ' ,' ') ,'NE' ,''); SET @AreStringsEqual = [dbo].[CompareTwoNames](@string1WithoutFemaleForm, @string2WithoutFemaleForm , @percentMatchThreshold, @withWordPermutation) END RETURN @AreStringsEqual END
CREATE FUNCTION [dbo].[GenerateNamePermutation] ( @words NVARCHAR(500) ) RETURNS @returnTable TABLE ( name NVARCHAR(500) ) AS BEGIN -- Fill the table variable with the rows for your result set DECLARE @separator CHAR(1) DECLARE @NameParts TABLE ( PartId INT IDENTITY, part NVARCHAR(50)) -- DECLARE @words NVARCHAR(50) SET @separator = ' ' ;WITH Parts(pn, start, finish) AS (SELECT 1, 1, CHARINDEX(@separator, @words) UNION ALL SELECT pn + 1, finish + 1, CHARINDEX(@separator, @words, finish + 1) FROM Parts WHERE finish > 0) INSERT INTO @NameParts (part) SELECT TOP(5) SUBSTRING(@words, start, CASE WHEN finish > 0 THEN finish - start ELSE 50 END) FROM Parts DECLARE @tokencount INT SELECT @tokencount = COUNT(*) FROM @NameParts; WITH Subsets AS (SELECT CAST(' ' + part AS NVARCHAR(MAX)) Permutation, CAST(1 AS INT) AS Iteration FROM @NameParts UNION ALL SELECT Permutation + ' ' + part AS Permutation, Iteration + 1 AS Iteration FROM Subsets s JOIN @NameParts n ON s.Permutation NOT LIKE '%' + n.part + '%') INSERT INTO @returnTable SELECT STUFF(Permutation, 1, 1, '') AS Perm FROM SUBSETS WHERE Iteration = @tokencount RETURN END