Let’s say we want to check for name existence in a database. However the user input might not be sanitized or in the same order as the one in the database. The name might have spelling mistakes. Also the female name is sometimes adjusted to match the male name after mariage. This might pose an issue in identifying the man or woman.
You can see thy this might be a problem.
CREATE FUNCTION [dbo].[CompareTwoNames]
(
@string1 NVARCHAR(2000),
@string2 NVARCHAR(2000),
@percentMatchThreshold INT = 80,
@withWordPermutation BIT = 0
)
RETURNS BIT
AS
BEGIN
SET @string1 = REPLACE(@string1, 'DR.','')
SET @string2 = REPLACE(@string2, 'DR.','')
SET @string1 = LTRIM(RTRIM(@string1))
SET @string2 = LTRIM(RTRIM(@string2))
DECLARE @AreStringsEqual BIT
SET @AreStringsEqual = 0
SET @string1 = REPLACE(@string1,N'Ăś', 'Ö')
SET @string1 = REPLACE(@string1,N'Ăź', 'Ü')
SET @string1 = REPLACE(@string1,N'Ĺą', 'Ü')
SET @string1 = REPLACE(@string1,N'ĂĄ', 'Á')
SET @string1 = REPLACE(@string1,N'ĂŠ', 'É')
SET @string1 = REPLACE(@string1,N'Ăł','Ó')
SET @string1 = REPLACE(@string1,N'Ă','Á')
SET @string1 = REPLACE(@string1,N'Ĺ','O')
SET @string1 = REPLACE(@string1,N'é','É' )
SET @string1 = REPLACE(@string1, N'ó','Ó' )
SET @string1 = REPLACE(@string1,N'Ăł','Ó')
SET @string1 = REPLACE(@string1,N'Ăś','O' )
SET @string1 = REPLACE(@string1, N'Ă','É' )
SET @string1 = REPLACE(@string1,N'ù','Ü' )
SET @string1 = REPLACE(@string1, N'è','É' )
SET @string2 = REPLACE(@string2,N'Ăś', 'Ö')
SET @string2 = REPLACE(@string2,N'Ăź', 'Ü')
SET @string2 = REPLACE(@string2,N'Ĺą', 'Ü')
SET @string2 = REPLACE(@string2,N'ĂĄ', 'Á')
SET @string2 = REPLACE(@string2,N'ĂŠ', 'É')
SET @string2 = REPLACE(@string2,N'Ăł','Ó')
SET @string2 = REPLACE(@string2,N'Ă','Á')
SET @string2 = REPLACE(@string2,N'Ĺ','O')
SET @string2 = REPLACE(@string2,N'é','É' )
SET @string2 = REPLACE(@string2, N'ó','Ó')
SET @string2 = REPLACE(@string2,N'Ăł','Ó')
SET @string2 = REPLACE(@string2,N'Ăś','O' )
SET @string2 = REPLACE(@string2, N'Ă','É')
SET @string2 = REPLACE(@string2,N'ù','Ü' )
SET @string2 = REPLACE(@string2, N'è','É' )
IF @string1 IS NULL AND @string2 IS NULL
BEGIN
SET @AreStringsEqual = 1
RETURN @AreStringsEqual
END
IF LEN( Ltrim(Rtrim(@string1))) = 0 AND LEN(Ltrim(Rtrim(@string2))) = 0
BEGIN
SET @AreStringsEqual = 1
RETURN @AreStringsEqual
END
IF @string1 IS NULL AND LEN(Ltrim(Rtrim(@string2))) = 0
BEGIN
SET @AreStringsEqual = 1
RETURN @AreStringsEqual
END
IF LEN( Ltrim(Rtrim(@string1))) = 0 AND @string2 IS NULL
BEGIN
SET @AreStringsEqual = 1
RETURN @AreStringsEqual
END
--SET @string1= @string1 COLLATE SQL_Latin1_General_CP1_CI_AS
-- SET @string2= @string2 COLLATE SQL_Latin1_General_CP1_CI_AS
SET @string1 = dbo.RemoveAccents(@string1)
SET @string2 = dbo.RemoveAccents(@string2)
SET @string1 = UPPER(@string1)
SET @string2 = UPPER(@string2)
IF ( Ltrim(Rtrim(@string1 COLLATE Latin1_General_CI_AI)) = Ltrim(Rtrim(@string2 COLLATE Latin1_General_CI_AI)) )
BEGIN
SET @AreStringsEqual = 1
GOTO JustBeforeReturn--RETURN @AreStringsEqual
END
IF SOUNDEX(@string1) = SOUNDEX(@string2)
BEGIN
SET @AreStringsEqual = 1
GOTO JustBeforeReturn--RETURN @AreStringsEqual
END
DECLARE @percentageMatch INT
SELECT @percentageMatch = [AstraHu].[dbo].[GetPercentageOfTwoStringMatching] ( @string1, @string2)
IF @percentageMatch >= @percentMatchThreshold
BEGIN
SET @AreStringsEqual = 1
GOTO JustBeforeReturn--RETURN @AreStringsEqual
END
IF(@string1 LIKE '%' + @string2 +'%') OR (@string2 LIKE '%' + @string1 +'%')
BEGIN
SET @AreStringsEqual = 1
GOTO JustBeforeReturn--RETURN @AreStringsEqual
END
IF @withWordPermutation = 1
BEGIN
DECLARE @namesWithWordPermutationTable TABLE
(
id INT NOT NULL IDENTITY(1,1),
name NVARCHAR(500)
)
INSERT INTO @namesWithWordPermutationTable
SELECT name FROM [dbo].[GenerateNamePermutation](@string1)
DECLARE @Index INT = 1, @MaxIndex INT
SELECT @MaxIndex = COUNT(1) FROM @namesWithWordPermutationTable
WHILE @Index <= @MaxIndex
BEGIN
SET @AreStringsEqual = [dbo].[CompareTwoNames]((SELECT TOP(1) name FROM @namesWithWordPermutationTable WHERE id = @Index) , @string2 , @percentMatchThreshold, 0)
IF @AreStringsEqual = 1
BREAK
SET @Index= @Index + 1
END
END
JustBeforeReturn:
DECLARE @string1ContainsFemaleName BIT = 0, @string2ContainsFemaleName BIT = 0
IF (@string1 LIKE '%NÉ %') OR (@string1 LIKE '%NÉ') OR (@string1 LIKE '%NE %') OR (@string1 LIKE '%NE')
SET @string1ContainsFemaleName = 1
IF (@string2 LIKE '%NÉ %') OR (@string2 LIKE '%NÉ') OR (@string2 LIKE '%NE %') OR (@string2 LIKE '%NE')
SET @string2ContainsFemaleName = 1
IF(@string1ContainsFemaleName <> @string2ContainsFemaleName)
SET @AreStringsEqual = 0
IF @string1ContainsFemaleName = @string2ContainsFemaleName AND @string2ContainsFemaleName = 1
BEGIN
DECLARE @string1WithoutFemaleForm VARCHAR(2000), @string2WithoutFemaleForm VARCHAR(2000)
SET @string1WithoutFemaleForm = REPLACE(
REPLACE(
REPLACE(@string1,'NÉ ',' ')
,'NE '
,' ')
,'NE'
,'');
SET @string2WithoutFemaleForm = REPLACE(
REPLACE(
REPLACE(@string2,'NÉ ',' ')
,'NE '
,' ')
,'NE'
,'');
SET @AreStringsEqual = [dbo].[CompareTwoNames](@string1WithoutFemaleForm, @string2WithoutFemaleForm , @percentMatchThreshold, @withWordPermutation)
END
RETURN @AreStringsEqual
END
CREATE FUNCTION [dbo].[GenerateNamePermutation]
(
@words NVARCHAR(500)
)
RETURNS
@returnTable TABLE
(
name NVARCHAR(500)
)
AS
BEGIN
-- Fill the table variable with the rows for your result set
DECLARE @separator CHAR(1)
DECLARE @NameParts TABLE (
PartId INT IDENTITY,
part NVARCHAR(50))
-- DECLARE @words NVARCHAR(50)
SET @separator = ' '
;WITH Parts(pn, start, finish)
AS (SELECT 1,
1,
CHARINDEX(@separator, @words)
UNION ALL
SELECT pn + 1,
finish + 1,
CHARINDEX(@separator, @words, finish + 1)
FROM Parts
WHERE finish > 0)
INSERT INTO @NameParts
(part)
SELECT TOP(5) SUBSTRING(@words, start, CASE
WHEN finish > 0 THEN finish - start
ELSE 50
END)
FROM Parts
DECLARE @tokencount INT
SELECT @tokencount = COUNT(*)
FROM @NameParts;
WITH Subsets
AS (SELECT CAST(' ' + part AS NVARCHAR(MAX)) Permutation,
CAST(1 AS INT) AS Iteration
FROM @NameParts
UNION ALL
SELECT Permutation + ' ' + part AS Permutation,
Iteration + 1 AS Iteration
FROM Subsets s
JOIN @NameParts n
ON s.Permutation NOT LIKE '%' + n.part + '%')
INSERT INTO @returnTable
SELECT STUFF(Permutation, 1, 1, '') AS Perm
FROM SUBSETS
WHERE Iteration = @tokencount
RETURN
END