我一直想知道如何在Transact SQL中实现此算法,https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
怎么做到呢?
今天,我终于偶然发现了leebickmtu的这个Stack Overflow-answer,它显示了最初从Java移植的C#实现。我自由地将其移植到Transact SQL函数,请尽情享受!
IF OBJECT_ID (N'dbo.InlineMax', N'FN') IS NOT NULL DROP FUNCTION dbo.InlineMax; GO CREATE FUNCTION dbo.InlineMax(@valueOne int, @valueTwo int) RETURNS FLOAT AS BEGIN IF @valueOne > @valueTwo BEGIN RETURN @valueOne END RETURN ISNULL(@valueTwo, @valueOne) END; GO IF OBJECT_ID (N'dbo.InlineMin', N'FN') IS NOT NULL DROP FUNCTION dbo.InlineMin; GO CREATE FUNCTION dbo.InlineMin(@valueOne int, @valueTwo int) RETURNS FLOAT AS BEGIN IF @valueOne < @valueTwo RETURN @valueOne RETURN @valueTwo END; GO IF OBJECT_ID (N'dbo.JaroWinklerDistance', N'FN') IS NOT NULL DROP FUNCTION dbo.JaroWinklerDistance; GO CREATE FUNCTION dbo.JaroWinklerDistance(@stringOne varchar(MAX), @stringTwo varchar(MAX)) RETURNS FLOAT WITH EXECUTE AS CALLER AS BEGIN DECLARE @mWeightThreshold FLOAT; SET @mWeightThreshold = 0.7; DECLARE @mNuMChars INT; SET @mNumChars = 4; DECLARE @lLen1 int; SET @lLen1 = LEN(@stringOne) DECLARE @lLen2 int; SET @lLen2 = LEN(@stringTwo) IF @lLen1 = 0 RETURN CASE WHEN @lLen2 = 0 THEN 1 ELSE 0 END DECLARE @lSearchRange int; SET @lSearchRange = dbo.InlineMax(0,dbo.InlineMax(@lLen1, @lLen2)/2 - 1); DECLARE @lMatched1 TABLE (position int not null, [status] bit not null) DECLARE @lMatched2 TABLE (position int not null, [status] bit not null) DECLARE @lNumCommon int; SET @lNumCommon = 0 DECLARE @i int; SET @i = 1; WHILE(@i <= @lLen1) BEGIN DECLARE @lStart int; SET @lStart = dbo.InlineMax(1, @i - @lSearchRange) DECLARE @lEnd int; SET @lEnd = dbo.InlineMin(@i + @lSearchRange + 1, @lLen2) DECLARE @j int; SET @j = @lStart; WHILE(@j <= @lEnd) BEGIN IF((SELECT [status] FROM @lMatched2 WHERE position = @j) = 1) BEGIN SET @j = @j + 1 CONTINUE END IF (SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @j, 1)) BEGIN SET @j = @j + 1 CONTINUE END INSERT INTO @lMatched1 (position, [status]) VALUES(@i, 1) INSERT INTO @lMatched2 (position, [status]) VALUES(@j, 1) SET @lNumCommon = @lNumCommon + 1 BREAK END SET @i = @i + 1 END IF @lNumCommon = 0 BEGIN RETURN 0.0; END DECLARE @lNumHalfTransposed int; SET @lNumHalfTransposed = 0 DECLARE @k INT; SET @k = 1; DECLARE @stopLoop bit; SET @stopLoop = 0; SET @i = 1; WHILE(@i <= @lLen1) BEGIN IF ((SELECT [status] FROM @lMatched1 WHERE position = @i) = 1) BEGIN SET @i = @i + 1 CONTINUE; END WHILE(@stopLoop = 0) BEGIN IF((SELECT [status] FROM @lMatched2 WHERE position = @k) = 0) SET @k = @k + 1 ELSE BREAK IF((SELECT SUBSTRING(@stringOne, @i, 1)) <> (SELECT SUBSTRING(@stringTwo, @k, 1))) SET @lNumHalfTransposed = @lNumHalfTransposed + 1 SET @k = @k + 1 END SET @i = @i + 1 END DECLARE @lNumTransposed INT; SET @lNumTransposed = @lNumHalfTransposed/2; DECLARE @lNumCommonD FLOAT; SET @lNumCommonD = @lNumCommon DECLARE @lWeight FLOAT; SET @lWeight = (@lNumCommonD / @lLen1 + @lNumCommonD / @lLen2 + (@lNumCommon - @lNumTransposed) / @lNumCommonD) / 3.0; IF(@lWeight <= @mWeightThreshold) RETURN @lWeight DECLARE @lMax INT; SET @lMax = dbo.InlineMin(@mNumChars, dbo.InlineMin(@lLen1, @lLen2)) DECLARE @lPos INT; SET @lPos = 0 WHILE(@lPos < @lMax AND (SELECT SUBSTRING(@stringOne, @lPos, 1)) = (SELECT SUBSTRING(@stringTwo, @lPos, 1))) BEGIN SET @lPos = @lPos + 1 END IF @lPos = 0 RETURN @lWeight RETURN @lWeight + 0.1 * @lPos * (1.0 - @lWeight) END; GO