我有两根弦
string a = "foo bar"; string b = "bar foo";
我想检测从a到的更改 b。 我必须更改哪些字符a才能使用b?
a
b
我认为必须对每个字符进行一次迭代,并检测它是否被添加,删除或保持相等。这是我意想不到的结果
'f' Remove 'o' Remove 'o' Remove ' ' Remove 'b' Equal 'a' Equal 'r' Equal ' ' Add 'f' Add 'o' Add 'o' Add
类和结果的枚举:
public enum Operation { Add,Equal,Remove }; public class Difference { public Operation op { get; set; } public char c { get; set; } }
这是我的解决方案,但是“删除”案例对我来说尚不清楚
public static List<Difference> CalculateDifferences(string left, string right) { int count = 0; List<Difference> result = new List<Difference>(); foreach (char ch in left) { int index = right.IndexOf(ch, count); if (index == count) { count++; result.Add(new Difference() { c = ch, op = Operation.Equal }); } else if (index > count) { string add = right.Substring(count, index - count); result.AddRange(add.Select(x => new Difference() { c = x, op = Operation.Add })); count += add.Length; } else { //Remove? } } return result; }
删除的字符的代码看起来如何?
更新-添加了更多示例
范例1:
string a = "foobar"; string b = "fooar";
预期结果:
'f' Equal 'o' Equal 'o' Equal 'b' Remove 'a' Equal 'r' Equal
范例2:
string a = "asdfghjk"; string b = "wsedrftr";
'a' Remove 'w' Add 's' Equal 'e' Add 'd' Equal 'r' Add 'f' Equal 'g' Remove 'h' Remove 'j' Remove 'k' Remove 't' Add 'r' Add
更新:
这是Dmitry和Ingen的答案之间的 比较 :https ://dotnetfiddle.net/MJQDAO
您正在寻找 (最小)编辑距离 / (最小)编辑顺序 。您可以在此处找到该过程的 理论 :
https://web.stanford.edu/class/cs124/lec/med.pdf
让我们实现(最简单的)Levenstein距离/序列算法(有关详细信息,请参见https://en.wikipedia.org/wiki/Levenshtein_distance)。让我们从 帮助器 类开始(我对您的实现做了一些更改):
public enum EditOperationKind : byte { None, // Nothing to do Add, // Add new character Edit, // Edit character into character (including char into itself) Remove, // Delete existing character }; public struct EditOperation { public EditOperation(char valueFrom, char valueTo, EditOperationKind operation) { ValueFrom = valueFrom; ValueTo = valueTo; Operation = valueFrom == valueTo ? EditOperationKind.None : operation; } public char ValueFrom { get; } public char ValueTo {get ;} public EditOperationKind Operation { get; } public override string ToString() { switch (Operation) { case EditOperationKind.None: return $"'{ValueTo}' Equal"; case EditOperationKind.Add: return $"'{ValueTo}' Add"; case EditOperationKind.Remove: return $"'{ValueFrom}' Remove"; case EditOperationKind.Edit: return $"'{ValueFrom}' to '{ValueTo}' Edit"; default: return "???"; } } }
从提供的示例中可以看到,我们没有任何 编辑 操作,但是 添加+ remove ;这就是为什么我已经把editCost = 2时insertCost = 1,int removeCost = 1(在的情况下, 领带 :insert + remove对edit我们提出insert + remove)。现在我们准备实现Levenstein算法:
editCost = 2
insertCost = 1
int removeCost = 1
insert + remove
edit
public static EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 2) { if (null == source) throw new ArgumentNullException("source"); else if (null == target) throw new ArgumentNullException("target"); // Forward: building score matrix // Best operation (among insert, update, delete) to perform EditOperationKind[][] M = Enumerable .Range(0, source.Length + 1) .Select(line => new EditOperationKind[target.Length + 1]) .ToArray(); // Minimum cost so far int[][] D = Enumerable .Range(0, source.Length + 1) .Select(line => new int[target.Length + 1]) .ToArray(); // Edge: all removes for (int i = 1; i <= source.Length; ++i) { M[i][0] = EditOperationKind.Remove; D[i][0] = removeCost * i; } // Edge: all inserts for (int i = 1; i <= target.Length; ++i) { M[0][i] = EditOperationKind.Add; D[0][i] = insertCost * i; } // Having fit N - 1, K - 1 characters let's fit N, K for (int i = 1; i <= source.Length; ++i) for (int j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost int insert = D[i][j - 1] + insertCost; int delete = D[i - 1][j] + removeCost; int edit = D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost); int min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) M[i][j] = EditOperationKind.Add; else if (min == delete) M[i][j] = EditOperationKind.Remove; else if (min == edit) M[i][j] = EditOperationKind.Edit; D[i][j] = min; } // Backward: knowing scores (D) and actions (M) let's building edit sequence List<EditOperation> result = new List<EditOperation>(source.Length + target.Length); for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = M[y][x]; if (op == EditOperationKind.Add) { x -= 1; result.Add(new EditOperation('\0', target[x], op)); } else if (op == EditOperationKind.Remove) { y -= 1; result.Add(new EditOperation(source[y], '\0', op)); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; result.Add(new EditOperation(source[y], target[x], op)); } else // Start of the matching (EditOperationKind.None) break; } result.Reverse(); return result.ToArray(); }
演示:
var sequence = EditSequence("asdfghjk", "wsedrftr"); Console.Write(string.Join(Environment.NewLine, sequence));
结果: