using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
namespace WordCompare
{
class Program
{
static void Main(string[] args)
{
var fromString = "English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.";
var toString = "English is West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.a";
Stopwatch watch = new Stopwatch();
watch.Start();
var result = CompareStrings(fromString, toString);
watch.Stop();
Console.WriteLine("The result is {0}, spent {1} milliseconds.", result, watch.ElapsedMilliseconds);
}
private static int CompareStrings(string fromString, string toString)
{
var fLength = fromString.Length;
var tLength = toString.Length;
// pre verify the simplest condition
if (fLength == 0)
{
return tLength;
}
if (tLength == 0)
{
return fLength;
}
// prepare the martix
var martix = new int[fLength + 1, tLength + 1];
for (int i = 0; i <= fLength; i++)
{
martix[i, 0] = i;
}
for (int j = 0; j <= tLength; j++)
{
martix[0, j] = j;
}
// compare the chars
for (int i = 1; i <= fLength; i++)
{
var tempF = fromString[i - 1];
var cost = 0;
for (int j = 1; j <= tLength; j++)
{
var tempT = toString[j - 1];
if (tempT == tempF)
{
cost = 0;
}
else
{
cost = 1;
}
var valueAbove = martix[i - 1, j] + 1;
var valueLeft = martix[i, j - 1] + 1;
// left corner
var valueDiag = martix[i - 1, j - 1] + cost;
// find the minimum from the three vars above
var cellValue = valueAbove < valueLeft ? (valueDiag < valueAbove ? valueDiag : valueAbove) : (valueDiag < valueLeft ? valueDiag : valueLeft);
martix[i, j] = cellValue;
}
}
var result = martix[fLength, tLength];
return result;
}
}
}
算法如下表:
注意,有下划线的就是每个循环得到的结果。
这个算法计算的是将s[1…i]转换为t[1…j](例如将kitten转换为sitting)所需最少的操作数(也就是所谓的编辑距离),这个操作数被保存在d[i,j](d代表的就是上图所示的二维数组)中。
这个证明过程只能证明我们可以得到结果,但并没有证明结果是最小的(即我们得到的是最少的转换步骤)。所以我们引进了另外一个算法,即d[i,j]保存的是上述三种操作中操作数最小的一种。这就保证了我们获得的结果是最小的操作数(可使用argument by contradiction进行证明,离题太远,忽略。。)