Question : How do you find the edit distance in strings ?
Answer : You calculate the Levenshtein distance between strings with memoization. This is how it works:
If one of the input strings is empty, return the length of the other string as the distance D.
D(i,0) = i
D(0,j) = j
In a zero based index, d(i,j):
1) D(i-1, j) + 1 ( i.e. consider with a new left string letter )
2) D(i, j-1) + 1 ( i.e. consider with a new right string letter )
3) D(i-1, j-1) + cost where cost = 1 if S[i] != S[j] else cost = 0;
Tail recursion could also be considered with the same three types of breakouts. Cost can be 1 if the elements don't match and 0 otherwise.
If we took a sample data like the following, we would have a distance matrix as follows:
2 T 2 1 0
1 A 1 0 1
0 C 1 1 2
B A T
0 1 2
memo = {}
#i is the start index of str1, j is the start index of str2
def LevenshteinDistance(str1, i, len1, str2, j, len2):
l = [i,len1,j,len2]
key = (',').join(str(l))
if(memo.has_key(key)):
return memo[key]
if(len1 == 0):
return len2
if(len2 == 0):
return len1
cost = 0
if(str1[i] != str2[j]):
cost = 1;
dele = LevenshteinDistance(str1, i+1,len1-1, str2,j,len2)+1
inse = LevenshteinDistance(str1,i,len1,str2,j+1,len2-1)+1
upda = LevenshteinDistance(str1,i+1,len1-1,str2,j+1,len2-1)+cost
dist = min(dele, inse, upda)
memo[key] = dist
return dist
print LevenshteinDistance("cat", 0, 3, "bat", 0 ,3)
def min(a, b, c):
d = a;
if (d > b):
d = b
if (d > c):
d = c
return d
prints 1
C++ version:
map<string, int> m;
int getLD(char* str1,int i,int len1, char* str2, int j, int len2)
{
stringstream sout;
sout << i << "," << len1<< "," << j << "," << len2;
key = sout.str();
if(m.find(key) != m.end())
return m[key];
if(len1 == 0)
return len2;
if(len2 == 0)
return len1;
int cost = 0;
if(str1[i] != str2[j])
cost = 1;
dele = GetLD(str1, i+1,len1-1, str2,j,len2)+1;
inse = GetLD(str1,i,len1,str2,j+1,len2-1)+1;
upda = GetLD(str1,i+1,len1-1,str2,j+1,len2-1)+cost;
int dist = min(dele, inse, upda);
m[key] = dist;
return dist;
}
int min(int a, int b, int c):
{
int d = a;
if (d > b)
d = b;
if (d > c)
d = c;
return d;
}
Answer : You calculate the Levenshtein distance between strings with memoization. This is how it works:
If one of the input strings is empty, return the length of the other string as the distance D.
D(i,0) = i
D(0,j) = j
In a zero based index, d(i,j):
1) D(i-1, j) + 1 ( i.e. consider with a new left string letter )
2) D(i, j-1) + 1 ( i.e. consider with a new right string letter )
3) D(i-1, j-1) + cost where cost = 1 if S[i] != S[j] else cost = 0;
Tail recursion could also be considered with the same three types of breakouts. Cost can be 1 if the elements don't match and 0 otherwise.
If we took a sample data like the following, we would have a distance matrix as follows:
2 T 2 1 0
1 A 1 0 1
0 C 1 1 2
B A T
0 1 2
memo = {}
#i is the start index of str1, j is the start index of str2
def LevenshteinDistance(str1, i, len1, str2, j, len2):
l = [i,len1,j,len2]
key = (',').join(str(l))
if(memo.has_key(key)):
return memo[key]
if(len1 == 0):
return len2
if(len2 == 0):
return len1
cost = 0
if(str1[i] != str2[j]):
cost = 1;
dele = LevenshteinDistance(str1, i+1,len1-1, str2,j,len2)+1
inse = LevenshteinDistance(str1,i,len1,str2,j+1,len2-1)+1
upda = LevenshteinDistance(str1,i+1,len1-1,str2,j+1,len2-1)+cost
dist = min(dele, inse, upda)
memo[key] = dist
return dist
print LevenshteinDistance("cat", 0, 3, "bat", 0 ,3)
def min(a, b, c):
d = a;
if (d > b):
d = b
if (d > c):
d = c
return d
prints 1
C++ version:
map<string, int> m;
int getLD(char* str1,int i,int len1, char* str2, int j, int len2)
{
stringstream sout;
sout << i << "," << len1<< "," << j << "," << len2;
key = sout.str();
if(m.find(key) != m.end())
return m[key];
if(len1 == 0)
return len2;
if(len2 == 0)
return len1;
int cost = 0;
if(str1[i] != str2[j])
cost = 1;
dele = GetLD(str1, i+1,len1-1, str2,j,len2)+1;
inse = GetLD(str1,i,len1,str2,j+1,len2-1)+1;
upda = GetLD(str1,i+1,len1-1,str2,j+1,len2-1)+cost;
int dist = min(dele, inse, upda);
m[key] = dist;
return dist;
}
int min(int a, int b, int c):
{
int d = a;
if (d > b)
d = b;
if (d > c)
d = c;
return d;
}
No comments:
Post a Comment