- http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
- http://www.ics.uci.edu/~eppstein/161/960229.html
- http://www-igm.univ-mlv.fr/~lecroq/seqcomp/node4.html
/**
* Longest Common Subsequence.<BR>
* <pre>
* | ls(i-1,j-1) + 1, if a[i]=b[j]
* ls(i,j) = |
* | max(ls(i-1,j), ls(i,j-1)), else
* Boundary conditions: let think that ls[i][-1] = 0, ls[-1][j] = 0
* </pre>
*/
public class LongestCommonSubsequence {
public LongestCommonSubsequence(String a, String b) {
this.a = a;
this.b = b;
initDp();
}
// strings a, b
private String a;
private String b;
// ls(i,j) - maximum length of common subsequence that end at a[i] and b[j].
private int ls[][];
// specifies which neighboring cell ls(i,j) it got its value
Direction direction[][];
private void initDp() {
// init with 0 by default
ls = new int[a.length() + 1][b.length() + 1];
// so this could be skipped
for (int i = 0; i <= a.length(); i++)
ls[i][0] = 0;
for (int j = 0; j <= b.length(); j++)
ls[0][j] = 0;
direction = new Direction[a.length() + 1][b.length() + 1];
for (int i = 1; i <= a.length(); i++) {
for (int j = 1; j <= b.length(); j++) {
if (a.charAt(i - 1) == b.charAt(j - 1)) {
ls[i][j] = ls[i - 1][j - 1] + 1;
direction[i][j] = Direction.DIAG;
} else if (ls[i - 1][j] >= ls[i][j - 1]) {
ls[i][j] = ls[i - 1][j];
direction[i][j] = Direction.UP;
} else {
ls[i][j] = ls[i][j - 1];
direction[i][j] = Direction.LEFT;
}
}
}
}
public void printLs() {
System.out.printf("%1$2c ", ' ');
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2c ", (j == 0) ? ' ' : b.charAt(j - 1));
System.out.printf("\n");
for (int i = 0; i <= a.length(); i++) {
System.out.printf("%1$2c ", (i == 0) ? ' ' : a.charAt(i - 1));
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2d ", ls[i][j]);
System.out.printf("\n");
}
}
public void printDirection() {
System.out.printf("%1$2c ", ' ');
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2c ", (j == 0) ? ' ' : b.charAt(j - 1));
System.out.printf("\n");
for (int i = 0; i <= a.length(); i++) {
System.out.printf("%1$2c ", (i == 0) ? ' ' : a.charAt(i - 1));
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2s ", (direction[i][j] == null)? '.' : direction[i][j]);
System.out.printf("\n");
}
}
public String getLcs() {
StringBuilder sb = new StringBuilder();
int i = a.length();
int j = b.length();
while (i > 0 && j > 0) {
if (direction[i][j] == Direction.DIAG) {
sb.append(a.charAt(i - 1));
i--;
j--;
} else if (direction[i][j] == Direction.UP)
i--;
else
j--;
}
return sb.reverse().toString();
}
private enum Direction {
LEFT, UP, DIAG;
@Override
public String toString() {
switch (this) {
case LEFT:
return "<";
case UP:
return "^";
case DIAG:
return "\\";
}
throw new IllegalStateException("wrong direction");
}
}
public static void main(String[] args) {
LongestCommonSubsequence lcs = new LongestCommonSubsequence("CAGATAGAG", "AGCGA");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
lcs = new LongestCommonSubsequence("BDCABA", "ABCBDAB");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
lcs = new LongestCommonSubsequence("humans", "chimpanzees");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
}
}
Program output could be found below:
A G C G A
0 0 0 0 0 0
C 0 0 0 1 1 1
A 0 1 1 1 1 2
G 0 1 2 2 2 2
A 0 1 2 2 2 3
T 0 1 2 2 2 3
A 0 1 2 2 2 3
G 0 1 2 2 3 3
A 0 1 2 2 3 4
G 0 1 2 2 3 4
A G C G A
. . . . . .
C . ^ ^ \ < <
A . \ < ^ ^ \
G . ^ \ < \ ^
A . \ ^ ^ ^ \
T . ^ ^ ^ ^ ^
A . \ ^ ^ ^ \
G . ^ \ ^ \ ^
A . \ ^ ^ ^ \
G . ^ \ ^ \ ^
LCS=AGGA
A B C B D A B
0 0 0 0 0 0 0 0
B 0 0 1 1 1 1 1 1
D 0 0 1 1 1 2 2 2
C 0 0 1 2 2 2 2 2
A 0 1 1 2 2 2 3 3
B 0 1 2 2 3 3 3 4
A 0 1 2 2 3 3 4 4
A B C B D A B
. . . . . . . .
B . ^ \ < \ < < \
D . ^ ^ ^ ^ \ < <
C . ^ ^ \ < ^ ^ ^
A . \ ^ ^ ^ ^ \ <
B . ^ \ ^ \ < ^ \
A . \ ^ ^ ^ ^ \ ^
LCS=BDAB
c h i m p a n z e e s
0 0 0 0 0 0 0 0 0 0 0 0
h 0 0 1 1 1 1 1 1 1 1 1 1
u 0 0 1 1 1 1 1 1 1 1 1 1
m 0 0 1 1 2 2 2 2 2 2 2 2
a 0 0 1 1 2 2 3 3 3 3 3 3
n 0 0 1 1 2 2 3 4 4 4 4 4
s 0 0 1 1 2 2 3 4 4 4 4 5
c h i m p a n z e e s
. . . . . . . . . . . .
h . ^ \ < < < < < < < < <
u . ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
m . ^ ^ ^ \ < < < < < < <
a . ^ ^ ^ ^ ^ \ < < < < <
n . ^ ^ ^ ^ ^ ^ \ < < < <
s . ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \
LCS=hmans
No comments:
Post a Comment