Saturday, May 1, 2010

Dynamic Programming: Longest Common Subsequence

The idea how to solve Longest Common Subsequence problem by Dynamic Programming could be found here:
My implementation is published below.
/**
* Longest Common Subsequence.<BR>
* <pre>
* | ls(i-1,j-1) + 1, if a[i]=b[j]
* ls(i,j) = |
* | max(ls(i-1,j), ls(i,j-1)), else
* Boundary conditions: let think that ls[i][-1] = 0, ls[-1][j] = 0
* </pre>
*/

public class LongestCommonSubsequence {
public LongestCommonSubsequence(String a, String b) {
this.a = a;
this.b = b;

initDp();
}

// strings a, b
private String a;
private String b;

// ls(i,j) - maximum length of common subsequence that end at a[i] and b[j].
private int ls[][];

// specifies which neighboring cell ls(i,j) it got its value
Direction direction[][];

private void initDp() {
// init with 0 by default
ls = new int[a.length() + 1][b.length() + 1];

// so this could be skipped
for (int i = 0; i <= a.length(); i++)
ls[i][0] = 0;
for (int j = 0; j <= b.length(); j++)
ls[0][j] = 0;

direction = new Direction[a.length() + 1][b.length() + 1];

for (int i = 1; i <= a.length(); i++) {
for (int j = 1; j <= b.length(); j++) {
if (a.charAt(i - 1) == b.charAt(j - 1)) {
ls[i][j] = ls[i - 1][j - 1] + 1;
direction[i][j] = Direction.DIAG;
} else if (ls[i - 1][j] >= ls[i][j - 1]) {
ls[i][j] = ls[i - 1][j];
direction[i][j] = Direction.UP;
} else {
ls[i][j] = ls[i][j - 1];
direction[i][j] = Direction.LEFT;
}
}
}
}

public void printLs() {
System.out.printf("%1$2c ", ' ');
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2c ", (j == 0) ? ' ' : b.charAt(j - 1));
System.out.printf("\n");

for (int i = 0; i <= a.length(); i++) {
System.out.printf("%1$2c ", (i == 0) ? ' ' : a.charAt(i - 1));

for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2d ", ls[i][j]);
System.out.printf("\n");
}
}

public void printDirection() {
System.out.printf("%1$2c ", ' ');
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2c ", (j == 0) ? ' ' : b.charAt(j - 1));
System.out.printf("\n");

for (int i = 0; i <= a.length(); i++) {
System.out.printf("%1$2c ", (i == 0) ? ' ' : a.charAt(i - 1));

for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$2s ", (direction[i][j] == null)? '.' : direction[i][j]);
System.out.printf("\n");
}
}

public String getLcs() {
StringBuilder sb = new StringBuilder();

int i = a.length();
int j = b.length();
while (i > 0 && j > 0) {
if (direction[i][j] == Direction.DIAG) {
sb.append(a.charAt(i - 1));
i--;
j--;
} else if (direction[i][j] == Direction.UP)
i--;
else
j--;
}
return sb.reverse().toString();
}

private enum Direction {
LEFT, UP, DIAG;

@Override
public String toString() {
switch (this) {
case LEFT:
return "<";
case UP:
return "^";
case DIAG:
return "\\";
}
throw new IllegalStateException("wrong direction");
}
}

public static void main(String[] args) {
LongestCommonSubsequence lcs = new LongestCommonSubsequence("CAGATAGAG", "AGCGA");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());

lcs = new LongestCommonSubsequence("BDCABA", "ABCBDAB");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());

lcs = new LongestCommonSubsequence("humans", "chimpanzees");
lcs.printLs();
System.out.printf("\n");
lcs.printDirection();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
}
}

Program output could be found below:
       A  G  C  G  A 
0 0 0 0 0 0
C 0 0 0 1 1 1
A 0 1 1 1 1 2
G 0 1 2 2 2 2
A 0 1 2 2 2 3
T 0 1 2 2 2 3
A 0 1 2 2 2 3
G 0 1 2 2 3 3
A 0 1 2 2 3 4
G 0 1 2 2 3 4

A G C G A
. . . . . .
C . ^ ^ \ < <
A . \ < ^ ^ \
G . ^ \ < \ ^
A . \ ^ ^ ^ \
T . ^ ^ ^ ^ ^
A . \ ^ ^ ^ \
G . ^ \ ^ \ ^
A . \ ^ ^ ^ \
G . ^ \ ^ \ ^
LCS=AGGA

A B C B D A B
0 0 0 0 0 0 0 0
B 0 0 1 1 1 1 1 1
D 0 0 1 1 1 2 2 2
C 0 0 1 2 2 2 2 2
A 0 1 1 2 2 2 3 3
B 0 1 2 2 3 3 3 4
A 0 1 2 2 3 3 4 4

A B C B D A B
. . . . . . . .
B . ^ \ < \ < < \
D . ^ ^ ^ ^ \ < <
C . ^ ^ \ < ^ ^ ^
A . \ ^ ^ ^ ^ \ <
B . ^ \ ^ \ < ^ \
A . \ ^ ^ ^ ^ \ ^
LCS=BDAB

c h i m p a n z e e s
0 0 0 0 0 0 0 0 0 0 0 0
h 0 0 1 1 1 1 1 1 1 1 1 1
u 0 0 1 1 1 1 1 1 1 1 1 1
m 0 0 1 1 2 2 2 2 2 2 2 2
a 0 0 1 1 2 2 3 3 3 3 3 3
n 0 0 1 1 2 2 3 4 4 4 4 4
s 0 0 1 1 2 2 3 4 4 4 4 5

c h i m p a n z e e s
. . . . . . . . . . . .
h . ^ \ < < < < < < < < <
u . ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
m . ^ ^ ^ \ < < < < < < <
a . ^ ^ ^ ^ ^ \ < < < < <
n . ^ ^ ^ ^ ^ ^ \ < < < <
s . ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \
LCS=hmans

No comments: