- http://en.wikipedia.org/wiki/Longest_common_substring_problem#Dynamic_programming
- http://www.ics.uci.edu/~dan/class/161/notes/6/Dynamic.html
Next ideas additionally could be used to reduce the memory usage:
- Keep only the last and current row
- Store only non-zero values in the rows (hash tables)
/**
* Longest Common Substring.<BR>
* <pre>
* | ls(i-1,j-1) + 1, if a[i]=b[j]
* ls(i,j) = |
* | 0, else
* Boundary conditions: let think that ls[i][-1] = 0, ls[-1][j] = 0
* </pre>
*/
public class LongestCommonSubstring {
public LongestCommonSubstring(String a, String b) {
this.a = a;
this.b = b;
initDp();
}
// strings a, b
private String a;
private String b;
/**
* ls(i,j) - maximum length of common strings that end at a[i] and b[j].
*/
private int ls[][];
// max ls - stores max length
private int maxLs;
private int maxI;
private int maxJ;
private void updateMaxLs(int ls, int i, int j) {
if (maxLs < ls) {
maxLs = ls;
maxI = i;
maxJ = j;
}
}
private void initMaxLs() {
maxLs = 0;
maxI = -1;
maxJ = -1;
}
private void initDp() {
initMaxLs();
// init with 0 by default
ls = new int[a.length() + 1][b.length() + 1];
// so this could be skipped
for (int i = 0; i <= a.length(); i++)
ls[i][0] = 0;
for (int j = 0; j <= b.length(); j++)
ls[0][j] = 0;
for (int i = 1; i <= a.length(); i++) {
for (int j = 1; j <= b.length(); j++) {
ls[i][j] = (a.charAt(i - 1) != b.charAt(j - 1)) ? 0 : ls[i - 1][j - 1] + 1;
updateMaxLs(ls[i][j], i, j);
}
}
}
public void printLs() {
System.out.printf("%1$c ", ' ');
for (int j = 0; j <= b.length(); j++)
System.out.printf("%1$c ", (j == 0) ? ' ' : b.charAt(j - 1));
System.out.printf("\n");
for (int i = 0; i <= a.length(); i++) {
System.out.printf("%1$c ", (i == 0) ? ' ' : a.charAt(i - 1));
for (int j = 0; j <= b.length(); j++) {
System.out.printf("%1$d ", ls[i][j]);
}
System.out.printf("\n");
}
}
public String getLcs() {
int size = ls[maxI][maxJ];
char sb[] = new char[size];
for(int l=0; l<size; l++)
sb[l] = a.charAt(maxI-size+l);
return new String(sb);
}
public static void main(String[] args) {
LongestCommonSubstring lcs = new LongestCommonSubstring("hello", "aloha");
lcs.printLs();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
lcs = new LongestCommonSubstring("baba", "abba");
lcs.printLs();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
lcs = new LongestCommonSubstring("Quick Search algorithm", "Tuned Boyer-Moore algorithm");
lcs.printLs();
System.out.printf("LCS=%1$s\n\n", lcs.getLcs());
}
}
Program output could be found below:
a l o h a
0 0 0 0 0 0
h 0 0 0 0 1 0
e 0 0 0 0 0 0
l 0 0 1 0 0 0
l 0 0 1 0 0 0
o 0 0 0 2 0 0
LCS=lo
a b b a
0 0 0 0 0
b 0 0 1 1 0
a 0 1 0 0 2
b 0 0 2 1 0
a 0 1 0 0 2
LCS=ba
T u n e d B o y e r - M o o r e a l g o r i t h m
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Q 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
u 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
i 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
c 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
k 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
S 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
e 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
a 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
r 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0
c 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
h 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
a 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0
l 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0
g 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0
o 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 5 0 0 0 0 0
r 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 6 0 0 0 0
i 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0
t 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0
h 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0
m 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10
LCS= algorithm