Tuesday, June 30, 2015

Longest Common Substring

First method returns length of longest common substring.
Second method is modified based on first method to return the longest common substring.

It's interesting to see the two solutions coming out one year apart.

Second version: done in 2016. I am not improved :)


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import java.util.Objects;

/**
 * converting it to 2d array, x coordination is from one string and y coordination is form another string.
 * the matching substrings will be adjacent diagonal sequences. 
 * this way is also easy to retrieve all the common substrings: stating from 1 and diagonally increasing
 * advanced and more efficient way: suffix tree
 * @author Andrew Ma
 *
 */
public class LongestCommonSubString {
 public static int longestCSLength(String s1, String s2){
  /**
   * s1 is y, vertical. s2 is x, horizontal
   */
  Objects.requireNonNull(s1);
  Objects.requireNonNull(s2);
  int maxLength = 0;
  if(s1.length()==0 ||s2.length()==0)return 0;
  char[] s1y = s1.toCharArray();
  char[] s2x = s2.toCharArray();
  int[][] matches = new int[s1.length()][s2.length()]; 
  for(int x=0;x<s2.length();x++){
   for(int y=0;y<s1.length();y++){
    //mark and count
    if(s2x[x]==s1y[y]){
     if(x==0 || y==0){//on top or left boarder, start as 1
      matches[y][x]=1;
     }else{
     matches[y][x] = matches[y-1][x-1]+1;
     }
     maxLength = Math.max(matches[y][x],maxLength);
    }
    else
     matches[y][x]=0;
   }
  }
  return maxLength;
 }
 public static String retrieveString(String s1,String s2, int s1P,int s2P){
  StringBuffer sb = new StringBuffer();
  while(s1P>=0 && s2P>=0&&s1.charAt(s1P)==s2.charAt(s2P)){
   sb.append(s1.charAt(s1P));
   s1P--;
   s2P--;
  }
  return sb.reverse().toString();
 }
 public static String longestCommonSubstring(String s1, String s2){
  /**
   * s1 is y, vertical. s2 is x, horizontal
   */
  Objects.requireNonNull(s1);
  Objects.requireNonNull(s2);
  String longestSubStr = "";
  int maxLength = 0;
  if(s1.length()==0 ||s2.length()==0)return "";
  char[] s1y = s1.toCharArray();
  char[] s2x = s2.toCharArray();
  int[][] matches = new int[s1.length()][s2.length()]; 
  for(int x=0;x<s2.length();x++){
   for(int y=0;y<s1.length();y++){
    //mark and count
    if(s2x[x]==s1y[y]){
     if(x==0 ||y==0){//on top or left boarder, start as 1
      matches[y][x]=1;
     }else{
     matches[y][x] = matches[y-1][x-1]+1;
     }
     if(maxLength <matches[y][x]){
      maxLength = matches[y][x];
      longestSubStr = retrieveString(s1,s2,y,x);
     }
    }
    else
     matches[y][x]=0;
   }
  }
  return longestSubStr;
 }
 public static void main(String[] args){
  int i = longestCSLength("abcdefg","uiubciabcdu");
  System.out.println(i);
  String s = longestCommonSubstring("abcdefg","uiubciabcdu");
  System.out.println(s);
 }
}
First version in 2015. 
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
public class longestCommonSubstr {

 public static void main(String[] args) {
  System.out.println(longestCommonStrLen("aaabbbccc",
    "hshshsabbbcklklkl"));

 }

 public static int longestCommonStrLen(String first, String second) {
  if (first == null || second == null || first.length() == 0
    || second.length() == 0) {
   return 0;
  }
  // a variable to hold the maximum length
  int maxLen = 0;
  // imaging first is listed vertically, each character marks a row
  /**
   * abc
   *a
   *b
   *c
   */
  int row = first.length();
  // imaging second is listed horizontally
  // bcd
  /**
   * together a matric is like this bcd a b 1 c 2 for a matrix like that,
   * if there is a common substring, you will find both dimensions will
   * grow together
   */
  int column = second.length();
  int[][] table = new int[row][column];
  // initialize first row values to be 0
  for (int s = 0; s < column; s++)
   table[0][s] = 0;
  // initialize first column values to be 0
  for (int f = 0; f < row; f++)
   table[f][0] = 0;

  for (int i = 0; i < row; i++) {
   for (int j = 0; j < column; j++) {

    // at current position, it checks previous position's value.
    // common substring means both dimensions grow at same time. the
    // calculation is from low to high, so that in order to
    // determine higher position's length,
    // it need to figure out lower position's length the first
    // for edge matches,it starts with 1, means one match
    if (first.charAt(i) == second.charAt(j)) {

     // starting from edges, initialize count to be 1
     if (i == 0 || j == 0) {
      table[i][j] = 1;
     } else {
      table[i][j] = table[i - 1][j - 1] + 1;
     }
     if (table[i][j] > maxLen) {
      maxLen = table[i][j];
     }
    }
   }
  }
  return maxLen;
 }

 public static String getLongestCommonSubstr(String first, String second) {
  if (first == null || second == null || first.length() == 0
    || second.length() == 0) {
   return "";
  }
  // a variable to hold the maximum length
  StringBuilder sb = new StringBuilder();
  int maxLen = 0;
  // one position in one string is enough
  int maxI = 0;
  // imaging first is listed vertically, each character marks a row
  /**
   * abc
   *a
   *b
   *c
   */
  int row = first.length();
  // imaging second is listed horizontally
  // bcd
  /**
   * together a matrix is like this bcd a b c for a matrix like that, if
   * there is a common substring, you will find both dimensions will grow
   * together
   */
  int column = second.length();
  // do not need to have an extra column and row, this matrix represents
  // all possible matching points, it's value is length of
  // substring as result of growing diagonally
  int[][] table = new int[row][column];
  // initialize first row values to be 0, set it to 1 when there is a
  // match
  for (int s = 0; s < column; s++)
   table[0][s] = 0;
  // initialize first column values to be 0, set it to 1 when there is a
  // match
  for (int f = 0; f < row; f++)
   table[f][0] = 0;
  // traversal the two strings. it check back both dimensions to determine
  // if substring grows
  for (int i = 0; i < row; i++) {
   for (int j = 0; j < column; j++) {

    // at current position, it checks previous position's value.
    // common substring means both dimensions grow at same time. the
    // calculation is from low to height, so that in order to
    // determine higher position's length,
    // it need to figure out lower position's length the first
    // for edge matches,it starts with 1, means one match
    if (first.charAt(i) == second.charAt(j)) {

     // starting from edges, initialize count to be 1
     if (i == 0 || j == 0) {
      table[i][j] = 1;
     }
     // other wise, substring is growing, mark the length and
     // build it out
     else {
      table[i][j] = table[i - 1][j - 1] + 1;

     }
     if (table[i][j] > maxLen) {
      maxLen = table[i][j];
      maxI = i;
     }
    }
   }
  }
  while (maxLen > 0) {
   sb.append(first.charAt(maxI - maxLen + 1));
   maxLen--;
  }
  return sb.toString();
 }
}

No comments: