Second method is modified based on first method to return the longest common substring.
It's interesting to see the two solutions coming out one year apart.
Second version: done in 2016. I am not improved :)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | import java.util.Objects; /** * converting it to 2d array, x coordination is from one string and y coordination is form another string. * the matching substrings will be adjacent diagonal sequences. * this way is also easy to retrieve all the common substrings: stating from 1 and diagonally increasing * advanced and more efficient way: suffix tree * @author Andrew Ma * */ public class LongestCommonSubString { public static int longestCSLength(String s1, String s2){ /** * s1 is y, vertical. s2 is x, horizontal */ Objects.requireNonNull(s1); Objects.requireNonNull(s2); int maxLength = 0; if(s1.length()==0 ||s2.length()==0)return 0; char[] s1y = s1.toCharArray(); char[] s2x = s2.toCharArray(); int[][] matches = new int[s1.length()][s2.length()]; for(int x=0;x<s2.length();x++){ for(int y=0;y<s1.length();y++){ //mark and count if(s2x[x]==s1y[y]){ if(x==0 || y==0){//on top or left boarder, start as 1 matches[y][x]=1; }else{ matches[y][x] = matches[y-1][x-1]+1; } maxLength = Math.max(matches[y][x],maxLength); } else matches[y][x]=0; } } return maxLength; } public static String retrieveString(String s1,String s2, int s1P,int s2P){ StringBuffer sb = new StringBuffer(); while(s1P>=0 && s2P>=0&&s1.charAt(s1P)==s2.charAt(s2P)){ sb.append(s1.charAt(s1P)); s1P--; s2P--; } return sb.reverse().toString(); } public static String longestCommonSubstring(String s1, String s2){ /** * s1 is y, vertical. s2 is x, horizontal */ Objects.requireNonNull(s1); Objects.requireNonNull(s2); String longestSubStr = ""; int maxLength = 0; if(s1.length()==0 ||s2.length()==0)return ""; char[] s1y = s1.toCharArray(); char[] s2x = s2.toCharArray(); int[][] matches = new int[s1.length()][s2.length()]; for(int x=0;x<s2.length();x++){ for(int y=0;y<s1.length();y++){ //mark and count if(s2x[x]==s1y[y]){ if(x==0 ||y==0){//on top or left boarder, start as 1 matches[y][x]=1; }else{ matches[y][x] = matches[y-1][x-1]+1; } if(maxLength <matches[y][x]){ maxLength = matches[y][x]; longestSubStr = retrieveString(s1,s2,y,x); } } else matches[y][x]=0; } } return longestSubStr; } public static void main(String[] args){ int i = longestCSLength("abcdefg","uiubciabcdu"); System.out.println(i); String s = longestCommonSubstring("abcdefg","uiubciabcdu"); System.out.println(s); } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | public class longestCommonSubstr { public static void main(String[] args) { System.out.println(longestCommonStrLen("aaabbbccc", "hshshsabbbcklklkl")); } public static int longestCommonStrLen(String first, String second) { if (first == null || second == null || first.length() == 0 || second.length() == 0) { return 0; } // a variable to hold the maximum length int maxLen = 0; // imaging first is listed vertically, each character marks a row /** * abc *a *b *c */ int row = first.length(); // imaging second is listed horizontally // bcd /** * together a matric is like this bcd a b 1 c 2 for a matrix like that, * if there is a common substring, you will find both dimensions will * grow together */ int column = second.length(); int[][] table = new int[row][column]; // initialize first row values to be 0 for (int s = 0; s < column; s++) table[0][s] = 0; // initialize first column values to be 0 for (int f = 0; f < row; f++) table[f][0] = 0; for (int i = 0; i < row; i++) { for (int j = 0; j < column; j++) { // at current position, it checks previous position's value. // common substring means both dimensions grow at same time. the // calculation is from low to high, so that in order to // determine higher position's length, // it need to figure out lower position's length the first // for edge matches,it starts with 1, means one match if (first.charAt(i) == second.charAt(j)) { // starting from edges, initialize count to be 1 if (i == 0 || j == 0) { table[i][j] = 1; } else { table[i][j] = table[i - 1][j - 1] + 1; } if (table[i][j] > maxLen) { maxLen = table[i][j]; } } } } return maxLen; } public static String getLongestCommonSubstr(String first, String second) { if (first == null || second == null || first.length() == 0 || second.length() == 0) { return ""; } // a variable to hold the maximum length StringBuilder sb = new StringBuilder(); int maxLen = 0; // one position in one string is enough int maxI = 0; // imaging first is listed vertically, each character marks a row /** * abc *a *b *c */ int row = first.length(); // imaging second is listed horizontally // bcd /** * together a matrix is like this bcd a b c for a matrix like that, if * there is a common substring, you will find both dimensions will grow * together */ int column = second.length(); // do not need to have an extra column and row, this matrix represents // all possible matching points, it's value is length of // substring as result of growing diagonally int[][] table = new int[row][column]; // initialize first row values to be 0, set it to 1 when there is a // match for (int s = 0; s < column; s++) table[0][s] = 0; // initialize first column values to be 0, set it to 1 when there is a // match for (int f = 0; f < row; f++) table[f][0] = 0; // traversal the two strings. it check back both dimensions to determine // if substring grows for (int i = 0; i < row; i++) { for (int j = 0; j < column; j++) { // at current position, it checks previous position's value. // common substring means both dimensions grow at same time. the // calculation is from low to height, so that in order to // determine higher position's length, // it need to figure out lower position's length the first // for edge matches,it starts with 1, means one match if (first.charAt(i) == second.charAt(j)) { // starting from edges, initialize count to be 1 if (i == 0 || j == 0) { table[i][j] = 1; } // other wise, substring is growing, mark the length and // build it out else { table[i][j] = table[i - 1][j - 1] + 1; } if (table[i][j] > maxLen) { maxLen = table[i][j]; maxI = i; } } } } while (maxLen > 0) { sb.append(first.charAt(maxI - maxLen + 1)); maxLen--; } return sb.toString(); } } |
No comments:
Post a Comment