Monday, September 29, 2014

Longest Word in 2 D Array

Problem:
given a 2D character array, and a dictionary, please find the longest in dictionary word from the array.

Restrictions:
Do not access an element twice;
from current position, there are 8 possible next moves: horizontally, vertically and Diagonally.

I will give out two solutions here. One is working directly on the given array, another is to covert the array to a graph, and then do depth first traversal to access all possible path and build out word as it traverses.
The key in second solution is to build up neighbors. The second approach has more codes to work on, but it is easier to understand.

Time efficiency for the two solutions are about the same, but second one use extra data structure so it consumes more space.

Solution 1:
Starting from anywhere in a matrix and find all words starting with it. since it uses same strategy to follow all the possible paths to go further, it's naturally recursive solution.


package tree;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

//this program return first in dictionary word with maximum word length
public class TDArray {

 // test: remove T from shopbopt to return abebook
 public static void main(String[] args) {
  final char array[][] = { 
    { 'A', 'B', 'P', 'R' },
    { 'S', 'H', 'E', 'P' }, 
    { 'P', 'O', 'V', 'B' },
    { 'B', 'O', 'O', 'O' },
    { 'C', 'P', 'T', 'K' } 
    };
  final Set<String> dict = new HashSet<>(Arrays.asList("AMAZON",
    "ABEBOOKTO", "SHOPBOPT"));
  findLongestWorrdInDictionary(array, dict);

 }

 public static int longestLength(Set<String> dict) {
  if (dict == null)
   return -1;
  int length = 0;
  for (String word : dict) {
   if (word.length() > length)
    length = word.length();
  }
  return length;
 }

 public static void findLongestWorrdInDictionary(char[][] matrix,
   Set<String> dictionary) {
  if (matrix == null || dictionary == null)
   return;
  String longestWord = null;
  Set<String> visited = new HashSet<>();
  StringBuffer finalWord = new StringBuffer();
  // search from each possible start position
  for (int i = 0; i < matrix.length; i++) {
   for (int j = 0; j < matrix[i].length; j++) {
    visited.clear();
    longestWord = "";
    findFromNewStart(matrix, dictionary, i, j, longestWord,
      visited, longestLength(dictionary), finalWord);
   }
  }

  System.out.print("Longest in dictionary word is:" + finalWord);
 }

 // from each start position, it can go to 8 possible other positions
 public static String findFromNewStart(char[][] matrix, Set<String> dict,
   int x, int y, String longestWord, Set<String> visited,
   int maxLength, StringBuffer finalWord) {
  // over the matric boundary, return previous string/word being built
  if (x < 0 || y < 0 || x >= matrix.length || y >= matrix[0].length)
   return longestWord;
  // if the string is already longer than the max word length in
  // dictionary, stop building longer string
  if (longestWord.length() >= maxLength)
   return longestWord;
  // all the nodes have been visited, then this operation is done because
  // it's asked not to access same node twice
  if (!visited.add(String.valueOf(x) + String.valueOf(y))) {
   return longestWord;
  }
  // otherwise, we can continue to build longer string by adding the
  // current character
  longestWord = longestWord + matrix[x][y];
  // for each new string, check if it is in the dictionary
  if (dict.contains(longestWord)){
   System.out.println("Find a word:" + longestWord);
    
    if( longestWord.length() > finalWord.length()) {
   // if the current in dictionary word is longer than previous in
   // dictionary word, then remember it as new candidate word

     finalWord.delete(0, finalWord.length());
     finalWord.append(longestWord);
    }
  }
//for a given node (x,y), since all nodes can only be accessed once, the visited should keep tracking them
//but when going back to the start node, all nodes should be removed so that next invoking won't be impacted  
  // go next step 1-left,2-upper left, 3-up, 4-upper right,5-right,6-lower
  // right, 7-down, 8-lower left

  // 1 of 8 possible moving directions, each of new node has same 8
  // possible moving directions, the possible moving directions are
  // traversed according to following sequences

  findFromNewStart(matrix, dict, x - 1, y, longestWord, visited,
    maxLength, finalWord);

  findFromNewStart(matrix, dict, x - 1, y + 1, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x, y + 1, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x + 1, y + 1, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x + 1, y, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x + 1, y - 1, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x, y - 1, longestWord, visited,
    maxLength, finalWord);
  findFromNewStart(matrix, dict, x - 1, y - 1, longestWord, visited,
    maxLength, finalWord);
  /**
   * the above can be done this way:
   * for (int row=y-1; row<=y+1 && row<matrix.length; row++)
        for (int col=x-1; col<=x+1 && col<matrix[0].length; col++)
          if (row>=0 && col>=0 && !visited.contains(String.valueOf(col) + String.valueOf(row)))
     findFromNewStart(matrix, dict, col, row, longestWord, visited,
       maxLength, finalWord);
   */
  //at end of traversal, nodes should be removed from visited list so that next traversal from different node won't be impacted
  visited.remove(String.valueOf(x) + String.valueOf(y));
  return longestWord;
 }

}

Solution 2 

Converting the matrix to a graph, starting from any node/vertex and for every node/vertex, using depth first traversal to traverse the entire graph. a node's neighbors are the ones surrounding the corresponding element in the matrix.
package tree;

/**
 * Boggle (Find all possible words in a board of characters)
 */
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;

/**
 * The idea is to consider every character as a starting character and find all
 * words starting with it. All words starting from a character can be found
 * using Depth First Traversal. We do depth first traversal starting from every
 * cell. We keep track of visited cells to make sure that a cell is considered
 * only once in a word.
 * 
 * The matrix is converted to graphy in order to to graph traversal, a node's
 * neighbors are the ones surrounding the node in matrix
 * 
 * then do depth first traversal from every node, and find all possible letter
 * combinations during traversal check if the words are in dictionanry, and
 * remember the one with longest length
 * 
 * @author ama
 *
 */
public class TDArrayGraphySolution {

 public static void main(String[] args) {
  final char givenArray[][] = { { 'A', 'B', 'P', 'R' },
    { 'S', 'H', 'E', 'P' }, { 'P', 'O', 'V', 'B' },
    { 'B', 'O', 'O', 'O' }, { 'C', 'P', 'T', 'K' } };
  final Set<String> dict = new HashSet<>(Arrays.asList("AMAZON",
    "ABEBOOK", "SHOPBOPT"));
  int row = givenArray.length;
  int col = givenArray[0].length;

  Vertex[][] graph = new Vertex[row][col];
  // initialize vertexes, it is similiar to the given array, but with
  // elements as vertex
  for (int i = 0; i < row; i++) {
   for (int j = 0; j < col; j++) {
    graph[i][j] = new Vertex(String.valueOf(givenArray[i][j]));
   }
  }
  // make up graph by complete neighbor relationships
  for (int i = 0; i < row; i++) {
   for (int j = 0; j < col; j++) {
    // moving left
    if (j > 0)
     graph[i][j].addNeighbor(graph[i][j - 1]);
    // move upper left
    if (i > 0 && j > 0)
     graph[i][j].addNeighbor(graph[i - 1][j - 1]);
    // move up
    if (i > 0)
     graph[i][j].addNeighbor(graph[i - 1][j]);
    // move upper right
    if (i > 0 && j < col - 1)
     graph[i][j].addNeighbor(graph[i - 1][j + 1]);
    // move right
    if (j < col - 1)
     graph[i][j].addNeighbor(graph[i][j + 1]);
    // move lower right
    if (i < row - 1 && j < col - 1)
     graph[i][j].addNeighbor(graph[i + 1][j + 1]);
    // move down
    if (i < row - 1)
     graph[i][j].addNeighbor(graph[i + 1][j]);
    // move down left
    if (i < row - 1 && j > 0)
     graph[i][j].addNeighbor(graph[i + 1][j - 1]);

   }
  }
  // then traverse it with DFS
  String longestWord = "";
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < row; i++) {
   for (int j = 0; j < col; j++) {
    longestWord = getStringByDFT(graph[i][j], sb, longestWord,
      longestLength(dict), dict);

   }
  }
  System.out.println("Longest in dict word is: " + longestWord);
 }

 /**
  * return longest length of word in dictionary
  * 
  * @param dict
  * @return
  */
 public static int longestLength(Set<String> dict) {
  if (dict == null)
   return -1;
  int length = 0;
  for (String word : dict) {
   if (word.length() > length)
    length = word.length();
  }
  return length;
 }

 /**
  * depth first traversal to traverse all the nodes in graph:
  * 
  * access a vertext, then its neighbors from left to right
  * 
  * to avoid repeatedly accessing a vertex, it will be marked as visited. you
  * can also use another structure to record what is visited so far.
  * 
  * it builds string while traversing. it does not need to go through all the
  * nodes because longest words in dictionary is one of exit condition.
  * 
  * since from every node, this recursion will repeat, so that when going
  * back to body of recursion function, the current node is marked as not
  * visited so that recursion form same level of neighbors can still access
  * this node.
  * 
  * The string being built changes its content while going in and going out
  * the recursion function to reflect the current access path.
  * 
  * @param vertex
  * @param toBuild
  * @param longestWordinDict
  * @param maxLength
  * @param dict
  * @return
  */
 public static String getStringByDFT(Vertex vertex, StringBuffer toBuild,
   String longestWordinDict, int maxLength, Set<String> dict) {
  if (vertex == null || dict == null)
   return null;

  // access the vertex
  toBuild.append(vertex.getVal());
  // System.out.println(toBuild.toString());
  if (toBuild.length() > maxLength) {
   vertex.setVisited(false);
   toBuild.deleteCharAt(toBuild.length() - 1);
   return longestWordinDict;
  }
  if (dict.contains(toBuild.toString())) {
   System.out.println("Found a word in dictionary:"
     + toBuild.toString());
   if (longestWordinDict.length() < toBuild.length()) {

    longestWordinDict = toBuild.toString();
   }
  }
  // then vertex's neighbors
  for (Vertex neighbor : vertex.getNeighbors()) {
   if (!neighbor.isVisited())
    longestWordinDict = getStringByDFT(neighbor, toBuild,
      longestWordinDict, maxLength, dict);
  }
  // because it needs to traverse the graph from another vertex so that
  // when going back, cleaning up its access status
  // but going forward, it's marked as visited so that same vertex won't
  // be used twice
  vertex.setVisited(false);
  toBuild.deleteCharAt(toBuild.length() - 1);
  return longestWordinDict;
 }

}

import java.util.Collection;
import java.util.LinkedList;

public class Vertex {
 private final String val;
   private final Collection<Vertex> neighbors = new LinkedList<Vertex>();
   private boolean visited;
   
   public Iterable<Vertex> getNeighbors(){
     return neighbors;
   }
   
   public boolean isVisited(){
     return visited;
   }
   
   public String getVal(){
    visited = true;
    return val;
   }
   
   public Vertex(String item) {
     this.val = item;
   }
   
   public void addNeighbor(Vertex neighbor) {
     neighbors.add(neighbor);
   }
   public void setVisited(boolean visited){
    this.visited = visited;
   }
}

No comments: