-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathConcatenatedWordsII.java
248 lines (208 loc) · 9.64 KB
/
ConcatenatedWordsII.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
package interviewQuestions;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ConcatenatedWordsII {
/**The 2nd version of solution has these optimizations:
1. It uses a Trie data structure which is a best fit for a problem domain like this, it saves a lot of memory space: instead of using a HashSet, Trie could save all words with the same prefix into the same place. This same location advantage also speeds up the loop up process when searching a particular word/substring.
2. I implemented a remove() and undoRemove() method to mark one word off and on instead of creating a new copy of a HashSet to check every word. This is also a huge performance boost.
This is why it turns a 12 minutes run to 800 ms.
* @throws Exception */
public static void main(String...args) throws Exception{
try {
String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/wordsforproblem.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first30.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first50.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first100.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/test.txt";
readFromFileAndDecorateIntoJsonFormatForLeetcode(FILE_PATH);
String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_wordsforproblem.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first30.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first50.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first1001.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_testagain.txt";
ConcatenatedWordsII test = new ConcatenatedWordsII();
ResultType result = test.readFromFile(FILE_PATH);
Set<String> words = result.wordDict;
Integer maxWordLen = result.maxWordLen;
TrieNode root = test.buildTrie(words);
long startTime = System.currentTimeMillis();
List<String> validConcatenatedWords = new ArrayList();
for (String word : words) {
if (word == null || word.length() == 0) continue;
test.remove(word, root);/** every word is comprised of every word itself, thus this word itself needs to be removed first for checking it*/
int n = word.length();
boolean[] dp = new boolean[n + 1];
dp[0] = true;
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= i && j <= maxWordLen; j++) {
if (!dp[i - j])
continue;
String subWord = word.substring(i - j, i);
if (test.contains(subWord, root)) {
dp[i] = true;
break;
}
}
}
if(dp[n]) validConcatenatedWords.add(word);
test.undoRemove(word, root);
}
long finishTime = System.currentTimeMillis();
long usedTime = finishTime - startTime;
//Note: we're only interested in the first two longest words, so we can iterate through validConcatenatedWords once and find those two
String firstLongest = "";
String secondLongest = "";
for(String word : validConcatenatedWords){
if(word.length() > secondLongest.length()){
if(word.length() > firstLongest.length()) {
String temp = secondLongest;
secondLongest = firstLongest;
firstLongest = word;
} else {
secondLongest = word;
}
}
}
//print out the result we're interested in: longest concatenated word, second concatenated word, and the total number of all concatenated words
System.out.println("The longest concatenated word is: " + firstLongest + ",\nthe 2nd longest concatenated word is: "+
secondLongest + "\nand there are a total of " + validConcatenatedWords.size() + " valid concatenated words in this given file.");
//For a more verbose solution, I'm printing all words in descending order based on their lengths.
sortAndWriteToFile(OUTPUT_PATH, validConcatenatedWords, usedTime);
} catch (Exception e) {
e.printStackTrace();
}
}
private static void sortAndWriteToFile(String OUTPUT_PATH, List<String> validConcatenatedWords,
long usedTime) throws IOException {
Collections.sort(validConcatenatedWords, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
if (o1.length() > o2.length()) {
return -1;
} else if (o1.length() < o2.length()) {
return 1;
} else {
return 0;
}
}
});
File file = new File(OUTPUT_PATH);
if (!file.exists())
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
bw.write("There's a total of " + validConcatenatedWords.size() + " valid concatenated words found in this file.\n");
bw.write("It took " + usedTime + " ms to finish.\n");
for (String word : validConcatenatedWords) {
bw.write(word + "\n");
}
bw.close();
}
public TrieNode buildTrie(Set<String> words) {
TrieNode root = new TrieNode();
for(String word : words){
char[] chars = word.toCharArray();
TrieNode node = root;
for(int i = 0; i < chars.length; i++){
char c = chars[i];
if(node.children[c - 'a'] == null){
node.children[c - 'a'] = new TrieNode();
}
node = node.children[c - 'a'];
}
node.isWord = true;
}
return root;
}
// Returns true if the word is in the trie.
public boolean contains(String word, TrieNode root) {
TrieNode node = root;
for(int i = 0; i < word.length(); i++){
if(node.children[word.charAt(i) - 'a'] == null) return false;
node = node.children[word.charAt(i) - 'a'];
}
return node.isWord;
}
// mark that word on
public void undoRemove(String word, TrieNode root) {
TrieNode node = root;
for(int i = 0; i < word.length(); i++){
node = node.children[word.charAt(i) - 'a'];
}
node.isWord = true;
}
// mark that word off, we are not really deleting that word
public void remove(String word, TrieNode root) {
TrieNode node = root;
for(int i = 0; i < word.length(); i++){
node = node.children[word.charAt(i) - 'a'];
}
node.isWord = false;
}
class TrieNode {
boolean isWord;
TrieNode[] children = new TrieNode[26];
// Initialize your data structure here.
public TrieNode() {}
}
public static ResultType readFromFile(String filePath) throws Exception {
Set<String> wordDict = new HashSet<>();
int maxWordLen = Integer.MIN_VALUE;
try {
BufferedReader br = new BufferedReader(new FileReader(filePath));
try {
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null) {
String word = new String(line);
maxWordLen = (maxWordLen > word.length()) ? maxWordLen : word.length();
if (word.length() != 0)
wordDict.add(word.trim());
line = br.readLine();
}
} finally {
br.close();
}
} finally {
// print out or log error information reading input files
}
ResultType result = new ResultType(wordDict, maxWordLen);
return result;
}
public static void readFromFileAndDecorateIntoJsonFormatForLeetcode(String filePath) throws Exception {
String OUTPUT_PATH2 = "/Users/SteveSun/Google Drive/Developer/output_wordsforproblem_for_leetcode.txt";
ResultType result = readFromFile(filePath);
writeToFile_ForLeetcode(OUTPUT_PATH2, result.wordDict);
}
private static void writeToFile_ForLeetcode(String OUTPUT_PATH, Set<String> validConcatenatedWords) throws IOException {
File file = new File(OUTPUT_PATH);
if (!file.exists())
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
bw.write("[");
for (String word : validConcatenatedWords) {
bw.write("\"" + word + "\",");
}
bw.write("]");
bw.close();
}
}
class ResultType {
Set<String> wordDict;
Integer maxWordLen;
ResultType(Set<String> wordDict, Integer maxWordLen) {
this.wordDict = wordDict;
this.maxWordLen = maxWordLen;
}
}