-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathConcatenatedWords.java
189 lines (167 loc) · 6.86 KB
/
ConcatenatedWords.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
package interviewQuestions;
import java.util.*;
import java.io.*;
/**
* There's a file called “words-for-problem.txt” which contains a sorted list of
* approximately 173,000 words. The words are listed one word per line, do not contain spaces, and
* are all lowercase.
*
* Your task is to write a program that reads the file and provides the following:
* the longest concatenated word (that is,the longest word that is comprised entirely of shorter words in the
* file)
* the 2nd longest concatenated word
* the total count of all the concatenated words in the file
*
* For example, if the file contained:
*
* cat
*
* cats
*
* catsdogcats
*
* dog
*
* dogcatsdog
*
* hippopotamuses
*
* rat
*
* ratcatdogcat
*
* the longest concatenated word would be 'ratcatdogcat' with 12 characters. ‘hippopotamuses’ is a
* longer word, however it is not comprised entirely of shorter words in the list. The 2nd longest
* concatenated word is ‘catsdogcats’ with 11 characters. The total number of concatenated words is 3.
* Note that ‘cats’ is not a concatenated word because there is no word ‘s’ in the list.
*
* Your solution
*
* Please email your solution source code as attachments (zipped if more than 3 files). In addition,
* please include the following details in the body of the email:
*
* the longest and 2nd longest concatenated words
*
* the total count of concatenated words in the file
*
* the programming language(s) you used to complete the challenge
*/
public class ConcatenatedWords {
public static void main(String... args) {
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/wordsforproblem.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first30.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first50.txt";
String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/first100.txt";
// String FILE_PATH = "/Users/SteveSun/Google Drive/Developer/test.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output2.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first30.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first50.txt";
String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_first100.txt";
// String OUTPUT_PATH = "/Users/SteveSun/Google Drive/Developer/output_test.txt";
ConcatenatedWords test = new ConcatenatedWords();
test.doJob(FILE_PATH, OUTPUT_PATH);
}
public void doJob(String FILE_PATH, String OUTPUT_PATH) {
try {
long startTime = System.currentTimeMillis();
ResultType result = readFromFile(FILE_PATH);
Set<String> wordDict = result.wordDict;
Integer maxWordLen = result.maxWordLen;
List<String> validConcatenatedWords = findAllValidConcatenatedWords(wordDict,
maxWordLen);
//Note: if we're only interested in the first two longest words, then we can iterate through validConcatenatedWords once
//and find those two
//For a more verbose solution, I'm printing all words in descending order based on their lengths.
Collections.sort(validConcatenatedWords, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
if (o1.length() > o2.length()) {
return -1;
} else if (o1.length() < o2.length()) {
return 1;
} else {
return 0;
}
}
});
long finishTime = System.currentTimeMillis();
long usedTime = finishTime - startTime;
File file = new File(OUTPUT_PATH);
if (!file.exists())
file.createNewFile();
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
bw.write("There's a total of " + validConcatenatedWords.size() + " valid concatenated words found in this file.\n");
bw.write("It took " + usedTime + " ms to finish.\n");
for (String word : validConcatenatedWords) {
bw.write(word + "\n");
}
bw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private List<String> findAllValidConcatenatedWords(Set<String> wordDict, Integer maxWordLen) {
List<String> validConcatenatedWords = new ArrayList();
for (String word : wordDict) {
if (isValid(wordDict, word, maxWordLen)) {
validConcatenatedWords.add(word);
}
}
return validConcatenatedWords;
}
private boolean isValid(final Set<String> wordDict, String s, int maxWordLen) {
if (s == null || s.length() == 0)
return false;
Set<String> wordDictCopy = new HashSet(wordDict);
wordDictCopy.remove(s);// every word is comprised of every word itself, thus this word
// itself needs to be removed first for checking it*/
int n = s.length();
boolean[] dp = new boolean[n + 1];
dp[0] = true;
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= i && j <= maxWordLen; j++) {
if (!dp[i - j])
continue;
String word = s.substring(i - j, i);
if (wordDictCopy.contains(word)) {
dp[i] = true;
break;
}
}
}
return dp[n];
}
class ResultType {
Set<String> wordDict;
Integer maxWordLen;
ResultType(Set<String> wordDict, Integer maxWordLen) {
this.wordDict = wordDict;
this.maxWordLen = maxWordLen;
}
}
public ResultType readFromFile(String filePath) throws Exception {
Set<String> wordDict = new HashSet<>();
int maxWordLen = Integer.MIN_VALUE;
try {
BufferedReader br = new BufferedReader(new FileReader(filePath));
try {
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null) {
String word = new String(line);
maxWordLen = (maxWordLen > word.length()) ? maxWordLen : word.length();
if (word.length() != 0)
wordDict.add(word.trim());
line = br.readLine();
}
} finally {
br.close();
}
} finally {
// print out or log error information reading input files
}
ResultType result = new ResultType(wordDict, maxWordLen);
return result;
}
}