Skip to content

Commit d180be0

Browse files
authored
Merge pull request #31 from ontologyportal/Thompson_14_Feb_2025
Added GenCausesTestData.java to create sentences and logic about "causes".
2 parents a2521c1 + c019cbb commit d180be0

File tree

5 files changed

+284
-0
lines changed

5 files changed

+284
-0
lines changed

lib/jackson-core-2.18.2.jar

584 KB
Binary file not shown.

lib/jackson-databind-2.18.2.jar

1.58 MB
Binary file not shown.
130 KB
Binary file not shown.

lib/ollama4j-1.0.93.jar

134 KB
Binary file not shown.
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
package com.articulate.nlp;
2+
3+
import io.github.ollama4j.OllamaAPI;
4+
import io.github.ollama4j.models.response.OllamaResult;
5+
import io.github.ollama4j.types.OllamaModelType;
6+
import io.github.ollama4j.utils.OptionsBuilder;
7+
import io.github.ollama4j.utils.Options;
8+
9+
import java.io.FileWriter;
10+
import java.util.Collection;
11+
import java.util.Set;
12+
import java.util.Arrays;
13+
import java.util.List;
14+
import java.util.stream.Collectors;
15+
import java.io.IOException;
16+
import java.nio.ByteBuffer;
17+
import java.nio.channels.FileChannel;
18+
import java.nio.channels.FileLock;
19+
import java.nio.file.Path;
20+
import java.nio.file.Paths;
21+
import java.nio.file.StandardOpenOption;
22+
import java.nio.file.Files;
23+
import java.util.Random;
24+
25+
import com.articulate.sigma.*;
26+
import java.util.*;
27+
import com.articulate.sigma.wordNet.WordNet;
28+
29+
public class GenCausesTestData {
30+
31+
public static boolean debug = false;
32+
public static KB kb;
33+
public static String outputFileEnglish = "causes-eng.txt";
34+
public static String outputFileLogic = "causes-log.txt";
35+
public static boolean EQUIVALENCE_MAPPINGS = false;
36+
37+
public static String[] phrasesCauses = {
38+
" causes ",
39+
" leads to ",
40+
" results in ",
41+
" brings about ",
42+
" triggers ",
43+
" provokes ",
44+
" induces ",
45+
" produces ",
46+
" prompts ",
47+
" gives rise to ",
48+
" is responsible for "
49+
};
50+
51+
public static String[] phrasesCausedBy = {
52+
" is caused by ",
53+
" is due to ",
54+
" is a result of ",
55+
" is because of ",
56+
" is brought about by ",
57+
" is triggered by ",
58+
" is provoked by ",
59+
" is induced by ",
60+
" is produced by ",
61+
" is prompted by ",
62+
" stems from ",
63+
" arises from ",
64+
" originates from ",
65+
" is driven by ",
66+
" is attributable to ",
67+
" can be traced back to "
68+
};
69+
70+
/** ***************************************************************
71+
* Creates a file if one doesn't exist already.
72+
*/
73+
public static void createFileIfDoesNotExists(String fileName) {
74+
Path filePath = Paths.get(fileName);
75+
if (Files.exists(filePath)) {
76+
return;
77+
} else {
78+
try {
79+
Files.createFile(filePath);
80+
} catch (IOException e) {
81+
e.printStackTrace();
82+
}
83+
}
84+
}
85+
86+
/** ***********************************************************************
87+
* Takes an enlish phrase and its logic equivalent.
88+
* Files are locked, so that if multiple processes are building
89+
* the dataset, the correspondence of english/logic pairs is preserved.
90+
*/
91+
public static void writeEnglishLogicPairToFile(String english, String logic) {
92+
FileChannel fileChannel1 = null;
93+
FileChannel fileChannel2 = null;
94+
FileLock lock1 = null;
95+
FileLock lock2 = null;
96+
97+
try {
98+
fileChannel1 = FileChannel.open(Paths.get(outputFileEnglish), StandardOpenOption.WRITE, StandardOpenOption.APPEND);
99+
fileChannel2 = FileChannel.open(Paths.get(outputFileLogic), StandardOpenOption.WRITE, StandardOpenOption.APPEND);
100+
101+
lock1 = fileChannel1.lock();
102+
lock2 = fileChannel2.lock();
103+
104+
ByteBuffer buffer1 = ByteBuffer.wrap(english.getBytes());
105+
ByteBuffer buffer2 = ByteBuffer.wrap(logic.getBytes());
106+
107+
fileChannel1.write(buffer1);
108+
fileChannel2.write(buffer2);
109+
110+
} catch (IOException e) {
111+
e.printStackTrace();
112+
} finally {
113+
try {
114+
if (lock1 != null) lock1.release();
115+
if (fileChannel1 != null) fileChannel1.close();
116+
if (lock2 != null) lock2.release();
117+
if (fileChannel2 != null) fileChannel2.close();
118+
} catch (IOException e) {
119+
e.printStackTrace();
120+
}
121+
}
122+
}
123+
124+
/** ***************************************************************
125+
* Takes a string which is a response to an Ollama query.
126+
* Removes all punctuation and splits camel case answers.
127+
* @return a clean ollama answer.
128+
*/
129+
private static String cleanOllamaResponse(String str) {
130+
StringBuilder result = new StringBuilder();
131+
for (char c : str.toCharArray()) {
132+
if (Character.isUpperCase(c) && result.length() > 0) {
133+
result.append(" ");
134+
}
135+
result.append(c);
136+
}
137+
String sentence = result.toString();
138+
sentence = Arrays.stream(sentence.split("\\s+"))
139+
.map(word -> word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase())
140+
.collect(Collectors.joining(" "));
141+
List<Character> punctuation = Arrays.asList('.', ',', '!', '?', ';', ':', '-', '(', ')', '[', ']', '{', '}', '"', '\'', ' ');
142+
sentence = sentence.chars()
143+
.mapToObj(c -> (char) c)
144+
.filter(c -> !punctuation.contains(c))
145+
.map(String::valueOf)
146+
.collect(Collectors.joining());
147+
return sentence;
148+
}
149+
150+
/** ***************************************************************
151+
* Takes a string, such as "ThisIsAStringWithCapitalLetters"
152+
* and breaks it up into "This Is A String With Capital Letters"
153+
* @return a string broken up into words with spaces.
154+
*/
155+
public static String addSpaceBeforeCapitals(String input) {
156+
if (input == null || input.isEmpty()) {
157+
return input;
158+
}
159+
StringBuilder result = new StringBuilder();
160+
result.append(input.charAt(0)); // Append the first character as is
161+
for (int i = 1; i < input.length(); i++) {
162+
char currentChar = input.charAt(i);
163+
if (Character.isUpperCase(currentChar)) {
164+
result.append(' ');
165+
}
166+
result.append(currentChar);
167+
}
168+
return result.toString();
169+
}
170+
171+
/** ***************************************************************
172+
* Given a term, looks up all the equivalent mappings of that therm
173+
* in WordNet, and returns a random mapping.
174+
* @return a random equivalent SUMO Mapping
175+
*/
176+
private static String getEquivalentSUMOMapping(String term) {
177+
Set<String> synsetOfTerm = WordNet.wn.getSynsetsFromWord(term.toLowerCase());
178+
ArrayList<String> equivalentTerms = new ArrayList();
179+
int counter = 0;
180+
for (String synset:synsetOfTerm) {
181+
if (debug) System.out.println("Synset of " + term + ": " + synset);
182+
String sumoMapping = WordNet.wn.getSUMOMapping(synset);
183+
if (sumoMapping != null) {
184+
sumoMapping = sumoMapping.substring(2);
185+
if (sumoMapping.charAt(sumoMapping.length() - 1) == '=' || EQUIVALENCE_MAPPINGS == false) {
186+
String sumoTerm = sumoMapping.substring(0, sumoMapping.length() - 1);
187+
if (debug) System.out.println("Equivalent mapping to: " + sumoTerm);
188+
if(kb.kbCache.subclassOf(sumoTerm, "Process")) {
189+
if (debug) System.out.println(sumoTerm + " is a process. Added.");
190+
equivalentTerms.add(sumoMapping.substring(0, sumoMapping.length() - 1));
191+
}
192+
}
193+
}
194+
}
195+
if (!equivalentTerms.isEmpty()) {
196+
Random rand = new Random();
197+
return equivalentTerms.get(rand.nextInt(equivalentTerms.size()));
198+
}
199+
return null;
200+
}
201+
202+
/** *********************************************************************
203+
* Main method. Builds a test set of the form "<term> causes <term>"
204+
* and its logical equivalent.
205+
* First, selects a random process from SUMO.
206+
* Then asks ollama what is caused by that process.
207+
*/
208+
public static void main(String[] args) throws Exception {
209+
if (args == null || args.length < 2 || args.length > 3 || args[0].equals("-h"))
210+
System.out.println("Usage: GenCausesTestData <file prefix> <num to generate> <optional: -e (for equivalence mappings only)");
211+
outputFileEnglish = args[0] + "-eng.txt";
212+
outputFileLogic = args[0] + "-log.txt";
213+
int numToGenerate = Integer.parseInt(args[1]);
214+
if (args.length == 3 && args[2].equals("-e")) {
215+
EQUIVALENCE_MAPPINGS = true;
216+
System.out.println("Using ONLY equivalence mappings");
217+
}
218+
else {
219+
System.out.println("Drawing from equivalence and subsuming mappings.");
220+
}
221+
222+
OllamaAPI ollamaAPI = new OllamaAPI();
223+
ollamaAPI.setVerbose(false);
224+
boolean RAW_PROMPT = false;
225+
Options options = new OptionsBuilder().setTemperature(1.0f).build();
226+
227+
KBmanager.getMgr().initializeOnce();
228+
kb = KBmanager.getMgr().getKB(KBmanager.getMgr().getPref("sumokbname"));
229+
System.out.println("Finished loading KBs");
230+
Set<String> allSUMOTermsSet = kb.kbCache.getChildClasses("Process");
231+
RandSet allSUMOTermsRandSet = RandSet.listToEqualPairs(allSUMOTermsSet);
232+
233+
createFileIfDoesNotExists(outputFileEnglish);
234+
createFileIfDoesNotExists(outputFileLogic);
235+
Random random = new Random();
236+
String englishSentence;
237+
238+
int sentenceGeneratedCounter = 0;
239+
while (sentenceGeneratedCounter < numToGenerate) {
240+
if (debug) System.out.println("\n");
241+
String randomSumoProcess = allSUMOTermsRandSet.getNext();
242+
String randomSumoProcessEnglish = kb.getTermFormat("EnglishLanguage", randomSumoProcess);
243+
if (randomSumoProcessEnglish == null) {
244+
randomSumoProcessEnglish = addSpaceBeforeCapitals((randomSumoProcess));
245+
}
246+
if (debug) System.out.println("Random SUMO Process: " + randomSumoProcess);
247+
String prompt = "Just the response. In a single word, what does '" + randomSumoProcessEnglish + "' cause?";
248+
249+
OllamaResult result =
250+
ollamaAPI.generate("llama3.2", prompt, RAW_PROMPT, options);
251+
252+
if (debug) System.out.println("Ollama returns: " + result.getResponse());
253+
String responseOllamaEnglish = cleanOllamaResponse(result.getResponse());
254+
String responseInSumo = getEquivalentSUMOMapping(responseOllamaEnglish);
255+
256+
if (responseInSumo != null) {
257+
if (random.nextBoolean()) {
258+
int randomIndex = random.nextInt(phrasesCauses.length);
259+
englishSentence = randomSumoProcessEnglish + phrasesCauses[randomIndex] + responseOllamaEnglish.toLowerCase() + ".\n";
260+
}
261+
else {
262+
int randomIndex = random.nextInt(phrasesCausedBy.length);
263+
englishSentence = responseOllamaEnglish.toLowerCase() + phrasesCausedBy[randomIndex] + randomSumoProcessEnglish + ".\n";
264+
}
265+
char firstChar = Character.toUpperCase(englishSentence.charAt(0));
266+
String remainingChars = englishSentence.substring(1).toLowerCase();
267+
englishSentence = firstChar + remainingChars;
268+
String logicPhrase = "( exists ( ?V1 ?V2) (and (instance ?V1 " + randomSumoProcess + " ) "
269+
+ " (instance ?V2 " + responseInSumo + " ) "
270+
+ " (causesSubclass ?V1 ?V2) ) )\n";
271+
if (debug) System.out.println("Resulting English sentence: '" + englishSentence + "'");
272+
if (debug) System.out.println("Resulting logic: '" + logicPhrase + "'");
273+
writeEnglishLogicPairToFile(englishSentence, logicPhrase);
274+
sentenceGeneratedCounter++;
275+
if (sentenceGeneratedCounter % 100 == 0) {
276+
System.out.print("...." + sentenceGeneratedCounter);
277+
}
278+
}
279+
else {
280+
if (debug) System.out.println("No related process for: " + result.getResponse());
281+
}
282+
}
283+
}
284+
}

0 commit comments

Comments
 (0)