Skip to content

Commit

Permalink
successfully differentiate between CV fragments that have the same
Browse files Browse the repository at this point in the history
SMILES but different R-Groups/attachment points
  • Loading branch information
ChemMitch committed Jan 31, 2025
1 parent 30524ed commit 0c0b7aa
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,22 @@ void testHaveDifferentHashes() {
Assertions.assertNotEquals(hash1, hash3);
}

@Test
void testHaveDifferentHashes2() {
FragmentVocabularyTerm fragmentVocabularyTerm1 = new FragmentVocabularyTerm();
fragmentVocabularyTerm1.setFragmentStructure("[*]OC[C@@](O[C@@H]([*])2)([C@H]([C@H]21)O[*])CO1 |$_R92;;;;;;_R90;;;;_R91;;;;$|");
fragmentVocabularyTerm1.setSimplifiedStructure("[*]OC[C@@](O[C@@H]([*])2)([C@H]([C@H]21)O[*])CO1 |$_R92;;;;;;_R90;;;;_R91;;;;$|");
fragmentVocabularyTerm1.setValue("iR");
String hash1= CVFragmentStructureValidator.getHash(fragmentVocabularyTerm1).get();

FragmentVocabularyTerm fragmentVocabularyTerm3 = new FragmentVocabularyTerm();
fragmentVocabularyTerm3.setFragmentStructure("[*]OC[C@@]12CO[C@@H]([C@H]([*])O1)[C@@H]2O[*] |$_R91;;;;;;;;_R90;;;;_R92$|");
fragmentVocabularyTerm3.setSimplifiedStructure("[*]OC[C@@]12CO[C@@H]([C@H]([*])O1)[C@@H]2O[*] |$_R91;;;;;;;;_R90;;;;_R92$|");
fragmentVocabularyTerm3.setValue("LR");
String hash3 = CVFragmentStructureValidator.getHash(fragmentVocabularyTerm3).get();
Assertions.assertNotEquals(hash1, hash3);
}

private String getInChiKey(String smiles) throws IOException {
Chemical chem = Chemical.parse(smiles);
String inChIKey = chem.toInchi().getKey();
Expand Down
13 changes: 5 additions & 8 deletions gsrs-module-substances-core/src/main/java/ix/core/chem/Chem.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
public class Chem {
private Chem () {}

public static final String WILDCARD_SUBSTITUTION_ATOM = "He";
public static final Integer WILDCARD_SUBSTITUTION_ATOM_NUMBER = 2;

public static void setFormula (Structure struc) {
try {
struc.formula = formula (struc.toChemical(false));
Expand All @@ -32,20 +35,14 @@ public static void setFormula (Structure struc) {

public static Chemical RemoveQueryFeaturesForPseudoInChI(Chemical c) {
Chemical chemicalToUse = c;
/*try {
log.trace("RemoveQueryFeaturesForPseudoInChI processing molfile c {}", c.toMol());
} catch (IOException e) {
log.error("Error generating mol from Chemical");
}*/
if(c.hasQueryAtoms() || c.atoms().filter(at->("A".equals(at.getSymbol()) || "*".equals(at.getSymbol()) || "R".equals(at.getSymbol()))).count()>0){
chemicalToUse = c.copy();
chemicalToUse.atoms()
.filter(at-> at.getSymbol() == null || "A".equals(at.getSymbol()) || "*".equals(at.getSymbol())
|| "R".equals(at.getSymbol()))//isQueryAtom returns true
.forEach(a->{
a.setAtomicNumber(2);
//verify that this is setting a symbol as well
a.setAlias("He");
a.setAtomicNumber(WILDCARD_SUBSTITUTION_ATOM_NUMBER);
a.setAlias(WILDCARD_SUBSTITUTION_ATOM);
a.setMassNumber(6);
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@

@Slf4j
public class CVFragmentStructureValidator extends AbstractValidatorPlugin<ControlledVocabulary> {


private final static int R_GROUP_ADJUSTMENT = 87;

private class FragmentChanges{

private List<FragmentVocabularyTerm> addedTerms = new ArrayList<FragmentVocabularyTerm>();
Expand Down Expand Up @@ -75,6 +77,10 @@ public void validate(ControlledVocabulary newCV, ControlledVocabulary oldCV, Val

public static Optional<String> getHash(FragmentVocabularyTerm term) {
try {
Optional<String> inchiKeyOne= getInchiKeyFromComplexSmiles(term.getFragmentStructure());
if(inchiKeyOne.isPresent()) {
return inchiKeyOne;
}
String inputStructure = term.getFragmentStructure().split(" ")[0];
Chemical chem = Chemical.parse(inputStructure);
//see if we get a good result without changing the structure
Expand All @@ -101,6 +107,32 @@ private static Optional<String> getInitialHash(Chemical chem) {
}
}

//input 'complex' because it contains both a simple SMILES string and a set of R-Group designations.
// for example, [*]OC[C@@]12CO[C@@H]([C@H]([*])O1)[C@@H]2O[*] |$_R91;;;;;;;;_R90;;;;_R92$|
private static Optional<String> getInchiKeyFromComplexSmiles(String complexInput) {
Chemical initiallyParsedChemical;
try {
initiallyParsedChemical = Chemical.parse(complexInput);
initiallyParsedChemical.atoms()
.filter(at->at.getRGroupIndex().isPresent() && at.getRGroupIndex().getAsInt() >0)
.forEach(at->{
///Subtracting this number from an RGroup index will give us a mass number that InChI can use to
// differentiate atoms. When the mass number is too high, InChI ignores it.
at.setMassNumber( Math.max(0, at.getRGroupIndex().getAsInt()- R_GROUP_ADJUSTMENT));
log.warn("r group: {}", at.getRGroupIndex().getAsInt());
at.setAlias(Chem.WILDCARD_SUBSTITUTION_ATOM);
at.setAtomicNumber(Chem.WILDCARD_SUBSTITUTION_ATOM_NUMBER);
at.setRGroup(0);
});

Chemical transformedChemical= Chemical.parse(initiallyParsedChemical.toSmiles());
return Optional.of(transformedChemical.toInchi().getKey());
} catch (IOException e) {
log.info("in transformComplexSmiles, error parsing input {}", complexInput);
return Optional.empty();
}

}
private void chemicalValidation(FragmentVocabularyTerm term, Map<String,List<String>> lookup, ValidatorCallback callback) {

String fragmentStructure = term.getFragmentStructure().trim();
Expand Down

0 comments on commit 0c0b7aa

Please sign in to comment.