From 7e4ab0baf5e824044e714382cd96991ffcd4d52c Mon Sep 17 00:00:00 2001 From: qcampbel Date: Thu, 27 Feb 2025 22:14:23 -0500 Subject: [PATCH] better fix for rdkit error --- paper2_LIME/RF-lime.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paper2_LIME/RF-lime.ipynb b/paper2_LIME/RF-lime.ipynb index f38f437..49f6db9 100644 --- a/paper2_LIME/RF-lime.ipynb +++ b/paper2_LIME/RF-lime.ipynb @@ -51,6 +51,9 @@ ")\n", "#drop smile with containing 'P'\n", "soldata = soldata[soldata[\"SMILES\"].str.contains(\"P\") == False]\n", + "#drop smile where rdkit.Chem.MolFromSmiles return None\n", + "soldata = soldata[soldata[\"SMILES\"].apply(lambda smiles: rdkit.Chem.MolFromSmiles(smiles) is not None)]\n", + "\n", "\n", "features_start_at = list(soldata.columns).index(\"MolWt\")" ] @@ -84,7 +87,7 @@ ")\n", "\n", "# make subsample from pandas df\n", - "molecules = [mol for smi in soldata.SMILES if (mol := rdkit.Chem.MolFromSmiles(smi)) is not None]\n", + "molecules = [rdkit.Chem.MolFromSmiles(smi) for smi in soldata.SMILES]\n", "\n", "raw_features = []\n", "for e, c in zip(molecules, calc.map(molecules, quiet=True)):\n",