diff --git a/paper2_LIME/RF-lime.ipynb b/paper2_LIME/RF-lime.ipynb index f38f437..49f6db9 100644 --- a/paper2_LIME/RF-lime.ipynb +++ b/paper2_LIME/RF-lime.ipynb @@ -51,6 +51,9 @@ ")\n", "#drop smile with containing 'P'\n", "soldata = soldata[soldata[\"SMILES\"].str.contains(\"P\") == False]\n", + "#drop smile where rdkit.Chem.MolFromSmiles return None\n", + "soldata = soldata[soldata[\"SMILES\"].apply(lambda smiles: rdkit.Chem.MolFromSmiles(smiles) is not None)]\n", + "\n", "\n", "features_start_at = list(soldata.columns).index(\"MolWt\")" ] @@ -84,7 +87,7 @@ ")\n", "\n", "# make subsample from pandas df\n", - "molecules = [mol for smi in soldata.SMILES if (mol := rdkit.Chem.MolFromSmiles(smi)) is not None]\n", + "molecules = [rdkit.Chem.MolFromSmiles(smi) for smi in soldata.SMILES]\n", "\n", "raw_features = []\n", "for e, c in zip(molecules, calc.map(molecules, quiet=True)):\n",