diff --git a/A_Primer_on_Deep_Learning_in_Genomics_Public.ipynb b/A_Primer_on_Deep_Learning_in_Genomics_Public.ipynb index 87c462a..d21fe55 100644 --- a/A_Primer_on_Deep_Learning_in_Genomics_Public.ipynb +++ b/A_Primer_on_Deep_Learning_in_Genomics_Public.ipynb @@ -290,10 +290,17 @@ "one_hot_encoder = OneHotEncoder(categories='auto') \n", "input_features = []\n", "\n", + "# Fit encoders first before applying transforms \n", + "alphabet = list(set(letter for seq in sequences for letter in seq)) \n", + "integer_encoder.fit(alphabet) \n", + "\n", + "alphabet_encoded = integer_encoder.transform(integer_encoder.classes_) \n", + "one_hot_encoder.fit(np.array(alphabet_encoded).reshape(-1,1)) \n", + "\n", "for sequence in sequences:\n", - " integer_encoded = integer_encoder.fit_transform(list(sequence))\n", + " integer_encoded = integer_encoder.transform(list(sequence))\n", " integer_encoded = np.array(integer_encoded).reshape(-1, 1)\n", - " one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)\n", + " one_hot_encoded = one_hot_encoder.transform(integer_encoded)\n", " input_features.append(one_hot_encoded.toarray())\n", "\n", "np.set_printoptions(threshold=40)\n", @@ -849,4 +856,4 @@ ] } ] -} \ No newline at end of file +}