Skip to content

Commit

Permalink
Make clone labels consistent (#133)
Browse files Browse the repository at this point in the history
* recode names of some clones that were inconsistent

* update ks test between misclassified samples

* recreate figure with updated clone labels

other minor tweaks

* restrict figure to only those clones used in the paper
  • Loading branch information
gwaybio authored Jul 19, 2023
1 parent 11e711f commit e10a4ac
Show file tree
Hide file tree
Showing 10 changed files with 495 additions and 392 deletions.
106 changes: 61 additions & 45 deletions 5.signature-exploration/2.summarize_singscore_accuracy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,22 @@
"full_singscore_df.incorrect = ~(full_singscore_df.incorrect > 0)\n",
"full_singscore_df.incorrect = full_singscore_df.incorrect.astype(int)\n",
"\n",
"# Recode many of the samples to the correct/consistent number\n",
"clone_recode_dict = {\n",
" \"WT clone 01\": \"WT001\",\n",
" \"WT clone 02\": \"WT002\",\n",
" \"WT clone 03\": \"WT003\",\n",
" \"WT clone 04\": \"WT004\",\n",
" \"WT clone 05\": \"WT005\",\n",
" \"WT clone 10\": \"WT010\",\n",
" \"WT clone 12\": \"WT012\",\n",
" \"WT clone 13\": \"WT013\",\n",
" \"WT clone 14\": \"WT014\",\n",
" \"WT clone 15\": \"WT015\",\n",
"}\n",
"\n",
"full_singscore_df.Metadata_clone_number = full_singscore_df.Metadata_clone_number.replace(clone_recode_dict)\n",
"\n",
"# Output to file\n",
"full_singscore_df.to_csv(output_singscore_file, index=False, sep=\"\\t\")\n",
"\n",
Expand Down Expand Up @@ -389,15 +405,15 @@
" <tr>\n",
" <th>1</th>\n",
" <td>otherclone</td>\n",
" <td>WT clone 15</td>\n",
" <td>WT015</td>\n",
" <td>16</td>\n",
" <td>15.0</td>\n",
" <td>0.937500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>otherclone</td>\n",
" <td>WT clone 10</td>\n",
" <td>WT010</td>\n",
" <td>16</td>\n",
" <td>12.0</td>\n",
" <td>0.750000</td>\n",
Expand All @@ -413,7 +429,7 @@
" <tr>\n",
" <th>4</th>\n",
" <td>holdout</td>\n",
" <td>WT clone 01</td>\n",
" <td>WT001</td>\n",
" <td>4</td>\n",
" <td>2.0</td>\n",
" <td>0.500000</td>\n",
Expand Down Expand Up @@ -465,10 +481,10 @@
"text/plain": [
" Metadata_model_split Metadata_clone_number total_samples \\\n",
"0 otherclone BZ006 8 \n",
"1 otherclone WT clone 15 16 \n",
"2 otherclone WT clone 10 16 \n",
"1 otherclone WT015 16 \n",
"2 otherclone WT010 16 \n",
"3 inference BZ007 3 \n",
"4 holdout WT clone 01 4 \n",
"4 holdout WT001 4 \n",
"5 inference BZ006 3 \n",
"6 test CloneA 24 \n",
"7 inference WT_parental 6 \n",
Expand Down Expand Up @@ -1058,19 +1074,6 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>WT clone 15</td>\n",
" <td>16</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0.937500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>BZ006</td>\n",
" <td>11</td>\n",
" <td>9.0</td>\n",
Expand All @@ -1083,30 +1086,43 @@
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>WT015</td>\n",
" <td>19</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>0.789474</td>\n",
" <td>0.000000</td>\n",
" <td>0.052632</td>\n",
" <td>0.947368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WT clone 10</td>\n",
" <td>16</td>\n",
" <td>WT010</td>\n",
" <td>19</td>\n",
" <td>12.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0.750000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>0.631579</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.052632</td>\n",
" <td>0.947368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>WT clone 01</td>\n",
" <td>7</td>\n",
" <td>WT001</td>\n",
" <td>10</td>\n",
" <td>2.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>0.285714</td>\n",
" <td>0.285714</td>\n",
" <td>0.428571</td>\n",
" <td>0.571429</td>\n",
" <td>0.200000</td>\n",
" <td>0.300000</td>\n",
" <td>0.600000</td>\n",
" <td>0.400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
Expand All @@ -1127,24 +1143,24 @@
],
"text/plain": [
" Metadata_clone_number total_samples completely_incorrect high_confidence \\\n",
"0 WT clone 15 16 15.0 0 \n",
"1 BZ006 11 9.0 0 \n",
"2 WT clone 10 16 12.0 0 \n",
"3 WT clone 01 7 2.0 2 \n",
"0 BZ006 11 9.0 0 \n",
"1 WT015 19 15.0 0 \n",
"2 WT010 19 12.0 0 \n",
"3 WT001 10 2.0 3 \n",
"4 BZ007 11 2.0 8 \n",
"\n",
" accurate incorrect prop_completely_incorrect prop_high_confidence \\\n",
"0 0 16 0.937500 0.000000 \n",
"1 0 11 0.818182 0.000000 \n",
"2 0 16 0.750000 0.000000 \n",
"3 3 4 0.285714 0.285714 \n",
"0 0 11 0.818182 0.000000 \n",
"1 1 18 0.789474 0.000000 \n",
"2 1 18 0.631579 0.000000 \n",
"3 6 4 0.200000 0.300000 \n",
"4 8 3 0.181818 0.727273 \n",
"\n",
" prop_accurate prop_inaccurate \n",
"0 0.000000 1.000000 \n",
"1 0.000000 1.000000 \n",
"2 0.000000 1.000000 \n",
"3 0.428571 0.571429 \n",
"1 0.052632 0.947368 \n",
"2 0.052632 0.947368 \n",
"3 0.600000 0.400000 \n",
"4 0.727273 0.272727 "
]
},
Expand Down Expand Up @@ -1174,7 +1190,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:resistance-mechanisms-signature] *",
"display_name": "Python [conda env:resistance-mechanisms-signature]",
"language": "python",
"name": "conda-env-resistance-mechanisms-signature-py"
},
Expand Down
106 changes: 53 additions & 53 deletions 5.signature-exploration/3.evaluate_misclassified_featurespace.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(46, 10)\n"
"(36, 10)\n"
]
},
{
Expand Down Expand Up @@ -444,19 +444,6 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>WT clone 15</td>\n",
" <td>16</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0.937500</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>BZ006</td>\n",
" <td>11</td>\n",
" <td>9.0</td>\n",
Expand All @@ -469,30 +456,43 @@
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>WT015</td>\n",
" <td>19</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>0.789474</td>\n",
" <td>0.000000</td>\n",
" <td>0.052632</td>\n",
" <td>0.947368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WT clone 10</td>\n",
" <td>16</td>\n",
" <td>WT010</td>\n",
" <td>19</td>\n",
" <td>12.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0.750000</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>0.631579</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.052632</td>\n",
" <td>0.947368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>WT clone 01</td>\n",
" <td>7</td>\n",
" <td>WT001</td>\n",
" <td>10</td>\n",
" <td>2.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>0.285714</td>\n",
" <td>0.285714</td>\n",
" <td>0.428571</td>\n",
" <td>0.571429</td>\n",
" <td>0.200000</td>\n",
" <td>0.300000</td>\n",
" <td>0.600000</td>\n",
" <td>0.400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
Expand All @@ -513,24 +513,24 @@
],
"text/plain": [
" Metadata_clone_number total_samples completely_incorrect high_confidence \\\n",
"0 WT clone 15 16 15.0 0 \n",
"1 BZ006 11 9.0 0 \n",
"2 WT clone 10 16 12.0 0 \n",
"3 WT clone 01 7 2.0 2 \n",
"0 BZ006 11 9.0 0 \n",
"1 WT015 19 15.0 0 \n",
"2 WT010 19 12.0 0 \n",
"3 WT001 10 2.0 3 \n",
"4 BZ007 11 2.0 8 \n",
"\n",
" accurate incorrect prop_completely_incorrect prop_high_confidence \\\n",
"0 0 16 0.937500 0.000000 \n",
"1 0 11 0.818182 0.000000 \n",
"2 0 16 0.750000 0.000000 \n",
"3 3 4 0.285714 0.285714 \n",
"0 0 11 0.818182 0.000000 \n",
"1 1 18 0.789474 0.000000 \n",
"2 1 18 0.631579 0.000000 \n",
"3 6 4 0.200000 0.300000 \n",
"4 8 3 0.181818 0.727273 \n",
"\n",
" prop_accurate prop_inaccurate \n",
"0 0.000000 1.000000 \n",
"1 0.000000 1.000000 \n",
"2 0.000000 1.000000 \n",
"3 0.428571 0.571429 \n",
"1 0.052632 0.947368 \n",
"2 0.052632 0.947368 \n",
"3 0.600000 0.400000 \n",
"4 0.727273 0.272727 "
]
},
Expand All @@ -556,7 +556,7 @@
{
"data": {
"text/plain": [
"['WT clone 15', 'BZ006', 'WT clone 10']"
"['BZ006', 'WT015', 'WT010']"
]
},
"execution_count": 7,
Expand All @@ -579,7 +579,7 @@
{
"data": {
"text/plain": [
"['WT clone 02', 'WT clone 12', 'WT clone 13', 'WT clone 14', 'BZ003', 'BZ007']"
"['WT012', 'WT013', 'WT002', 'BZ007', 'BZ003', 'WT014']"
]
},
"execution_count": 8,
Expand Down Expand Up @@ -610,8 +610,8 @@
"# Manually define these samples in specific dictionaries\n",
"sample_comparison_dict = {\n",
" \"wildtype\": {\n",
" \"correct\": [\"WT clone 02\", \"WT clone 12\", \"WT clone 13\", \"WT clone 14\"],\n",
" \"incorrect\": [\"WT clone 15\", \"WT clone 10\"]\n",
" \"correct\": [\"WT002\", \"WT012\", \"WT013\", \"WT014\"],\n",
" \"incorrect\": [\"WT015\", \"WT010\"]\n",
" },\n",
" \"resistant\": {\n",
" \"correct\": [\"BZ003\", \"BZ007\"],\n",
Expand Down Expand Up @@ -665,8 +665,8 @@
" <th>0</th>\n",
" <td>Cells_AreaShape_Zernike_4_2</td>\n",
" <td>wildtype</td>\n",
" <td>0.179330</td>\n",
" <td>4.461730e-03</td>\n",
" <td>0.277778</td>\n",
" <td>7.342544e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand All @@ -679,8 +679,8 @@
" <th>2</th>\n",
" <td>Cells_Correlation_K_DNA_AGP</td>\n",
" <td>wildtype</td>\n",
" <td>0.214869</td>\n",
" <td>3.162436e-04</td>\n",
" <td>0.407407</td>\n",
" <td>2.724839e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
Expand All @@ -693,20 +693,20 @@
" <th>4</th>\n",
" <td>Cells_Correlation_Manders_Mito_ER</td>\n",
" <td>wildtype</td>\n",
" <td>0.169118</td>\n",
" <td>8.729040e-03</td>\n",
" <td>0.333333</td>\n",
" <td>5.082990e-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" feature clone_type ks_stat ks_pval\n",
"0 Cells_AreaShape_Zernike_4_2 wildtype 0.179330 4.461730e-03\n",
"0 Cells_AreaShape_Zernike_4_2 wildtype 0.277778 7.342544e-01\n",
"1 Cells_AreaShape_Zernike_4_2 resistant 0.705397 6.321256e-22\n",
"2 Cells_Correlation_K_DNA_AGP wildtype 0.214869 3.162436e-04\n",
"2 Cells_Correlation_K_DNA_AGP wildtype 0.407407 2.724839e-01\n",
"3 Cells_Correlation_K_DNA_AGP resistant 0.633651 2.272664e-17\n",
"4 Cells_Correlation_Manders_Mito_ER wildtype 0.169118 8.729040e-03"
"4 Cells_Correlation_Manders_Mito_ER wildtype 0.333333 5.082990e-01"
]
},
"execution_count": 10,
Expand Down Expand Up @@ -754,7 +754,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:resistance-mechanisms-signature] *",
"display_name": "Python [conda env:resistance-mechanisms-signature]",
"language": "python",
"name": "conda-env-resistance-mechanisms-signature-py"
},
Expand Down
Loading

0 comments on commit e10a4ac

Please sign in to comment.