Skip to content

Commit 2749f5d

Browse files
committed
output prediction working
1 parent eefd0d2 commit 2749f5d

File tree

5 files changed

+6738
-463
lines changed

5 files changed

+6738
-463
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
from pandas import DataFrame
2+
from typing import Optional, List
23

4+
from pysd2cat.analysis import correctness
35

4-
def predict_signal(df: DataFrame, experiment_identifier: str) -> DataFrame:
5-
pass
6+
def predict_signal(df: DataFrame,
7+
experiment_identifier: str,
8+
low_control : str,
9+
high_control : str,
10+
id_col : str,
11+
channels : List[str],
12+
strain_col : Optional[str]='strain_name') -> DataFrame:
13+
14+
res = correctness.compute_predicted_output(df,
15+
training_df=None,
16+
data_columns = channels,
17+
out_dir='.',
18+
strain_col=strain_col,
19+
high_control=high_control,
20+
low_control=low_control,
21+
id_col=id_col,
22+
use_harness=False,
23+
description=None)
24+
25+
return res
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import json
2+
import os
3+
import pandas as pd
4+
5+
## Data Helper functions
6+
7+
def get_record(experiment):
8+
record = json.load(open(os.path.join(experiment, "record.json")))
9+
return record
10+
11+
def json_to_pd(json_data, channels=["FSC-A"]):
12+
df = pd.DataFrame()
13+
for sample in json_data:
14+
sample_id = sample['sample_id']
15+
sample_df = pd.DataFrame(data={ k:v for k, v in sample.items() if k != "sample_id"})
16+
sample_df.loc[:,'sample_id'] = sample_id
17+
df = df.append(sample_df, ignore_index=True)
18+
return df
19+
20+
def get_record_file(record, file_type="fc_meta"):
21+
files = record['files']
22+
files_of_type = [ x for x in files if file_type in x['name']]
23+
if len(files_of_type) > 0:
24+
return files_of_type[0]
25+
else:
26+
return None
27+
28+
def get_meta(experiment, record):
29+
meta_file_name = get_record_file(record, file_type="fc_meta.csv")
30+
#print(meta_file_name)
31+
if meta_file_name:
32+
meta_df = pd.read_csv(os.path.join(experiment, meta_file_name['name']))
33+
return meta_df
34+
else:
35+
return None
36+
37+
38+
def get_data(experiment, record):
39+
fc_raw_file = get_record_file(record, file_type="fc_raw_events")
40+
if fc_raw_file:
41+
fc_raw_data = json.load(open(os.path.join(experiment, fc_raw_file['name'])))
42+
return json_to_pd(fc_raw_data)
43+
else:
44+
return None
45+
46+
def get_data_and_metadata(experiment):
47+
record = get_record(experiment)
48+
data = get_data(experiment, record)
49+
meta = get_meta(experiment, record)
50+
if data is not None and meta is not None:
51+
df = meta.merge(data, on="sample_id", how="inner")
52+
return df
53+
else:
54+
return None
55+
56+
def get_data_converge_id(path):
57+
data_converge_id = path.split("/")[-1].split(".")[0]
58+
return data_converge_id
59+
60+

Diff for: notebooks/correctness_analysis.ipynb

+10-52
Original file line numberDiff line numberDiff line change
@@ -15,45 +15,15 @@
1515
]
1616
},
1717
{
18-
"data": {
19-
"text/plain": [
20-
"['/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Short-Duration-Time-Series-35C_20200414152048',\n",
21-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Growth-Curves-35C_20200414152809',\n",
22-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Beta-Estradiol-OR-Gate-Plant-TF-Dose-Response_20200423200308',\n",
23-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Dose-Response_20200427004659',\n",
24-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Growth-Curves_20200417214333',\n",
25-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Dose-Response_20200414151033',\n",
26-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Long-Duration-Time-Series-20191208_20200414085017',\n",
27-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Doxycycline-OR-Gate-Plant-TF-Growth-Curves_20200414144659',\n",
28-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Growth-Curves-35C_20200423200123',\n",
29-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Dose-Response_20200423202039',\n",
30-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Growth-Curves-with-Plate-Reader-Optimization_20200423194854',\n",
31-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Beta-Estradiol-OR-Gate-Plant-TF-Growth-Curves_20200327174254',\n",
32-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Growth-Curves_20200414145352',\n",
33-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Beta-Estradiol-OR-Gate-Plant-TF-Dose-Response_20200414091814',\n",
34-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Long-Duration-Time-Series-20191208_20200328071458',\n",
35-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Growth-Curves-with-Plate-Reader-Optimization_20200414151551',\n",
36-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Short-Duration-Time-Series-35C_20200423195648',\n",
37-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Short-Duration-Time-Series-20191208_20200328030858',\n",
38-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Short-Duration-Time-Series-20191208_20200414095504',\n",
39-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Doxycycline-OR-Gate-Plant-TF-Dose-Response_20200423201302',\n",
40-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Dose-Response_20200327165322',\n",
41-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Long-Duration-Time-Series-20191208_20200423193808',\n",
42-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Beta-Estradiol-OR-Gate-Plant-TF-Growth-Curves_20200423201016',\n",
43-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Growth-Curves_20200423202428',\n",
44-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Doxycycline-OR-Gate-Plant-TF-Growth-Curves_20200327174015',\n",
45-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Beta-Estradiol-OR-Gate-Plant-TF-Growth-Curves_20200414150010',\n",
46-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Dose-Response_20200423194728',\n",
47-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-OR-Gate-CRISPR-Growth-Curves_20200327171330',\n",
48-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-CRISPR-Short-Duration-Time-Series-20191208_20200423194115',\n",
49-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_CEN-PK-Inducible-CRISPR-Characterization_20200426200027',\n",
50-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Doxycycline-OR-Gate-Plant-TF-Dose-Response_20200426192144',\n",
51-
" '/work/projects/SD2E-Community/prod/projects/sd2e-project-43/test/dc_YeastSTATES-Doxycycline-OR-Gate-Plant-TF-Growth-Curves_20200423201744']"
52-
]
53-
},
54-
"execution_count": 1,
55-
"metadata": {},
56-
"output_type": "execute_result"
18+
"ename": "ModuleNotFoundError",
19+
"evalue": "No module named 'data_converge'",
20+
"output_type": "error",
21+
"traceback": [
22+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
23+
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
24+
"\u001b[0;32m<ipython-input-1-1f0e54a506c8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpysd2cat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0manalysis\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcorrectness\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdata_converge\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutil\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdata_utils\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdu\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0magavepy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magave\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAgave\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAgaveError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
25+
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'data_converge'"
26+
]
5727
}
5828
],
5929
"source": [
@@ -1737,7 +1707,7 @@
17371707
},
17381708
{
17391709
"cell_type": "code",
1740-
"execution_count": 20,
1710+
"execution_count": 2,
17411711
"metadata": {},
17421712
"outputs": [],
17431713
"source": [
@@ -3631,18 +3601,6 @@
36313601
"display_name": "Python 3",
36323602
"language": "python",
36333603
"name": "python3"
3634-
},
3635-
"language_info": {
3636-
"codemirror_mode": {
3637-
"name": "ipython",
3638-
"version": 3
3639-
},
3640-
"file_extension": ".py",
3641-
"mimetype": "text/x-python",
3642-
"name": "python",
3643-
"nbconvert_exporter": "python",
3644-
"pygments_lexer": "ipython3",
3645-
"version": "3.6.8"
36463604
}
36473605
},
36483606
"nbformat": 4,

Diff for: notebooks/demo_fcs_signal_prediction.ipynb

+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import os\n",
10+
"import numpy as np\n",
11+
"from os.path import expanduser\n",
12+
"from fcs_signal_prediction.predict import predict_signal\n",
13+
"from fcs_signal_prediction.utils import data_utils as du\n",
14+
"\n",
15+
"\n",
16+
"%load_ext autoreload\n",
17+
"%reload_ext autoreload\n",
18+
"%autoreload 2\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"DATA_CONVERGE_PROJECT=\"sd2e-project-43\"\n",
28+
"\n",
29+
"data_converge_base = os.path.join(expanduser(\"~\"), 'sd2e-projects', DATA_CONVERGE_PROJECT)\n",
30+
"experiment_dir = os.path.realpath(os.path.join(data_converge_base, 'reactor_outputs', 'complete'))\n",
31+
"experiment_dir_contents = [os.path.realpath(os.path.join(experiment_dir, x)) for x in os.listdir(experiment_dir)]\n",
32+
"\n",
33+
"experiment_dir_contents"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"#experiment_id = \"YeastSTATES-CRISPR-Short-Duration-Time-Series-20191208\"\n",
43+
"experiment_id=\"YeastSTATES-OR-Gate-CRISPR-Dose-Response\"\n",
44+
"process_dir = os.path.join(experiment_dir, experiment_id)\n",
45+
"experiment_id_dir_contents = [os.path.realpath(os.path.join(process_dir, x)) for x in os.listdir(process_dir)]\n",
46+
"experiment_id_dir_contents.sort()\n",
47+
"last_process = experiment_id_dir_contents[-1]"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"id_col = 'sample_id'\n",
57+
"df = du.get_data(last_process, du.get_record(last_process))\n",
58+
"meta = du.get_meta(last_process, du.get_record(last_process))\n",
59+
"#df = du.get_meta(last_process, du.get_record(last_process))\n",
60+
"df = df.merge(meta[[id_col, 'strain_name']])"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": null,
66+
"metadata": {},
67+
"outputs": [],
68+
"source": [
69+
"channels = list(df.columns)\n",
70+
"channels.remove(id_col)\n",
71+
"channels.remove(\"strain_name\")\n",
72+
"\n",
73+
"high_control = 'CRISPR_CEN.PK2_positive_control_NOR_00_24864'\n",
74+
"low_control = 'CRISPR_CEN.PK2_negative_control_23970'\n",
75+
"strain_col = \"strain_name\""
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"predictions = predict_signal(df, experiment_id, low_control, high_control, id_col, channels, strain_col)\n",
85+
"mean_prediction = predictions.groupby([id_col]).agg({\"predicted_output\" : [np.mean, np.std]}).reset_index()\n",
86+
"mean_prediction.columns = mean_prediction.columns.map('_'.join)\n",
87+
"mean_prediction = mean_prediction.rename(columns={id_col+\"_\": id_col})\n",
88+
"result = meta.merge(mean_prediction, on=id_col)\n",
89+
"result"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"metadata": {},
96+
"outputs": [],
97+
"source": [
98+
"def get_log_gfp(df):\n",
99+
" df['log BL1-A'] = df['BL1-A'].apply(lambda x: x+1.0).apply(np.log10)\n",
100+
" return df.replace([np.inf, -np.inf], np.nan).dropna()\n",
101+
"\n",
102+
"\n",
103+
"log_df = get_log_gfp(df).sample(n=int(10e5))"
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"import matplotlib.pyplot as plt\n",
113+
"\n",
114+
"samples = log_df.sample_id.unique()\n",
115+
"num_samples = len(samples)\n",
116+
"\n",
117+
"fig, ax = plt.subplots(num_samples, 1, figsize=(3, 3*num_samples))\n",
118+
"\n",
119+
"num=100000\n",
120+
"\n",
121+
"high_df = log_df.loc[log_df.strain_name == high_control]#.sample(n=num)\n",
122+
"low_df = log_df.loc[log_df.strain_name == low_control]#.sample(n=num)\n",
123+
"\n",
124+
"for i, sample in enumerate(samples):\n",
125+
" #print(i)\n",
126+
" sample_df = log_df.loc[log_df.sample_id == sample]\n",
127+
" \n",
128+
" #sample_df = sample_df.sample(n=min(num, len(sample_df)))\n",
129+
"\n",
130+
" ax[i].hist(high_df['log BL1-A'], label=\"high\")\n",
131+
" ax[i].hist(low_df['log BL1-A'], label=\"low\")\n",
132+
" ax[i].hist(sample_df['log BL1-A'], label=\"sample\")\n",
133+
" ax[i].set_xlim(0, 5)\n",
134+
" ax[i].text(6, 0, \"\\n\".join(result[result.sample_id==sample][[\"predicted_output_mean\", \"predicted_output_std\"]].transpose()[0:].to_string().split(\"\\n\")[1:]))\n",
135+
" ax[i].set_title(sample)\n",
136+
"\n",
137+
"# if i >= 9: \n",
138+
"# break\n",
139+
"#fig.legend()\n",
140+
"\n",
141+
"#result[result.sample_id==sample][[\"predicted_output_mean\", \"predicted_output_std\"]]\n",
142+
"\n"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": null,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": []
151+
}
152+
],
153+
"metadata": {
154+
"kernelspec": {
155+
"display_name": "Python 3",
156+
"language": "python",
157+
"name": "python3"
158+
}
159+
},
160+
"nbformat": 4,
161+
"nbformat_minor": 2
162+
}

0 commit comments

Comments
 (0)