Skip to content

Commit c9db405

Browse files
authored
start analysis of other enrichment methods (#49)
1 parent 9aae50b commit c9db405

File tree

2 files changed

+246
-0
lines changed

2 files changed

+246
-0
lines changed
+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Name\n",
8+
"\n",
9+
"This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 1,
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"name": "stderr",
19+
"output_type": "stream",
20+
"text": [
21+
"Using TensorFlow backend.\n",
22+
"/home/alexandra/anaconda3/envs/generic_expression/lib/python3.7/site-packages/matplotlib/__init__.py:886: MatplotlibDeprecationWarning: \n",
23+
"examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.\n",
24+
" \"found relative to the 'datapath' directory.\".format(key))\n"
25+
]
26+
}
27+
],
28+
"source": [
29+
"%load_ext autoreload\n",
30+
"%load_ext rpy2.ipython\n",
31+
"%autoreload 2\n",
32+
"\n",
33+
"import os\n",
34+
"import sys\n",
35+
"import pandas as pd\n",
36+
"import numpy as np\n",
37+
"import pickle\n",
38+
"\n",
39+
"from rpy2.robjects import pandas2ri\n",
40+
"pandas2ri.activate()\n",
41+
"\n",
42+
"from ponyo import utils\n",
43+
"from generic_expression_patterns_modules import calc, process\n",
44+
"\n",
45+
"np.random.seed(123)"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": 2,
51+
"metadata": {},
52+
"outputs": [],
53+
"source": [
54+
"# Read in config variables\n",
55+
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"../\"))\n",
56+
"\n",
57+
"config_filename = os.path.abspath(\n",
58+
" os.path.join(base_dir, \"configs\", \"config_human_general.tsv\")\n",
59+
")\n",
60+
"\n",
61+
"params = utils.read_config(config_filename)"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": 3,
67+
"metadata": {},
68+
"outputs": [],
69+
"source": [
70+
"# Load params\n",
71+
"local_dir = params[\"local_dir\"]\n",
72+
"project_id = params['project_id']\n",
73+
"hallmark_DB_filename = params[\"pathway_DB_filename\"]"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": 4,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"# Load DE stats directory\n",
83+
"DE_stats_dir = os.path.join(local_dir, \"DE_stats\")\n",
84+
"\n",
85+
"# Template experiment DE stats\n",
86+
"template_DE_stats_filename = os.path.join(\n",
87+
" DE_stats_dir,\n",
88+
" f\"DE_stats_template_data_{project_id}_real.txt\"\n",
89+
")"
90+
]
91+
},
92+
{
93+
"cell_type": "markdown",
94+
"metadata": {},
95+
"source": [
96+
"## Enrichment methods\n",
97+
"* [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma\n",
98+
"* [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma\n",
99+
"* [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package\n",
100+
"* [ORA]() is available in PathwayStudios or David\n",
101+
"\n",
102+
"TO DO: Write about each method"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 5,
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"# Define function\n",
112+
"# ORA works on list of DE\n",
113+
"# Apply voom on gene expression >> ROAST, CAMERA, GVSA\n",
114+
"\n",
115+
"# Process data using voom\n",
116+
"\n",
117+
"\n",
118+
"# Run method on template experiments\n",
119+
"# Run method on simulated experiments\n",
120+
"# Output table sort by ranking"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 6,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": [
129+
"# Get summary rank of pathways"
130+
]
131+
}
132+
],
133+
"metadata": {
134+
"kernelspec": {
135+
"display_name": "Python [conda env:generic_expression] *",
136+
"language": "python",
137+
"name": "conda-env-generic_expression-py"
138+
},
139+
"language_info": {
140+
"codemirror_mode": {
141+
"name": "ipython",
142+
"version": 3
143+
},
144+
"file_extension": ".py",
145+
"mimetype": "text/x-python",
146+
"name": "python",
147+
"nbconvert_exporter": "python",
148+
"pygments_lexer": "ipython3",
149+
"version": "3.7.8"
150+
}
151+
},
152+
"nbformat": 4,
153+
"nbformat_minor": 4
154+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# ## Name
5+
#
6+
# This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods
7+
8+
# In[1]:
9+
10+
11+
get_ipython().run_line_magic('load_ext', 'autoreload')
12+
get_ipython().run_line_magic('load_ext', 'rpy2.ipython')
13+
get_ipython().run_line_magic('autoreload', '2')
14+
15+
import os
16+
import sys
17+
import pandas as pd
18+
import numpy as np
19+
import pickle
20+
21+
from rpy2.robjects import pandas2ri
22+
pandas2ri.activate()
23+
24+
from ponyo import utils
25+
from generic_expression_patterns_modules import calc, process
26+
27+
np.random.seed(123)
28+
29+
30+
# In[2]:
31+
32+
33+
# Read in config variables
34+
base_dir = os.path.abspath(os.path.join(os.getcwd(), "../"))
35+
36+
config_filename = os.path.abspath(
37+
os.path.join(base_dir, "configs", "config_human_general.tsv")
38+
)
39+
40+
params = utils.read_config(config_filename)
41+
42+
43+
# In[3]:
44+
45+
46+
# Load params
47+
local_dir = params["local_dir"]
48+
project_id = params['project_id']
49+
hallmark_DB_filename = params["pathway_DB_filename"]
50+
51+
52+
# In[4]:
53+
54+
55+
# Load DE stats directory
56+
DE_stats_dir = os.path.join(local_dir, "DE_stats")
57+
58+
# Template experiment DE stats
59+
template_DE_stats_filename = os.path.join(
60+
DE_stats_dir,
61+
f"DE_stats_template_data_{project_id}_real.txt"
62+
)
63+
64+
65+
# ## Enrichment methods
66+
# * [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma
67+
# * [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma
68+
# * [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package
69+
# * [ORA]() is available in PathwayStudios or David
70+
#
71+
# TO DO: Write about each method
72+
73+
# In[5]:
74+
75+
76+
# Define function
77+
# ORA works on list of DE
78+
# Apply voom on gene expression >> ROAST, CAMERA, GVSA
79+
80+
# Process data using voom
81+
82+
83+
# Run method on template experiments
84+
# Run method on simulated experiments
85+
# Output table sort by ranking
86+
87+
88+
# In[6]:
89+
90+
91+
# Get summary rank of pathways
92+

0 commit comments

Comments
 (0)