start analysis of other enrichment methods (#49)

ajlee21 · web-flow · commit c9db405d3ffb · 2020-12-18T15:44:03.000-05:00
diff --git a/human_general_analysis/Try_other_enrichment_methods.ipynb b/human_general_analysis/Try_other_enrichment_methods.ipynb
@@ -0,0 +1,154 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Name\n",
+    "\n",
+    "This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n",
+      "/home/alexandra/anaconda3/envs/generic_expression/lib/python3.7/site-packages/matplotlib/__init__.py:886: MatplotlibDeprecationWarning: \n",
+      "examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.\n",
+      "  \"found relative to the 'datapath' directory.\".format(key))\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%load_ext rpy2.ipython\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "\n",
+    "from rpy2.robjects import pandas2ri\n",
+    "pandas2ri.activate()\n",
+    "\n",
+    "from ponyo import utils\n",
+    "from generic_expression_patterns_modules import calc, process\n",
+    "\n",
+    "np.random.seed(123)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read in config variables\n",
+    "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"../\"))\n",
+    "\n",
+    "config_filename = os.path.abspath(\n",
+    "    os.path.join(base_dir, \"configs\", \"config_human_general.tsv\")\n",
+    ")\n",
+    "\n",
+    "params = utils.read_config(config_filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load params\n",
+    "local_dir = params[\"local_dir\"]\n",
+    "project_id = params['project_id']\n",
+    "hallmark_DB_filename = params[\"pathway_DB_filename\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load DE stats directory\n",
+    "DE_stats_dir = os.path.join(local_dir, \"DE_stats\")\n",
+    "\n",
+    "# Template experiment DE stats\n",
+    "template_DE_stats_filename = os.path.join(\n",
+    "    DE_stats_dir,\n",
+    "    f\"DE_stats_template_data_{project_id}_real.txt\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Enrichment methods\n",
+    "* [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma\n",
+    "* [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma\n",
+    "* [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package\n",
+    "* [ORA]() is available in PathwayStudios or David\n",
+    "\n",
+    "TO DO: Write about each method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define function\n",
+    "# ORA works on list of DE\n",
+    "# Apply voom on gene expression >> ROAST, CAMERA, GVSA\n",
+    "\n",
+    "# Process data using voom\n",
+    "\n",
+    "\n",
+    "# Run method on template experiments\n",
+    "# Run method on simulated experiments\n",
+    "# Output table sort by ranking"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get summary rank of pathways"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:generic_expression] *",
+   "language": "python",
+   "name": "conda-env-generic_expression-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/human_general_analysis/nbconverted/Try_other_enrichment_methods.py b/human_general_analysis/nbconverted/Try_other_enrichment_methods.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ## Name
+# 
+# This notebook plugs in other gene set enrichment methods to demonstrate that our method, SOPHIE, can be inserted into different pipelines and work with other methods
+
+# In[1]:
+
+
+get_ipython().run_line_magic('load_ext', 'autoreload')
+get_ipython().run_line_magic('load_ext', 'rpy2.ipython')
+get_ipython().run_line_magic('autoreload', '2')
+
+import os
+import sys
+import pandas as pd
+import numpy as np
+import pickle
+
+from rpy2.robjects import pandas2ri
+pandas2ri.activate()
+
+from ponyo import utils
+from generic_expression_patterns_modules import calc, process
+
+np.random.seed(123)
+
+
+# In[2]:
+
+
+# Read in config variables
+base_dir = os.path.abspath(os.path.join(os.getcwd(), "../"))
+
+config_filename = os.path.abspath(
+    os.path.join(base_dir, "configs", "config_human_general.tsv")
+)
+
+params = utils.read_config(config_filename)
+
+
+# In[3]:
+
+
+# Load params
+local_dir = params["local_dir"]
+project_id = params['project_id']
+hallmark_DB_filename = params["pathway_DB_filename"]
+
+
+# In[4]:
+
+
+# Load DE stats directory
+DE_stats_dir = os.path.join(local_dir, "DE_stats")
+
+# Template experiment DE stats
+template_DE_stats_filename = os.path.join(
+    DE_stats_dir,
+    f"DE_stats_template_data_{project_id}_real.txt"
+)
+
+
+# ## Enrichment methods
+# * [ROAST](https://pubmed.ncbi.nlm.nih.gov/20610611/) is available in limma
+# * [CAMERA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3458527/) is available in limma
+# * [GSVA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3618321/) its own bioconductor package
+# * [ORA]() is available in PathwayStudios or David
+# 
+# TO DO: Write about each method
+
+# In[5]:
+
+
+# Define function
+# ORA works on list of DE
+# Apply voom on gene expression >> ROAST, CAMERA, GVSA
+
+# Process data using voom
+
+
+# Run method on template experiments
+# Run method on simulated experiments
+# Output table sort by ranking
+
+
+# In[6]:
+
+
+# Get summary rank of pathways
+