|
| 1 | +#!/usr/bin/env python |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +# In[1]: |
| 5 | + |
| 6 | + |
| 7 | +# I'm gonna make a 2D scatterplot with density contour here. |
| 8 | +import sys |
| 9 | +import seaborn as sb |
| 10 | +import numpy as np |
| 11 | +import pandas as pd |
| 12 | +import matplotlib |
| 13 | +import matplotlib.colors as mcolors |
| 14 | +import matplotlib.pyplot as plt |
| 15 | +import matplotlib.font_manager as font_manager |
| 16 | +pd.options.mode.chained_assignment = None # stop annoying warning when clipping columns. default='warn' |
| 17 | + |
| 18 | + |
| 19 | +# In[2]: |
| 20 | + |
| 21 | + |
| 22 | +#userInputFile = '/Users/jra/Desktop/CTCF_tmp/loops/CTCF_D4_pad1000_FDR0.05_quickAssoc_no11.csv' |
| 23 | +#userInputFile = '/Users/jra/Desktop/CTCF_tmp/loops/H3K27ac/ESC_vs_EpiLC_FDR0.05_quickAssoc_no11.csv' |
| 24 | +userInputFile = '/Users/jra/Library/CloudStorage/Dropbox/3dname/28_feb_2023/CnR_D0-4_WT-TKO_peaks_RPKM_DNAme_filtered_sort_limma_thresholded.txt' |
| 25 | +inputFile = pd.read_csv(userInputFile, sep='\t') |
| 26 | +print(inputFile.columns.tolist()) |
| 27 | + |
| 28 | + |
| 29 | +# In[3]: |
| 30 | + |
| 31 | + |
| 32 | +#x1 = 'CTCF_D4_DnmtWT_rep1.filt.intra' |
| 33 | +#x2 = 'CTCF_D4_DnmtWT_rep2.filt.intra' |
| 34 | +#y1 = 'CTCF_D4_DnmtTKO_rep1.filt.intra' |
| 35 | +#y2 = 'CTCF_D4_DnmtTKO_rep2.filt.intra' |
| 36 | +x1 = 'D0_DnmtWT_rep1.filt.intra' |
| 37 | +x2 = 'D0_DnmtWT_rep2.filt.intra' |
| 38 | +y1 = 'D4_DnmtWT_rep1.filt.intra' |
| 39 | +y2 = 'D4_DnmtWT_rep2.filt.intra' |
| 40 | +#x1 = 'CTCF_D0_DnmtWT_rep1.filt.intra' |
| 41 | +#x2 = 'CTCF_D0_DnmtWT_rep2.filt.intra' |
| 42 | +#y1 = 'CTCF_D4_DnmtWT_rep1.filt.intra' |
| 43 | +#y2 = 'CTCF_D4_DnmtWT_rep2.filt.intra' |
| 44 | + |
| 45 | +x = 'E14_D4_WT_CTCF_CUTnRUN_AMS042021_rep1-2.bam_RPKM' |
| 46 | +y = 'E14_D4_TKO_CTCF_CUTnRUN_AMS042021_rep1-2.bam_RPKM' |
| 47 | +#x = 'E14_D0_WT_CTCF_CUTnRUN_AMS042021_rep1-2.bam_RPKM' |
| 48 | +#y = 'E14_D0_TKO_CTCF_CUTnRUN_AMS042021_rep1-2.bam_RPKM' |
| 49 | + |
| 50 | +logFC = 'logFC' |
| 51 | +#FDR = 'FDR' # HiChIP |
| 52 | +FDR = 'adj.P.Val' # CnR |
| 53 | +# region = 'region' |
| 54 | + |
| 55 | +#df = inputFile[[x1,x2,y1,y2,logFC,FDR,region]] # HiChIP |
| 56 | +df = inputFile[[x,y,logFC,FDR]] # CnR |
| 57 | +df |
| 58 | + |
| 59 | + |
| 60 | +# In[4]: |
| 61 | + |
| 62 | + |
| 63 | +# sum the replicates for HiChIP data |
| 64 | +# df['x'] = df[x1] + df[x2] |
| 65 | +# df['y'] = df[y1] + df[y2] |
| 66 | + |
| 67 | +# reverse for non-diffloops comparisons |
| 68 | +#df[logFC] = -df[logFC] # diffloops flipped on me |
| 69 | +df[logFC] = df[logFC] # diffloops flipped on me |
| 70 | + |
| 71 | +# set the filtering thresholds |
| 72 | +FDR_cutoff = 0.05 |
| 73 | +logFC_cutoff = 1 |
| 74 | + |
| 75 | +# set the filtering conditions |
| 76 | +conditions = [ |
| 77 | + (df[FDR] < FDR_cutoff) & (df[logFC] > logFC_cutoff), |
| 78 | + (df[FDR] < FDR_cutoff) & (df[logFC] < -logFC_cutoff) |
| 79 | +] |
| 80 | + |
| 81 | +# name the conditions |
| 82 | +names = ['UP', 'DN'] |
| 83 | +#colours = {'NN':'0.333, 0.333, 0.333, 0.1', 'UP':'0.333, 0.333, 0.333, 1', 'DN':'0.878, 0.267, 0.278, 1'} |
| 84 | +# add a new column with the names that correspond to the conditions met |
| 85 | +df['thresh'] = np.select(conditions, names, default='NN') |
| 86 | + |
| 87 | + |
| 88 | +colors_dict = {'UP':(0.878, 0.267, 0.278, 1.000), |
| 89 | + 'DN':(0.333, 0.333, 0.333, 0.050), |
| 90 | + 'NN':(0.333, 0.333, 0.333, 0.050),} |
| 91 | +#colors_dict = {'UP':(0.004, 0.627, 0.451, 1.000), |
| 92 | +# 'DN':(0.62, 0.208, 0.62, 1.000), |
| 93 | +# 'NN':(0.333, 0.333, 0.333, 0.050),} |
| 94 | + |
| 95 | + |
| 96 | + |
| 97 | +colors = [mcolors.to_rgba(colors_dict[c]) for c in df['thresh']] |
| 98 | +# count the number of instances each condition is met |
| 99 | +counts = df['thresh'].value_counts() |
| 100 | +print(counts) |
| 101 | + |
| 102 | + |
| 103 | +# In[7]: |
| 104 | + |
| 105 | + |
| 106 | +# matplotlib.rcParams['svg.fonttype'] = 'none' # saves fonts as a text object instead of a vector path |
| 107 | +small_size = 12 |
| 108 | +medium_size = 16 |
| 109 | +plt.rcParams['font.size'] = medium_size |
| 110 | +plt.rcParams["figure.figsize"] = (6,6) |
| 111 | +font_path = '/opt/X11/share/system_fonts/HelveticaNeue.ttc' |
| 112 | +font_name = 'Helvetica Neue' |
| 113 | +prop = font_manager.FontProperties(fname=font_path) |
| 114 | + |
| 115 | +# fig = df.plot(kind="scatter", x = 'x', y = 'y', c = colors, s=12) # HiChIP |
| 116 | +fig = df.plot(kind="scatter", x = x, y = y, c = colors, s=12, marker='o', edgecolors='none') |
| 117 | + |
| 118 | +fig.set_xscale('log') |
| 119 | +fig.set_yscale('log') |
| 120 | + |
| 121 | +# this is really stupid, there must be a better way |
| 122 | +for idx, count in enumerate(counts): |
| 123 | + fig.text(1.1, 0.95 - idx*0.1, f"{counts.index[idx]}: {count}", ha='left', va='top', transform=fig.transAxes) |
| 124 | + |
| 125 | +fig.set_xlabel("ESC contacts", fontname=font_name, fontproperties=prop) |
| 126 | +fig.set_ylabel("EpiLC contacts", fontname=font_name, fontproperties=prop) |
| 127 | +fig.set_title("HiChIP loops", fontname=font_name, fontproperties=prop) |
| 128 | + |
| 129 | +for tick in fig.get_xticklabels(): |
| 130 | + tick.set_fontname(font_name) |
| 131 | + tick.set_fontsize(small_size) |
| 132 | +for tick in fig.get_yticklabels(): |
| 133 | + tick.set_fontname(font_name) |
| 134 | + tick.set_fontsize(small_size) |
| 135 | + |
| 136 | +fig.spines['bottom'].set_linewidth(0.5) |
| 137 | +fig.spines['left'].set_linewidth(0.5) |
| 138 | +fig.spines['top'].set_linewidth(0) |
| 139 | +fig.spines['right'].set_linewidth(0) |
| 140 | +fig.tick_params(width=0.5) |
| 141 | + |
| 142 | + |
| 143 | +# In[8]: |
| 144 | + |
| 145 | + |
| 146 | +datapoint_count = len(df) |
| 147 | +#outFigure = "%s_2D_scatter_%s_datapoints.svg" % (userInputFile, datapoint_count) |
| 148 | +#fig.figure.savefig(outFigure) |
| 149 | +outFigure = "%s_2D_scatter_%s_datapoints.png" % (userInputFile, datapoint_count) |
| 150 | +fig.figure.savefig(outFigure, dpi=1200, bbox_inches='tight', transparent=True) |
| 151 | + |
| 152 | + |
| 153 | +# In[ ]: |
| 154 | + |
| 155 | + |
| 156 | + |
| 157 | + |
0 commit comments