Skip to content

Commit 140bf17

Browse files
committed
update to print Ne, rec
1 parent 382c0fb commit 140bf17

File tree

7 files changed

+81
-69
lines changed

7 files changed

+81
-69
lines changed

Diff for: project/sim_modules/readconfig.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,8 @@ def read_config_stats(configFile):
195195
assert max(size_list) <= n_haps
196196
sim_dt["spatial_sfs"] = config.getboolean(pop1, "compute_spatial_SFS")
197197
spatfold = config.getboolean(pop1, "fold_spatial_SFS")
198-
spatagg = config.getboolean(pop1, "aggregate_spatial_SFS")
199198
sim_dt["sfs"] = config.getboolean(pop1, "compute_SFS")
200199
sfsfold = config.getboolean(pop1, "fold_SFS")
201-
sfsagg = config.getboolean(pop1, "aggregate_SFS")
202200
sim_dt["ld"] = config.getboolean(pop1, "compute_LD")
203201
if sim_dt["ld"]:
204202
nb_times = config.getint(pop1, "pop_time_changes")
@@ -228,8 +226,8 @@ def read_config_stats(configFile):
228226
"win_size2": int(win_size_2),
229227
"calc_stats": stat_list,
230228
"afibs_fold": afibsfold,
231-
"spat_fold_agg": (spatfold, spatagg),
232-
"sfs_fold_agg": (sfsfold, sfsagg),
229+
"spat_fold_agg": (spatfold),
230+
"sfs_fold_agg": (sfsfold),
233231
"pw_quants": quant_list
234232
}
235233
if "ibs" in stat_list:

Diff for: project/sim_modules/sim_msprime.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,13 @@ def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
264264
low, high = mut_rate
265265
mu = np.random.uniform(low, high, sim_number)
266266
else:
267-
mu = mut_rate
268-
if order:
267+
if len(mut_rate) < sim_number:
268+
mu = np.random.choice(mut_rate, sim_number)
269+
elif order:
269270
l_mu = len(mu)
271+
else:
272+
mu = mut_rate
273+
270274
else:
271275
mu = [mut_rate] * sim_number
272276

@@ -280,9 +284,12 @@ def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
280284
rec = np.random.uniform(low, high, sim_number)
281285
# rec = np.random.exponential(rec_rate, sim_number)
282286
else:
283-
rec = rec_rate
284-
if order:
287+
if len(rec_rate) < sim_number:
288+
rec = np.random.choice(rec_rate, sim_number)
289+
elif order:
285290
l_rec = len(rec)
291+
else:
292+
rec = rec_rate
286293
else:
287294
rec = [rec_rate] * sim_number
288295

@@ -299,9 +306,12 @@ def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
299306
low, high = effective_size
300307
scaled_Ne = np.random.randint(low, high, sim_number) * ploidy
301308
else:
302-
scaled_Ne = effective_size
303-
if order:
309+
if len(effective_size) < sim_number:
310+
scaled_Ne = np.random.choice(effective_size, sim_number)
311+
elif order:
304312
l_ne = len(scaled_Ne)
313+
else:
314+
scaled_Ne = effective_size
305315
else:
306316
scaled_Ne = [effective_size * ploidy] * sim_number
307317
# =========================================================================

Diff for: project/stat_modules/afs.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def spatial_histo_fast(pos, count, M, dmax=np.inf):
5959
return dist
6060

6161

62-
def asfs_stats(gt, pos, fold, agg):
63-
"""Calculate the aggregate allele frequence spectrum.
62+
def asfs_stats(gt, pos, fold):
63+
"""Calculate the allele frequence spectrum.
6464
6565
With many individuals the SFS becomes unwieldy, here I collapse the
6666
intermediate frequencies into 1 bin. This differs from the one above by
@@ -75,8 +75,6 @@ def asfs_stats(gt, pos, fold, agg):
7575
DESCRIPTION.
7676
pos : TYPE
7777
DESCRIPTION.
78-
agg : bool
79-
if True, return an aggregate SFS, 0,1,2, 3:-2, -2, -1
8078
fold : bool
8179
if True, return folded SFS
8280
@@ -94,12 +92,6 @@ def asfs_stats(gt, pos, fold, agg):
9492
sfsp = (allel.sfs(gtseg.count_alleles()[:, 1], gtseg.shape[1]))[1:-1]
9593
tots = np.sum(sfsp)
9694
sfs = sfsp / tots
97-
if agg:
98-
# TODO: test best binning.
99-
sfs[3] = sum(sfs[3:-2])
100-
sfs[4] = sfs[-2]
101-
sfs[5] = sfs[-1]
102-
sfs = sfs[:6]
10395

10496
return sfs
10597

Diff for: project/stat_modules/pwpopstats.py

+33-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from project.stat_modules.sequtils import get_seg, get_ac_seg, h2gt, pop2seg
1717

1818

19-
def d_tajD(p1, pos, gt, win_size, length_bp):
19+
def d_tajD(p1, pos, gt, win_size, length_bp, quants):
2020
"""Compute the difference in Tajima’s D between two populations in moving windows.
2121
2222
Parameters
@@ -49,6 +49,11 @@ def d_tajD(p1, pos, gt, win_size, length_bp):
4949
if size < 5:
5050
size = 5
5151
dtajd_ = allel.moving_delta_tajima_d(ac1_seg, ac2_seg, size=size)
52+
53+
if quants[0] < 0:
54+
dtajdq = [np.nanmean(dtajd_)]
55+
else:
56+
dtajdq = np.nanquantiles(dtajd_, quants)
5257
return dtajd_
5358

5459

@@ -86,7 +91,10 @@ def fst(p1, pos, gt, quants):
8691
ac2_seg = ac2.compress(loc_asc, axis=0)
8792
num, den = allel.hudson_fst(ac1_seg, ac2_seg)
8893
fst_snp = num / den
89-
fst_ = np.nanquantile(fst_snp, quants)
94+
if quants[0] < 0:
95+
fst_ = [np.nanmean(fst_snp)]
96+
else:
97+
fst_ = np.nanquantile(fst_snp, quants)
9098
return fst_
9199

92100

@@ -141,15 +149,21 @@ def dmin(p1, pos, gt, win_size, length_bp):
141149
return dmin_
142150

143151

144-
def gmin(p1, pos, gt, win_size, length_bp):
152+
def gmin(p1, pos, gt, win_size, length_bp, quants):
145153
"""Calculate minimum pairwise difference between two populations.
146154
147155
dmin / dxy
148156
"""
149157
dmin_ = dmin(p1, pos, gt, win_size, length_bp)
150158
dxy_ = dxy(p1, pos, gt, win_size, length_bp)
151159
gmin_win = dmin_/dxy_
152-
return gmin_win
160+
161+
if quants[0] < 0:
162+
gmin_ = [np.nanmean(gmin_win)]
163+
else:
164+
gmin_ = np.nanquantile(gmin_win, quants)
165+
166+
return gmin_
153167

154168

155169
def pi_fx(pos, gt, win_size, length_bp):
@@ -174,7 +188,10 @@ def dd1_2(p1, pos, gt, win_size, length_bp, quants):
174188
for p in [p1_, p2_]:
175189
gtpop = gt.take(p, axis=1)
176190
pi_ = pi_fx(pos, gtpop, win_size, length_bp)
177-
dd12_ls.extend(np.nanquantile((dmin_/pi_), quants))
191+
if quants[0] < 0:
192+
dd12_ls.append(np.nanmean(dmin_/pi_))
193+
else:
194+
dd12_ls.extend(np.nanquantile((dmin_/pi_), quants))
178195
return dd12_ls
179196

180197

@@ -214,7 +231,10 @@ def ddRank1_2(p1, pos, gt, win_size, length_bp, quants):
214231
gtpop = gt.take(p, axis=1)
215232
pw_win = pw_within(pos, gtpop, win_size, length_bp)
216233
ddR12 = [stats.percentileofscore(pw, dmin_[i]) for i, pw in enumerate(pw_win)]
217-
ddR12_ls.extend(np.nanquantile(ddR12, quants))
234+
if quants[0] < 0:
235+
ddR12_ls.append(np.nanmean(ddR12))
236+
else:
237+
ddR12_ls.extend(np.nanquantile(ddR12, quants))
218238
return ddR12_ls
219239

220240

@@ -233,7 +253,7 @@ def ld_window(pos, hap, win_size, length_bp):
233253
return ld_wins
234254

235255

236-
def zx(p1, pos, hap, win_size, length_bp, randn=500):
256+
def zx(p1, pos, hap, win_size, length_bp, quants, randn=500):
237257
"""FILET statistic.
238258
239259
Zx = (Zn_s1 + Zn_s2)/(2*Zn_sg)
@@ -259,7 +279,12 @@ def zx(p1, pos, hap, win_size, length_bp, randn=500):
259279
zn2 = np.array(ld_window(pos, h2, win_size, length_bp))
260280
zx_ = (zn1 + zn2) / (2 * zn_sg)
261281

262-
return zx_
282+
if quants[0] < 0:
283+
zxq = [np.nanmean(zx_)]
284+
else:
285+
zxq = np.nanquantile(zx_, quants)
286+
287+
return zxq
263288

264289

265290
def ibs_max(pos, hap, dmax):

Diff for: project/stat_modules/sumstats.py

+21-19
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def hap_het(self):
8686

8787
def spatial_sfs(self):
8888
stats_ls = []
89-
fold, agg = self.stats["spat_fold_agg"]
89+
fold = self.stats["spat_fold"]
9090
for p in self.stats["pop_config"]:
9191
hap_p, pos_p, counts_p = self.split_pop(p)
9292
n_haplo = len(p)
@@ -97,12 +97,6 @@ def spatial_sfs(self):
9797
haps = int(len(p)/2)
9898
spat_fold = spat_fold[0:haps]
9999
spat_ = spat_fold
100-
if agg:
101-
spat_ag = spat_[0:2]
102-
spat_ag.append(np.mean(spat_[3:-2]))
103-
spat_ag.append(spat_[-2])
104-
spat_ag.append(spat_[-1])
105-
spat_ = spat_ag
106100
stats_ls.extend(spat_)
107101

108102
return stats_ls
@@ -159,14 +153,14 @@ def ld(self):
159153

160154
return stats_ls
161155

162-
def sfs(self, agg=False, fold=False):
163-
fold, agg = self.stats["sfs_fold_agg"]
156+
def sfs(self, fold=False):
157+
fold = self.stats["sfs_fold"]
164158
gt = allel.HaplotypeArray(self.haparr.T)
165159
pos = allel.SortedIndex(self.pos)
166160
stats_ls = []
167161
for pop in self.stats["pop_config"]:
168162
gtpop = gt.take(pop, axis=1)
169-
sfs = afs.asfs_stats(gtpop, pos, fold, agg)
163+
sfs = afs.asfs_stats(gtpop, pos, fold)
170164
stats_ls.extend(sfs)
171165

172166
return stats_ls
@@ -191,8 +185,8 @@ def delta_tajD(self):
191185
stats_ls = []
192186
for p1, p2 in combinations(self.stats["pop_config"], 2):
193187
gtpops = gt.take(p1+p2, axis=1)
194-
flt = pwpopstats.d_tajD(len(p1), pos, gtpops, win_size, length_bp)
195-
stats_ls.extend(np.nanquantiles(flt, quants))
188+
flt = pwpopstats.d_tajD(len(p1), pos, gtpops, win_size, length_bp, quants)
189+
stats_ls.extend(flt)
196190
return stats_ls
197191

198192
def ld_2pop(self, D=0):
@@ -244,7 +238,11 @@ def dXY(self):
244238
for p1, p2 in combinations(self.stats["pop_config"], 2):
245239
gtpops = gt.take(p1+p2, axis=1)
246240
flt = pwpopstats.dxy(len(p1), pos, gtpops, win_size, length_bp)
247-
stats_ls.extend(np.nanquantile(flt, quants))
241+
if quants[0] < 0:
242+
dxy_ = [np.nanmean(flt)]
243+
else:
244+
dxy_ = np.nanquantile(flt, quants)
245+
stats_ls.extend(dxy_)
248246
return stats_ls
249247

250248
def dmin(self):
@@ -256,8 +254,12 @@ def dmin(self):
256254
stats_ls = []
257255
for p1, p2 in combinations(self.stats["pop_config"], 2):
258256
gtpops = gt.take(p1+p2, axis=1)
259-
flt = pwpopstats.dmin(len(p1), pos, gtpops, win_size, length_bp)
260-
stats_ls.extend(np.nanquantile(flt, quants))
257+
flt = pwpopstats.dmin(len(p1), pos, gtpops, win_size, length_bp, quants)
258+
if quants[0] < 0:
259+
dminq = [np.nanmean(flt)]
260+
else:
261+
dminq = np.nanquantile(flt, quants)
262+
stats_ls.extend(dminq)
261263
return stats_ls
262264

263265
def gmin(self):
@@ -269,8 +271,8 @@ def gmin(self):
269271
stats_ls = []
270272
for p1, p2 in combinations(self.stats["pop_config"], 2):
271273
gtpops = gt.take(p1+p2, axis=1)
272-
flt = pwpopstats.gmin(len(p1), pos, gtpops, win_size, length_bp)
273-
stats_ls.extend(np.nanquantile(flt, quants))
274+
flt = pwpopstats.gmin(len(p1), pos, gtpops, win_size, length_bp, quants)
275+
stats_ls.extend(flt)
274276
return stats_ls
275277

276278
def dd12(self):
@@ -307,8 +309,8 @@ def Zx(self):
307309
stats_ls = []
308310
for p1, p2 in combinations(self.stats["pop_config"], 2):
309311
hap_p, pos_p, counts_p = self.split_pop(p1+p2)
310-
flt = pwpopstats.zx(len(p1), pos_p, hap_p, win_size, length_bp)
311-
stats_ls.extend(np.nanquantile(flt, quants))
312+
flt = pwpopstats.zx(len(p1), pos_p, hap_p, win_size, length_bp, quants)
313+
stats_ls.extend(flt)
312314
return stats_ls
313315

314316
def IBS_maxXY(self):

Diff for: project/stat_modules/write_stats.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,15 @@ def headers(out_file, stats_dt, obs=False):
3737
sub_pops = [len(i) for i in stats_dt["pop_config"]]
3838
n_pops = len(sub_pops)
3939
quants = len(stats_dt["pw_quants"])
40-
sfsfold, sfsagg = stats_dt["sfs_fold_agg"]
41-
spatfold, spatagg = stats_dt["spat_fold_agg"]
40+
sfsfold = stats_dt["sfs_fold"]
41+
spatfold = stats_dt["spat_fold"]
4242
afibsfold = stats_dt["afibs_fold"]
4343
sfs_n = [sub-1 for sub in sub_pops]
4444
if sfsfold:
4545
sfs_n = [floor((sub)/2) for sub in sub_pops]
46-
if sfsagg:
47-
sfs_n = [6] * n_pops
4846
spat_n = [sub-1 for sub in sub_pops]
4947
if spatfold:
5048
spat_n = [floor((sub)/2) for sub in sub_pops]
51-
if spatagg:
52-
spat_n = [6] * n_pops
5349
afibs_n = [sub-2 for sub in sub_pops]
5450
if afibsfold:
5551
afibs_n = [floor((sub-1)/2) for sub in sub_pops]

Diff for: stat_test.py

+5-16
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,15 @@ def test_afibs():
106106

107107
def test_sfs():
108108
# unfold
109-
sfs = afs.asfs_stats(gtpop, pos_s, False, False)
109+
sfs = afs.asfs_stats(gtpop, pos_s, False)
110110
ss = np.array([0.43859649, 0.16666667, 0.14912281, 0.01754386, 0.05263158,
111111
0.04385965, 0.0877193, 0.01754386, 0.02631579])
112112
assert(all(np.isclose(ss, sfs)) is True)
113113
# fold
114-
sfs = afs.asfs_stats(gtpop, pos_s, True, False)
114+
sfs = afs.asfs_stats(gtpop, pos_s, True)
115115
ss = np.array([0.46491228, 0.18421053, 0.23684211, 0.06140351, 0.05263158])
116116
assert(all(np.isclose(ss, sfs)) is True)
117-
# agg
118-
sfs = afs.asfs_stats(gtpop, pos_s, False, True)
119-
ss = np.array([0.43859649, 0.16666667, 0.14912281, 0.20175439, 0.01754386,
120-
0.02631579])
117+
121118
assert(all(np.isclose(ss, sfs)) is True)
122119

123120

@@ -170,14 +167,6 @@ def test_spatial_sfs():
170167
spat_f = spat_fold
171168
ss = np.array([13881., 7868., 15864., 32643., 24952.])
172169
assert(all(np.isclose(ss, spat_f)) is True)
173-
# agg
174-
spat_ag = list(spat_[0:3])
175-
spat_ag.append(np.mean(spat_[3:-2]))
176-
spat_ag.append(spat_[-2])
177-
spat_ag.append(spat_[-1])
178-
spat_a = np.array(spat_ag)
179-
ss = np.array([1419.0, 7868.0, 5648.0, 13833.75, 0.0, 12462.0])
180-
assert(all(np.isclose(ss, spat_a)) is True)
181170

182171

183172
def test_ld():
@@ -231,7 +220,7 @@ def test_dmin():
231220

232221

233222
def test_gmin():
234-
flt = pwpopstats.gmin(len(pop1), pos_s, gt, win_size, length_bp)
223+
flt = pwpopstats.gmin(len(pop1), pos_s, gt, win_size, length_bp, quants)
235224
stats_ls = np.quantile(flt, quants)
236225
ss = np.array([0.09416196, 0.35460993, 0.40257649, 0.48543689, 0.49382716])
237226
assert(all(np.isclose(ss, stats_ls)) is True)
@@ -251,7 +240,7 @@ def test_ddRank12():
251240

252241

253242
def test_Zx():
254-
flt = pwpopstats.zx(len(pop1), pos, haps, win_size, length_bp)
243+
flt = pwpopstats.zx(len(pop1), pos, haps, win_size, length_bp, quants)
255244
stats_ls = np.nanquantile(flt, quants)
256245
ss = np.array([0. , 0.21724138, 0.65702271, 1.21374723, 1.78181818])
257246
assert(all(np.isclose(ss, stats_ls)) is True)

0 commit comments

Comments
 (0)