Skip to content

Commit dd96592

Browse files
committed
take ordered file to mimic tbs in ms
1 parent 3b91fa0 commit dd96592

File tree

3 files changed

+51
-25
lines changed

3 files changed

+51
-25
lines changed

project/sim_modules/readconfig.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,19 @@ def read_config_sims(configFile, ms_path):
2525
contig_len = int(config.getfloat(sim, "contiglen"))
2626
num_loci = config.getint(sim, "loci")
2727

28-
effective_size = config.get(sim, "effective_population_size")
29-
if "," in effective_size:
30-
effective_size = list(map(float, effective_size.split(",")))
31-
effective_size = list(map(int, effective_size))
28+
ne_size = config.get(sim, "effective_population_size")
29+
if "," in ne_size:
30+
effective_size = list(map(float, ne_size.split(",")))
31+
effective_size = list(map(int, ne_size))
32+
elif ne_size[0].isalpha():
33+
if os.path.exists(ne_size):
34+
print(f"loading {ne_size} ...")
35+
effective_size = list(np.loadtxt(ne_size))
36+
else:
37+
print(f"loading {os.path.join(config_path, ne_size)} ...")
38+
effective_size = list(np.loadtxt(os.path.join(config_path, ne_size)))
3239
else:
33-
effective_size = int(float(effective_size))
40+
effective_size = int(float(ne_size))
3441

3542
recomb_rate = config.get(sim, "recombination_rate")
3643
if recomb_rate:

project/sim_modules/sim_msprime.py

+32-17
Original file line numberDiff line numberDiff line change
@@ -156,19 +156,15 @@ def run_simulation(param_df):
156156
# build demographic command line
157157
demo_events = demo_config(param_df)
158158

159-
ne0 = np.random.choice(scaled_Ne)
160-
mu_t = np.random.choice(mu)
161-
rec_t = np.random.choice(rec)
162-
pfileout.write(f"{ne0}\t{mu_t}\t{rec_t}\n")
163159
# check demo
164160
if dry_run:
165161
checkDemo(pops, demo_events)
166162
return None
167163
else:
168164
trees = msp.simulate(
169-
Ne=ne0,
170-
recombination_rate=rec_t,
171-
mutation_rate=mu_t,
165+
Ne=param_df["ne_t"],
166+
recombination_rate=param_df["rec_t"],
167+
mutation_rate=param_df["mu_t"],
172168
num_replicates=model_dt["loci"],
173169
length=model_dt["contig_length"],
174170
population_configurations=pops,
@@ -209,7 +205,8 @@ def checkDemo(pops, demo_events):
209205

210206

211207
def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
212-
outfile: str, nprocs: int, stats_config: str, dryrun: bool):
208+
outfile: str, nprocs: int, stats_config: str, dryrun: bool,
209+
order: bool):
213210
"""Run code for simulating msprime.
214211
215212
Parameters
@@ -260,61 +257,80 @@ def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
260257

261258
# set mutation rate
262259
global mu
260+
l_mu = np.nan
263261
mut_rate = model_dt["mutation_rate"]
264262
if type(mut_rate) is list:
265263
if len(mut_rate) == 2:
266264
low, high = mut_rate
267265
mu = np.random.uniform(low, high, sim_number)
268266
else:
269267
mu = mut_rate
268+
if order:
269+
l_mu = len(mu)
270270
else:
271-
mu = [mut_rate]
271+
mu = [mut_rate] * sim_number
272272

273273
# set recombination rate
274274
global rec
275+
l_rec = np.nan
275276
rec_rate = model_dt["recombination_rate"]
276277
if type(rec_rate) is list:
277278
if len(rec_rate) == 2:
278279
low, high = rec_rate
279280
rec = np.random.uniform(low, high, sim_number)
281+
# rec = np.random.exponential(rec_rate, sim_number)
280282
else:
281283
rec = rec_rate
284+
if order:
285+
l_rec = len(rec)
282286
else:
283-
# rec = np.random.exponential(rec_rate, sim_number)
284-
rec = [rec_rate]
287+
rec = [rec_rate] * sim_number
285288

286289
# set ploidy
287290
global ploidy
288291
ploidy = model_dt["ploidy"]
289292

290293
# set effective pop size
291294
global scaled_Ne
295+
l_ne = np.nan
292296
effective_size = model_dt["eff_size"]
293297
if type(effective_size) is list:
294298
if len(effective_size) == 2:
295299
low, high = effective_size
296300
scaled_Ne = np.random.randint(low, high, sim_number) * ploidy
297301
else:
298302
scaled_Ne = effective_size
303+
if order:
304+
l_ne = len(scaled_Ne)
299305
else:
300-
scaled_Ne = [effective_size * ploidy]
301-
global pfileout
302-
pfileout = open(f"{outfile}.ne_mu_rec.out", 'w')
306+
scaled_Ne = [effective_size * ploidy] * sim_number
303307
# =========================================================================
304308
# Main simulations
305309
# =========================================================================
306310
# set up generator fx for MP
311+
if order:
312+
l_min = np.nanmin([l_mu, l_rec, l_ne])
313+
sim_number = int(l_min)
314+
print(f"order requested, setting sim_number to shortest param file: {l_min}")
315+
316+
with open(f"{outfile}.ne_mu_rec.out", 'w') as pfile:
317+
pfile.write("Ne\tmu\trec\n")
318+
for i in range(sim_number):
319+
pfile.write(f"{int(scaled_Ne[i])}\t{mu[i]}\t{rec[i]}\n")
320+
307321
event = param_df["event"].values
308322
pops = param_df["pops"].values
309323
time_arr = list(zip(*param_df["time"].values))
310324
value_arr = list(zip(*param_df["value"].values))
311-
param_gen = ({"time": time_arr[i], "event": event, "pops": pops, "value": value_arr[i]} for i in range(sim_number))
325+
param_gen = ({"ne_t": scaled_Ne[i], "mu_t": mu[i], "rec_t": rec[i],
326+
"time": time_arr[i], "event": event, "pops": pops, "value": value_arr[i]} for i in range(sim_number))
327+
# param_gen = ({"time": time_arr[i], "event": event, "pops": pops, "value": value_arr[i]} for i in range(sim_number))
312328
param_gen = list(param_gen)
329+
313330
# check nprocs
314331
if nprocs > multiprocessing.cpu_count(): # check that there are not more requested than available
315332
print("not {nprocs} processors available, setting to {multiprocessing.cpu_count()}")
316333
nprocs = multiprocessing.cpu_count()
317-
318334
# perform sims
319335
global stats_dt
320336
global header_len
@@ -349,4 +365,3 @@ def simulate_msprime(model_dict, demo_dataframe, param_df, sim_number: int,
349365
pops_outfile.close()
350366
else:
351367
print("No stats file given with msprime")
352-
pfileout.close()

run_sims.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ def parse_args(args_in):
104104
parser.add_argument("--set_priors", type=str,
105105
help="provide a list of priors to be reused will overide"
106106
" all other parameters")
107+
parser.add_argument("--in_order", action="store_true",
108+
help="will draw from rec, mu, and Ne files in order")
107109
parser.add_argument("--nprocs", type=int, default=1,
108110
help="processors for MP")
109111
parser.add_argument("--dryrun", action="store_true",
@@ -126,6 +128,7 @@ def main():
126128
outfile = args.out
127129
ploidy = args.ploidy
128130
priors_df = args.set_priors
131+
order = args.in_order
129132
nprocs = args.nprocs
130133
dry_run = args.dryrun
131134
stats_config = args.stats_config
@@ -145,14 +148,15 @@ def main():
145148
# =========================================================================
146149
# parses the model file and draw all parameters
147150
demo_df, param_df = parse_model(model_file, sim_number)
151+
# load and/or write priors
148152
if priors_df:
149153
assert os.path.exists(priors_df), "no priors file found"
150154
param_df = pd.read_csv(
151155
priors_df, header=[0, 1], index_col=0, skipinitialspace=True)
152-
else:
153-
# write priors
156+
else: # save priors
154157
priors_outfile = f"{sim_path}"
155158
write_params(param_df, priors_outfile, sim_number, dry_run)
159+
# call sims
156160
if "discoal" in ms_path:
157161
simulate_discoal(ms_path, model_dt, demo_df, param_df, sim_number,
158162
sim_path, nprocs, stats_config, dry_run)
@@ -167,7 +171,7 @@ def main():
167171
sim_path, nprocs, stats_config, dry_run, args.approx)
168172
elif "msprime" in ms_path:
169173
simulate_msprime(model_dt, demo_df, param_df, sim_number,
170-
sim_path, nprocs, stats_config, dry_run)
174+
sim_path, nprocs, stats_config, dry_run, order)
171175

172176

173177
if __name__ == "__main__":

0 commit comments

Comments
 (0)