Skip to content
This repository has been archived by the owner on Feb 6, 2024. It is now read-only.

Commit

Permalink
New option --subsample and --subsample-seed to subsample a number of …
Browse files Browse the repository at this point in the history
…partitions (for Rob Lanfear)
  • Loading branch information
bqminh committed Oct 30, 2019
1 parent 72c2cf7 commit 3be3e2a
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
32 changes: 32 additions & 0 deletions alignment/superalignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,38 @@ void SuperAlignment::readFromParams(Params &params) {
part_names.insert((*pit)->name);
}

if (params.subsampling > 0) {
// sumsample a number of partitions
int subsample = params.subsampling;
if (subsample >= partitions.size())
outError("--subsample must be smaller than #partitions");
cout << "Random subsampling " << subsample << " partitions (seed: " << params.subsampling_seed << ")..." << endl;
double prop = double(subsample) / partitions.size();
int *rstream;
init_random(params.subsampling_seed, false, &rstream);
// make sure to sub-sample exact number
vector<bool> sample;
int i;
sample.resize(partitions.size(), false);
for (int num = 0; num < subsample; ) {
for (i = 0; i < sample.size(); i++) if (!sample[i]) {
if (random_double(rstream) < prop) {
sample[i] = true;
num++;
if (num >= subsample)
break;
}
}
}
finish_random(rstream);
vector<Alignment*> keep_partitions;
for (i = 0; i < sample.size(); i++)
if (sample[i])
keep_partitions.push_back(partitions[i]);
// now replace partitions
partitions = keep_partitions;
}

// Initialize the counter for evaluated NNIs on subtrees
cout << "Subset\tType\tSeqs\tSites\tInfor\tInvar\tModel\tName" << endl;
int part = 0;
Expand Down
18 changes: 18 additions & 0 deletions utils/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,8 @@ void parseArg(int argc, char *argv[], Params &params) {
gettimeofday(&tv, &tz);
//params.ran_seed = (unsigned) (tv.tv_sec+tv.tv_usec);
params.ran_seed = (tv.tv_usec);
params.subsampling_seed = params.ran_seed;
params.subsampling = 0;

for (cnt = 1; cnt < argc; cnt++) {
try {
Expand Down Expand Up @@ -2757,6 +2759,22 @@ void parseArg(int argc, char *argv[], Params &params) {
continue;
}

if (strcmp(argv[cnt], "--subsample") == 0) {
cnt++;
if (cnt >= argc)
throw "Use --subsample NUMBER";
params.subsampling = convert_int(argv[cnt]);
continue;
}

if (strcmp(argv[cnt], "--subsample-seed") == 0) {
cnt++;
if (cnt >= argc)
throw "Use --subsample-seed <random_seed>";
params.subsampling_seed = convert_int(argv[cnt]);
continue;
}

#ifdef USE_BOOSTER
if (strcmp(argv[cnt], "--tbe") == 0) {
params.transfer_bootstrap = 1;
Expand Down
6 changes: 6 additions & 0 deletions utils/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -1610,6 +1610,12 @@ class Params {
/** 1 or 2 to perform transfer boostrap expectation (TBE) */
int transfer_bootstrap;

/** subsampling some number of partitions / sites for analysis */
int subsampling;

/** random seed number for subsampling */
int subsampling_seed;

/**
1 if output all intermediate trees (initial trees, NNI-optimal trees and trees after each NNI step)
2 if output all intermediate trees + 1-NNI-away trees
Expand Down

0 comments on commit 3be3e2a

Please sign in to comment.