Skip to content

Commit 2222fed

Browse files
Merge pull request #11 from phylo42/v0.2
V0.2
2 parents c8c7914 + a630778 commit 2222fed

File tree

4 files changed

+80
-31
lines changed

4 files changed

+80
-31
lines changed

CHANGELOG.txt

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
============================================================================================================
22
v0.2.0
3+
4+
The pre-publication release intended to be the reference version for the further improvements. Previous releases are left mostly for history and are not supposed to be used.
5+
36
- Supported partial loading of databases with phylo-k-mer filtering (--mu, --max-ram)
47
- Reintroduced parallelism (--threads)
58
- Reworked LWR formula so that it is normalized over all branches of the tree

epik.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
import subprocess
1313

1414

15+
__version__ = "0.2.0"
16+
17+
1518
@click.group()
19+
@click.version_option(__version__)
1620
def epik():
1721
"""
1822
EPIK: Evolutionary Placement with Informative K-mers
@@ -46,7 +50,7 @@ def epik():
4650
help="Output directory.")
4751
@click.option('--threads',
4852
type=int,
49-
default=4, show_default=True,
53+
default=1, show_default=True,
5054
help="Number of threads used.")
5155
@click.option('--max-ram',
5256
type=str,
@@ -57,7 +61,10 @@ def place(database, states, omega, mu, outputdir, threads, max_ram, input_file):
5761
"""
5862
Places .fasta files using the input IPK database.
5963
60-
\tpython epik.py place -s [nucl|amino] -i db.ipk -o output file.fasta [file2.fasta ...]
64+
epik.py place -s [nucl|amino] -i DB.ipk -o output file.fasta [file2.fasta ...]
65+
66+
Examples:
67+
\tepik.py place -i DB.ipk -o temp --max-ram 4G --threads 8 query.fasta
6168
6269
"""
6370
place_queries(database, states, omega, mu, outputdir, threads, max_ram, input_file)

epik/CMakeLists.txt

+15-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
22

3-
set(ENABLE_OMP OFF)
4-
set(ENABLE_SSE OFF)
5-
set(ENABLE_AVX2 OFF)
6-
set(ENABLE_AVX512 OFF)
3+
if (NOT DEFINED ENABLE_OMP)
4+
set(ENABLE_OMP ON)
5+
endif()
6+
7+
if (NOT DEFINED ENABLE_SSE)
8+
set(ENABLE_SSE OFF)
9+
endif()
10+
11+
if (NOT DEFINED ENABLE_AVX2)
12+
set(ENABLE_AVX2 OFF)
13+
endif()
14+
15+
if (NOT DEFINED ENABLE_AVX512)
16+
set(ENABLE_AVX512 OFF)
17+
endif()
718

819
find_package(RapidJSON REQUIRED)
920
find_package(Boost REQUIRED COMPONENTS filesystem)

epik/src/epik/main.cpp

+53-25
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <sstream>
66
#include <iomanip>
77
#include <cctype>
8+
#include <cmath>
89
#include <stdexcept>
910
#include <boost/filesystem.hpp>
1011
#include <cxxopts.hpp>
@@ -63,29 +64,51 @@ void print_intruction_set()
6364

6465
/// Float-to-humanized string for better output
6566
template<typename T>
66-
std::string humanize(T num)
67+
std::string to_human_readable(T num)
6768
{
6869
std::ostringstream oss;
69-
oss.precision(1);
7070

71-
if (num < 1000.0)
71+
if (num < 1024)
7272
{
7373
oss << std::fixed << num;
7474
}
75-
else if (num < 1000000.0)
76-
{
77-
oss << std::fixed << num / 1000.0 << "K";
78-
}
79-
else if (num < 1000000000.0)
80-
{
81-
oss << std::fixed << num / 1000000.0 << "M";
82-
}
8375
else
8476
{
85-
oss << std::fixed << num / 1000000000.0 << "B";
77+
double value;
78+
std::string suffix;
79+
80+
if (num < 1024 * 1024)
81+
{
82+
value = num / 1024.0;
83+
suffix = "K";
84+
}
85+
else if (num < 1024 * 1024 * 1024)
86+
{
87+
value = num / (1024.0 * 1024.0);
88+
suffix = "M";
89+
}
90+
else
91+
{
92+
value = num / (1024.0 * 1024.0 * 1024.0);
93+
suffix = "B";
94+
}
95+
96+
// Check if the fractional part is zero
97+
double int_part;
98+
double frac_part = std::modf(value, &int_part);
99+
if (frac_part == 0.0)
100+
{
101+
oss << static_cast<long long>(int_part) << suffix;
102+
}
103+
else
104+
{
105+
oss.precision(1);
106+
oss << std::fixed << value << suffix;
107+
}
86108
}
87109

88110
return oss.str();
111+
89112
}
90113

91114
/// Size_t-to-string that translates milliseconds to humanized time
@@ -128,9 +151,9 @@ std::string humanize_time(size_t milliseconds)
128151
return oss.str();
129152
}
130153

131-
/// Parse the humanized RAM size to a number.
132-
/// I know that the name of this function is unfortunate.
133-
size_t dehumanize_ram(const std::string& max_ram)
154+
/// Parse a human-readable --max-ram value as the number of bytes
155+
/// e.g. 128K, 50M, 4.2Gb etc.
156+
size_t parse_human_readable(const std::string& max_ram)
134157
{
135158
double value;
136159
char unit = 0;
@@ -140,7 +163,7 @@ size_t dehumanize_ram(const std::string& max_ram)
140163
ss >> value;
141164
if (ss.fail())
142165
{
143-
throw std::runtime_error("Can't parse max_ram parameter: wrong numerical part");
166+
throw std::runtime_error("Could not parse --max-ram parameter: wrong numerical part");
144167
}
145168

146169
// Check if there is a memory unit
@@ -149,7 +172,7 @@ size_t dehumanize_ram(const std::string& max_ram)
149172
ss >> unit;
150173
if (ss.fail())
151174
{
152-
throw std::runtime_error("Can't parse max_ram parameter: wrong unit");
175+
throw std::runtime_error("Could not parse --max-ram parameter: wrong unit");
153176
}
154177
}
155178

@@ -230,10 +253,15 @@ int main(int argc, char** argv)
230253
if (parsed_options.count("max-ram"))
231254
{
232255
const auto max_ram_string = parsed_options["max-ram"].as<std::string>();
233-
const auto max_ram = dehumanize_ram(max_ram_string);
234-
max_entries = max_ram / sizeof(i2l::pkdb_value);
256+
const auto max_ram = parse_human_readable(max_ram_string);
257+
max_entries = static_cast<size_t>(max_ram / sizeof(i2l::pkdb_value));
258+
259+
if (max_entries == 0)
260+
{
261+
throw std::runtime_error("Memory limit is too low");
262+
}
235263
std::cout << "Max-RAM provided: will be loaded not more than "
236-
<< humanize(max_entries) << " phylo-k-mers." << std::endl;
264+
<< to_human_readable(max_entries) << " phylo-k-mers." << std::endl;
237265
}
238266

239267
#ifndef EPIK_OMP
@@ -259,8 +287,8 @@ int main(int argc, char** argv)
259287
<< "\tk: " << db.kmer_size() << std::endl
260288
<< "\tomega: " << db.omega() << std::endl
261289
<< "\tPositions loaded: " << (db.positions_loaded() ? "true" : "false") << std::endl << std::endl;
262-
std::cout << "Loaded " << humanize(db.get_num_entries_loaded())
263-
<< " of " << humanize(db.get_num_entries_total())
290+
std::cout << "Loaded " << to_human_readable(db.get_num_entries_loaded())
291+
<< " of " << to_human_readable(db.get_num_entries_total())
264292
<< " phylo-k-mers. " << std::endl << std::endl;
265293

266294
const auto tree = i2l::io::parse_newick(db.tree());
@@ -325,7 +353,7 @@ int main(int argc, char** argv)
325353
average_speed += seq_per_second;
326354

327355
// Update progress bar
328-
bar.set_option(option::PrefixText{humanize(seq_per_second) + " seq/s "});
356+
bar.set_option(option::PrefixText{to_human_readable(seq_per_second) + " seq/s "});
329357
bar.set_option(option::PostfixText{std::to_string(num_seq_placed) + " / ?"});
330358
bar.set_progress(reader.bytes_read());
331359

@@ -339,12 +367,12 @@ int main(int argc, char** argv)
339367

340368
average_speed /= (double)num_iterations;
341369
bar.set_option(option::PrefixText{"Done. "});
342-
bar.set_option(option::PostfixText{std::to_string(num_seq_placed)});
370+
bar.set_option(option::PostfixText{to_human_readable(num_seq_placed)});
343371
bar.set_progress(reader.bytes_read());
344372

345373
std::cout << std::endl << termcolor::bold << termcolor::white
346374
<< "Placed " << num_seq_placed << " sequences.\nAverage speed: "
347-
<< humanize(average_speed) << " seq/s.\n";
375+
<< to_human_readable(average_speed) << " seq/s.\n";
348376
std::cout << "Output: " << jplace_filename << std::endl;
349377

350378
const auto placement_time = std::chrono::duration_cast<std::chrono::milliseconds>(

0 commit comments

Comments
 (0)