Skip to content

Commit afa738b

Browse files
committed
Merge commit 'a3686deab294cb1baa8c544024a5d65dcad90846'
2 parents 528cddc + a3686de commit afa738b

File tree

79 files changed

+829
-334
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+829
-334
lines changed

lib/mmseqs/.cirrus.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ env:
44
task:
55
name: FreeBSD-13
66
freebsd_instance:
7-
image_family: freebsd-13-0
7+
image_family: freebsd-13-2-snap
88
install_script: pkg install -y cmake git samtools
99
compile_script: |
1010
mkdir build && cd build

lib/mmseqs/cmake/MMseqsSetupDerivedTarget.cmake

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
include(AppendTargetProperty)
22

33
function (mmseqs_setup_derived_target TARGET)
4-
get_target_property(COMPILE_TMP mmseqs-framework COMPILE_FLAGS)
5-
get_target_property(LINK_TMP mmseqs-framework LINK_FLAGS)
6-
get_target_property(DEF_TMP mmseqs-framework COMPILE_DEFINITIONS)
7-
get_target_property(INCL_TMP mmseqs-framework INCLUDE_DIRECTORIES)
4+
set(SOURCE "${ARGN}")
5+
if(NOT SOURCE)
6+
set(SOURCE "mmseqs-framework")
7+
endif()
8+
get_target_property(COMPILE_TMP ${SOURCE} COMPILE_FLAGS)
9+
get_target_property(LINK_TMP ${SOURCE} LINK_FLAGS)
10+
get_target_property(DEF_TMP ${SOURCE} COMPILE_DEFINITIONS)
11+
get_target_property(INCL_TMP ${SOURCE} INCLUDE_DIRECTORIES)
812

9-
target_link_libraries(${TARGET} mmseqs-framework)
13+
target_link_libraries(${TARGET} ${SOURCE})
1014
append_target_property(${TARGET} COMPILE_FLAGS ${COMPILE_TMP})
1115
append_target_property(${TARGET} LINK_FLAGS ${LINK_TMP})
1216
set_property(TARGET ${TARGET} APPEND PROPERTY COMPILE_DEFINITIONS ${DEF_TMP})

lib/mmseqs/data/workflow/blastp.sh

+41-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -27,14 +54,23 @@ ALN_RES_MERGE="$TMP_PATH/aln_0"
2754
while [ "$STEP" -lt "$STEPS" ]; do
2855
SENS_PARAM=SENSE_${STEP}
2956
eval SENS="\$$SENS_PARAM"
30-
# call prefilter module
57+
58+
# 1. Prefilter hits
3159
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
32-
# shellcheck disable=SC2086
33-
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
34-
|| fail "Prefilter died"
60+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
61+
fake_pref "${INPUT}" "${TARGET}" "$TMP_PATH/pref_$STEP"
62+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
63+
# shellcheck disable=SC2086
64+
$RUNNER "$MMSEQS" ungappedprefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $UNGAPPEDPREFILTER_PAR \
65+
|| fail "Ungapped prefilter died"
66+
else
67+
# shellcheck disable=SC2086
68+
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
69+
|| fail "Prefilter died"
70+
fi
3571
fi
3672

37-
# call alignment module
73+
# 2. alignment module
3874
if [ "$STEPS" -eq 1 ]; then
3975
if notExists "$3.dbtype"; then
4076
# shellcheck disable=SC2086

lib/mmseqs/data/workflow/blastpgp.sh

+42-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -28,15 +55,26 @@ STEP=0
2855
while [ "$STEP" -lt "$NUM_IT" ]; do
2956
# call prefilter module
3057
if notExists "$TMP_PATH/pref_tmp_${STEP}.done"; then
31-
PARAM="PREFILTER_PAR_$STEP"
32-
eval TMP="\$$PARAM"
58+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
59+
TMP=""
60+
PREF="fake_pref"
61+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
62+
PARAM="UNGAPPEDPREFILTER_PAR_$STEP"
63+
eval TMP="\$$PARAM"
64+
PREF="${MMSEQS} ungappedprefilter"
65+
else
66+
PARAM="PREFILTER_PAR_$STEP"
67+
eval TMP="\$$PARAM"
68+
PREF="${MMSEQS} prefilter"
69+
fi
70+
3371
if [ "$STEP" -eq 0 ]; then
3472
# shellcheck disable=SC2086
35-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
73+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
3674
|| fail "Prefilter died"
3775
else
3876
# shellcheck disable=SC2086
39-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
77+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
4078
|| fail "Prefilter died"
4179
fi
4280
touch "$TMP_PATH/pref_tmp_${STEP}.done"

lib/mmseqs/data/workflow/createtaxdb.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ if { [ "${DBMODE}" = "1" ] && notExists "${TAXDBNAME}_taxonomy"; } || { [ "${DBM
5959
# Download NCBI taxon information
6060
if notExists "${TMP_PATH}/ncbi_download.complete"; then
6161
echo "Download taxdump.tar.gz"
62-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz" "${TMP_PATH}/taxdump.tar.gz"
62+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz" "${TMP_PATH}/taxdump.tar.gz"
6363
tar -C "${TMP_PATH}" -xzf "${TMP_PATH}/taxdump.tar.gz" names.dmp nodes.dmp merged.dmp delnodes.dmp
6464
touch "${TMP_PATH}/ncbi_download.complete"
6565
rm -f "${TMP_PATH}/taxdump.tar.gz"

lib/mmseqs/data/workflow/databases.sh

+5-5
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,9 @@ case "${SELECTION}" in
118118
if notExists "${TMP_PATH}/nr.gz"; then
119119
date "+%s" > "${TMP_PATH}/version"
120120
downloadFile "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz" "${TMP_PATH}/nr.gz"
121-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz" "${TMP_PATH}/prot.accession2taxid.gz"
121+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz" "${TMP_PATH}/prot.accession2taxid.gz"
122122
gunzip "${TMP_PATH}/prot.accession2taxid.gz"
123-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/pdb.accession2taxid.gz" "${TMP_PATH}/pdb.accession2taxid.gz"
123+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/pdb.accession2taxid.gz" "${TMP_PATH}/pdb.accession2taxid.gz"
124124
gunzip "${TMP_PATH}/pdb.accession2taxid.gz"
125125
fi
126126
push_back "${TMP_PATH}/nr.gz"
@@ -147,7 +147,7 @@ case "${SELECTION}" in
147147
"PDB")
148148
if notExists "${TMP_PATH}/pdb_seqres.txt.gz"; then
149149
date "+%s" > "${TMP_PATH}/version"
150-
downloadFile "https://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz" "${TMP_PATH}/pdb_seqres.txt.gz"
150+
downloadFile "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz" "${TMP_PATH}/pdb_seqres.txt.gz"
151151
fi
152152
push_back "${TMP_PATH}/pdb_seqres.txt.gz"
153153
INPUT_TYPE="FASTA_LIST"
@@ -212,8 +212,8 @@ case "${SELECTION}" in
212212
;;
213213
"CDD")
214214
if notExists "${TMP_PATH}/msa.msa.gz"; then
215-
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.info" "${TMP_PATH}/version"
216-
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/fasta.tar.gz" "${TMP_PATH}/msa.tar.gz"
215+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/cdd.info" "${TMP_PATH}/version"
216+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/fasta.tar.gz" "${TMP_PATH}/msa.tar.gz"
217217
fi
218218
INPUT_TYPE="FASTA_MSA"
219219
SED_FIX_LOOKUP='s|\.FASTA||g'

lib/mmseqs/data/workflow/taxpercontig.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ if [ -n "${ORF_FILTER}" ]; then
4545
fi
4646

4747
if notExists "${TMP_PATH}/orfs_aln.list"; then
48-
awk '$3 > 1 { print $1 }' "${TMP_PATH}/orfs_aln.index" > "${TMP_PATH}/orfs_aln.list"
48+
# shellcheck disable=SC2086
49+
"$MMSEQS" recoverlongestorf "${ORFS_DB}" "${TMP_PATH}/orfs_aln" "${TMP_PATH}/orfs_aln_recovered.list" ${THREADS_PAR} \
50+
|| fail "recoverlongestorf died"
51+
awk '$3 > 1 { print $1 }' "${TMP_PATH}/orfs_aln.index" > "${TMP_PATH}/orfs_aln_remain.list"
52+
cat "${TMP_PATH}/orfs_aln_recovered.list" "${TMP_PATH}/orfs_aln_remain.list" > "${TMP_PATH}/orfs_aln.list"
4953
fi
5054

5155
if notExists "${TMP_PATH}/orfs_filter.dbtype"; then

lib/mmseqs/data/workflow/tsv2exprofiledb.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ fi
2222

2323
if notExists "${OUT}.dbtype"; then
2424
"$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY}
25-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
25+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
2626
"$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY}
2727
fi
2828

2929
if notExists "${OUT}_seq.dbtype"; then
3030
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY}
31-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
31+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
3232
"$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY}
3333
fi
3434

3535
if notExists "${OUT}_aln.dbtype"; then
3636
"$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY}
37-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
37+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
3838
"$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY}
3939
fi
4040

lib/mmseqs/lib/alp/sls_pvalues.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ namespace Sls {
258258
return rand_C;
259259
};
260260

261-
static inline double standard_normal()//generates standard normal random value using the BoxMuller transform
261+
static inline double standard_normal()//generates standard normal random value using the Box-Muller transform
262262
{
263263
double r1=0;
264264
while(r1==0)

lib/mmseqs/lib/ksw2/kseq.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,10 @@ typedef struct __kstring_t {
113113
if (ks->end == -1) { ks->is_eof = 1; return -3; } \
114114
} else break; \
115115
} \
116-
if (delimiter == KS_SEP_LINE) { \
117-
for (i = ks->begin; i < ks->end; ++i) \
118-
if (ks->buf[i] == '\n') { ks->newline+=(append == 1); break; } \
116+
if (delimiter == KS_SEP_LINE) { \
117+
unsigned char *sep = (unsigned char*)memchr(ks->buf + ks->begin, '\n', ks->end - ks->begin); \
118+
i = sep != NULL ? sep - (unsigned char*)ks->buf : ks->end; \
119+
ks->newline += (sep != NULL && append == 1); \
119120
} else if (delimiter > KS_SEP_MAX) { \
120121
for (i = ks->begin; i < ks->end; ++i) \
121122
if (ks->buf[i] == delimiter) break; \

lib/mmseqs/src/CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ if (NOT EMSCRIPTEN)
7676
endif()
7777

7878
if (ENABLE_WERROR)
79-
append_target_property(mmseqs-framework COMPILE_FLAGS -Werror)
80-
append_target_property(mmseqs-framework LINK_FLAGS -Werror)
79+
append_target_property(mmseqs-framework COMPILE_FLAGS -Werror -Wno-unused-command-line-argument)
80+
append_target_property(mmseqs-framework LINK_FLAGS -Werror -Wno-unused-command-line-argument)
8181
endif()
8282

8383
# needed for concat.h
@@ -222,6 +222,7 @@ if (OPENMP_FOUND)
222222
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
223223
target_link_libraries(mmseqs-framework ${OpenMP_CXX_LIBRARIES})
224224
endif()
225+
target_include_directories(mmseqs-framework PUBLIC ${OpenMP_CXX_INCLUDE_DIRS})
225226
append_target_property(mmseqs-framework COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
226227
append_target_property(mmseqs-framework LINK_FLAGS ${OpenMP_CXX_FLAGS})
227228
elseif (REQUIRE_OPENMP)

lib/mmseqs/src/CommandDeclarations.h

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern int convertkb(int argc, const char **argv, const Command& command);
2323
extern int convertmsa(int argc, const char **argv, const Command& command);
2424
extern int convertprofiledb(int argc, const char **argv, const Command& command);
2525
extern int createdb(int argc, const char **argv, const Command& command);
26+
extern int makepaddedseqdb(int argc, const char **argv, const Command& command);
2627
extern int createindex(int argc, const char **argv, const Command& command);
2728
extern int createlinindex(int argc, const char **argv, const Command& command);
2829
extern int createseqfiledb(int argc, const char **argv, const Command& command);
@@ -97,6 +98,7 @@ extern int ungappedprefilter(int argc, const char **argv, const Command& command
9798
extern int gappedprefilter(int argc, const char **argv, const Command& command);
9899
extern int unpackdb(int argc, const char **argv, const Command& command);
99100
extern int rbh(int argc, const char **argv, const Command& command);
101+
extern int recoverlongestorf(int argc, const char **argv, const Command& command);
100102
extern int result2flat(int argc, const char **argv, const Command& command);
101103
extern int result2msa(int argc, const char **argv, const Command& command);
102104
extern int result2dnamsa(int argc, const char **argv, const Command& command);

lib/mmseqs/src/MMseqsBase.cpp

+23-8
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ std::vector<Command> baseCommands = {
3939
"Slower, sensitive clustering",
4040
"mmseqs easy-cluster examples/DB.fasta result tmp\n"
4141
"# Cluster output\n"
42-
"# - result_rep_seq.fasta: Representatives\n"
43-
"# - result_all_seq.fasta: FASTA-like per cluster\n"
44-
"# - result_cluster.tsv: Adjacency list\n\n"
42+
"# - result_rep_seq.fasta: Representatives\n"
43+
"# - result_all_seqs.fasta: FASTA-like per cluster\n"
44+
"# - result_cluster.tsv: Adjacency list\n\n"
4545
"# Important parameter: --min-seq-id, --cov-mode and -c \n"
4646
"# --cov-mode \n"
4747
"# 0 1 2\n"
@@ -62,9 +62,9 @@ std::vector<Command> baseCommands = {
6262
"Fast linear time cluster, less sensitive clustering",
6363
"mmseqs easy-linclust examples/DB.fasta result tmp\n\n"
6464
"# Linclust output\n"
65-
"# - result_rep_seq.fasta: Representatives\n"
66-
"# - result_all_seq.fasta: FASTA-like per cluster\n"
67-
"# - result_cluster.tsv: Adjecency list\n\n"
65+
"# - result_rep_seq.fasta: Representatives\n"
66+
"# - result_all_seqs.fasta: FASTA-like per cluster\n"
67+
"# - result_cluster.tsv: Adjecency list\n\n"
6868
"# Important parameter: --min-seq-id, --cov-mode and -c \n"
6969
"# --cov-mode \n"
7070
"# 0 1 2\n"
@@ -130,14 +130,21 @@ std::vector<Command> baseCommands = {
130130
"<i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB>",
131131
CITATION_MMSEQS2, {{"fast[a|q]File[.gz|bz2]|stdin", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::flatfileStdinAndGeneric },
132132
{"sequenceDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::flatfile }}},
133+
{"makepaddedseqdb", makepaddedseqdb, &par.onlyverbosity, COMMAND_HIDDEN,
134+
"Generate a padded sequence DB",
135+
"Generate a padded sequence DB",
136+
"Martin Steinegger <[email protected]>",
137+
"<i:sequenceDB> <o:sequenceDB>",
138+
CITATION_MMSEQS2, {{"sequenceDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA|DbType::NEED_HEADER, &DbValidator::sequenceDb },
139+
{"sequenceIndexDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }}},
133140
{"appenddbtoindex", appenddbtoindex, &par.appenddbtoindex, COMMAND_HIDDEN,
134141
NULL,
135142
NULL,
136143
"Milot Mirdita <[email protected]>",
137144
"<i:DB1> ... <i:DBN> <o:DB>",
138145
CITATION_MMSEQS2, {{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::allDb },
139146
{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb }}},
140-
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
147+
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
141148
NULL,
142149
NULL,
143150
"Martin Steinegger <[email protected]>",
@@ -555,7 +562,7 @@ std::vector<Command> baseCommands = {
555562
{"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::resultDb },
556563
{"pvalDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::resultDb },
557564
{"tmpDir", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::directory }}},
558-
{"mergeresultsbyset", mergeresultsbyset, &par.threadsandcompression,COMMAND_MULTIHIT,
565+
{"mergeresultsbyset", mergeresultsbyset, &par.mergeresultsbyset, COMMAND_MULTIHIT,
559566
"Merge results from multiple ORFs back to their respective contig",
560567
NULL,
561568
"Ruoshi Zhang, Clovis Norroy & Milot Mirdita <[email protected]>",
@@ -900,6 +907,14 @@ std::vector<Command> baseCommands = {
900907
"Eli Levy Karin <[email protected]> ",
901908
"<i:contigsSequenceDB> <i:extractedOrfsHeadersDB> <o:orfsAlignedToContigDB>",
902909
CITATION_MMSEQS2, {{"",DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, NULL}}},
910+
{"recoverlongestorf", recoverlongestorf, &par.onlythreads, COMMAND_EXPERT,
911+
"Recover longest ORF for taxonomy annotation after elimination",
912+
NULL,
913+
"Sung-eun Jang",
914+
"<i:orfDB> <i:resultDB> <o:tsvFile>",
915+
CITATION_MMSEQS2, {{"orfDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb},
916+
{"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::resultDb},
917+
{"tsvFile", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::flatfile}}},
903918
{"reverseseq", reverseseq, &par.reverseseq, COMMAND_SEQUENCE,
904919
"Reverse (without complement) sequences",
905920
NULL,

lib/mmseqs/src/alignment/Matcher.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ size_t Matcher::resultToBuffer(char * buff1, const result_t &result, bool addBac
282282
*(tmpBuff-1) = '\t';
283283
tmpBuff = Util::fastSeqIdToBuffer(result.seqId, tmpBuff);
284284
*(tmpBuff-1) = '\t';
285-
tmpBuff += sprintf(tmpBuff,"%.3E",result.eval);
285+
tmpBuff += snprintf(tmpBuff, 32, "%.3E", result.eval);
286286
tmpBuff++;
287287
*(tmpBuff-1) = '\t';
288288
tmpBuff = Itoa::i32toa_sse2(result.qStartPos, tmpBuff);

0 commit comments

Comments
 (0)