Skip to content

Commit c2c3ad9

Browse files
result2profile can print frequencies in tsv format (#907)
1 parent 747c64c commit c2c3ad9

File tree

7 files changed

+68
-13
lines changed

7 files changed

+68
-13
lines changed

Diff for: src/alignment/PSSMCalculator.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,23 @@ void PSSMCalculator::printPSSM(size_t queryLength){
239239
}
240240
}
241241

242+
void PSSMCalculator::profileToString(std::string& result, size_t queryLength){
243+
result.append(5, ' ');
244+
for (size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) {
245+
result.append(SSTR(subMat->num2aa[aa]));
246+
result.append(6, ' ');
247+
}
248+
result.append(1, '\n');
249+
for (size_t i = 0; i < queryLength; i++) {
250+
for (size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) {
251+
result.append(SSTR(profile[i * Sequence::PROFILE_AA_SIZE + aa], 4));
252+
result.append(1, ' ');
253+
}
254+
result.append(1, '\n');
255+
}
256+
result.append(1, '\n');
257+
}
258+
242259
void PSSMCalculator::computeLogPSSM(BaseMatrix *subMat, char *pssm, const float *profile, float bitFactor, size_t queryLength, float scoreBias) {
243260
for(size_t pos = 0; pos < queryLength; pos++) {
244261
for(size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) {

Diff for: src/alignment/PSSMCalculator.h

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class PSSMCalculator {
5454

5555
void printProfile(size_t queryLength);
5656
void printPSSM(size_t queryLength);
57+
void profileToString(std::string& result, size_t queryLength);
5758

5859
// prepare pseudocounts
5960
static void preparePseudoCounts(float *frequency, float *frequency_with_pseudocounts, size_t entrySize, size_t queryLength, const float **R);

Diff for: src/commons/Parameters.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Parameters::Parameters():
137137
PARAM_PC_MODE(PARAM_PC_MODE_ID, "--pseudo-cnt-mode", "Pseudo count mode", "use 0: substitution-matrix or 1: context-specific pseudocounts", typeid(int), (void *) &pcmode, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
138138
PARAM_PCA(PARAM_PCA_ID, "--pca", "Pseudo count a", "Pseudo count admixture strength", typeid(MultiParam<PseudoCounts>), (void *) &pca, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
139139
PARAM_PCB(PARAM_PCB_ID, "--pcb", "Pseudo count b", "Pseudo counts: Neff at half of maximum admixture (range 0.0-inf)", typeid(MultiParam<PseudoCounts>), (void *) &pcb, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
140+
PARAM_PROFILE_OUTPUT_MODE(PARAM_PROFILE_OUTPUT_MODE_ID, "--profile-output-mode", "Profile output mode", "Profile output mode: 0: binary log-odds 1: human-readable frequencies", typeid(int), (void *) &profileOutputMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
140141
// sequence2profile
141142
PARAM_NEFF(PARAM_NEFF_ID, "--neff", "Neff", "Neff included into context state profile (1.0,20.0)", typeid(float), (void *) &neff, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
142143
PARAM_TAU(PARAM_TAU_ID, "--tau", "Tau", "Tau: context state pseudo count mixture (0.0,1.0)", typeid(float), (void *) &tau, "[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE),
@@ -570,6 +571,7 @@ Parameters::Parameters():
570571
result2profile.push_back(&PARAM_THREADS);
571572
result2profile.push_back(&PARAM_COMPRESSED);
572573
result2profile.push_back(&PARAM_V);
574+
result2profile.push_back(&PARAM_PROFILE_OUTPUT_MODE);
573575
574576
// createtsv
575577
createtsv.push_back(&PARAM_FIRST_SEQ_REP_SEQ);
@@ -2437,6 +2439,7 @@ void Parameters::setDefaults() {
24372439
pcmode = PCMODE_SUBSTITUTION_SCORE;
24382440
pca = MultiParam<PseudoCounts>(PseudoCounts(1.1, 1.4));
24392441
pcb = MultiParam<PseudoCounts>(PseudoCounts(4.1, 5.8));
2442+
profileOutputMode = 0;
24402443

24412444
// sequence2profile
24422445
neff = 1.0;

Diff for: src/commons/Parameters.h

+2
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ class Parameters {
536536
int pcmode;
537537
MultiParam<PseudoCounts> pca;
538538
MultiParam<PseudoCounts> pcb;
539+
int profileOutputMode;
539540

540541
// sequence2profile
541542
float neff;
@@ -855,6 +856,7 @@ class Parameters {
855856
PARAMETER(PARAM_PC_MODE)
856857
PARAMETER(PARAM_PCA)
857858
PARAMETER(PARAM_PCB)
859+
PARAMETER(PARAM_PROFILE_OUTPUT_MODE)
858860

859861
// sequence2profile
860862
PARAMETER(PARAM_NEFF)

Diff for: src/commons/Util.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,17 @@ std::string SSTR(double x) {
659659
return fmt::format("{:.3E}", x);
660660
}
661661

662+
template<>
663+
std::string SSTR(double x, int precision) {
664+
return fmt::format("{:.{}E}", x, precision);
665+
}
666+
662667
template<>
663668
std::string SSTR(float x) {
664669
return fmt::format("{:.3f}", x);
670+
}
671+
672+
template<>
673+
std::string SSTR(float x, int precision) {
674+
return fmt::format("{:.{}f}", x, precision);
665675
}

Diff for: src/commons/Util.h

+8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ std::string SSTR(T) {
2828
return "";
2929
}
3030

31+
template<typename T>
32+
std::string SSTR(T, int) {
33+
static_assert(assert_false<T>::value , "Not implemented for requested type");
34+
return "";
35+
}
36+
3137
template<> std::string SSTR(const char*);
3238
template<> std::string SSTR(char*);
3339
template<> std::string SSTR(bool);
@@ -45,6 +51,8 @@ template<> std::string SSTR(long long);
4551
template<> std::string SSTR(unsigned long long);
4652
template<> std::string SSTR(double);
4753
template<> std::string SSTR(float);
54+
template<> std::string SSTR(double, int precision);
55+
template<> std::string SSTR(float, int precision);
4856

4957
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
5058

Diff for: src/util/result2profile.cpp

+27-13
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,11 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
104104
}
105105

106106
int type = Parameters::DBTYPE_HMM_PROFILE;
107-
if (returnAlnRes) {
107+
const int writePlain = par.profileOutputMode == 1;
108+
if (par.profileOutputMode == 1) {
109+
type = Parameters::DBTYPE_OMIT_FILE;
110+
par.compressed = false;
111+
} else if (returnAlnRes) {
108112
type = Parameters::DBTYPE_ALIGNMENT_RES;
109113
if (needSrcIndex) {
110114
type = DBReader<unsigned int>::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC);
@@ -261,18 +265,25 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
261265
alnResults,
262266
#endif
263267
par.wg, 0.0);
264-
if (par.compBiasCorrection == true){
265-
SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer,
266-
Sequence::PROFILE_AA_SIZE,
267-
res.centerLength);
268-
}
269-
270-
if (par.maskProfile == true) {
271-
masker.mask(centerSequence, par.maskProb, pssmRes);
272-
}
273-
pssmRes.toBuffer(centerSequence, subMat, result);
268+
if (writePlain) {
269+
result.clear();
270+
result.append("Query profile of sequence ");
271+
result.append(SSTR(queryKey));
272+
result.push_back('\n');
273+
calculator.profileToString(result, res.centerLength);
274+
} else {
275+
if (par.compBiasCorrection == true){
276+
SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer,
277+
Sequence::PROFILE_AA_SIZE,
278+
res.centerLength);
279+
}
280+
if (par.maskProfile == true) {
281+
masker.mask(centerSequence, par.maskProb, pssmRes);
282+
}
283+
pssmRes.toBuffer(centerSequence, subMat, result);
284+
}
274285
}
275-
resultWriter.writeData(result.c_str(), result.length(), queryKey, thread_idx);
286+
resultWriter.writeData(result.c_str(), result.length(), queryKey, thread_idx, writePlain == false);
276287
result.clear();
277288
alnResults.clear();
278289

@@ -281,7 +292,10 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
281292
}
282293
delete[] pNullBuffer;
283294
}
284-
resultWriter.close(returnAlnRes == false);
295+
resultWriter.close(returnAlnRes == false || writePlain == true);
296+
if (writePlain) {
297+
FileUtil::remove(par.db4Index.c_str());
298+
}
285299
resultReader.close();
286300

287301
if (!sameDatabase) {

0 commit comments

Comments
 (0)