From 9d4204bf5ccb964f2e37e1a2c6b9f48ef9a70cd0 Mon Sep 17 00:00:00 2001 From: Cameron Gilchrist Date: Mon, 4 Mar 2024 17:05:44 +0900 Subject: [PATCH] Squashed 'lib/foldseek/' changes from e00a3dc1..15c0516f 15c0516f Remove --tar-include and --tar-exclude from createdb as they were never used e1394aac Rework createdb to correctly allow for only one directory or (new) tsv input, in additonal to loose files 0c3b7f23 scorecomplex minor update c4a4b6a4 scorecomplex redundant complex alignment re-solve e77b6431 scorecomplex complex alignment redundancy solved d725f0e3 scorecomplex rbh filtering with 0.7 as the margin d785457c change names for tm based rbh filtering 405e64c2 scorecomplex: rbh filtering with query tm score e5c52dda scorecomplex commit for benchmark test 537c6160 scorecomplex commit for benchmark test 4f3bc395 Merge branch 'master' of https://github.com/steineggerlab/foldseek 5b789cfd scorecomplex rollback to DBSCAN 493cefe7 Add mode to compute exact (slow) tmscore. 75a50f7c Fix https://github.com/steineggerlab/foldseek/issues/214 38e5e93f Merge pull request #244 from steineggerlab/test 6b1dd706 fix typo c388d483 confilcts solved ecf85daf final update scorecomplex with nearest neighbors 802235db backtrace related issue detour f629bbe1 Merge commit '8faebba3f96210242892943d37e4fe9e8a5eed8d' 8faebba3 Squashed 'lib/mmseqs/' changes from 22a77eeb..950342d9 dc272d56 complexsearch with DBSCAN commit for benchmark a7fefa22 bitscore margine change a6b1928e DBSCAN update retry 25031967 foldseek DBSCAN with RBH filter and NN rescue ac2b1dcf nearest neighbors update 41c7f9c9 nearest neighbors update e4079c49 new nearest neighbors 2742f469 new nearest neighbors 4d426356 nearest neighbors new 6cb3ea6c revoke nearest neighbors only 093af914 test nearest neighbors only2 1dbaac36 test nearest neighbors only 3bf3cdf5 Fix this in a different way f4a1a527 Fix compile on older compilers d3fca9e8 Update citations for databases 39ade546 Update README.md 87caae8e rbh filter margine improvement 6e4184a1 implement getting neareast neighbors in sorecomplex 6e632c30 eps related update revoke 65550247 scorecomplex learningRate=0 issue solved 095102ff infinite loop bug fix scorecomplex 9ca20244 Merge branch 'master' of https://github.com/steineggerlab/foldseek 096613dc commit for pull 0eff0231 Update scorecomplex.cpp: eps related update rollback f690b9d5 Merge branch 'master' of https://github.com/steineggerlab/foldseek da825d55 rbh filtering with bitscore & clustering eps update 852434a4 Add --input-format to createdb to force an input structure format 00ab450f scorecomplex rbh filtering implement 6893dcc5 Add CATH50 https://github.com/steineggerlab/foldseek/issues/232 bb090174 [expandcomplex] eased e-value for the 2nd alignment e9f76df6 update scorecomplex dbscan error fixed 1cb3a80d scorecomplex alignment clustering algorithm update 5433d6db Update README.md 1bc8d2e5 update to latest MMseqs2-App master c6f4f2a6 fix regression fail 10289c64 scorecomplex DBSCAN impropvement 6816a641 skip single chained complex for scorecomplex 2a187342 complexsearch initial search parameter adjustment 49dabe0d scorecomplex many against many bug fixed real final 1c4fdfa6 scorecomplex many against many bug fix final 9f8a2ef9 scorecomplex many against many bug fixed 3fe1f9e4 Merge branch 'master' of https://github.com/steineggerlab/foldseek c28e7938 fix wrong parameters in easy-complexsearch 035edc18 expandcomplex should now work correctly with both cluster and non-cluster dbs e396ca4d Carry extended dbtype for complexsearch to work with clustered dbs f05703dd Remove std::cout in structurerescorediagonal 7b68363d Merge branch 'master' of https://github.com/steineggerlab/foldseek 886021d2 Fix issue https://github.com/steineggerlab/foldseek/issues/205 592ffa80 fix explanation of complex related tools a6a712c1 fix easycomplexsearch.sh wrong param 799d42ca update EasyComplexSearch; improve expandcomplex stability 258be0fc Update README.md c382b8fa Update README.md d3f4980d Update README.md a417633d Update README.md b156e065 implement complexsearch and sanitizing expandcomplex b220b5a9 Update README.md ec32bee1 update expandcomplex 76ffa031 update expandcomplex 10ba8f53 Fix easy-complexsearch workflow shell scripting issues 75cc763a fix regression failed 206f600a fix regression test failed 9cc02eb7 expandcomplex a695211e expandcomplex dffdf788 Increase buffer size 79f865d6 Add --complex-report-mode to allow disabling report in easy-scorecomplex 38290cf7 Cleanup easy-complexsearch 08f1db5e Expose --db-output for createcomplexreport bdeb0024 Update README.md 629de617 Merge branch 'master' of https://github.com/steineggerlab/foldseek f7857793 Fix sameDB (clusterDB) issue in structurealign 7180ed43 Merge branch 'master' of https://github.com/steineggerlab/foldseek 28a4a7f5 update createcomplexreport with multithreading issue fixed git-subtree-dir: lib/foldseek git-subtree-split: 15c0516fbae0d7e0903ee80f14cb927782b394d0 --- README.md | 87 ++- data/CMakeLists.txt | 1 + data/complexsearch.sh | 46 ++ data/easycomplexsearch.sh | 44 +- data/main.js | 6 +- data/structdatabases.sh | 10 +- data/vendor.js.zst | Bin 896221 -> 896254 bytes lib/mmseqs/src/CommandDeclarations.h | 1 + lib/mmseqs/src/MMseqsBase.cpp | 21 +- lib/mmseqs/src/commons/Parameters.cpp | 3 + .../prefiltering/CacheFriendlyOperations.cpp | 55 +- .../prefiltering/CacheFriendlyOperations.h | 4 +- lib/mmseqs/src/prefiltering/QueryMatcher.cpp | 44 +- lib/mmseqs/src/prefiltering/QueryMatcher.h | 3 +- .../src/prefiltering/UngappedAlignment.cpp | 22 +- .../src/prefiltering/UngappedAlignment.h | 8 +- lib/mmseqs/src/util/CMakeLists.txt | 1 + lib/mmseqs/src/util/makepaddedseqdb.cpp | 54 ++ lib/mmseqs/src/util/result2msa.cpp | 41 +- src/FoldseekBase.cpp | 69 ++- src/LocalCommandDeclarations.h | 2 + src/commons/LocalParameters.cpp | 41 +- src/commons/LocalParameters.h | 12 +- src/commons/TMaligner.cpp | 121 +++- src/commons/TMaligner.h | 13 +- src/strucclustutils/CMakeLists.txt | 1 + src/strucclustutils/GemmiWrapper.cpp | 82 ++- src/strucclustutils/GemmiWrapper.h | 14 +- src/strucclustutils/aln2tmscore.cpp | 4 +- src/strucclustutils/createcomplexreport.cpp | 48 +- src/strucclustutils/createcomplexreport.h | 37 +- src/strucclustutils/expandcomplex.cpp | 147 +++++ src/strucclustutils/scorecomplex.cpp | 557 +++++++++++------- src/strucclustutils/structcreatedb.cpp | 66 ++- src/strucclustutils/structurealign.cpp | 71 +-- src/strucclustutils/structureconvertalis.cpp | 2 +- .../structurerescorediagonal.cpp | 44 +- src/strucclustutils/tmalign.cpp | 2 +- src/workflow/CMakeLists.txt | 1 + src/workflow/ComplexSearch.cpp | 124 ++++ src/workflow/EasyComplexSearch.cpp | 53 +- 41 files changed, 1439 insertions(+), 523 deletions(-) create mode 100644 data/complexsearch.sh create mode 100644 lib/mmseqs/src/util/makepaddedseqdb.cpp create mode 100644 src/strucclustutils/expandcomplex.cpp create mode 100644 src/workflow/ComplexSearch.cpp diff --git a/README.md b/README.md index 05da2da..94bc6aa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Foldseek -Foldseek enables fast and sensitive comparisons of large structure sets. +Foldseek enables fast and sensitive comparisons of large protein structure sets.

@@ -32,7 +32,7 @@ Foldseek enables fast and sensitive comparisons of large structure sets. - [Examples](#examples) ## Webserver -Search your protein structures against the [AlphaFoldDB](https://alphafold.ebi.ac.uk/) and [PDB](https://www.rcsb.org/) in seconds using our Foldseek webserver: [search.foldseek.com](https://search.foldseek.com) 🚀 +Search your protein structures against the [AlphaFoldDB](https://alphafold.ebi.ac.uk/) and [PDB](https://www.rcsb.org/) in seconds using the Foldseek webserver ([code](https://github.com/soedinglab/mmseqs2-app)): [search.foldseek.com](https://search.foldseek.com) 🚀 ## Installation ``` @@ -66,7 +66,7 @@ For optimal software performance, consider three options based on your RAM and s Use the `--prefilter-mode 1`, which isn't memory-limited and computes all ungapped alignments. This option optimally utilizes foldseek's multithreading capabilities for single queries. ## Tutorial Video -We presented a Foldseek tutorial at the SBGrid where we demonstrate the webserver and command line interface of foldseek. +We presented a Foldseek tutorial at the SBGrid where we demonstrated Foldseek's webserver and command line interface. Check it out [here](https://www.youtube.com/watch?v=k5Rbi22TtOA). . @@ -77,14 +77,14 @@ Many of Foldseek's modules (subprograms) rely on MMseqs2. For more information a ## Quick start ### Search -The `easy-search` module allows to search single or multiple query structures, formatted in PDB/mmCIF format (flat or gzipped), against a target database, folder or single protein structures. In default it outputs the alignment information as a [tab-separated file](#tab-separated) but we support also [Superposed Cα PDBs](#superpositioned-cα-only-pdb-files) or a [HTML](#interactive-html) output. +The `easy-search` module allows to query one or more single-chain protein structures, formatted in PDB/mmCIF format (flat or gzipped), against a target database, folder or individual single-chain protein structures (for multi-chain proteins see [complexsearch](#complexsearch)). The default alignment information output is a [tab-separated file](#tab-separated) but Foldseek also supports [Superposed Cα PDBs](#superpositioned-cα-only-pdb-files) and [HTML](#interactive-html). foldseek easy-search example/d1asha_ example/ aln tmpFolder #### Output Search ##### Tab-separated -The default fields are containing the following fields: `query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits` but they can be customized with the `--format-output` option e.g. `--format-output "query,target,qaln,taln"` returns the query and target accession and the pairwise alignments in tab separated format. You can choose many different output columns. +The default output fields are: `query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits` but they can be customized with the `--format-output` option e.g., `--format-output "query,target,qaln,taln"` returns the query and target accessions and the pairwise alignments in tab-separated format. You can choose many different output columns. | Code | Description | | --- | --- | @@ -101,14 +101,14 @@ The default fields are containing the following fields: `query,target,fident,aln |lddtfull | LDDT per aligned position | |prob | Estimated probability for query and target to be homologous (e.g. being within the same SCOPe superfamily) | -Check out the [MMseqs2 documentation for more format output codes](https://github.com/soedinglab/MMseqs2/wiki#custom-alignment-format-with-convertalis). +Check out the [MMseqs2 documentation for additional output format codes](https://github.com/soedinglab/MMseqs2/wiki#custom-alignment-format-with-convertalis). ##### Superpositioned Cα only PDB files -Foldseek's `--format-mode 5` generates PDB files with all Cα atoms superimposed based on the aligned coordinates on to the query structure. -For each pairwise alignment it will write a single PDB files, so be carefull when using this options for large searches. +Foldseek's `--format-mode 5` generates PDB files with all target Cα atoms superimposed onto the query structure based on the aligned coordinates. +For each pairwise alignment it will write its own PDB file, so be careful when using this options for large searches. ##### Interactive HTML -Foldseek can locally generate a search result HTML similiar to the [webserver](https://search.foldseek.com) by specifying the format mode `--format-mode 3` +Locally run Foldseek can generate an HTML search result, similar to the one produced by the [webserver](https://search.foldseek.com) by specifying `--format-mode 3` ``` foldseek easy-search example/d1asha_ example/ result.html tmp --format-mode 3 @@ -129,17 +129,17 @@ foldseek easy-search example/d1asha_ example/ result.html tmp --format-mode 3 | --cov-mode | Alignment | 0: coverage of query and target, 1: coverage of target, 2: coverage of query | #### Alignment Mode -In default Foldseek uses its local 3Di+AA strutural alignment but it also supports to realign hits using the global TMalign as well as rescoring alignments using TMscore. +By default, Foldseek uses its local 3Di+AA structural alignment but it also supports realigning hits using the global TMalign as well as rescoring alignments using TMscore. foldseek easy-search example/d1asha_ example/ aln tmp --alignment-type 1 -In case of the alignment type (`--alignment-type 1`) tmalign, we sort the results by the TMscore normalized by query length. We write the TMscore into the e-value=(qTMscore+tTMscore)/2 as well as into the score(=qTMscore*100) field. All output fields (like pident, fident, and alnlen) are calculated from the TMalign alignment. +If alignment type is set to tmalign (`--alignment-type 1`), the results will be sorted by the TMscore normalized by query length. The TMscore is used for reporting two fields: the e-value=(qTMscore+tTMscore)/2 and the score=(qTMscore*100). All output fields (e.g., pident, fident, and alnlen) are calculated based on the TMalign alignment. ### Databases The `databases` command downloads pre-generated databases like PDB or AlphaFoldDB. # pdb - foldseek databases PDB100 pdb tmp + foldseek databases PDB pdb tmp # alphafold db foldseek databases Alphafold/Proteome afdb tmp @@ -155,14 +155,14 @@ We currently support the following databases: ``` #### Create custom databases and indexes -The target database can be pre-processed by `createdb`. This make sense if searched multiple times. +The target database can be pre-processed by `createdb`. This is useful when searching multiple times against the same set of target structures. foldseek createdb example/ targetDB foldseek createindex targetDB tmp #OPTIONAL generates and stores the index on disk foldseek easy-search example/d1asha_ targetDB aln.m8 tmpFolder ### Cluster -The `easy-cluster` algorithm is designed for structural clustering by assigning structures to a representative protein using structural alignment. It accepts input in either PDB or mmCIF format, with support for both flat and gzipped files. By default, easy-cluster generates three output files with the following prefixes: (1) `_clu.tsv`, (2) `_repseq.fasta`, and (3) `_allseq.fasta`. The first file (1) is a [tab-separated](#tab-separated-cluster) file describing the mapping from representative to member, while the second file (2) contains only [representative sequences](#representative-fasta), and the third file (3) includes all [cluster member sequences](#all-member-fasta). +The `easy-cluster` algorithm is designed for structural clustering by assigning structures to a representative protein structure using structural alignment. It accepts input in either PDB or mmCIF format, with support for both flat and gzipped files. By default, easy-cluster generates three output files with the following prefixes: (1) `_clu.tsv`, (2) `_repseq.fasta`, and (3) `_allseq.fasta`. The first file (1) is a [tab-separated](#tab-separated-cluster) file describing the mapping from representative to member, while the second file (2) contains only [representative sequences](#representative-fasta), and the third file (3) includes all [cluster member sequences](#all-member-fasta). foldseek easy-cluster example/ res tmp -c 0.9 @@ -187,7 +187,7 @@ MCAT...Q ``` ##### All member fasta -In `_allseq.fasta` file all sequences of the cluster are present. A new cluster is marked by two identical name lines of the representative sequence, where the first line stands for the cluster and the second is the name line of the first cluster sequence. It is followed by the fasta formatted sequences of all its members. +In the `_allseq.fasta` file all sequences of the cluster are present. A new cluster is marked by two identical name lines of the representative sequence, where the first line stands for the cluster and the second is the name line of the first cluster sequence. It is followed by the fasta formatted sequences of all its members. ``` >Q0KJ32 @@ -220,23 +220,26 @@ MCAR...Q ### Complexsearch -The `easy-complexsearch` module is a tool for searching single or multiple query protein complexes (PDB/mmCIF, flat or gzipped) against a target database of protein complexes. It reports the similarity metrices of the complexes like TMscore. +The `easy-complexsearch` module is designed for querying one or more protein complex (multi-chain) structures (supported input formats: PDB/mmCIF, flat or gzipped) against a target database of protein complex structures. It reports the similarity metrices between the complexes (e.g., the TMscore). #### Using Complexsearch -To pairwise compare complexes use `easy-complexsearch`, run the following command: +The examples below use files that can be found in the `example` directory, which is part of the Foldseek repo, if you clone it. +If you use the precompiled version of the software, you can download the files directly: [1tim.pdb.gz](https://github.com/steineggerlab/foldseek/raw/master/example/1tim.pdb.gz) and [8tim.pdb.gz](https://github.com/steineggerlab/foldseek/raw/master/example/8tim.pdb.gz). + +For a pairwise alignment of complexes using `easy-complexsearch`, run the following command: ``` foldseek easy-complexsearch example/1tim.pdb.gz example/8tim.pdb.gz result tmpFolder ``` -This command searches the specified protein complexe `1tim.pdb.gz` against 8tim.pdb.gz, producing alignment information. -Foldseek `easy-complexsearch` can also be used to search full databases: +Foldseek `easy-complexsearch` can also be used for searching one or more query complexes against a target database: ``` -foldseek databases PDB100 pdb tmp +foldseek databases PDB pdb tmp foldseek easy-complexsearch example/1tim.pdb.gz pdb result tmpFolder ``` #### Complex Search Output ##### Tab-separated-complex -By default, `easy-complexsearch` outputs the alignment as a tab-separated file. The standard fields include `query, target, fident, alnlen, mismatch, gapopen, qstart, qend, tstart, tend, evalue, bits, complexassignid`. Customize output with the `--format-output` option. For example, `--format-output "query,target,complexqtmscore,complexttmscore,complexassignid"` alters the output to show specific scores and identifiers. +By default, `easy-complexsearch` reports the output alignment in a tab-separated file. +The default output fields are: `query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,complexassignid` but they can be customized with the `--format-output` option e.g., `--format-output "query,target,complexqtmscore,complexttmscore,complexassignid"` alters the output to show specific scores and identifiers. | Code | Description | | --- | --- | @@ -257,34 +260,24 @@ By default, `easy-complexsearch` outputs the alignment as a tab-separated file. ``` ##### Complex Report -`easy-complexsearch` also generates a report format (prefixed `_report`), which provides a summary ot the inter complex chain matching, including identifiers, chains, TM scores, rotation matrices, translation vectors, and assignment IDs. Reports are containing the following fields: +`easy-complexsearch` also generates a report (prefixed `_report`), which provides a summary of the inter-complex chain matching, including identifiers, chains, TMscores, rotation matrices, translation vectors, and assignment IDs. The report includes the following fields: | Column | Description | | --- | --- | -| 1 | Identifiers for query complex | -| 2 | Identifiers for query complex | -| 3 | Matched chains of query complex | -| 4 | Matched chains of target complex | -| 5 | TM scores normalized by query length | -| 6 | TM scores normalized by target length | -| (8,9) | Rotation matrix (u) and Translation vector(t) | -| 9 | Complex Assignment Id | +| 1 | Identifier of the query complex | +| 2 | Identifier of the target complex | +| 3 | Comma separated matched chains in the query complex | +| 4 | Comma separated matched chains in the target complex | +| 5 | TM score normalized by query length [0-1] | +| 6 | TM score normalized by target length [0-1] | +| 7 | Comma separated nine rotation matrix (U) values | +| 8 | Comma separated three translation vector (T) values | +| 9 | Complex alignment ID | **Example Output:** ``` 1tim.pdb.gz 8tim.pdb.gz A,B A,B 0.98941 0.98941 0.999983,0.000332,0.005813,-0.000373,0.999976,0.006884,-0.005811,-0.006886,0.999959 0.298992,0.060047,0.565875 0 ``` - - ## Main Modules - `easy-search` fast protein structure search - `easy-cluster` fast protein structure clustering @@ -293,12 +286,12 @@ foldseek easy-search example/d1asha_ example/ result.html tmp --format-mode 3 ## Examples ### Rescore aligments using TMscore -Easiest way to get the alignment TMscore normalized by min(alnLen,qLen,targetLen) as well as a rotation matrix is through the following command: +The easiest way to get the alignment TMscore normalized by min(alnLen,qLen,targetLen) as well as a rotation matrix is through the following command: ``` foldseek easy-search example/ example/ aln tmp --format-output query,target,alntmscore,u,t ``` -Alternative, it is possible to compute TMscores for the kind of alignment output (e.g. 3Di/AA) using the following commands: +Alternatively, it is possible to compute TMscores for the kind of alignment output (e.g., 3Di+AA) using the following commands: ``` foldseek createdb example/ targetDB foldseek createdb example/ queryDB @@ -307,10 +300,10 @@ foldseek aln2tmscore queryDB targetDB aln aln_tmscore foldseek createtsv queryDB targetDB aln_tmscore aln_tmscore.tsv ``` -Output format `aln_tmscore.tsv`: query and target identifier, TMscore, translation(3) and rotation vector=(3x3) +Output format `aln_tmscore.tsv`: query and target identifiers, TMscore, translation(3) and rotation vector=(3x3) ### Cluster search results -The following command aligns the input structures all-against-all and keeps only alignments with 80% of the sequence covered by the alignment (-c 0.8) (read more about alignment coverage [here](https://github.com/soedinglab/MMseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster)). It then clusters the results using greedy set cover algorithm. The clustering mode can be adjusted using --cluster-mode, read more [here](https://github.com/soedinglab/MMseqs2/wiki#clustering-modes). The clustering output format is described [here](https://github.com/soedinglab/MMseqs2/wiki#cluster-tsv-format). +The following command performs an all-against-all alignments of the input structures and retains only the alignments, which cover 80% of the sequence (-c 0.8) (read more about alignment coverage options [here](https://github.com/soedinglab/MMseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster)). It then clusters the results using a greedy set cover algorithm. The clustering mode can be adjusted using --cluster-mode, read more [here](https://github.com/soedinglab/MMseqs2/wiki#clustering-modes). The clustering output format is described [here](https://github.com/soedinglab/MMseqs2/wiki#cluster-tsv-format). ``` foldseek createdb example/ db @@ -320,8 +313,8 @@ foldseek createtsv db db clu clu.tsv ``` ### Query centered multiple sequence alignment -Foldseek can generate a3m based multiple sequence alignments using the following commands. -a3m can be converted to fasta format using [reformat.pl](https://raw.githubusercontent.com/soedinglab/hh-suite/master/scripts/reformat.pl) (`reformat.pl in.a3m out.fas`). +Foldseek can output multiple sequence alignments in a3m format using the following commands. +To convert a3m to FASTA format, the following script can be used [reformat.pl](https://raw.githubusercontent.com/soedinglab/hh-suite/master/scripts/reformat.pl) (`reformat.pl in.a3m out.fas`). ``` foldseek createdb example/ targetDB diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt index 000a2d3..ad4256a 100644 --- a/data/CMakeLists.txt +++ b/data/CMakeLists.txt @@ -13,6 +13,7 @@ set(COMPILED_RESOURCES evalue_nn.kerasify main.js vendor.js.zst + complexsearch.sh easycomplexsearch.sh ) diff --git a/data/complexsearch.sh b/data/complexsearch.sh new file mode 100644 index 0000000..0ce67fe --- /dev/null +++ b/data/complexsearch.sh @@ -0,0 +1,46 @@ +#!/bin/sh -e +fail() { + echo "Error: $1" + exit 1 +} + +notExists() { + [ ! -f "$1" ] +} + +if notExists "${TMP_PATH}/result.dbtype"; then + # shellcheck disable=SC2086 + "$MMSEQS" search "${QUERYDB}" "${TARGETDB}" "${TMP_PATH}/result" "${TMP_PATH}/search_tmp" ${SEARCH_PAR} \ + || fail "Search died" +fi + +RESULT="${TMP_PATH}/result" +if [ "$PREFMODE" != "EXHAUSTIVE" ]; then + if notExists "${TMP_PATH}/result_expand_pref.dbtype"; then + # shellcheck disable=SC2086 + "$MMSEQS" expandcomplex "${QUERYDB}" "${TARGETDB}" "${RESULT}" "${TMP_PATH}/result_expand_pref" ${THREADS_PAR} \ + || fail "Expandcomplex died" + fi + if notExists "${TMP_PATH}/result_expand_aligned.dbtype"; then + # shellcheck disable=SC2086 + "$MMSEQS" $COMPLEX_ALIGNMENT_ALGO "${QUERYDB}" "${TARGETDB}" "${TMP_PATH}/result_expand_pref" "${TMP_PATH}/result_expand_aligned" ${COMPLEX_ALIGN_PAR} \ + || fail $COMPLEX_ALIGNMENT_ALGO "died" + fi + RESULT="${TMP_PATH}/result_expand_aligned" +fi +if notExists "${TMP_PATH}/complex_result.dbtype"; then + # shellcheck disable=SC2086 + $MMSEQS scorecomplex "${QUERYDB}" "${TARGETDB}" "${RESULT}" "${OUTPUT}" ${SCORECOMPLEX_PAR} \ + || fail "ScoreComplex died" +fi + +if [ -n "${REMOVE_TMP}" ]; then + # shellcheck disable=SC2086 + "$MMSEQS" rmdb "${TMP_PATH}/result" ${VERBOSITY} + if [ "$PREFMODE" != "EXHAUSTIVE" ]; then + # shellcheck disable=SC2086 + "$MMSEQS" rmdb "${TMP_PATH}/result_expand_aligned" ${VERBOSITY} + fi + rm -rf "${TMP_PATH}/search_tmp" + rm -f "${TMP_PATH}/complexsearch.sh" +fi diff --git a/data/easycomplexsearch.sh b/data/easycomplexsearch.sh index a197805..b914133 100644 --- a/data/easycomplexsearch.sh +++ b/data/easycomplexsearch.sh @@ -26,39 +26,31 @@ if notExists "${TARGET}.dbtype"; then TARGET="${TMP_PATH}/target" fi - -SEARCH_RESULT="${TMP_PATH}/result" -if notExists "${SEARCH_RESULT}.dbtype"; then +if notExists "${TMP_PATH}/complex_result.dbtype"; then # shellcheck disable=SC2086 - - "$MMSEQS" search "${QUERY}" "${TARGET}" "${SEARCH_RESULT}" "${TMP_PATH}/search_tmp" ${SEARCH_PAR} \ - || fail "Search died" + "$MMSEQS" complexsearch "${QUERY}" "${TARGET}" "${TMP_PATH}/complex_result" "${TMP_PATH}/complexsearch_tmp" ${COMPLEXSEARCH_PAR} \ + || fail "ComplexSearch died" fi -SCORECOMPLEX_RESULT="${TMP_PATH}/result2" -if notExists "${SCORECOMPLEX_RESULT}/.dbtype"; then - # shellcheck disable=SC2086 - $MMSEQS scorecomplex "${QUERY}" "${TARGET}" "${SEARCH_RESULT}" ${SCORECOMPLEX_RESULT} ${SCORECOMPLEX_PAR} \ - || fail "ScoreComplex died" -fi +# shellcheck disable=SC2086 +"$MMSEQS" convertalis "${QUERY}" "${TARGET}" "${TMP_PATH}/complex_result" "${OUTPUT}" ${CONVERT_PAR} \ + || fail "Convert Alignments died" -if notExists "${TMP_PATH}/alis.dbtype"; then +if [ -z "${NO_REPORT}" ]; then # shellcheck disable=SC2086 - "$MMSEQS" convertalis "${QUERY}" "${TARGET}" "${SCORECOMPLEX_RESULT}" "${OUTPUT}" ${CONVERT_PAR} \ - || fail "Convert Alignments died" + "$MMSEQS" createcomplexreport "${QUERY}" "${TARGET}" "${TMP_PATH}/complex_result" "${OUTPUT}_report" ${REPORT_PAR} \ + || fail "createcomplexreport died" fi -# shellcheck disable=SC2086 -"$MMSEQS" createcomplexreport "${QUERY}" "${TARGET}" "${SCORECOMPLEX_RESULT}" "${REPORT}" ${REPORT_PAR}\ - || fail "Createcomplexreport dies" - - - - - if [ -n "${REMOVE_TMP}" ]; then # shellcheck disable=SC2086 "$MMSEQS" rmdb "${TMP_PATH}/result" ${VERBOSITY} + if [ "$PREFMODE" != "EXHAUSTIVE" ]; then + # shellcheck disable=SC2086 + "$MMSEQS" rmdb "${TMP_PATH}/result_expand_aligned" ${VERBOSITY} + fi + # shellcheck disable=SC2086 + "$MMSEQS" rmdb "${TMP_PATH}/complex_result" ${VERBOSITY} if [ -z "${LEAVE_INPUT}" ]; then if [ -f "${TMP_PATH}/target" ]; then # shellcheck disable=SC2086 @@ -79,6 +71,6 @@ if [ -n "${REMOVE_TMP}" ]; then # shellcheck disable=SC2086 "$MMSEQS" rmdb "${TMP_PATH}/query_ss" ${VERBOSITY} fi - rm -rf "${TMP_PATH}/search_tmp" - rm -f "${TMP_PATH}/easyscorecomplex.sh" -fi + rm -rf "${TMP_PATH}/complexsearch_tmp" + rm -f "${TMP_PATH}/easycomplexsearch.sh" +fi \ No newline at end of file diff --git a/data/main.js b/data/main.js index bc9a3ea..d82201e 100644 --- a/data/main.js +++ b/data/main.js @@ -2089,8 +2089,8 @@ version: 3, sources: [ "webpack://./frontend/StructureViewer.vue" ], names: [], - mappings: ";AA0mBA;IACA,YAAA;IACA,aAAA;IACA,cAAA;AACA;AAEA;IACA,iCAAA;AACA;AACA;;;;GAIA;AACA;IACA,WAAA;IACA,YAAA;AACA;AACA;IACA,kBAAA;AACA;AACA;IACA,kBAAA;AACA;AACA;IACA,oBAAA;IACA,mBAAA;IACA,kBAAA;IACA,uBAAA;IACA,WAAA;IACA,SAAA;IACA,UAAA;IACA,OAAA;AACA;AACA;IACA,kBAAA;IACA,WAAA;IACA,MAAA;IACA,OAAA;IACA,UAAA;IACA,sBAAA;IACA,wBAAA;AACA;AACA;IACA,iBAAA;IACA,UAAA;AACA;AACA;IACA,gBAAA;IACA,UAAA;IACA,mBAAA;AACA", - sourcesContent: [ '\n\n