Skip to content

Commit

Permalink
VSEARCH 1.0.11: SAM output file support
Browse files Browse the repository at this point in the history
  • Loading branch information
torognes committed Feb 5, 2015
1 parent 937290e commit 96f83bd
Show file tree
Hide file tree
Showing 13 changed files with 422 additions and 21 deletions.
27 changes: 16 additions & 11 deletions doc/vsearch.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.\" ============================================================================
.TH vsearch 1 "January 23, 2015" "version 1.0.10" "USER COMMANDS"
.TH vsearch 1 "February 5, 2015" "version 1.0.11" "USER COMMANDS"
.\" ============================================================================
.SH NAME
vsearch \(em chimera detection, clustering, dereplication, masking, pairwise alignment, searching, shuffling and sorting of amplicons from metagenomic projects.
Expand All @@ -22,7 +22,7 @@ Clustering:
.RS
\fBvsearch\fR (--cluster_fast | --cluster_size | --cluster_smallmem)
\fIfastafile\fR (--alnout | --blast6out | --centroids | --clusters |
--msaout | --uc | --userout) \fIoutputfile\fR --id \fIreal\fR
--msaout | --samout | --uc | --userout) \fIoutputfile\fR --id \fIreal\fR
[\fIoptions\fR]
.PP
.RE
Expand All @@ -41,14 +41,14 @@ Masking:
Pairwise alignment:
.RS
\fBvsearch\fR --allpairs_global \fIfastafile\fR (--alnout |
--blast6out | --matched | --notmatched | --uc | --userout)
--blast6out | --matched | --notmatched | --samout | --uc | --userout)
\fIoutputfile\fR (--acceptall | --id \fIreal\fR) [\fIoptions\fR]
.PP
.RE
Searching:
.RS
\fBvsearch\fR --usearch_global \fIfastafile\fR --db \fIfastafile\fR
(--alnout | --blast6out | --uc | --userout) \fIoutputfile\fR --id
(--alnout | --blast6out | --samout | --uc | --userout) \fIoutputfile\fR --id
\fIreal\fR [\fIoptions\fR]
.PP
.RE
Expand Down Expand Up @@ -434,8 +434,9 @@ just a decreasing length ordering.
.TP
Most searching options also apply to clustering:
.br
--alnout, --blast6out, --userout, --userfields, --fastapairs, --matched,
--notmatched, --maxaccept, --maxreject, score filtering, gap penalties, masking. (see the Searching section).
--alnout, --samout, --blast6out, --userout, --userfields, --fastapairs,
--matched, --notmatched, --maxaccept, --maxreject,
score filtering, gap penalties, masking. (see the Searching section).
.RE
.PP
.\" ----------------------------------------------------------------------------
Expand Down Expand Up @@ -549,7 +550,7 @@ Pairwise alignment options:
Perform optimal global pairwise alignments of all vs. all fasta
sequences contained in \fIfilename\fR. The results of the n * (n-1) /
2 alignments are written to the result files specified with --alnout,
--blast6out, --fastapairs --matched, --notmatched, --uc or --userout
--blast6out, --fastapairs --matched, --notmatched, --samout, --uc or --userout
(see Searching section below). Specify either the --acceptall option
to output all pairwise alignments, or specify an identity level with
--id to discard weak alignments. Most other accept/reject options (see
Expand Down Expand Up @@ -790,7 +791,7 @@ Reject the target sequence if the alignment contains at least
.BI --maxhits\~ "positive integer"
Maximum number of hits to show once the search is terminated (hits are
sorted by decreasing identity). Unlimited by default value. \fBIt
applies to alnout, blast6out, uc, userout, fastapairs\fR.
applies to alnout, blast6out, samout, uc, userout, fastapairs\fR.
.TP
.BI --maxid \0real
Reject the target sequence if its percentage of identity with the
Expand Down Expand Up @@ -859,9 +860,9 @@ Write query sequences not matching database target sequences to
.TP
.B --output_no_hits
Write both matching and non-matching queries to --alnout, --blast6out,
and --userout output files (--uc and --uc_allhits output files always
feature non-matching queries). Non-matching queries are labelled "No
hits" in --alnout files.
--samout and --userout output files (--uc and --uc_allhits output files
always feature non-matching queries). Non-matching queries are labelled
"No hits" in --alnout files.
.TP
.BI --qmask\~ "none|dust|soft"
Mask simple repeats and low-complexity regions in query sequences
Expand All @@ -882,6 +883,10 @@ Reject the target sequence if the alignment ends with gaps.
Width of alignment lines in --alnout output. The default value is
64. Set to 0 to eliminate wrapping.
.TP
.BI --samout \0filename
Write alignment results to \fIfilename\fR in the SAM format.
Output order may vary when using multiple threads.
.TP
.B --self
Reject the alignment if the query and target labels are identical.
.TP
Expand Down
Binary file modified doc/vsearch_manual.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions eval/eval.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ THREADS=0
DUPLICATES=100
DIR=.
DB=../data/Rfam_11_0.fasta

ID=0.5

if [ $(uname -s) == "Linux" ]; then
VSEARCH=$(ls -v ../bin/vsearch*linux* | tail -1)
Expand Down Expand Up @@ -45,7 +45,7 @@ echo Running search
/usr/bin/time $PROG \
--usearch_global $DIR/qq.fsa \
--db $DIR/db.fsa \
--id 0.5 \
--id $ID \
--maxaccepts 1 \
--maxrejects 32 \
--strand plus \
Expand Down
19 changes: 19 additions & 0 deletions src/allpairs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ static int qmatches;
static int queries;
static long progress = 0;
static FILE * fp_alnout = 0;
static FILE * fp_samout = 0;
static FILE * fp_userout = 0;
static FILE * fp_blast6out = 0;
static FILE * fp_uc = 0;
Expand Down Expand Up @@ -85,6 +86,15 @@ void allpairs_output_results(int hit_count,
qseqlen,
qsequence_rc);

if (fp_samout)
results_show_samout(fp_samout,
hits,
toreport,
query_head,
qsequence,
qseqlen,
qsequence_rc);

if (toreport)
{
double top_hit_id = hits[0].id;
Expand Down Expand Up @@ -493,6 +503,13 @@ void allpairs_global(char * cmdline, char * progheader)
fprintf(fp_alnout, "%s\n", progheader);
}

if (opt_samout)
{
fp_samout = fopen(opt_samout, "w");
if (! fp_samout)
fatal("Unable to open SAM output file for writing");
}

if (opt_userout)
{
fp_userout = fopen(opt_userout, "w");
Expand Down Expand Up @@ -585,5 +602,7 @@ void allpairs_global(char * cmdline, char * progheader)
fclose(fp_userout);
if (fp_alnout)
fclose(fp_alnout);
if (fp_samout)
fclose(fp_samout);
show_rusage();
}
15 changes: 15 additions & 0 deletions src/cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static int clusters = 0;
static FILE * fp_centroids = 0;
static FILE * fp_uc = 0;
static FILE * fp_alnout = 0;
static FILE * fp_samout = 0;
static FILE * fp_userout = 0;
static FILE * fp_blast6out = 0;
static FILE * fp_fastapairs = 0;
Expand Down Expand Up @@ -285,6 +286,11 @@ void cluster_core_results_hit(struct hit * best,
best, 1, query_head,
qsequence, qseqlen, qsequence_rc);

if (fp_samout)
results_show_samout(fp_samout,
best, 1, query_head,
qsequence, qseqlen, qsequence_rc);

if (fp_fastapairs)
results_show_fastapairs_one(fp_fastapairs,
best,
Expand Down Expand Up @@ -884,6 +890,13 @@ void cluster(char * dbname,
fprintf(fp_alnout, "%s\n", progheader);
}

if (opt_samout)
{
fp_samout = fopen(opt_samout, "w");
if (! fp_samout)
fatal("Unable to open SAM output file for writing");
}

if (opt_userout)
{
fp_userout = fopen(opt_userout, "w");
Expand Down Expand Up @@ -1170,6 +1183,8 @@ void cluster(char * dbname,
fclose(fp_userout);
if (fp_alnout)
fclose(fp_alnout);
if (fp_samout)
fclose(fp_samout);
if (fp_uc)
fclose(fp_uc);
if (fp_centroids)
Expand Down
2 changes: 1 addition & 1 deletion src/derep.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2014 Torbjorn Rognes
Copyright (C) 2014-2015 Torbjorn Rognes
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
Expand Down
Loading

0 comments on commit 96f83bd

Please sign in to comment.