|
45 | 45 | f(tIdx,1:end)=reshape(fIn,[],inputParam.numClones);
|
46 | 46 |
|
47 | 47 | fout=fopen([inputParam.outName '.lumosVarSNV.vcf'],'w');
|
| 48 | +fparam=fopen([inputParam.outName '.lumosVarParam.txt'],'w'); |
48 | 49 |
|
49 | 50 | %%%print VCF header
|
50 | 51 | fprintf(fout,'##fileformat=VCFv4.2\n');
|
51 | 52 | fprintf(fout,['##fileData=' datestr(clock) '\n']);
|
| 53 | +for i=1:height(inputParam.chrTable) |
| 54 | + fprintf(fout,['##contig=<ID=' inputParam.chrTable.chrName{i} '>\n']) |
| 55 | +end |
52 | 56 | inputFields=fieldnames(inputParam);
|
53 | 57 | for i=1:length(inputFields)
|
54 | 58 | if(isnumeric(inputParam.(inputFields{i})))
|
55 |
| - fprintf(fout,['##INPUT=<' inputFields{i} '=' mat2str(inputParam.(inputFields{i})') '>\n']); |
| 59 | + fprintf(fparam,[inputFields{i} ': ' mat2str(inputParam.(inputFields{i})') '>\n']); |
56 | 60 | elseif ~(istable(inputParam.(inputFields{i})))
|
57 |
| - fprintf(fout,['##INPUT=<' inputFields{i} '=' inputParam.(inputFields{i}) '>\n']); |
| 61 | + fprintf(fparam,[inputFields{i} ': ' inputParam.(inputFields{i}) '>\n']); |
58 | 62 | end
|
59 | 63 | end
|
| 64 | +fclose(fparam); |
60 | 65 | for i=1:size(f,2)
|
61 | 66 | outString=['##CloneID=' num2str(i)];
|
62 | 67 | for j=1:size(f,1)
|
|
70 | 75 | fprintf(fout,['##INFO=<ID=JPT,Number=1,Type=Float,Description="Phred Scaled Joint Posterior Probability the Call can be Trusted">\n']);
|
71 | 76 | fprintf(fout,['##INFO=<ID=JPA,Number=1,Type=Float,Description="Phred Scaled Joint Posterior Probability the Position is an Artifact">\n']);
|
72 | 77 | fprintf(fout,['##INFO=<ID=JPS,Number=1,Type=Float,Description="Joint Posterior Probability of Somatic Mutation">\n']);
|
| 78 | +fprintf(fout,['##INFO=<ID=JPND,Number=1,Type=Float,Description="Joint Posterior Probability of non-diploid germline variant">\n']); |
73 | 79 | fprintf(fout,['##INFO=<ID=JPGAB,Number=1,Type=Float,Description="Joint Posterior Probability of No Somatic Mutation and Position is Germline AB">\n']);
|
74 | 80 | fprintf(fout,['##INFO=<ID=JPGAA,Number=1,Type=Float,Description="Joint Posterior Probability of No Somatic Mutation and Position is Germline AA">\n']);
|
75 | 81 | fprintf(fout,['##INFO=<ID=JPGND,Number=1,Type=Float,Description="Joint Posterior Probability of Variant Present in Germline Not Following Diploid Model">\n']);
|
|
79 | 85 | fprintf(fout,['##INFO=<ID=CloneId,Number=1,Type=Integer,Description="CloneId">\n']);
|
80 | 86 | fprintf(fout,['##INFO=<ID=CN,Number=1,Type=Integer,Description="Copy Number">\n']);
|
81 | 87 | fprintf(fout,['##INFO=<ID=MACN,Number=1,Type=Integer,Description="Min Allele Copy Number">\n']);
|
| 88 | +fprintf(fout,['##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">\n']); |
| 89 | +fprintf(fout,['##INFO=<ID=END,Number=.,Type=Integer,Description="end position of the variant described in this record">\n']); |
82 | 90 | fprintf(fout,['##FILTER=<ID=SomaticPASS,Description="JPS>pSomaticThresh and pass filters">\n']);
|
| 91 | +fprintf(fout,['##FILTER=<ID=SomaticDBsnp,Description="JPS>pSomaticThresh and pass filters and population AF>maxSomPopFreq">\n']); |
83 | 92 | fprintf(fout,['##FILTER=<ID=SomaticLowQC,Description="JPS>0.5 and artifact filters">\n']);
|
84 | 93 | fprintf(fout,['##FILTER=<ID=SomaticPairPASS,Description="PPS>pSomaticThresh and JPS<0.5 pass filters">\n']);
|
85 | 94 | fprintf(fout,['##FILTER=<ID=SomaticPairLowQC,Description="PPS>0.5 and JPS<0.5 and artifact filters">\n']);
|
|
187 | 196 | currSomIdx=strncmp(Filter,'Somatic',7) & T.cnaF~=f(tIdx(1),cloneId(:,1))';
|
188 | 197 | tumorGT(currSomIdx)=cellstr(sort(gt(currSomIdx,:),2));
|
189 | 198 | tumorGT(cellfun('isempty',tumorGT))={'.'};
|
190 |
| -tumorGT=regexprep(tumorGT,'([0-9])','$1\'); |
191 |
| -tumorGT=regexprep(tumorGT,'\\$',''); |
| 199 | +tumorGT=regexprep(tumorGT,'([0-9])','$1/'); |
| 200 | +tumorGT=regexprep(tumorGT,'\/$',''); |
192 | 201 | germGT=cell(1,size(T,1));
|
193 | 202 | germGT(P.Hom(:,1)>0.5 | P.Somatic(:,1)>0.5)=cellstr(repmat(gt(P.Hom(:,1)>0.5 | P.Somatic(:,1)>0.5,1),1,2));
|
194 | 203 | germGT(P.Het(:,1)>0.5)=cellstr(sort(gt(P.Het(:,1)>0.5,:),2));
|
195 | 204 | germGT(cellfun('isempty',germGT))={'.'};
|
196 |
| -germGT=regexprep(germGT,'([0-9])','$1\'); |
197 |
| -germGT=regexprep(germGT,'\\$',''); |
| 205 | +germGT=regexprep(germGT,'([0-9])','$1/'); |
| 206 | +germGT=regexprep(germGT,'\/$',''); |
198 | 207 |
|
199 | 208 | %%%calculate sample fractions
|
200 | 209 | if inputParam.NormalSample>0
|
|
237 | 246 | formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',T.AcountsComb(aIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(aIdx)))');
|
238 | 247 | bIdx=T.RefComb==T.Bcomb;
|
239 | 248 | formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':',strsplit(sprintf('%-.0f\n',T.BcountsComb(bIdx)))',',',strsplit(sprintf('%-.0f\n',T.AcountsComb(bIdx)))');
|
240 |
| - formatStr([~aIdx & ~bIdx; true],n)=strcat(formatStr([~aIdx & ~bIdx; true],n),':NA,',strsplit(sprintf('%-.0f\n',T.AcountsComb(~aIdx & ~bIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(~aIdx & ~bIdx)))'); |
| 249 | + formatStr([~aIdx & ~bIdx; true],n)=strcat(formatStr([~aIdx & ~bIdx; true],n),':.,',strsplit(sprintf('%-.0f\n',T.AcountsComb(~aIdx & ~bIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(~aIdx & ~bIdx)))'); |
241 | 250 | filtStr=repmat({'REJECT'},height(T),1);
|
242 | 251 | filtStr(P.trust(:,i)>=inputParam.pGoodThresh)={'PASS'};
|
243 | 252 | filtStr(P.trust(:,i)<inputParam.pGoodThresh & P.artifact(:,i)<inputParam.pGoodThresh)={'LowQC'};
|
244 | 253 | if inputParam.NormalSample<1
|
245 | 254 | filtStr(somaticDetected(:,i)==1)=strcat(filtStr(somaticDetected(:,i)==1),';SomaticDetected');
|
246 | 255 | filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i))=strcat(filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i)),';SomaticNotDetected');
|
247 |
| - formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':NA'); |
| 256 | + formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':.'); |
248 | 257 | else
|
249 | 258 | filtStr(somaticDetected(:,i)==1)=strcat(filtStr(somaticDetected(:,i)==1),';SomaticDetected');
|
250 | 259 | filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i))=strcat(filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i)),';SomaticNotDetected');
|
251 | 260 | formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':',strsplit(sprintf('%-.3f\n',P.SomaticPair(:,i)))');
|
252 | 261 | end
|
253 | 262 | formatStr(:,n)=strcat(formatStr(:,n),':',strsplit(sprintf('%-.0f\n',-10*log10(1-P.trust(:,i))))',':',strsplit(sprintf('%-.0f\n',-10*log10(1-P.artifact(:,i))))',':',strsplit(sprintf('%-.0f\n',P.DataSomatic(:,i)))');
|
254 |
| - formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(aIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(aIdx,i))))',',NA'); |
255 |
| - formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':NA,',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(bIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(bIdx,i))))'); |
| 263 | + formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(aIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(aIdx,i))))',',.'); |
| 264 | + formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':.,',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(bIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(bIdx,i))))'); |
256 | 265 | formatStr(~aIdx & ~bIdx,n)=strcat(formatStr(~aIdx & ~bIdx,n),':.');
|
257 | 266 | if inputParam.NormalSample>0
|
258 | 267 | formatStr(:,n)=strcat(formatStr(:,n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataNonDip(:,i))))');
|
259 | 268 | else
|
260 |
| - formatStr(:,n)=strcat(formatStr(:,n),':NA'); |
| 269 | + formatStr(:,n)=strcat(formatStr(:,n),':.'); |
261 | 270 | end
|
262 | 271 | if(sum(strncmp(Filter,'Somatic',7))>0)
|
263 | 272 | formatStr([strncmp(Filter,'Somatic',7); true],n)=strcat(formatStr([strncmp(Filter,'Somatic',7); true],n),':',strsplit(sprintf('%-.3f\n',sampleFrac(strncmp(Filter,'Somatic',7),i)))');
|
264 |
| - formatStr(~strncmp(Filter,'Somatic',7),n)=strcat(formatStr(~strncmp(Filter,'Somatic',7),n),':NA'); |
| 273 | + formatStr(~strncmp(Filter,'Somatic',7),n)=strcat(formatStr(~strncmp(Filter,'Somatic',7),n),':.'); |
265 | 274 | end
|
266 |
| - formatStr(T.NumCopies==2 & T.MinAlCopies==1,n)=strcat(formatStr(T.NumCopies==2 & T.MinAlCopies==1,n),':NA'); |
| 275 | + formatStr(T.NumCopies==2 & T.MinAlCopies==1,n)=strcat(formatStr(T.NumCopies==2 & T.MinAlCopies==1,n),':.'); |
267 | 276 | if sum(T.NumCopies~=2 | T.MinAlCopies~=1)>0
|
268 | 277 | formatStr([T.NumCopies~=2 | T.MinAlCopies~=1; true],n)=strcat(formatStr([T.NumCopies~=2 | T.MinAlCopies~=1; true],n),':',strsplit(sprintf('%-.3f\n',T.cnaF(T.NumCopies~=2 | T.MinAlCopies~=1)))');
|
269 | 278 | end
|
|
284 | 293 | else
|
285 | 294 | headers=[headers regexp(inputParam.sampleNames,',','split')];
|
286 | 295 | end
|
287 |
| -for i=1:length(headers) |
| 296 | +for i=1:length(headers)-1 |
288 | 297 | fprintf(fout,'%s\t',headers{i});
|
289 | 298 | end
|
| 299 | +fprintf(fout,'%s',headers{i+1}); |
290 | 300 |
|
291 | 301 | for i=1:size(outData,1)
|
292 | 302 | fprintf(fout,strcat('\n%s\t%d\t%s\t%s\t%s\t%f\t%s\t%s\t%s',repmat('\t%s',1,n)),outData{i,:});
|
|
0 commit comments