Skip to content

Commit da9a595

Browse files
author
Rebecca Halperin
committed
formatting changes for compatibility with bcftools
1 parent 8d52f21 commit da9a595

File tree

1 file changed

+24
-14
lines changed

1 file changed

+24
-14
lines changed

src/writeJointVCF.m

+24-14
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,23 @@
4545
f(tIdx,1:end)=reshape(fIn,[],inputParam.numClones);
4646

4747
fout=fopen([inputParam.outName '.lumosVarSNV.vcf'],'w');
48+
fparam=fopen([inputParam.outName '.lumosVarParam.txt'],'w');
4849

4950
%%%print VCF header
5051
fprintf(fout,'##fileformat=VCFv4.2\n');
5152
fprintf(fout,['##fileData=' datestr(clock) '\n']);
53+
for i=1:height(inputParam.chrTable)
54+
fprintf(fout,['##contig=<ID=' inputParam.chrTable.chrName{i} '>\n'])
55+
end
5256
inputFields=fieldnames(inputParam);
5357
for i=1:length(inputFields)
5458
if(isnumeric(inputParam.(inputFields{i})))
55-
fprintf(fout,['##INPUT=<' inputFields{i} '=' mat2str(inputParam.(inputFields{i})') '>\n']);
59+
fprintf(fparam,[inputFields{i} ': ' mat2str(inputParam.(inputFields{i})') '>\n']);
5660
elseif ~(istable(inputParam.(inputFields{i})))
57-
fprintf(fout,['##INPUT=<' inputFields{i} '=' inputParam.(inputFields{i}) '>\n']);
61+
fprintf(fparam,[inputFields{i} ': ' inputParam.(inputFields{i}) '>\n']);
5862
end
5963
end
64+
fclose(fparam);
6065
for i=1:size(f,2)
6166
outString=['##CloneID=' num2str(i)];
6267
for j=1:size(f,1)
@@ -70,6 +75,7 @@
7075
fprintf(fout,['##INFO=<ID=JPT,Number=1,Type=Float,Description="Phred Scaled Joint Posterior Probability the Call can be Trusted">\n']);
7176
fprintf(fout,['##INFO=<ID=JPA,Number=1,Type=Float,Description="Phred Scaled Joint Posterior Probability the Position is an Artifact">\n']);
7277
fprintf(fout,['##INFO=<ID=JPS,Number=1,Type=Float,Description="Joint Posterior Probability of Somatic Mutation">\n']);
78+
fprintf(fout,['##INFO=<ID=JPND,Number=1,Type=Float,Description="Joint Posterior Probability of non-diploid germline variant">\n']);
7379
fprintf(fout,['##INFO=<ID=JPGAB,Number=1,Type=Float,Description="Joint Posterior Probability of No Somatic Mutation and Position is Germline AB">\n']);
7480
fprintf(fout,['##INFO=<ID=JPGAA,Number=1,Type=Float,Description="Joint Posterior Probability of No Somatic Mutation and Position is Germline AA">\n']);
7581
fprintf(fout,['##INFO=<ID=JPGND,Number=1,Type=Float,Description="Joint Posterior Probability of Variant Present in Germline Not Following Diploid Model">\n']);
@@ -79,7 +85,10 @@
7985
fprintf(fout,['##INFO=<ID=CloneId,Number=1,Type=Integer,Description="CloneId">\n']);
8086
fprintf(fout,['##INFO=<ID=CN,Number=1,Type=Integer,Description="Copy Number">\n']);
8187
fprintf(fout,['##INFO=<ID=MACN,Number=1,Type=Integer,Description="Min Allele Copy Number">\n']);
88+
fprintf(fout,['##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">\n']);
89+
fprintf(fout,['##INFO=<ID=END,Number=.,Type=Integer,Description="end position of the variant described in this record">\n']);
8290
fprintf(fout,['##FILTER=<ID=SomaticPASS,Description="JPS>pSomaticThresh and pass filters">\n']);
91+
fprintf(fout,['##FILTER=<ID=SomaticDBsnp,Description="JPS>pSomaticThresh and pass filters and population AF>maxSomPopFreq">\n']);
8392
fprintf(fout,['##FILTER=<ID=SomaticLowQC,Description="JPS>0.5 and artifact filters">\n']);
8493
fprintf(fout,['##FILTER=<ID=SomaticPairPASS,Description="PPS>pSomaticThresh and JPS<0.5 pass filters">\n']);
8594
fprintf(fout,['##FILTER=<ID=SomaticPairLowQC,Description="PPS>0.5 and JPS<0.5 and artifact filters">\n']);
@@ -187,14 +196,14 @@
187196
currSomIdx=strncmp(Filter,'Somatic',7) & T.cnaF~=f(tIdx(1),cloneId(:,1))';
188197
tumorGT(currSomIdx)=cellstr(sort(gt(currSomIdx,:),2));
189198
tumorGT(cellfun('isempty',tumorGT))={'.'};
190-
tumorGT=regexprep(tumorGT,'([0-9])','$1\');
191-
tumorGT=regexprep(tumorGT,'\\$','');
199+
tumorGT=regexprep(tumorGT,'([0-9])','$1/');
200+
tumorGT=regexprep(tumorGT,'\/$','');
192201
germGT=cell(1,size(T,1));
193202
germGT(P.Hom(:,1)>0.5 | P.Somatic(:,1)>0.5)=cellstr(repmat(gt(P.Hom(:,1)>0.5 | P.Somatic(:,1)>0.5,1),1,2));
194203
germGT(P.Het(:,1)>0.5)=cellstr(sort(gt(P.Het(:,1)>0.5,:),2));
195204
germGT(cellfun('isempty',germGT))={'.'};
196-
germGT=regexprep(germGT,'([0-9])','$1\');
197-
germGT=regexprep(germGT,'\\$','');
205+
germGT=regexprep(germGT,'([0-9])','$1/');
206+
germGT=regexprep(germGT,'\/$','');
198207

199208
%%%calculate sample fractions
200209
if inputParam.NormalSample>0
@@ -237,33 +246,33 @@
237246
formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',T.AcountsComb(aIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(aIdx)))');
238247
bIdx=T.RefComb==T.Bcomb;
239248
formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':',strsplit(sprintf('%-.0f\n',T.BcountsComb(bIdx)))',',',strsplit(sprintf('%-.0f\n',T.AcountsComb(bIdx)))');
240-
formatStr([~aIdx & ~bIdx; true],n)=strcat(formatStr([~aIdx & ~bIdx; true],n),':NA,',strsplit(sprintf('%-.0f\n',T.AcountsComb(~aIdx & ~bIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(~aIdx & ~bIdx)))');
249+
formatStr([~aIdx & ~bIdx; true],n)=strcat(formatStr([~aIdx & ~bIdx; true],n),':.,',strsplit(sprintf('%-.0f\n',T.AcountsComb(~aIdx & ~bIdx)))',',',strsplit(sprintf('%-.0f\n',T.BcountsComb(~aIdx & ~bIdx)))');
241250
filtStr=repmat({'REJECT'},height(T),1);
242251
filtStr(P.trust(:,i)>=inputParam.pGoodThresh)={'PASS'};
243252
filtStr(P.trust(:,i)<inputParam.pGoodThresh & P.artifact(:,i)<inputParam.pGoodThresh)={'LowQC'};
244253
if inputParam.NormalSample<1
245254
filtStr(somaticDetected(:,i)==1)=strcat(filtStr(somaticDetected(:,i)==1),';SomaticDetected');
246255
filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i))=strcat(filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i)),';SomaticNotDetected');
247-
formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':NA');
256+
formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':.');
248257
else
249258
filtStr(somaticDetected(:,i)==1)=strcat(filtStr(somaticDetected(:,i)==1),';SomaticDetected');
250259
filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i))=strcat(filtStr(P.Somatic(:,i)>0.5 & ~somaticDetected(:,i)),';SomaticNotDetected');
251260
formatStr(:,n)=strcat(formatStr(:,n),':',[filtStr; {''}],':',strsplit(sprintf('%-.3f\n',P.SomaticPair(:,i)))');
252261
end
253262
formatStr(:,n)=strcat(formatStr(:,n),':',strsplit(sprintf('%-.0f\n',-10*log10(1-P.trust(:,i))))',':',strsplit(sprintf('%-.0f\n',-10*log10(1-P.artifact(:,i))))',':',strsplit(sprintf('%-.0f\n',P.DataSomatic(:,i)))');
254-
formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(aIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(aIdx,i))))',',NA');
255-
formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':NA,',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(bIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(bIdx,i))))');
263+
formatStr([aIdx; true],n)=strcat(formatStr([aIdx; true],n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(aIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(aIdx,i))))',',.');
264+
formatStr([bIdx; true],n)=strcat(formatStr([bIdx; true],n),':.,',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHet(bIdx,i))))',',',strsplit(sprintf('%-.0f\n',-10*log10(P.DataHom(bIdx,i))))');
256265
formatStr(~aIdx & ~bIdx,n)=strcat(formatStr(~aIdx & ~bIdx,n),':.');
257266
if inputParam.NormalSample>0
258267
formatStr(:,n)=strcat(formatStr(:,n),':',strsplit(sprintf('%-.0f\n',-10*log10(P.DataNonDip(:,i))))');
259268
else
260-
formatStr(:,n)=strcat(formatStr(:,n),':NA');
269+
formatStr(:,n)=strcat(formatStr(:,n),':.');
261270
end
262271
if(sum(strncmp(Filter,'Somatic',7))>0)
263272
formatStr([strncmp(Filter,'Somatic',7); true],n)=strcat(formatStr([strncmp(Filter,'Somatic',7); true],n),':',strsplit(sprintf('%-.3f\n',sampleFrac(strncmp(Filter,'Somatic',7),i)))');
264-
formatStr(~strncmp(Filter,'Somatic',7),n)=strcat(formatStr(~strncmp(Filter,'Somatic',7),n),':NA');
273+
formatStr(~strncmp(Filter,'Somatic',7),n)=strcat(formatStr(~strncmp(Filter,'Somatic',7),n),':.');
265274
end
266-
formatStr(T.NumCopies==2 & T.MinAlCopies==1,n)=strcat(formatStr(T.NumCopies==2 & T.MinAlCopies==1,n),':NA');
275+
formatStr(T.NumCopies==2 & T.MinAlCopies==1,n)=strcat(formatStr(T.NumCopies==2 & T.MinAlCopies==1,n),':.');
267276
if sum(T.NumCopies~=2 | T.MinAlCopies~=1)>0
268277
formatStr([T.NumCopies~=2 | T.MinAlCopies~=1; true],n)=strcat(formatStr([T.NumCopies~=2 | T.MinAlCopies~=1; true],n),':',strsplit(sprintf('%-.3f\n',T.cnaF(T.NumCopies~=2 | T.MinAlCopies~=1)))');
269278
end
@@ -284,9 +293,10 @@
284293
else
285294
headers=[headers regexp(inputParam.sampleNames,',','split')];
286295
end
287-
for i=1:length(headers)
296+
for i=1:length(headers)-1
288297
fprintf(fout,'%s\t',headers{i});
289298
end
299+
fprintf(fout,'%s',headers{i+1});
290300

291301
for i=1:size(outData,1)
292302
fprintf(fout,strcat('\n%s\t%d\t%s\t%s\t%s\t%f\t%s\t%s\t%s',repmat('\t%s',1,n)),outData{i,:});

0 commit comments

Comments
 (0)