Skip to content

Commit e2a4f73

Browse files
committed
do not require GT for copy_ds!()
1 parent 79f4a6b commit e2a4f73

2 files changed

Lines changed: 48 additions & 10 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name = "VCFTools"
22
uuid = "a620830f-fdd7-5ebc-8d26-3621ab35fbfe"
33
keywords = ["Variant call format", "VCF"]
44
authors = ["Hua Zhou <hwachou@gmail.com>", "OpenMendel Team"]
5-
version = "0.2.6"
5+
version = "0.2.7"
66

77
[deps]
88
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"

src/convert.jl

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -693,22 +693,41 @@ function copy_ds!(
693693
record_alt === nothing || (record_alt[j] = VCF.alt(record))
694694

695695
# loop over every marker in record
696-
_, _, _, _, _, alt_freq, _, _, _, _, _ = gtstats(record, nothing)
697-
ct = 2alt_freq
698-
wt = alt_freq == 0 ? 1.0 : 1.0 / (2alt_freq * (1 - alt_freq))
696+
# _, _, _, _, _, alt_freq, _, _, _, _, _ = gtstats(record, nothing)
697+
# ct = 2alt_freq
698+
# wt = alt_freq == 0 ? 1.0 : 1.0 / √(2alt_freq * (1 - alt_freq))
699699
for i in 1:size(A, 1)
700700
geno = record.genotype[i]
701701
# Missing genotype: dropped field or "." => 0x2e
702702
if dskey > lastindex(geno) || record.data[geno[dskey]] == [0x2e]
703-
A[i, j] = (impute ? ct : missing)
703+
A[i, j] = missing # (impute ? ct : missing)
704704
else # not missing
705705
A[i, j] = parse(T, String(record.data[geno[dskey]]))
706706
end
707+
708+
end
709+
if center || scale || impute
710+
total_ds = zero(T)
711+
cnt = 0
712+
for i in 1:size(A, 1)
713+
if A[i, j] !== missing
714+
total_ds += A[i, j]
715+
cnt += 1
716+
end
717+
end
718+
ct = total_ds / cnt
719+
wt = ct == 0 ? 1.0 : 1.0 / (ct * (1 - ct/2))
707720
# center and scale if asked
708721
center && !ismissing(A[i, j]) && (A[i, j] -= ct)
709722
scale && !ismissing(A[i, j]) && (A[i, j] *= wt)
723+
if impute
724+
for i in 1:size(A, 1)
725+
if A[i, j] === missing
726+
A[i, j] = ct
727+
end
728+
end
729+
end
710730
end
711-
712731
# update progress
713732
msg != "" && next!(pmeter)
714733
end
@@ -786,20 +805,39 @@ function copy_ds_trans!(
786805
record_alt === nothing || (record_alt[j] = VCF.alt(record))
787806

788807
# loop over every marker in record
789-
_, _, _, _, _, alt_freq, _, _, _, _, _ = gtstats(record, nothing)
790-
ct = 2alt_freq
791-
wt = alt_freq == 0 ? 1.0 : 1.0 / (2alt_freq * (1 - alt_freq))
808+
# _, _, _, _, _, alt_freq, _, _, _, _, _ = gtstats(record, nothing)
809+
# ct = 2alt_freq
810+
# wt = alt_freq == 0 ? 1.0 : 1.0 / √(2alt_freq * (1 - alt_freq))
792811
for i in 1:size(A, 2)
793812
geno = record.genotype[i]
794813
# Missing genotype: dropped field or "." => 0x2e
795814
if dskey > lastindex(geno) || record.data[geno[dskey]] == [0x2e]
796-
A[j, i] = (impute ? ct : missing)
815+
A[j, i] = missing #(impute ? ct : missing)
797816
else # not missing
798817
A[j, i] = parse(T, String(record.data[geno[dskey]]))
799818
end
819+
end
820+
if center || scale || impute
821+
total_ds = zero(T)
822+
cnt = 0
823+
for i in 1:size(A, 2)
824+
if A[j, i] !== missing
825+
total_ds += A[j, i]
826+
cnt += 1
827+
end
828+
end
829+
ct = total_ds / cnt
830+
wt = ct == 0 ? 1.0 : 1.0 / (ct * (1 - ct/2))
800831
# center and scale if asked
801832
center && !ismissing(A[j, i]) && (A[j, i] -= ct)
802833
scale && !ismissing(A[j, i]) && (A[j, i] *= wt)
834+
if impute
835+
for i in 1:size(A, 2)
836+
if A[j, i] === missing
837+
A[j, i] = ct
838+
end
839+
end
840+
end
803841
end
804842

805843
#update progress

0 commit comments

Comments
 (0)