You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
In slouch version 2.1.4 there is a bug in calculating the weight matrix for a model with discrete predictors mapped as regimes on the tree. The reason we discovered this, is that we were working on an alternative (faster) implementation of the likelihood. I'm copy-pasting a minimum-working example below
## minimum working example
library(ape)
library(phytools)
library(slouch)
############################################################# ## State-dependent pruning ## !! σ2, α are scalars, θ is a vector of length # states # # #############################################################sd_postorder<-function(node_index, edge, tree, continuousChar,
μ, V, log_norm_factor, subedges_lengths, σ2, α, θ){
ntip= length(tree$tip.label)
# if is internal nodeif (node_index>ntip){
left_edge= which(edge[,1] ==node_index)[1] # index of left child edgeright_edge= which(edge[,1] ==node_index)[2] # index of right child edgeleft=edge[left_edge,2] # index of left child noderight=edge[right_edge,2] # index of right child nodeoutput_left<- sd_postorder(left, edge, tree, continuousChar,
μ, V, log_norm_factor, subedges_lengths, σ2, α, θ)
μ <-output_left[[1]]
V<-output_left[[2]]
log_norm_factor<-output_left[[3]]
output_right<- sd_postorder(right, edge, tree, continuousChar,
μ, V, log_norm_factor, subedges_lengths, σ2, α, θ)
μ <-output_right[[1]]
V<-output_right[[2]]
log_norm_factor<-output_right[[3]]
sub_bl_left=subedges_lengths[left_edge][[1]] # all subedges of left child edgesub_bl_right=subedges_lengths[right_edge][[1]] # all subedges of right child edge# for the sake of readability, computation of variance, mean, and log_nf are done in separate loops# 1) variance of the normal variable: this branch (v_left) and the subtree (V[left])## Is 'delta_left* exp(2.0 * α * bl_left)' added in each sub-edge?delta_left=V[left]
v_left=0# initialise v_leftfor (iin rev(1:length(sub_bl_left))){
state<- names(sub_bl_left)[i]
delta_t<-sub_bl_left[[i]]
v_left= σ2/(2*α) * expm1(2.0*α *delta_t)
delta_left=v_left+delta_left* exp(2.0* α *delta_t)
}
delta_right=V[right]
v_right=0# initialise v_rightfor (iin rev(1:length(sub_bl_right))){
state<- names(sub_bl_right)[i]
v_right= σ2/(2*α) *expm1(2.0*α*sub_bl_right[[i]])
delta_right=v_right+delta_right* exp(2.0* α *sub_bl_right[[i]])
}
var_left=delta_leftvar_right=delta_right# 2) mean of the normal variablemean_left= μ[left]
for (iin rev(1:length(sub_bl_left))){
state<- names(sub_bl_left)[i]
mean_left= exp(α*sub_bl_left[[i]])*(mean_left- θ[[state]]) + θ[[state]]
}
mean_right= μ[right]
for (iin rev(1:length(sub_bl_right))){
state<- names(sub_bl_right)[i]
mean_right= exp(α*sub_bl_right[[i]])*(mean_right- θ[[state]]) + θ[[state]]
}
## compute the mean and variance of the nodemean_ancestor= (mean_left*var_right+mean_right*var_left) / (var_left+var_right)
μ[node_index] =mean_ancestorvar_node= (var_left*var_right) / (var_left+var_right)
V[node_index] =var_node## compute the normalizing factor, the left-hand side of the pdf of the normal variablelog_nf_left=0for (iin rev(1:length(sub_bl_left))){
state<- names(sub_bl_left)[i]
delta_t<-sub_bl_left[[i]]
log_nf_left=log_nf_left+delta_t* α
}
log_nf_right=0for (iin rev(1:length(sub_bl_right))){
state<- names(sub_bl_right)[i]
log_nf_right=log_nf_right+sub_bl_right[[i]] * α
}
contrast=mean_left-mean_righta=-(contrast*contrast/ (2*(var_left+var_right)))
b= log(2*pi*(var_left+var_right))/2.0log_nf=log_nf_left+log_nf_right+a-blog_norm_factor[node_index] =log_nfreturn(list(μ, V, log_norm_factor))
}
# if is tipelse{
species=tree$tip.label[node_index]
μ[node_index] =continuousChar[[which(names(continuousChar) ==species)]]
V[node_index] =0.0## if there is no observation errorreturn(list(μ, V, log_norm_factor))
}
}
sd_logL_pruning<-function(tree, continuousChar, σ2, α, θ){
ntip= length(tree$tip.label) # number of tipsedge=tree$edge# equals tree[:edge] in Julian_edges= length(edge[,1]) # number of edgesmax_node_index= max(tree$edge) # total number of nodesV=numeric(max_node_index)
μ =numeric(max_node_index)
log_norm_factor=numeric(max_node_index)
subedges_lengths=tree$mapsroot_index=ntip+1output<- sd_postorder(root_index, edge, tree, continuousChar,
μ, V, log_norm_factor, subedges_lengths, σ2, α, θ)
μ <-output[[1]]
V<-output[[2]]
log_norm_factor<-output[[3]]
## assume root value equal to theta
μ_root= μ[root_index]
v_root=V[root_index]
left_edge_from_root<- which(edge[,1] ==ntip+1)[1] # obtain left child edge index of root nodeleft_subedges_from_root<-subedges_lengths[[left_edge_from_root]] # obtain sub-edge lengthsroot_state= names(tail(left_subedges_from_root))[[1]] # obtain root state, assuming it equals last state at left child edgelnl= dnorm(θ[[root_state]], mean= μ_root, sd= sqrt(v_root), log=TRUE)
## add norm factorfor (log_nfinlog_norm_factor){
lnl=lnl+log_nf
}
return(lnl)
}
And some code to test the two versions of computing the likelihood
#################################################### ## Testing... # # ##################################################### test with slouch data set
data("artiodactyla")
data("neocortex")
# convert continuous data to read.nexus.data() formatbrain<-list()
for (iin1:length(neocortex$brain_mass_g_log_mean)){
sp<-neocortex$species[i]
brain[sp] <-list(neocortex$brain_mass_g_log_mean[i])
}
neocortex<-neocortex[match(artiodactyla$tip.label, neocortex$species), ]
diet<- as.character(neocortex$diet)
names(diet) <-neocortex$species
set.seed(123)
smaptree<-phytools::make.simmap(artiodactyla, diet)
This is what the SIMMAP tree looks like (it's just one random SIMMAP, but it does not matter for illustrating that the likelihoods are equivalent)
As of version 2.1.4, these two methods gave different results (i.e. different probability densities). There is a fix in 88c6181. When printing the likelihoods after the bug fix, in version 2.1.5 (I will upload a new version to CRAN), the likelihoods are identical up to floating point errors
In slouch version
2.1.4
there is a bug in calculating the weight matrix for a model with discrete predictors mapped as regimes on the tree. The reason we discovered this, is that we were working on an alternative (faster) implementation of the likelihood. I'm copy-pasting a minimum-working example belowAnd some code to test the two versions of computing the likelihood
This is what the SIMMAP tree looks like (it's just one random SIMMAP, but it does not matter for illustrating that the likelihoods are equivalent)
As of version
2.1.4
, these two methods gave different results (i.e. different probability densities). There is a fix in 88c6181. When printing the likelihoods after the bug fix, in version2.1.5
(I will upload a new version to CRAN), the likelihoods are identical up to floating point errorsThe text was updated successfully, but these errors were encountered: