From b08ebad80c38cdaec833e06cbcd9af7c6146ef02 Mon Sep 17 00:00:00 2001 From: dwjak123lkdmaKOP Date: Thu, 14 Nov 2024 13:49:47 +0100 Subject: [PATCH] ANOVA support for family = gaussian() #152 --- sjSDM/R/anova.R | 24 ++++++++++++-------- sjSDM/R/sjSDM.R | 2 +- sjSDM/inst/python/sjSDM_py/dist_mvp.py | 4 ++++ sjSDM/inst/python/sjSDM_py/model_sjSDM.py | 27 +++++++++++++++++++++-- 4 files changed, 45 insertions(+), 12 deletions(-) diff --git a/sjSDM/R/anova.R b/sjSDM/R/anova.R index 1a70dfe..38bb74d 100644 --- a/sjSDM/R/anova.R +++ b/sjSDM/R/anova.R @@ -42,8 +42,7 @@ anova.sjSDM = function(object, samples = 5000L, verbose = TRUE, ...) { object = checkModel(object) pkg.env$fa$set_seed(object$seed) - - if(object$family$family$family == "gaussian") stop("gaussian not yet supported") +# if(object$family$family$family == "gaussian") stop("gaussian not yet supported") object$settings$se = FALSE @@ -291,9 +290,9 @@ get_shared_anova = function(R2objt, spatial = TRUE) { F_AB <- R2objt$Full - R2objt$F_S - (F_A + F_B) F_AS <- R2objt$Full - R2objt$F_B - (F_A + F_S) F_ABS <- R2objt$Full - (F_A + F_B + F_S + F_BS + F_AB + F_AS) - A = F_A + F_AB*abs(F_A)/(abs(F_A)+abs(F_B)) + F_AS*abs(F_A)/(abs(F_S)+abs(F_A))+ F_ABS*abs(F_A)/(abs(F_A)+abs(F_B)+abs(F_S)) - B = F_B + F_AB*abs(F_B)/(abs(F_A)+abs(F_B)) + F_BS*abs(F_B)/(abs(F_S)+abs(F_B))+ F_ABS*abs(F_B)/(abs(F_A)+abs(F_B)+abs(F_S)) - S = F_S + F_AS*abs(F_S)/(abs(F_S)+abs(F_A)) + F_BS*abs(F_S)/(abs(F_S)+abs(F_B))+ F_ABS*abs(F_S)/(abs(F_A)+abs(F_B)+abs(F_S)) + A = F_A + F_AB*abs(F_A)/(abs(F_A)+abs(F_B)) + F_AS*abs(F_A)/(abs(F_S)+abs(F_A)) + F_ABS*abs(F_A)/(abs(F_A)+abs(F_B)+abs(F_S)) + B = F_B + F_AB*abs(F_B)/(abs(F_A)+abs(F_B)) + F_BS*abs(F_B)/(abs(F_S)+abs(F_B)) + F_ABS*abs(F_B)/(abs(F_A)+abs(F_B)+abs(F_S)) + S = F_S + F_AS*abs(F_S)/(abs(F_S)+abs(F_A)) + F_BS*abs(F_S)/(abs(F_S)+abs(F_B)) + F_ABS*abs(F_S)/(abs(F_A)+abs(F_B)+abs(F_S)) # R2 = correct_R2(R2objt$Full) TODO Check that this can be gone proportional = list(F_A = A, F_B = B, F_S = S, R2 = R2objt$Full) A = F_A + F_AB*0.3333333 + F_AS*0.3333333+ F_ABS*0.3333333 @@ -321,8 +320,15 @@ get_null_ll = function(object, verbose = TRUE, ...) { object_tmp = object object_tmp$settings$se = FALSE - if(inherits(object, "spatial ")) null_pred = predict(update(object_tmp, env_formula = ~1, spatial_formula = ~0, biotic = bioticStruct(diag = TRUE), verbose = verbose)) - else null_pred = predict(update(object_tmp, env_formula = ~1, biotic = bioticStruct(diag = TRUE), verbose = verbose)) + + + if(inherits(object, "spatial ")) { + null_model = update(object_tmp, env_formula = ~1, spatial_formula = ~0, biotic = bioticStruct(diag = TRUE), verbose = verbose) + null_pred = predict(null_model) + } else { + null_model = update(object_tmp, env_formula = ~1, biotic = bioticStruct(diag = TRUE), verbose = verbose) + null_pred = predict(null_model) + } if(object$family$family$family == "binomial") { null_m = stats::dbinom( object$data$Y, 1, null_pred, log = TRUE) @@ -342,8 +348,8 @@ get_null_ll = function(object, verbose = TRUE, ...) { YT = torch$tensor(object$data$Y, dtype = torch$float32) null_m = force_r(torch$distributions$NegativeBinomial(total_count=theta, probs=probs)$log_prob(YT)$cpu()$data$numpy()) } else if(object$family$family$family == "gaussian") { - warning("family = gaussian() is not fully supported yet.") - null_m = stats::dnorm(object$data$Y, mean = null_pred, log = TRUE) + #warning("family = gaussian() is not fully supported yet.") + null_m = sapply(1:ncol(object$data$Y), function(i) stats::dnorm(object$data$Y[,i], null_pred[,i],sd = exp(null_model$theta)[i], log = TRUE)) } return(null_m) diff --git a/sjSDM/R/sjSDM.R b/sjSDM/R/sjSDM.R index 3261373..bb71a5d 100644 --- a/sjSDM/R/sjSDM.R +++ b/sjSDM/R/sjSDM.R @@ -317,7 +317,7 @@ sjSDM = function(Y = NULL, out$sessionInfo = utils::sessionInfo() out$weights = force_r(model$env_weights) out$sigma = force_r(model$get_sigma) - if(out$family$family$family== "nbinom") out$theta = force_r(model$get_theta) + if(out$family$family$family== "nbinom" || out$family$family$family== "gaussian" ) out$theta = force_r(model$get_theta) out$history = force_r(model$history) out$spatial_weights = force_r(model$spatial_weights) out$spatial = spatial diff --git a/sjSDM/inst/python/sjSDM_py/dist_mvp.py b/sjSDM/inst/python/sjSDM_py/dist_mvp.py index 2f6325c..672a2b0 100644 --- a/sjSDM/inst/python/sjSDM_py/dist_mvp.py +++ b/sjSDM/inst/python/sjSDM_py/dist_mvp.py @@ -140,6 +140,8 @@ def MVP_logLik(Y, X, sigma, device, dtype, batch_size=25, alpha = 1.0, sampling= link_func = lambda value: torch.exp(value) elif link=="nbinom": link_func = lambda value: torch.exp(value) + elif link=="normal": + link_func = lambda value: value if theta is not None: theta = torch.tensor(theta, dtype=dtype, device=torch.device(device)) @@ -158,6 +160,8 @@ def MVP_logLik(Y, X, sigma, device, dtype, batch_size=25, alpha = 1.0, sampling= logprob = E.log().mul(y).add((1.0 - E).log().mul(1.0 - y)).neg().sum(dim = 2).neg() elif link == "count": logprob = torch.distributions.Poisson(rate=E).log_prob(y).sum(2) + elif link == "normal": + logprob = torch.distributions.Normal(E, theta.exp()).log_prob(y).sum(2) elif link == "nbinom": eps = 0.0001 theta_tmp = 1.0/(torch.nn.functional.softplus(theta)+eps) diff --git a/sjSDM/inst/python/sjSDM_py/model_sjSDM.py b/sjSDM/inst/python/sjSDM_py/model_sjSDM.py index 31dff05..c9e8dbc 100644 --- a/sjSDM/inst/python/sjSDM_py/model_sjSDM.py +++ b/sjSDM/inst/python/sjSDM_py/model_sjSDM.py @@ -360,6 +360,10 @@ def build(self, self.theta = torch.ones([r_dim], requires_grad = True, dtype = self.dtype, device = self.device).to(self.device) self.params.append([self.theta]) + if link == "normal": + self.theta = torch.zeros([r_dim], requires_grad = True, dtype = self.dtype, device = self.device).to(self.device) + self.params.append([self.theta]) + if not diag: low = -np.sqrt(6.0/(r_dim+df)) # type: ignore high = np.sqrt(6.0/(r_dim+df)) # type: ignore @@ -893,8 +897,19 @@ def tmp(mu: torch.Tensor, Ys: torch.Tensor, sigma: torch.Tensor, batch_size: int return Eprob.log().neg().sub(maxlogprob) elif self.link == "normal": + #def tmp(mu: torch.Tensor, Ys: torch.Tensor, sigma: torch.Tensor, batch_size: int, sampling: int, df: int, alpha: float, device: str, dtype: torch.dtype): + # return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma.matmul(sigma.t()).add(torch.eye(sigma.shape[0], device=torch.device(device), dtype=dtype))).log_prob(Ys).neg() def tmp(mu: torch.Tensor, Ys: torch.Tensor, sigma: torch.Tensor, batch_size: int, sampling: int, df: int, alpha: float, device: str, dtype: torch.dtype): - return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma.matmul(sigma.t()).add(torch.eye(sigma.shape[0], device=torch.device(device), dtype=dtype))).log_prob(Ys).neg() + # return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma.matmul(sigma.t()).add(torch.eye(sigma.shape[0], device=device, dtype=dtype ))).log_prob(Ys).neg() + noise = torch.randn(size = [sampling, batch_size, df], device=torch.device(device),dtype=dtype) + #noise = torch.distributions.Normal(loc = torch.zeros_like(self.theta), scale = self.theta.exp()).sample([sampling, batch_size]).to(device=torch.device(device),dtype=dtype) + E = torch.einsum("ijk, lk -> ijl", [noise, sigma]).add(mu)#.exp() + + logprob = torch.distributions.Normal(E, self.theta.exp()).log_prob(Ys)#.sum(2) + logprob = logprob.sum(dim = 2)# .neg() + maxlogprob = logprob.max(dim = 0).values + Eprob = logprob.sub(maxlogprob).exp().mean(dim = 0) + return Eprob.log().neg().sub(maxlogprob) else: if self.link == "probit": link_func = lambda value: torch.distributions.Normal(0.0, 1.0).cdf(value) @@ -965,7 +980,15 @@ def tmp(mu: torch.Tensor, Ys: torch.Tensor, sigma: torch.Tensor, batch_size: int elif self.link == "normal": def tmp(mu: torch.Tensor, Ys: torch.Tensor, sigma: torch.Tensor, batch_size: int, sampling: int, df: int, alpha: float, device: str, dtype: torch.dtype): - return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma.matmul(sigma.t()).add(torch.eye(sigma.shape[0]), device=device, dtype=dtype )).log_prob(Ys).neg() + # return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma.matmul(sigma.t()).add(torch.eye(sigma.shape[0], device=device, dtype=dtype ))).log_prob(Ys).neg() + noise = torch.randn(size = [sampling, batch_size, df], device=torch.device(device),dtype=dtype) + #noise = torch.distributions.Normal(loc = torch.zeros_like(self.theta), scale = self.theta.exp()).sample([sampling, batch_size]).to(device=torch.device(device),dtype=dtype) + E = torch.einsum("ijk, lk -> ijl", [noise, sigma]).add(mu)#.exp() + logprob = torch.distributions.Normal(E, self.theta.exp()).log_prob(Ys)#.sum(2) + logprob = logprob.sum(dim = 2)# .neg() + maxlogprob = logprob.max(dim = 0).values + Eprob = logprob.sub(maxlogprob).exp().mean(dim = 0) + return Eprob.log().neg().sub(maxlogprob).reshape([batch_size, 1]) return tmp