From e24d3cf84686cbe7b952ed97e382d9800e1da041 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 1 Feb 2023 14:10:17 -0300 Subject: [PATCH 01/13] Auto Enable Disable KVM hosts --- agent/conf/agent.properties | 4 + .../agent/properties/AgentProperties.java | 3 + .../com/cloud/resource/ResourceService.java | 4 +- .../cloud/agent/api/PingRoutingCommand.java | 9 ++ .../agent/api/StartupRoutingCommand.java | 9 ++ .../java/com/cloud/agent/AgentManager.java | 7 ++ .../cloud/agent/manager/AgentManagerImpl.java | 54 +++++++- .../resource/LibvirtComputingResource.java | 45 ++++++- .../cloud/resource/ResourceManagerImpl.java | 118 +++++++++++------- .../resource/MockResourceManagerImpl.java | 5 + 10 files changed, 204 insertions(+), 54 deletions(-) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 27c0e387fa3c..eceb594a8829 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false # The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1 # iothreads=1 + +# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host +# depending on the return value of the file/script +# agent.health.check.script.path= \ No newline at end of file diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index 9a031e001fa1..4af4ebed5e6d 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -312,6 +312,9 @@ public class AgentProperties{ */ public static final Property OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class); + public static final Property HEALTH_CHECK_SCRIPT_PATH = + new Property<>("agent.health.check.script.path", null, String.class); + /** * Sets the hypervisor type.
* Possible values: kvm | lxc
diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index e2b84ba87203..c1ca3722bea1 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -44,11 +44,11 @@ public interface ResourceService { /** * Updates a host - * - * @param cmd - the command specifying hostId */ Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; + Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException; + Host cancelMaintenance(CancelMaintenanceCmd cmd); Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; diff --git a/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java b/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java index d7733ee91976..ce529ad4bcb1 100644 --- a/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java @@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand { boolean _gatewayAccessible = true; boolean _vnetAccessible = true; + private Boolean hostHealthCheckResult; protected PingRoutingCommand() { } @@ -57,4 +58,12 @@ public boolean isVnetAccessible() { public void setVnetAccessible(boolean vnetAccessible) { _vnetAccessible = vnetAccessible; } + + public Boolean getHostHealthCheckResult() { + return hostHealthCheckResult; + } + + public void setHostHealthCheckResult(Boolean hostHealthCheckResult) { + this.hostHealthCheckResult = hostHealthCheckResult; + } } diff --git a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java index b459f8849690..b4f9d20df5ed 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java @@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand { List hostTags = new ArrayList(); String hypervisorVersion; HashMap> groupDetails = new HashMap>(); + private Boolean hostHealthCheckResult; public StartupRoutingCommand() { super(Host.Type.Routing); @@ -188,4 +189,12 @@ public boolean getSupportsClonedVolumes() { public void setSupportsClonedVolumes(boolean supportsClonedVolumes) { this.supportsClonedVolumes = supportsClonedVolumes; } + + public Boolean getHostHealthCheckResult() { + return hostHealthCheckResult; + } + + public void setHostHealthCheckResult(Boolean hostHealthCheckResult) { + this.hostHealthCheckResult = hostHealthCheckResult; + } } diff --git a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java index 818e0a75e64a..6ba0c3b4fa0d 100644 --- a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java @@ -39,6 +39,13 @@ public interface AgentManager { static final ConfigKey Wait = new ConfigKey("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return", true); + ConfigKey EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class, + "enable.kvm.host.auto.enable.disable", + "Advanced", + "false", + "(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " + + "according to the hosts health check results", + true, ConfigKey.Scope.Cluster, null); public enum TapAgentsAction { Add, Del, Contains, diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 8e910c55cabe..3fb8225ad74d 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1290,6 +1290,7 @@ protected void processRequest(final Link link, final Request request) { try { if (cmd instanceof StartupRoutingCommand) { final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; + processStartupRoutingCommand(startup, hostId); answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval()); } else if (cmd instanceof StartupProxyCommand) { final StartupProxyCommand startup = (StartupProxyCommand) cmd; @@ -1321,6 +1322,9 @@ protected void processRequest(final Link link, final Request request) { // if the router is sending a ping, verify the // gateway was pingable if (cmd instanceof PingRoutingCommand) { + + processPingRoutingCommand((PingRoutingCommand) cmd, hostId); + final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible(); final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); @@ -1425,6 +1429,52 @@ protected void doTask(final Task task) throws TaskExecutionException { } } + private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) { + if (hostHealthCheckResult == null) { + return; + } + HostVO host = _hostDao.findById(hostId); + if (host == null) { + s_logger.error(String.format("Unable to find host with ID: %s", hostId)); + return; + } + if (!EnableKVMAutoEnableDisable.valueIn(host.getClusterId())) { + s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " + + "received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName())); + return; + } + + String allocationState = hostHealthCheckResult ? "Enable" : "Disable"; + + try { + s_logger.info(String.format("Host health check %s, auto %s KVM host: %s", + hostHealthCheckResult ? "succeeds" : "fails", + hostHealthCheckResult ? "enabling" : "disabling", + host.getName())); + _resourceMgr.updateHostAllocationState(hostId, allocationState); + } catch (NoTransitionException e) { + s_logger.error(String.format("Cannot Auto %s host: %s", allocationState, host.getName()), e); + } + } + + private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) { + if (startup == null) { + s_logger.error("Empty StartupRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = startup.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + + private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) { + if (pingRoutingCommand == null) { + s_logger.error("Empty PingRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + protected AgentManagerImpl() { } @@ -1747,8 +1797,8 @@ public String getConfigComponentName() { @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, - DirectAgentThreadCap }; + return new ConfigKey[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, + DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable }; } protected class SetHostParamsListener implements Listener { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index a24a0464b0ee..2b480b906607 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -323,6 +323,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private String _dcId; private String _clusterId; private final Properties _uefiProperties = new Properties(); + private String hostHealthCheckScriptPath; private long _hvVersion; private Duration _timeout; @@ -942,6 +943,16 @@ public boolean configure(final String name, final Map params) th throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh"); } + String healthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH); + if (org.apache.commons.lang3.StringUtils.isNotBlank(healthCheckScriptPath)) { + if (new File(healthCheckScriptPath).exists()) { + hostHealthCheckScriptPath = healthCheckScriptPath; + } else { + s_logger.info(String.format("Unable to find the host health check script at: %s, " + + "discarding it", healthCheckScriptPath)); + } + } + setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh"); if (setupTungstenVrouterPath == null) { throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh"); @@ -3431,13 +3442,37 @@ protected synchronized String attachOrDetachDevice(final Connect conn, final boo @Override public PingCommand getCurrentStatus(final long id) { - + PingRoutingCommand pingRoutingCommand; if (!_canBridgeFirewall) { - return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport()); + pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport()); } else { final HashMap> nwGrpStates = syncNetworkGroups(id); - return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); + pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); + } + Boolean healthCheckResult = getHostHealthCheckResult(hostHealthCheckScriptPath); + if (healthCheckResult != null) { + pingRoutingCommand.setHostHealthCheckResult(healthCheckResult); + } + return pingRoutingCommand; + } + + private Boolean getHostHealthCheckResult(String hostHealthCheckScriptPath) { + if (org.apache.commons.lang3.StringUtils.isBlank(hostHealthCheckScriptPath)) { + s_logger.debug("Host health check script path is not specified"); + return null; } + File script = new File(hostHealthCheckScriptPath); + if (!script.exists() || !script.isFile() || !script.canExecute()) { + s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " + + "reason: %s", hostHealthCheckScriptPath, + !script.exists() ? "file does not exist" : "please check file permissions to execute this file")); + return null; + } + int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath); + if (s_logger.isDebugEnabled()) { + s_logger.debug(String.format("Host health check script exit code: %s", exitCode)); + } + return exitCode == 0; } @Override @@ -3479,6 +3514,10 @@ public StartupCommand[] initialize() { cmd.setGatewayIpAddress(_localGateway); cmd.setIqn(getIqn()); cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption())); + Boolean healthCheckResult = getHostHealthCheckResult(hostHealthCheckScriptPath); + if (healthCheckResult != null) { + cmd.setHostHealthCheckResult(healthCheckResult); + } if (cmd.getHostDetails().containsKey("Host.OS")) { _hostDistro = cmd.getHostDetails().get("Host.OS"); diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 346290827486..0616530f9292 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1774,73 +1774,92 @@ public boolean checkAndMaintain(final long hostId) { return hostInMaintenance; } + private void updateHostAllocationState(HostVO host, String allocationState) throws NoTransitionException { + final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState); + if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { + throw new CloudRuntimeException(String.format("Invalid allocation state: %s, " + + "only Enable/Disable are allowed", allocationState)); + } + + resourceStateTransitTo(host, resourceEvent, _nodeId); + } + + private void updateHostName(HostVO host, String name) { + s_logger.debug("Updating Host name to: " + name); + host.setName(name); + _hostDao.update(host.getId(), host); + } + + private void updateHostGuestOSCategory(Long hostId, Long guestOSCategoryId) { + // Verify that the guest OS Category exists + if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) { + throw new InvalidParameterValueException("Please specify a valid guest OS category."); + } + + final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); + final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); + + if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) { + // Create/Update an entry for guest.os.category.id + if (guestOSDetail != null) { + guestOSDetail.setValue(String.valueOf(guestOSCategory.getId())); + _hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail); + } else { + final Map detail = new HashMap(); + detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); + _hostDetailsDao.persist(hostId, detail); + } + } else { + // Delete any existing entry for guest.os.category.id + if (guestOSDetail != null) { + _hostDetailsDao.remove(guestOSDetail.getId()); + } + } + } + + private void updateHostTags(HostVO host, Long hostId, List hostTags) { + List activeVMs = _vmDao.listByHostId(hostId); + s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. " + + "Updating the host tags will not affect them.", activeVMs, host)); + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating Host Tags to :" + hostTags); + } + _hostTagsDao.persist(hostId, new ArrayList<>(new HashSet<>(hostTags))); + } + @Override public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { - Long hostId = cmd.getId(); - String name = cmd.getName(); - Long guestOSCategoryId = cmd.getOsCategoryId(); + return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(), + cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags()); + } + private Host updateHost(Long hostId, String name, Long guestOSCategoryId, + String allocationState, String url, List hostTags) throws NoTransitionException { // Verify that the host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); } - if (cmd.getAllocationState() != null) { - final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(cmd.getAllocationState()); - if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { - throw new CloudRuntimeException("Invalid allocation state:" + cmd.getAllocationState() + ", only Enable/Disable are allowed"); - } - - resourceStateTransitTo(host, resourceEvent, _nodeId); + if (StringUtils.isNotBlank(allocationState)) { + updateHostAllocationState(host, allocationState); } if (StringUtils.isNotBlank(name)) { - s_logger.debug("Updating Host name to: " + name); - host.setName(name); - _hostDao.update(host.getId(), host); + updateHostName(host, name); } if (guestOSCategoryId != null) { - // Verify that the guest OS Category exists - if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) { - throw new InvalidParameterValueException("Please specify a valid guest OS category."); - } - - final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); - final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); - - if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) { - // Create/Update an entry for guest.os.category.id - if (guestOSDetail != null) { - guestOSDetail.setValue(String.valueOf(guestOSCategory.getId())); - _hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail); - } else { - final Map detail = new HashMap(); - detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); - _hostDetailsDao.persist(hostId, detail); - } - } else { - // Delete any existing entry for guest.os.category.id - if (guestOSDetail != null) { - _hostDetailsDao.remove(guestOSDetail.getId()); - } - } + updateHostGuestOSCategory(hostId, guestOSCategoryId); } - final List hostTags = cmd.getHostTags(); - if (hostTags != null) { - List activeVMs = _vmDao.listByHostId(hostId); - s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. Updating the host tags will not affect them.", activeVMs, host)); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Updating Host Tags to :" + hostTags); - } - _hostTagsDao.persist(hostId, new ArrayList(new HashSet(hostTags))); + if (hostTags != null) { + updateHostTags(host, hostId, hostTags); } - final String url = cmd.getUrl(); if (url != null) { - _storageMgr.updateSecondaryStorage(cmd.getId(), cmd.getUrl()); + _storageMgr.updateSecondaryStorage(hostId, url); } try { _storageMgr.enableHost(hostId); @@ -1852,6 +1871,11 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return updatedHost; } + @Override + public Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { + return updateHost(hostId, null, null, allocationState, null, null); + } + @Override public Cluster getCluster(final Long clusterId) { return _clusterDao.findById(clusterId); diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 4d5b5ba584bf..8f90bd0bcf16 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -73,6 +73,11 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return null; } + @Override + public Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { + return null; + } + /* (non-Javadoc) * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) */ From 123e5c9fa930cb8220cb2bf2dc767938b19ad42d Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 1 Feb 2023 16:09:54 -0300 Subject: [PATCH 02/13] Improve health check result --- .../java/com/cloud/resource/ResourceService.java | 2 ++ .../api/command/admin/host/UpdateHostCmd.java | 2 +- .../com/cloud/agent/manager/AgentManagerImpl.java | 2 -- .../kvm/resource/LibvirtComputingResource.java | 12 +++++++++++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index c1ca3722bea1..684808bfd923 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -44,6 +44,8 @@ public interface ResourceService { /** * Updates a host + * + * @param cmd - the command specifying hostId */ Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java index 5ca53c077407..24ad20c62abe 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java @@ -117,7 +117,7 @@ public void execute() { Host result; try { result = _resourceService.updateHost(this); - if(getAnnotation() != null) { + if (getAnnotation() != null) { annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true); } HostResponse hostResponse = _responseGenerator.createHostResponse(result); diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 3fb8225ad74d..9e1701620fb0 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1322,9 +1322,7 @@ protected void processRequest(final Link link, final Request request) { // if the router is sending a ping, verify the // gateway was pingable if (cmd instanceof PingRoutingCommand) { - processPingRoutingCommand((PingRoutingCommand) cmd, hostId); - final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible(); final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 2b480b906607..882aca707c64 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -3456,6 +3456,16 @@ public PingCommand getCurrentStatus(final long id) { return pingRoutingCommand; } + /** + * The health check result is true, if the script is executed successfully and the exit code is 0 + * The health check result is false, if the script is executed successfully and the exit code is 1 + * The health check result is null, if + * - Script file is not specified, or + * - Script file does not exist, or + * - Script file is not accessible by the user of the cloudstack-agent process, or + * - Script file is not executable + * - There are errors when the script is executed (exit codes other than 0 or 1) + */ private Boolean getHostHealthCheckResult(String hostHealthCheckScriptPath) { if (org.apache.commons.lang3.StringUtils.isBlank(hostHealthCheckScriptPath)) { s_logger.debug("Host health check script path is not specified"); @@ -3472,7 +3482,7 @@ private Boolean getHostHealthCheckResult(String hostHealthCheckScriptPath) { if (s_logger.isDebugEnabled()) { s_logger.debug(String.format("Host health check script exit code: %s", exitCode)); } - return exitCode == 0; + return exitCode != 0 && exitCode != 1 ? null : exitCode == 0; } @Override From b584e5bf757093a4d20775de83014aab2a14f410 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Fri, 3 Feb 2023 13:00:07 -0300 Subject: [PATCH 03/13] Fix corner cases --- .../com/cloud/resource/ResourceService.java | 2 +- .../apache/cloudstack/api/ApiConstants.java | 1 + .../cloud/agent/manager/AgentManagerImpl.java | 2 +- .../cloud/resource/ResourceManagerImpl.java | 64 ++++++++++++++++--- .../resource/MockResourceManagerImpl.java | 2 +- 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index 684808bfd923..4e25318e0cb4 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -49,7 +49,7 @@ public interface ResourceService { */ Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; - Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException; + Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException; Host cancelMaintenance(CancelMaintenanceCmd cmd); diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index ec5799ceb912..0c15dd6bffbb 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -1010,6 +1010,7 @@ public class ApiConstants { public static final String PUBLIC_MTU = "publicmtu"; public static final String PRIVATE_MTU = "privatemtu"; public static final String MTU = "mtu"; + public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost"; /** * This enum specifies IO Drivers, each option controls specific policies on I/O. diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 9e1701620fb0..bc1d55bdff82 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1449,7 +1449,7 @@ private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long ho hostHealthCheckResult ? "succeeds" : "fails", hostHealthCheckResult ? "enabling" : "disabling", host.getName())); - _resourceMgr.updateHostAllocationState(hostId, allocationState); + _resourceMgr.autoUpdateHostAllocationState(hostId, allocationState); } catch (NoTransitionException e) { s_logger.error(String.format("Cannot Auto %s host: %s", allocationState, host.getName()), e); } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 0616530f9292..730096e40ec9 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1774,12 +1774,60 @@ public boolean checkAndMaintain(final long hostId) { return hostInMaintenance; } - private void updateHostAllocationState(HostVO host, String allocationState) throws NoTransitionException { + private ResourceState.Event getResourceEventFromAllocationStateString(String allocationState) { final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState); if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { - throw new CloudRuntimeException(String.format("Invalid allocation state: %s, " + + throw new InvalidParameterValueException(String.format("Invalid allocation state: %s, " + "only Enable/Disable are allowed", allocationState)); } + return resourceEvent; + } + + private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, + boolean isUpdateFromHostHealthCheck, + HostVO host, DetailVO hostDetail, + ResourceState.Event resourceEvent) { + if (autoEnableDisableKVMSetting) { + if (!isUpdateFromHostHealthCheck && hostDetail != null && + !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { + hostDetail.setValue(Boolean.TRUE.toString()); + _hostDetailsDao.update(hostDetail.getId(), hostDetail); + } else if (!isUpdateFromHostHealthCheck && Boolean.parseBoolean(hostDetail.getValue()) + && resourceEvent == ResourceState.Event.Disable) { + s_logger.info(String.format("The setting %s is enabled but the host %s is manually set into %s state," + + "ignoring future auto enabling of the host based on health check results", + AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent)); + hostDetail.setValue(Boolean.FALSE.toString()); + _hostDetailsDao.update(hostDetail.getId(), hostDetail); + } else if (isUpdateFromHostHealthCheck && hostDetail == null) { + hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, Boolean.TRUE.toString()); + _hostDetailsDao.persist(hostDetail); + } + } + } + private void updateHostAllocationState(HostVO host, String allocationState, + boolean isUpdateFromHostHealthCheck) throws NoTransitionException { + boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) && + host.getHypervisorType() == HypervisorType.KVM; + ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); + DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST); + + if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) || + (host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) { + s_logger.info(String.format("The host %s is already on the allocated state", host.getName())); + return; + } + + if (autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && + !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { + s_logger.debug(String.format("The setting %s is enabled and the health check succeeds on the host, " + + "but the host has been manually disabled previously, ignoring auto enabling", + AgentManager.EnableKVMAutoEnableDisable.key())); + return; + } + + handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host, + hostDetail, resourceEvent); resourceStateTransitTo(host, resourceEvent, _nodeId); } @@ -1831,11 +1879,11 @@ private void updateHostTags(HostVO host, Long hostId, List hostTags) { @Override public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(), - cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags()); + cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), false); } - private Host updateHost(Long hostId, String name, Long guestOSCategoryId, - String allocationState, String url, List hostTags) throws NoTransitionException { + private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState, + String url, List hostTags, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { // Verify that the host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { @@ -1843,7 +1891,7 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, } if (StringUtils.isNotBlank(allocationState)) { - updateHostAllocationState(host, allocationState); + updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck); } if (StringUtils.isNotBlank(name)) { @@ -1872,8 +1920,8 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, } @Override - public Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { - return updateHost(hostId, null, null, allocationState, null, null); + public Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { + return updateHost(hostId, null, null, allocationState, null, null, true); } @Override diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 8f90bd0bcf16..02f4d5b5513f 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -74,7 +74,7 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { } @Override - public Host updateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { + public Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { return null; } From 18f4c88c8ba1d0945550c0612885ea1b2ca803ba Mon Sep 17 00:00:00 2001 From: nvazquez Date: Fri, 3 Feb 2023 23:19:39 -0300 Subject: [PATCH 04/13] Script path refactor --- .../resource/LibvirtComputingResource.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 882aca707c64..ffac026457d2 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -943,14 +943,10 @@ public boolean configure(final String name, final Map params) th throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh"); } - String healthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH); - if (org.apache.commons.lang3.StringUtils.isNotBlank(healthCheckScriptPath)) { - if (new File(healthCheckScriptPath).exists()) { - hostHealthCheckScriptPath = healthCheckScriptPath; - } else { - s_logger.info(String.format("Unable to find the host health check script at: %s, " + - "discarding it", healthCheckScriptPath)); - } + hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH); + if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) { + s_logger.info(String.format("Unable to find the host health check script at: %s, " + + "discarding it", hostHealthCheckScriptPath)); } setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh"); @@ -3449,7 +3445,7 @@ public PingCommand getCurrentStatus(final long id) { final HashMap> nwGrpStates = syncNetworkGroups(id); pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); } - Boolean healthCheckResult = getHostHealthCheckResult(hostHealthCheckScriptPath); + Boolean healthCheckResult = getHostHealthCheckResult(); if (healthCheckResult != null) { pingRoutingCommand.setHostHealthCheckResult(healthCheckResult); } @@ -3466,8 +3462,8 @@ public PingCommand getCurrentStatus(final long id) { * - Script file is not executable * - There are errors when the script is executed (exit codes other than 0 or 1) */ - private Boolean getHostHealthCheckResult(String hostHealthCheckScriptPath) { - if (org.apache.commons.lang3.StringUtils.isBlank(hostHealthCheckScriptPath)) { + private Boolean getHostHealthCheckResult() { + if (StringUtils.isBlank(hostHealthCheckScriptPath)) { s_logger.debug("Host health check script path is not specified"); return null; } @@ -3524,7 +3520,7 @@ public StartupCommand[] initialize() { cmd.setGatewayIpAddress(_localGateway); cmd.setIqn(getIqn()); cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption())); - Boolean healthCheckResult = getHostHealthCheckResult(hostHealthCheckScriptPath); + Boolean healthCheckResult = getHostHealthCheckResult(); if (healthCheckResult != null) { cmd.setHostHealthCheckResult(healthCheckResult); } From 71e9683865fd27719ad863356d8dca177acc00a3 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Sat, 4 Feb 2023 08:10:37 -0300 Subject: [PATCH 05/13] Fix sonar cloud reports --- .../cloud/agent/manager/AgentManagerImpl.java | 39 ++++++++++--------- .../resource/LibvirtComputingResource.java | 26 ++++++++----- .../cloud/resource/ResourceManagerImpl.java | 4 +- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index bc1d55bdff82..f178a496a5b9 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -50,6 +50,7 @@ import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.commons.lang3.BooleanUtils; import org.apache.log4j.Logger; import org.apache.log4j.MDC; @@ -1249,6 +1250,24 @@ public AgentHandler(final Task.Type type, final Link link, final byte[] data) { super(type, link, data); } + private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) { + if (startup == null) { + s_logger.error("Empty StartupRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = startup.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + + private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) { + if (pingRoutingCommand == null) { + s_logger.error("Empty PingRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + protected void processRequest(final Link link, final Request request) { final AgentAttache attache = (AgentAttache)link.attachment(); final Command[] cmds = request.getCommands(); @@ -1436,7 +1455,7 @@ private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long ho s_logger.error(String.format("Unable to find host with ID: %s", hostId)); return; } - if (!EnableKVMAutoEnableDisable.valueIn(host.getClusterId())) { + if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) { s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " + "received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName())); return; @@ -1455,24 +1474,6 @@ private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long ho } } - private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) { - if (startup == null) { - s_logger.error("Empty StartupRoutingCommand received"); - return; - } - Boolean hostHealthCheckResult = startup.getHostHealthCheckResult(); - processHostHealthCheckResult(hostHealthCheckResult, hostId); - } - - private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) { - if (pingRoutingCommand == null) { - s_logger.error("Empty PingRoutingCommand received"); - return; - } - Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult(); - processHostHealthCheckResult(hostHealthCheckResult, hostId); - } - protected AgentManagerImpl() { } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index ffac026457d2..274b820603a2 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -717,6 +717,10 @@ protected enum BridgeType { NATIVE, OPENVSWITCH, TUNGSTEN } + protected enum HealthCheckResult { + SUCCESS, FAILURE, IGNORE + } + protected BridgeType _bridgeType; protected StorageSubsystemCommandHandler storageHandler; @@ -3445,9 +3449,9 @@ public PingCommand getCurrentStatus(final long id) { final HashMap> nwGrpStates = syncNetworkGroups(id); pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); } - Boolean healthCheckResult = getHostHealthCheckResult(); - if (healthCheckResult != null) { - pingRoutingCommand.setHostHealthCheckResult(healthCheckResult); + HealthCheckResult healthCheckResult = getHostHealthCheckResult(); + if (healthCheckResult != HealthCheckResult.IGNORE) { + pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS); } return pingRoutingCommand; } @@ -3462,23 +3466,25 @@ public PingCommand getCurrentStatus(final long id) { * - Script file is not executable * - There are errors when the script is executed (exit codes other than 0 or 1) */ - private Boolean getHostHealthCheckResult() { + private HealthCheckResult getHostHealthCheckResult() { if (StringUtils.isBlank(hostHealthCheckScriptPath)) { s_logger.debug("Host health check script path is not specified"); - return null; + return HealthCheckResult.IGNORE; } File script = new File(hostHealthCheckScriptPath); if (!script.exists() || !script.isFile() || !script.canExecute()) { s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " + "reason: %s", hostHealthCheckScriptPath, !script.exists() ? "file does not exist" : "please check file permissions to execute this file")); - return null; + return HealthCheckResult.IGNORE; } int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath); if (s_logger.isDebugEnabled()) { s_logger.debug(String.format("Host health check script exit code: %s", exitCode)); } - return exitCode != 0 && exitCode != 1 ? null : exitCode == 0; + return exitCode != 0 && exitCode != 1 ? + HealthCheckResult.IGNORE : + (exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE); } @Override @@ -3520,9 +3526,9 @@ public StartupCommand[] initialize() { cmd.setGatewayIpAddress(_localGateway); cmd.setIqn(getIqn()); cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption())); - Boolean healthCheckResult = getHostHealthCheckResult(); - if (healthCheckResult != null) { - cmd.setHostHealthCheckResult(healthCheckResult); + HealthCheckResult healthCheckResult = getHostHealthCheckResult(); + if (healthCheckResult != HealthCheckResult.IGNORE) { + cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS); } if (cmd.getHostDetails().containsKey("Host.OS")) { diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 730096e40ec9..764cc3d21ebb 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1792,8 +1792,8 @@ private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { hostDetail.setValue(Boolean.TRUE.toString()); _hostDetailsDao.update(hostDetail.getId(), hostDetail); - } else if (!isUpdateFromHostHealthCheck && Boolean.parseBoolean(hostDetail.getValue()) - && resourceEvent == ResourceState.Event.Disable) { + } else if (!isUpdateFromHostHealthCheck && hostDetail != null && + Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Disable) { s_logger.info(String.format("The setting %s is enabled but the host %s is manually set into %s state," + "ignoring future auto enabling of the host based on health check results", AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent)); From 50b79f5c0c18fc5b60ee09cb60addd14389504b9 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Sat, 4 Feb 2023 10:37:06 -0300 Subject: [PATCH 06/13] Fix last code smells --- .../cloud/agent/manager/AgentManagerImpl.java | 56 +++++++++---------- .../resource/LibvirtComputingResource.java | 11 +++- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index f178a496a5b9..96ecd66cd1bb 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1250,6 +1250,34 @@ public AgentHandler(final Task.Type type, final Link link, final byte[] data) { super(type, link, data); } + private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) { + if (hostHealthCheckResult == null) { + return; + } + HostVO host = _hostDao.findById(hostId); + if (host == null) { + s_logger.error(String.format("Unable to find host with ID: %s", hostId)); + return; + } + if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) { + s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " + + "received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName())); + return; + } + + String allocationState = hostHealthCheckResult ? "Enable" : "Disable"; + + try { + s_logger.info(String.format("Host health check %s, auto %s KVM host: %s", + hostHealthCheckResult ? "succeeds" : "fails", + hostHealthCheckResult ? "enabling" : "disabling", + host.getName())); + _resourceMgr.autoUpdateHostAllocationState(hostId, allocationState); + } catch (NoTransitionException e) { + s_logger.error(String.format("Cannot Auto %s host: %s", allocationState, host.getName()), e); + } + } + private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) { if (startup == null) { s_logger.error("Empty StartupRoutingCommand received"); @@ -1446,34 +1474,6 @@ protected void doTask(final Task task) throws TaskExecutionException { } } - private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) { - if (hostHealthCheckResult == null) { - return; - } - HostVO host = _hostDao.findById(hostId); - if (host == null) { - s_logger.error(String.format("Unable to find host with ID: %s", hostId)); - return; - } - if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) { - s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " + - "received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName())); - return; - } - - String allocationState = hostHealthCheckResult ? "Enable" : "Disable"; - - try { - s_logger.info(String.format("Host health check %s, auto %s KVM host: %s", - hostHealthCheckResult ? "succeeds" : "fails", - hostHealthCheckResult ? "enabling" : "disabling", - host.getName())); - _resourceMgr.autoUpdateHostAllocationState(hostId, allocationState); - } catch (NoTransitionException e) { - s_logger.error(String.format("Cannot Auto %s host: %s", allocationState, host.getName()), e); - } - } - protected AgentManagerImpl() { } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 274b820603a2..a66c7712d5a6 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -3482,9 +3482,14 @@ private HealthCheckResult getHostHealthCheckResult() { if (s_logger.isDebugEnabled()) { s_logger.debug(String.format("Host health check script exit code: %s", exitCode)); } - return exitCode != 0 && exitCode != 1 ? - HealthCheckResult.IGNORE : - (exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE); + return retrieveHealthCheckResultFromExitCode(exitCode); + } + + private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) { + if (exitCode != 0 && exitCode != 1) { + return HealthCheckResult.IGNORE; + } + return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE; } @Override From 101cfdda1a38fc3e00953fff75363efb1cfd8512 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Mon, 6 Feb 2023 14:22:39 -0300 Subject: [PATCH 07/13] Add marvin tests --- .../smoke/test_host_control_state.py | 240 +++++++++++++++++- 1 file changed, 238 insertions(+), 2 deletions(-) diff --git a/test/integration/smoke/test_host_control_state.py b/test/integration/smoke/test_host_control_state.py index 809af7d2a0e5..4eb51c349967 100644 --- a/test/integration/smoke/test_host_control_state.py +++ b/test/integration/smoke/test_host_control_state.py @@ -20,7 +20,7 @@ Tests for host control state """ -from marvin.cloudstackAPI import updateHost +from marvin.cloudstackAPI import (updateHost, updateConfiguration) from nose.plugins.attrib import attr from marvin.cloudstackTestCase import cloudstackTestCase from marvin.lib.common import (get_domain, @@ -28,13 +28,18 @@ get_template, list_hosts, list_routers, - list_ssvms) + list_ssvms, + list_clusters, + list_hosts) from marvin.lib.base import (Account, Domain, Host, ServiceOffering, VirtualMachine) from marvin.sshClient import SshClient +from marvin.lib.decoratorGenerators import skipTestIf +from marvin.lib.utils import wait_until +import logging import time @@ -250,3 +255,234 @@ def test_router_host_control_state(self): self.enable_host(host_id) self.verify_router_host_control_state(router.id, "Enabled") + + +class TestAutoEnableDisableHost(cloudstackTestCase): + + @classmethod + def setUpClass(cls): + cls.testClient = super(TestAutoEnableDisableHost, cls).getClsTestClient() + cls.apiclient = cls.testClient.getApiClient() + cls.services = cls.testClient.getParsedTestDataConfig() + # Get Zone, Domain and templates + cls.zone = get_zone(cls.apiclient, cls.testClient.getZoneForTests()) + cls.hypervisor = cls.testClient.getHypervisorInfo() + cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + if cls.hypervisor.lower() not in ['kvm']: + cls.hypervisorNotSupported = True + return + + cls.template = get_template( + cls.apiclient, + cls.zone.id, + cls.hypervisor + ) + + cls.logger = logging.getLogger('TestAutoEnableDisableHost') + + cls._cleanup = [ + ] + return + + @classmethod + def tearDownClass(cls): + super(TestAutoEnableDisableHost, cls).tearDownClass() + + def setUp(self): + self.apiclient = self.testClient.getApiClient() + self.cleanup = [] + return + + def tearDown(self): + super(TestAutoEnableDisableHost, self).tearDown() + + def get_ssh_client(self, ip, username, password, retries=10): + """ Setup ssh client connection and return connection """ + try: + ssh_client = SshClient(ip, 22, username, password, retries) + except Exception as e: + raise unittest.SkipTest("Unable to create ssh connection: " % e) + + self.assertIsNotNone( + ssh_client, "Failed to setup ssh connection to ip=%s" % ip) + + return ssh_client + + def wait_until_host_is_in_state(self, hostid, resourcestate, interval=3, retries=20): + def check_resource_state(): + response = Host.list( + self.apiclient, + id=hostid + ) + if isinstance(response, list): + if response[0].resourcestate == resourcestate: + self.logger.debug('Host with id %s is in resource state = %s' % (hostid, resourcestate)) + return True, None + else: + self.logger.debug("Waiting for host " + hostid + + " to reach state " + resourcestate + + ", with current state " + response[0].resourcestate) + return False, None + + done, _ = wait_until(interval, retries, check_resource_state) + if not done: + raise Exception("Failed to wait for host %s to be on resource state %s" % (hostid, resourcestate)) + return True + + def update_config(self, enable_feature): + cmd = updateConfiguration.updateConfigurationCmd() + cmd.name = "enable.kvm.host.auto.enable.disable" + cmd.value = enable_feature + + response = self.apiclient.updateConfiguration(cmd) + self.debug("updated the parameter %s with value %s" % (response.name, response.value)) + + def update_health_check_script(self, ip_address, username, password, exit_code): + health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh" + health_check_agent_property = "agent.health.check.script.path" + agent_properties_file_path = "/etc/cloudstack/agent/agent.properties" + + ssh_client = self.get_ssh_client(ip_address, username, password) + ssh_client.execute("echo 'exit %s' > %s" % (exit_code, health_check_script_path)) + ssh_client.execute("chmod +x %s" % health_check_script_path) + ssh_client.execute("echo '%s=%s' >> %s" % (health_check_agent_property, health_check_script_path, + agent_properties_file_path)) + ssh_client.execute("service cloudstack-agent restart") + + def remove_host_health_check(self, ip_address, username, password): + health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh" + ssh_client = self.get_ssh_client(ip_address, username, password) + ssh_client.execute("rm -f %s" % health_check_script_path) + + def select_host_for_health_checks(self): + clusters = list_clusters( + self.apiclient, + zoneid=self.zone.id + ) + if not clusters: + return None + + for cluster in clusters: + list_hosts_response = list_hosts( + self.apiclient, + clusterid=cluster.id, + type="Routing", + resourcestate="Enabled" + ) + assert isinstance(list_hosts_response, list) + if not list_hosts_response or len(list_hosts_response) < 1: + continue + return list_hosts_response[0] + return None + + def update_host_allocation_state(self, id, enable): + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Enable" if enable else "Disable" + response = self.apiclient.updateHost(cmd) + self.assertEqual(response.resourcestate, "Enabled" if enable else "Disabled") + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_01_auto_enable_disable_kvm_host(self): + """Test to auto-enable and auto-disable a KVM host based on health check results + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that fails and observe the host is Disabled + # 3. Make the health check script succeed and observe the host is Enabled + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 1) + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Set health check script for success + self.update_health_check_script(selected_host.ipaddress, username, password, 0) + + self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200) + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_02_disable_host_overrides_auto_enable_kvm_host(self): + """Test to override the auto-enabling of a KVM host by an administrator + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that succeeds and observe the host is Enabled + # 3. Make the host Disabled + # 4. Verify the host does not get auto-enabled after the previous step + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 0) + self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200) + + # Manually disable the host + self.update_host_allocation_state(selected_host.id, False) + + # Wait for more than the ping interval + time.sleep(70) + + # Verify the host continues on Disabled state + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Restore the host to Enabled state + self.remove_host_health_check(selected_host.ipaddress, username, password) + self.update_host_allocation_state(selected_host.id, True) + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_03_enable_host_does_not_override_auto_disable_kvm_host(self): + """Test to override the auto-disabling of a KVM host by an administrator + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that fails and observe the host is Disabled + # 3. Make the host Enabled + # 4. Verify the host does not get auto-disabled after the previous step + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 1) + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Manually enable the host + self.update_host_allocation_state(selected_host.id, True) + + # Verify the host goes back to Disabled state + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Restore the host to Enabled state + self.remove_host_health_check(selected_host.ipaddress, username, password) + self.update_host_allocation_state(selected_host.id, True) From 09b8149283c658c4f856e54843647435ce98eeef Mon Sep 17 00:00:00 2001 From: nvazquez Date: Tue, 7 Feb 2023 08:30:18 -0300 Subject: [PATCH 08/13] Fix new line on agent.properties to prevent host add failures --- agent/conf/agent.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index eceb594a8829..9174da7fd7bc 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -401,4 +401,4 @@ iscsi.session.cleanup.enabled=false # The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host # depending on the return value of the file/script -# agent.health.check.script.path= \ No newline at end of file +# agent.health.check.script.path= From 5bbda8fb599fd607a7dc2c2fab9022ccb326aae1 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Tue, 7 Feb 2023 22:53:28 -0300 Subject: [PATCH 09/13] Send alert on auto-enable-disable and add annotations when the setting is enabled --- .../cloud/resource/ResourceManagerImpl.java | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 764cc3d21ebb..263c560b0946 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -36,8 +36,10 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.alert.AlertManager; import com.cloud.exception.StorageConflictException; import com.cloud.exception.StorageUnavailableException; +import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.api.ApiConstants; @@ -294,6 +296,10 @@ public void setDiscoverers(final List discoverers) { private UserVmDetailsDao userVmDetailsDao; @Inject private AnnotationDao annotationDao; + @Inject + private AlertManager alertManager; + @Inject + private AnnotationService annotationService; private final long _nodeId = ManagementServerNode.getManagementServerId(); @@ -1805,7 +1811,7 @@ private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, } } } - private void updateHostAllocationState(HostVO host, String allocationState, + private boolean updateHostAllocationState(HostVO host, String allocationState, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) && host.getHypervisorType() == HypervisorType.KVM; @@ -1815,7 +1821,7 @@ private void updateHostAllocationState(HostVO host, String allocationState, if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) || (host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) { s_logger.info(String.format("The host %s is already on the allocated state", host.getName())); - return; + return false; } if (autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && @@ -1823,13 +1829,14 @@ private void updateHostAllocationState(HostVO host, String allocationState, s_logger.debug(String.format("The setting %s is enabled and the health check succeeds on the host, " + "but the host has been manually disabled previously, ignoring auto enabling", AgentManager.EnableKVMAutoEnableDisable.key())); - return; + return false; } handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host, hostDetail, resourceEvent); resourceStateTransitTo(host, resourceEvent, _nodeId); + return true; } private void updateHostName(HostVO host, String name) { @@ -1890,8 +1897,9 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); } + boolean isUpdateHostAllocation = false; if (StringUtils.isNotBlank(allocationState)) { - updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck); + isUpdateHostAllocation = updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck); } if (StringUtils.isNotBlank(name)) { @@ -1916,9 +1924,39 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String } final HostVO updatedHost = _hostDao.findById(hostId); + + if (isUpdateHostAllocation) { + sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState, isUpdateFromHostHealthCheck); + } + return updatedHost; } + private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState, + boolean isUpdateFromHostHealthCheck) { + if (!AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) || + host.getHypervisorType() != HypervisorType.KVM) { + return; + } + + String msg = String.format("The host %s (%s) ", host.getName(), host.getUuid()); + ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); + boolean isEventEnable = resourceEvent == ResourceState.Event.Enable; + + if (isUpdateFromHostHealthCheck) { + msg += String.format("is auto-%s after %s health check results", + isEventEnable ? "enabled" : "disabled", + isEventEnable ? "successful" : "failed"); + alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), + host.getPodId(), msg, msg); + } else { + msg += String.format("is manually %s despite the setting '%s' is enabled for the cluster %s", + isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(), + host.getClusterId()); + } + annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true); + } + @Override public Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { return updateHost(hostId, null, null, allocationState, null, null, true); From ca07dfc1c4a1f272419ac55405140dd94b358c59 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 8 Feb 2023 10:07:50 -0300 Subject: [PATCH 10/13] Address reviews --- .../com/cloud/resource/ResourceService.java | 2 +- .../cloud/agent/manager/AgentManagerImpl.java | 6 +++--- .../cloud/resource/ResourceManagerImpl.java | 21 ++++++++++++------- .../resource/MockResourceManagerImpl.java | 2 +- .../smoke/test_host_control_state.py | 14 ------------- 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index 4e25318e0cb4..2757c918ed65 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -49,7 +49,7 @@ public interface ResourceService { */ Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; - Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException; + Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException; Host cancelMaintenance(CancelMaintenanceCmd cmd); diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 96ecd66cd1bb..d69b2c1ae229 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1265,16 +1265,16 @@ private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long ho return; } - String allocationState = hostHealthCheckResult ? "Enable" : "Disable"; + ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable; try { s_logger.info(String.format("Host health check %s, auto %s KVM host: %s", hostHealthCheckResult ? "succeeds" : "fails", hostHealthCheckResult ? "enabling" : "disabling", host.getName())); - _resourceMgr.autoUpdateHostAllocationState(hostId, allocationState); + _resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent); } catch (NoTransitionException e) { - s_logger.error(String.format("Cannot Auto %s host: %s", allocationState, host.getName()), e); + s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e); } } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 263c560b0946..1ff64de4d882 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1824,9 +1824,8 @@ private boolean updateHostAllocationState(HostVO host, String allocationState, return false; } - if (autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && - !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { - s_logger.debug(String.format("The setting %s is enabled and the health check succeeds on the host, " + + if (isAutoEnableAttemptForADisabledHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, hostDetail, resourceEvent)) { + s_logger.debug(String.format("The setting '%s' is enabled and the health check succeeds on the host, " + "but the host has been manually disabled previously, ignoring auto enabling", AgentManager.EnableKVMAutoEnableDisable.key())); return false; @@ -1839,6 +1838,13 @@ private boolean updateHostAllocationState(HostVO host, String allocationState, return true; } + private boolean isAutoEnableAttemptForADisabledHost(boolean autoEnableDisableKVMSetting, + boolean isUpdateFromHostHealthCheck, + DetailVO hostDetail, ResourceState.Event resourceEvent) { + return autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && + !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable; + } + private void updateHostName(HostVO host, String name) { s_logger.debug("Updating Host name to: " + name); host.setName(name); @@ -1934,8 +1940,9 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState, boolean isUpdateFromHostHealthCheck) { - if (!AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) || - host.getHypervisorType() != HypervisorType.KVM) { + boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM && + AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()); + if (!isAutoEnableDisableKVMSettingEnabled) { return; } @@ -1958,8 +1965,8 @@ private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO hos } @Override - public Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { - return updateHost(hostId, null, null, allocationState, null, null, true); + public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { + return updateHost(hostId, null, null, resourceEvent.toString(), null, null, true); } @Override diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 02f4d5b5513f..73d4adf050b0 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -74,7 +74,7 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { } @Override - public Host autoUpdateHostAllocationState(Long hostId, String allocationState) throws NoTransitionException { + public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { return null; } diff --git a/test/integration/smoke/test_host_control_state.py b/test/integration/smoke/test_host_control_state.py index 4eb51c349967..f9d92384a78a 100644 --- a/test/integration/smoke/test_host_control_state.py +++ b/test/integration/smoke/test_host_control_state.py @@ -272,27 +272,13 @@ def setUpClass(cls): cls.hypervisorNotSupported = True return - cls.template = get_template( - cls.apiclient, - cls.zone.id, - cls.hypervisor - ) - cls.logger = logging.getLogger('TestAutoEnableDisableHost') - - cls._cleanup = [ - ] return @classmethod def tearDownClass(cls): super(TestAutoEnableDisableHost, cls).tearDownClass() - def setUp(self): - self.apiclient = self.testClient.getApiClient() - self.cleanup = [] - return - def tearDown(self): super(TestAutoEnableDisableHost, self).tearDown() From de268b03b619837b754548826503395fe89e048b Mon Sep 17 00:00:00 2001 From: nvazquez Date: Fri, 10 Feb 2023 08:23:48 -0300 Subject: [PATCH 11/13] Add a reason for enabling or disabling a host when the automatic feature is enabled --- .../api/command/admin/host/UpdateHostCmd.java | 4 - .../cloud/resource/ResourceManagerImpl.java | 26 ++-- ui/src/config/section/infra/hosts.js | 10 +- ui/src/views/infra/HostEnableDisable.vue | 133 ++++++++++++++++++ 4 files changed, 157 insertions(+), 16 deletions(-) create mode 100644 ui/src/views/infra/HostEnableDisable.vue diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java index 24ad20c62abe..e3ff130e2d48 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java @@ -19,7 +19,6 @@ import com.cloud.host.Host; import com.cloud.user.Account; import org.apache.cloudstack.acl.RoleType; -import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.ApiErrorCode; @@ -117,9 +116,6 @@ public void execute() { Host result; try { result = _resourceService.updateHost(this); - if (getAnnotation() != null) { - annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true); - } HostResponse hostResponse = _responseGenerator.createHostResponse(result); hostResponse.setResponseName(getCommandName()); this.setResponseObject(hostResponse); diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 1ff64de4d882..e219da677cf7 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1892,11 +1892,11 @@ private void updateHostTags(HostVO host, Long hostId, List hostTags) { @Override public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(), - cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), false); + cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), cmd.getAnnotation(), false); } private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState, - String url, List hostTags, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { + String url, List hostTags, String annotation, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { // Verify that the host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { @@ -1931,18 +1931,25 @@ private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String final HostVO updatedHost = _hostDao.findById(hostId); - if (isUpdateHostAllocation) { - sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState, isUpdateFromHostHealthCheck); - } + sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState, + isUpdateFromHostHealthCheck, isUpdateHostAllocation, annotation); return updatedHost; } private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState, - boolean isUpdateFromHostHealthCheck) { + boolean isUpdateFromHostHealthCheck, + boolean isUpdateHostAllocation, String annotation) { boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM && AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()); if (!isAutoEnableDisableKVMSettingEnabled) { + if (StringUtils.isNotBlank(annotation)) { + annotationService.addAnnotation(annotation, AnnotationService.EntityType.HOST, host.getUuid(), true); + } + return; + } + + if (!isUpdateHostAllocation) { return; } @@ -1957,16 +1964,19 @@ private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO hos alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), msg, msg); } else { - msg += String.format("is manually %s despite the setting '%s' is enabled for the cluster %s", + msg += String.format("is %s despite the setting '%s' is enabled for the cluster %s", isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(), host.getClusterId()); + if (StringUtils.isNotBlank(annotation)) { + msg += String.format(", reason: %s", annotation); + } } annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true); } @Override public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { - return updateHost(hostId, null, null, resourceEvent.toString(), null, null, true); + return updateHost(hostId, null, null, resourceEvent.toString(), null, null, null, true); } @Override diff --git a/ui/src/config/section/infra/hosts.js b/ui/src/config/section/infra/hosts.js index 1ca0a99e6173..b6530239df9e 100644 --- a/ui/src/config/section/infra/hosts.js +++ b/ui/src/config/section/infra/hosts.js @@ -98,8 +98,9 @@ export default { label: 'label.disable.host', message: 'message.confirm.disable.host', dataView: true, - defaultArgs: { allocationstate: 'Disable' }, - show: (record) => { return record.resourcestate === 'Enabled' } + show: (record) => { return record.resourcestate === 'Enabled' }, + popup: true, + component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable'))) }, { api: 'updateHost', @@ -107,8 +108,9 @@ export default { label: 'label.enable.host', message: 'message.confirm.enable.host', dataView: true, - defaultArgs: { allocationstate: 'Enable' }, - show: (record) => { return record.resourcestate === 'Disabled' } + show: (record) => { return record.resourcestate === 'Disabled' }, + popup: true, + component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable'))) }, { api: 'prepareHostForMaintenance', diff --git a/ui/src/views/infra/HostEnableDisable.vue b/ui/src/views/infra/HostEnableDisable.vue new file mode 100644 index 000000000000..bc71aa270809 --- /dev/null +++ b/ui/src/views/infra/HostEnableDisable.vue @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + + + + From bdcc1878acb46590de3b09c01e8b06b064490e56 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 15 Mar 2023 19:56:18 -0300 Subject: [PATCH 12/13] Fix comment on the marvin test description --- test/integration/smoke/test_host_control_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/smoke/test_host_control_state.py b/test/integration/smoke/test_host_control_state.py index f9d92384a78a..4b8409ecc276 100644 --- a/test/integration/smoke/test_host_control_state.py +++ b/test/integration/smoke/test_host_control_state.py @@ -446,7 +446,7 @@ def test_03_enable_host_does_not_override_auto_disable_kvm_host(self): # 1. Enable the KVM Auto Enable/Disable Feature # 2. Set a health check script that fails and observe the host is Disabled # 3. Make the host Enabled - # 4. Verify the host does not get auto-disabled after the previous step + # 4. Verify the host does get auto-disabled after the previous step """ selected_host = self.select_host_for_health_checks() From 82af5edce6f43994de9fdb7d5679acae9b257f95 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 15 Mar 2023 20:21:50 -0300 Subject: [PATCH 13/13] Fix for disabling the feature if the admin has manually updated the host resource state before any health check result --- .../main/java/com/cloud/resource/ResourceManagerImpl.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index e219da677cf7..295136b35fc0 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1805,8 +1805,9 @@ private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent)); hostDetail.setValue(Boolean.FALSE.toString()); _hostDetailsDao.update(hostDetail.getId(), hostDetail); - } else if (isUpdateFromHostHealthCheck && hostDetail == null) { - hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, Boolean.TRUE.toString()); + } else if (hostDetail == null) { + String autoEnableValue = !isUpdateFromHostHealthCheck ? Boolean.FALSE.toString() : Boolean.TRUE.toString(); + hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, autoEnableValue); _hostDetailsDao.persist(hostDetail); } }