diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 27c0e387fa3c..9174da7fd7bc 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false # The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1 # iothreads=1 + +# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host +# depending on the return value of the file/script +# agent.health.check.script.path= diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index 9a031e001fa1..4af4ebed5e6d 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -312,6 +312,9 @@ public class AgentProperties{ */ public static final Property OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class); + public static final Property HEALTH_CHECK_SCRIPT_PATH = + new Property<>("agent.health.check.script.path", null, String.class); + /** * Sets the hypervisor type.
* Possible values: kvm | lxc
diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index e2b84ba87203..2757c918ed65 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -49,6 +49,8 @@ public interface ResourceService { */ Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; + Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException; + Host cancelMaintenance(CancelMaintenanceCmd cmd); Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index 9f0418a8d9ca..5b7b4f674ca1 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -1020,6 +1020,7 @@ public class ApiConstants { public static final String PUBLIC_MTU = "publicmtu"; public static final String PRIVATE_MTU = "privatemtu"; public static final String MTU = "mtu"; + public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost"; public static final String LIST_APIS = "listApis"; /** diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java index 5ca53c077407..e3ff130e2d48 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java @@ -19,7 +19,6 @@ import com.cloud.host.Host; import com.cloud.user.Account; import org.apache.cloudstack.acl.RoleType; -import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.ApiErrorCode; @@ -117,9 +116,6 @@ public void execute() { Host result; try { result = _resourceService.updateHost(this); - if(getAnnotation() != null) { - annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true); - } HostResponse hostResponse = _responseGenerator.createHostResponse(result); hostResponse.setResponseName(getCommandName()); this.setResponseObject(hostResponse); diff --git a/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java b/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java index d7733ee91976..ce529ad4bcb1 100644 --- a/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java @@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand { boolean _gatewayAccessible = true; boolean _vnetAccessible = true; + private Boolean hostHealthCheckResult; protected PingRoutingCommand() { } @@ -57,4 +58,12 @@ public boolean isVnetAccessible() { public void setVnetAccessible(boolean vnetAccessible) { _vnetAccessible = vnetAccessible; } + + public Boolean getHostHealthCheckResult() { + return hostHealthCheckResult; + } + + public void setHostHealthCheckResult(Boolean hostHealthCheckResult) { + this.hostHealthCheckResult = hostHealthCheckResult; + } } diff --git a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java index b459f8849690..b4f9d20df5ed 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java @@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand { List hostTags = new ArrayList(); String hypervisorVersion; HashMap> groupDetails = new HashMap>(); + private Boolean hostHealthCheckResult; public StartupRoutingCommand() { super(Host.Type.Routing); @@ -188,4 +189,12 @@ public boolean getSupportsClonedVolumes() { public void setSupportsClonedVolumes(boolean supportsClonedVolumes) { this.supportsClonedVolumes = supportsClonedVolumes; } + + public Boolean getHostHealthCheckResult() { + return hostHealthCheckResult; + } + + public void setHostHealthCheckResult(Boolean hostHealthCheckResult) { + this.hostHealthCheckResult = hostHealthCheckResult; + } } diff --git a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java index 818e0a75e64a..6ba0c3b4fa0d 100644 --- a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java @@ -39,6 +39,13 @@ public interface AgentManager { static final ConfigKey Wait = new ConfigKey("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return", true); + ConfigKey EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class, + "enable.kvm.host.auto.enable.disable", + "Advanced", + "false", + "(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " + + "according to the hosts health check results", + true, ConfigKey.Scope.Cluster, null); public enum TapAgentsAction { Add, Del, Contains, diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index b74c11cf1384..abdee769c1a4 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -51,6 +51,7 @@ import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.commons.lang3.BooleanUtils; import org.apache.log4j.Logger; import org.apache.log4j.MDC; @@ -1250,6 +1251,52 @@ public AgentHandler(final Task.Type type, final Link link, final byte[] data) { super(type, link, data); } + private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) { + if (hostHealthCheckResult == null) { + return; + } + HostVO host = _hostDao.findById(hostId); + if (host == null) { + s_logger.error(String.format("Unable to find host with ID: %s", hostId)); + return; + } + if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) { + s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " + + "received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName())); + return; + } + + ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable; + + try { + s_logger.info(String.format("Host health check %s, auto %s KVM host: %s", + hostHealthCheckResult ? "succeeds" : "fails", + hostHealthCheckResult ? "enabling" : "disabling", + host.getName())); + _resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent); + } catch (NoTransitionException e) { + s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e); + } + } + + private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) { + if (startup == null) { + s_logger.error("Empty StartupRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = startup.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + + private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) { + if (pingRoutingCommand == null) { + s_logger.error("Empty PingRoutingCommand received"); + return; + } + Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult(); + processHostHealthCheckResult(hostHealthCheckResult, hostId); + } + protected void processRequest(final Link link, final Request request) { final AgentAttache attache = (AgentAttache)link.attachment(); final Command[] cmds = request.getCommands(); @@ -1291,6 +1338,7 @@ protected void processRequest(final Link link, final Request request) { try { if (cmd instanceof StartupRoutingCommand) { final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; + processStartupRoutingCommand(startup, hostId); answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval()); } else if (cmd instanceof StartupProxyCommand) { final StartupProxyCommand startup = (StartupProxyCommand) cmd; @@ -1322,6 +1370,7 @@ protected void processRequest(final Link link, final Request request) { // if the router is sending a ping, verify the // gateway was pingable if (cmd instanceof PingRoutingCommand) { + processPingRoutingCommand((PingRoutingCommand) cmd, hostId); final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible(); final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); @@ -1748,8 +1797,8 @@ public String getConfigComponentName() { @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, - DirectAgentThreadCap }; + return new ConfigKey[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, + DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable }; } protected class SetHostParamsListener implements Listener { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 3f711fa9b809..2ac6da86dc65 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -322,6 +322,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private String _dcId; private String _clusterId; private final Properties _uefiProperties = new Properties(); + private String hostHealthCheckScriptPath; private long _hvVersion; private Duration _timeout; @@ -717,6 +718,10 @@ protected enum BridgeType { NATIVE, OPENVSWITCH, TUNGSTEN } + protected enum HealthCheckResult { + SUCCESS, FAILURE, IGNORE + } + protected BridgeType _bridgeType; protected StorageSubsystemCommandHandler storageHandler; @@ -943,6 +948,12 @@ public boolean configure(final String name, final Map params) th throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh"); } + hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH); + if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) { + s_logger.info(String.format("Unable to find the host health check script at: %s, " + + "discarding it", hostHealthCheckScriptPath)); + } + setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh"); if (setupTungstenVrouterPath == null) { throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh"); @@ -3436,13 +3447,54 @@ protected synchronized String attachOrDetachDevice(final Connect conn, final boo @Override public PingCommand getCurrentStatus(final long id) { - + PingRoutingCommand pingRoutingCommand; if (!_canBridgeFirewall) { - return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport()); + pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport()); } else { final HashMap> nwGrpStates = syncNetworkGroups(id); - return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); + pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); } + HealthCheckResult healthCheckResult = getHostHealthCheckResult(); + if (healthCheckResult != HealthCheckResult.IGNORE) { + pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS); + } + return pingRoutingCommand; + } + + /** + * The health check result is true, if the script is executed successfully and the exit code is 0 + * The health check result is false, if the script is executed successfully and the exit code is 1 + * The health check result is null, if + * - Script file is not specified, or + * - Script file does not exist, or + * - Script file is not accessible by the user of the cloudstack-agent process, or + * - Script file is not executable + * - There are errors when the script is executed (exit codes other than 0 or 1) + */ + private HealthCheckResult getHostHealthCheckResult() { + if (StringUtils.isBlank(hostHealthCheckScriptPath)) { + s_logger.debug("Host health check script path is not specified"); + return HealthCheckResult.IGNORE; + } + File script = new File(hostHealthCheckScriptPath); + if (!script.exists() || !script.isFile() || !script.canExecute()) { + s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " + + "reason: %s", hostHealthCheckScriptPath, + !script.exists() ? "file does not exist" : "please check file permissions to execute this file")); + return HealthCheckResult.IGNORE; + } + int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath); + if (s_logger.isDebugEnabled()) { + s_logger.debug(String.format("Host health check script exit code: %s", exitCode)); + } + return retrieveHealthCheckResultFromExitCode(exitCode); + } + + private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) { + if (exitCode != 0 && exitCode != 1) { + return HealthCheckResult.IGNORE; + } + return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE; } @Override @@ -3484,6 +3536,10 @@ public StartupCommand[] initialize() { cmd.setGatewayIpAddress(_localGateway); cmd.setIqn(getIqn()); cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption())); + HealthCheckResult healthCheckResult = getHostHealthCheckResult(); + if (healthCheckResult != HealthCheckResult.IGNORE) { + cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS); + } if (cmd.getHostDetails().containsKey("Host.OS")) { _hostDistro = cmd.getHostDetails().get("Host.OS"); diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 346290827486..295136b35fc0 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -36,8 +36,10 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.alert.AlertManager; import com.cloud.exception.StorageConflictException; import com.cloud.exception.StorageUnavailableException; +import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.api.ApiConstants; @@ -294,6 +296,10 @@ public void setDiscoverers(final List discoverers) { private UserVmDetailsDao userVmDetailsDao; @Inject private AnnotationDao annotationDao; + @Inject + private AlertManager alertManager; + @Inject + private AnnotationService annotationService; private final long _nodeId = ManagementServerNode.getManagementServerId(); @@ -1774,73 +1780,149 @@ public boolean checkAndMaintain(final long hostId) { return hostInMaintenance; } + private ResourceState.Event getResourceEventFromAllocationStateString(String allocationState) { + final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState); + if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { + throw new InvalidParameterValueException(String.format("Invalid allocation state: %s, " + + "only Enable/Disable are allowed", allocationState)); + } + return resourceEvent; + } + + private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, + boolean isUpdateFromHostHealthCheck, + HostVO host, DetailVO hostDetail, + ResourceState.Event resourceEvent) { + if (autoEnableDisableKVMSetting) { + if (!isUpdateFromHostHealthCheck && hostDetail != null && + !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { + hostDetail.setValue(Boolean.TRUE.toString()); + _hostDetailsDao.update(hostDetail.getId(), hostDetail); + } else if (!isUpdateFromHostHealthCheck && hostDetail != null && + Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Disable) { + s_logger.info(String.format("The setting %s is enabled but the host %s is manually set into %s state," + + "ignoring future auto enabling of the host based on health check results", + AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent)); + hostDetail.setValue(Boolean.FALSE.toString()); + _hostDetailsDao.update(hostDetail.getId(), hostDetail); + } else if (hostDetail == null) { + String autoEnableValue = !isUpdateFromHostHealthCheck ? Boolean.FALSE.toString() : Boolean.TRUE.toString(); + hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, autoEnableValue); + _hostDetailsDao.persist(hostDetail); + } + } + } + private boolean updateHostAllocationState(HostVO host, String allocationState, + boolean isUpdateFromHostHealthCheck) throws NoTransitionException { + boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) && + host.getHypervisorType() == HypervisorType.KVM; + ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); + DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST); + + if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) || + (host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) { + s_logger.info(String.format("The host %s is already on the allocated state", host.getName())); + return false; + } + + if (isAutoEnableAttemptForADisabledHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, hostDetail, resourceEvent)) { + s_logger.debug(String.format("The setting '%s' is enabled and the health check succeeds on the host, " + + "but the host has been manually disabled previously, ignoring auto enabling", + AgentManager.EnableKVMAutoEnableDisable.key())); + return false; + } + + handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host, + hostDetail, resourceEvent); + + resourceStateTransitTo(host, resourceEvent, _nodeId); + return true; + } + + private boolean isAutoEnableAttemptForADisabledHost(boolean autoEnableDisableKVMSetting, + boolean isUpdateFromHostHealthCheck, + DetailVO hostDetail, ResourceState.Event resourceEvent) { + return autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && + !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable; + } + + private void updateHostName(HostVO host, String name) { + s_logger.debug("Updating Host name to: " + name); + host.setName(name); + _hostDao.update(host.getId(), host); + } + + private void updateHostGuestOSCategory(Long hostId, Long guestOSCategoryId) { + // Verify that the guest OS Category exists + if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) { + throw new InvalidParameterValueException("Please specify a valid guest OS category."); + } + + final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); + final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); + + if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) { + // Create/Update an entry for guest.os.category.id + if (guestOSDetail != null) { + guestOSDetail.setValue(String.valueOf(guestOSCategory.getId())); + _hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail); + } else { + final Map detail = new HashMap(); + detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); + _hostDetailsDao.persist(hostId, detail); + } + } else { + // Delete any existing entry for guest.os.category.id + if (guestOSDetail != null) { + _hostDetailsDao.remove(guestOSDetail.getId()); + } + } + } + + private void updateHostTags(HostVO host, Long hostId, List hostTags) { + List activeVMs = _vmDao.listByHostId(hostId); + s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. " + + "Updating the host tags will not affect them.", activeVMs, host)); + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating Host Tags to :" + hostTags); + } + _hostTagsDao.persist(hostId, new ArrayList<>(new HashSet<>(hostTags))); + } + @Override public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { - Long hostId = cmd.getId(); - String name = cmd.getName(); - Long guestOSCategoryId = cmd.getOsCategoryId(); + return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(), + cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), cmd.getAnnotation(), false); + } + private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState, + String url, List hostTags, String annotation, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { // Verify that the host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); } - if (cmd.getAllocationState() != null) { - final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(cmd.getAllocationState()); - if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { - throw new CloudRuntimeException("Invalid allocation state:" + cmd.getAllocationState() + ", only Enable/Disable are allowed"); - } - - resourceStateTransitTo(host, resourceEvent, _nodeId); + boolean isUpdateHostAllocation = false; + if (StringUtils.isNotBlank(allocationState)) { + isUpdateHostAllocation = updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck); } if (StringUtils.isNotBlank(name)) { - s_logger.debug("Updating Host name to: " + name); - host.setName(name); - _hostDao.update(host.getId(), host); + updateHostName(host, name); } if (guestOSCategoryId != null) { - // Verify that the guest OS Category exists - if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) { - throw new InvalidParameterValueException("Please specify a valid guest OS category."); - } - - final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); - final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); - - if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) { - // Create/Update an entry for guest.os.category.id - if (guestOSDetail != null) { - guestOSDetail.setValue(String.valueOf(guestOSCategory.getId())); - _hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail); - } else { - final Map detail = new HashMap(); - detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); - _hostDetailsDao.persist(hostId, detail); - } - } else { - // Delete any existing entry for guest.os.category.id - if (guestOSDetail != null) { - _hostDetailsDao.remove(guestOSDetail.getId()); - } - } + updateHostGuestOSCategory(hostId, guestOSCategoryId); } - final List hostTags = cmd.getHostTags(); - if (hostTags != null) { - List activeVMs = _vmDao.listByHostId(hostId); - s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. Updating the host tags will not affect them.", activeVMs, host)); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Updating Host Tags to :" + hostTags); - } - _hostTagsDao.persist(hostId, new ArrayList(new HashSet(hostTags))); + if (hostTags != null) { + updateHostTags(host, hostId, hostTags); } - final String url = cmd.getUrl(); if (url != null) { - _storageMgr.updateSecondaryStorage(cmd.getId(), cmd.getUrl()); + _storageMgr.updateSecondaryStorage(hostId, url); } try { _storageMgr.enableHost(hostId); @@ -1849,9 +1931,55 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { } final HostVO updatedHost = _hostDao.findById(hostId); + + sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState, + isUpdateFromHostHealthCheck, isUpdateHostAllocation, annotation); + return updatedHost; } + private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState, + boolean isUpdateFromHostHealthCheck, + boolean isUpdateHostAllocation, String annotation) { + boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM && + AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()); + if (!isAutoEnableDisableKVMSettingEnabled) { + if (StringUtils.isNotBlank(annotation)) { + annotationService.addAnnotation(annotation, AnnotationService.EntityType.HOST, host.getUuid(), true); + } + return; + } + + if (!isUpdateHostAllocation) { + return; + } + + String msg = String.format("The host %s (%s) ", host.getName(), host.getUuid()); + ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); + boolean isEventEnable = resourceEvent == ResourceState.Event.Enable; + + if (isUpdateFromHostHealthCheck) { + msg += String.format("is auto-%s after %s health check results", + isEventEnable ? "enabled" : "disabled", + isEventEnable ? "successful" : "failed"); + alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), + host.getPodId(), msg, msg); + } else { + msg += String.format("is %s despite the setting '%s' is enabled for the cluster %s", + isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(), + host.getClusterId()); + if (StringUtils.isNotBlank(annotation)) { + msg += String.format(", reason: %s", annotation); + } + } + annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true); + } + + @Override + public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { + return updateHost(hostId, null, null, resourceEvent.toString(), null, null, null, true); + } + @Override public Cluster getCluster(final Long clusterId) { return _clusterDao.findById(clusterId); diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 4d5b5ba584bf..73d4adf050b0 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -73,6 +73,11 @@ public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return null; } + @Override + public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { + return null; + } + /* (non-Javadoc) * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) */ diff --git a/test/integration/smoke/test_host_control_state.py b/test/integration/smoke/test_host_control_state.py index 809af7d2a0e5..4b8409ecc276 100644 --- a/test/integration/smoke/test_host_control_state.py +++ b/test/integration/smoke/test_host_control_state.py @@ -20,7 +20,7 @@ Tests for host control state """ -from marvin.cloudstackAPI import updateHost +from marvin.cloudstackAPI import (updateHost, updateConfiguration) from nose.plugins.attrib import attr from marvin.cloudstackTestCase import cloudstackTestCase from marvin.lib.common import (get_domain, @@ -28,13 +28,18 @@ get_template, list_hosts, list_routers, - list_ssvms) + list_ssvms, + list_clusters, + list_hosts) from marvin.lib.base import (Account, Domain, Host, ServiceOffering, VirtualMachine) from marvin.sshClient import SshClient +from marvin.lib.decoratorGenerators import skipTestIf +from marvin.lib.utils import wait_until +import logging import time @@ -250,3 +255,220 @@ def test_router_host_control_state(self): self.enable_host(host_id) self.verify_router_host_control_state(router.id, "Enabled") + + +class TestAutoEnableDisableHost(cloudstackTestCase): + + @classmethod + def setUpClass(cls): + cls.testClient = super(TestAutoEnableDisableHost, cls).getClsTestClient() + cls.apiclient = cls.testClient.getApiClient() + cls.services = cls.testClient.getParsedTestDataConfig() + # Get Zone, Domain and templates + cls.zone = get_zone(cls.apiclient, cls.testClient.getZoneForTests()) + cls.hypervisor = cls.testClient.getHypervisorInfo() + cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + if cls.hypervisor.lower() not in ['kvm']: + cls.hypervisorNotSupported = True + return + + cls.logger = logging.getLogger('TestAutoEnableDisableHost') + return + + @classmethod + def tearDownClass(cls): + super(TestAutoEnableDisableHost, cls).tearDownClass() + + def tearDown(self): + super(TestAutoEnableDisableHost, self).tearDown() + + def get_ssh_client(self, ip, username, password, retries=10): + """ Setup ssh client connection and return connection """ + try: + ssh_client = SshClient(ip, 22, username, password, retries) + except Exception as e: + raise unittest.SkipTest("Unable to create ssh connection: " % e) + + self.assertIsNotNone( + ssh_client, "Failed to setup ssh connection to ip=%s" % ip) + + return ssh_client + + def wait_until_host_is_in_state(self, hostid, resourcestate, interval=3, retries=20): + def check_resource_state(): + response = Host.list( + self.apiclient, + id=hostid + ) + if isinstance(response, list): + if response[0].resourcestate == resourcestate: + self.logger.debug('Host with id %s is in resource state = %s' % (hostid, resourcestate)) + return True, None + else: + self.logger.debug("Waiting for host " + hostid + + " to reach state " + resourcestate + + ", with current state " + response[0].resourcestate) + return False, None + + done, _ = wait_until(interval, retries, check_resource_state) + if not done: + raise Exception("Failed to wait for host %s to be on resource state %s" % (hostid, resourcestate)) + return True + + def update_config(self, enable_feature): + cmd = updateConfiguration.updateConfigurationCmd() + cmd.name = "enable.kvm.host.auto.enable.disable" + cmd.value = enable_feature + + response = self.apiclient.updateConfiguration(cmd) + self.debug("updated the parameter %s with value %s" % (response.name, response.value)) + + def update_health_check_script(self, ip_address, username, password, exit_code): + health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh" + health_check_agent_property = "agent.health.check.script.path" + agent_properties_file_path = "/etc/cloudstack/agent/agent.properties" + + ssh_client = self.get_ssh_client(ip_address, username, password) + ssh_client.execute("echo 'exit %s' > %s" % (exit_code, health_check_script_path)) + ssh_client.execute("chmod +x %s" % health_check_script_path) + ssh_client.execute("echo '%s=%s' >> %s" % (health_check_agent_property, health_check_script_path, + agent_properties_file_path)) + ssh_client.execute("service cloudstack-agent restart") + + def remove_host_health_check(self, ip_address, username, password): + health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh" + ssh_client = self.get_ssh_client(ip_address, username, password) + ssh_client.execute("rm -f %s" % health_check_script_path) + + def select_host_for_health_checks(self): + clusters = list_clusters( + self.apiclient, + zoneid=self.zone.id + ) + if not clusters: + return None + + for cluster in clusters: + list_hosts_response = list_hosts( + self.apiclient, + clusterid=cluster.id, + type="Routing", + resourcestate="Enabled" + ) + assert isinstance(list_hosts_response, list) + if not list_hosts_response or len(list_hosts_response) < 1: + continue + return list_hosts_response[0] + return None + + def update_host_allocation_state(self, id, enable): + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Enable" if enable else "Disable" + response = self.apiclient.updateHost(cmd) + self.assertEqual(response.resourcestate, "Enabled" if enable else "Disabled") + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_01_auto_enable_disable_kvm_host(self): + """Test to auto-enable and auto-disable a KVM host based on health check results + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that fails and observe the host is Disabled + # 3. Make the health check script succeed and observe the host is Enabled + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 1) + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Set health check script for success + self.update_health_check_script(selected_host.ipaddress, username, password, 0) + + self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200) + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_02_disable_host_overrides_auto_enable_kvm_host(self): + """Test to override the auto-enabling of a KVM host by an administrator + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that succeeds and observe the host is Enabled + # 3. Make the host Disabled + # 4. Verify the host does not get auto-enabled after the previous step + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 0) + self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200) + + # Manually disable the host + self.update_host_allocation_state(selected_host.id, False) + + # Wait for more than the ping interval + time.sleep(70) + + # Verify the host continues on Disabled state + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Restore the host to Enabled state + self.remove_host_health_check(selected_host.ipaddress, username, password) + self.update_host_allocation_state(selected_host.id, True) + + @attr(tags=["basic", "advanced"], required_hardware="false") + @skipTestIf("hypervisorNotSupported") + def test_03_enable_host_does_not_override_auto_disable_kvm_host(self): + """Test to override the auto-disabling of a KVM host by an administrator + + # Validate the following: + # 1. Enable the KVM Auto Enable/Disable Feature + # 2. Set a health check script that fails and observe the host is Disabled + # 3. Make the host Enabled + # 4. Verify the host does get auto-disabled after the previous step + """ + + selected_host = self.select_host_for_health_checks() + if not selected_host: + self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature") + + username = self.hostConfig["username"] + password = self.hostConfig["password"] + + # Enable the Auto Enable/Disable Configuration + self.update_config("true") + + # Set health check script for failure + self.update_health_check_script(selected_host.ipaddress, username, password, 1) + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Manually enable the host + self.update_host_allocation_state(selected_host.id, True) + + # Verify the host goes back to Disabled state + self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200) + + # Restore the host to Enabled state + self.remove_host_health_check(selected_host.ipaddress, username, password) + self.update_host_allocation_state(selected_host.id, True) diff --git a/ui/src/config/section/infra/hosts.js b/ui/src/config/section/infra/hosts.js index 1ca0a99e6173..b6530239df9e 100644 --- a/ui/src/config/section/infra/hosts.js +++ b/ui/src/config/section/infra/hosts.js @@ -98,8 +98,9 @@ export default { label: 'label.disable.host', message: 'message.confirm.disable.host', dataView: true, - defaultArgs: { allocationstate: 'Disable' }, - show: (record) => { return record.resourcestate === 'Enabled' } + show: (record) => { return record.resourcestate === 'Enabled' }, + popup: true, + component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable'))) }, { api: 'updateHost', @@ -107,8 +108,9 @@ export default { label: 'label.enable.host', message: 'message.confirm.enable.host', dataView: true, - defaultArgs: { allocationstate: 'Enable' }, - show: (record) => { return record.resourcestate === 'Disabled' } + show: (record) => { return record.resourcestate === 'Disabled' }, + popup: true, + component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable'))) }, { api: 'prepareHostForMaintenance', diff --git a/ui/src/views/infra/HostEnableDisable.vue b/ui/src/views/infra/HostEnableDisable.vue new file mode 100644 index 000000000000..bc71aa270809 --- /dev/null +++ b/ui/src/views/infra/HostEnableDisable.vue @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + + + +