Skip to content

[Internal - for testing purpose]- FR67Auto enable disable kvm hosts #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions agent/conf/agent.properties
Original file line number Diff line number Diff line change
Expand Up @@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false

# The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1
# iothreads=1

# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host
# depending on the return value of the file/script
# agent.health.check.script.path=
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,9 @@ public class AgentProperties{
*/
public static final Property<String> OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class);

public static final Property<String> HEALTH_CHECK_SCRIPT_PATH =
new Property<>("agent.health.check.script.path", null, String.class);

/**
* Sets the hypervisor type.<br>
* Possible values: kvm | lxc <br>
Expand Down
2 changes: 2 additions & 0 deletions api/src/main/java/com/cloud/resource/ResourceService.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public interface ResourceService {
*/
Host updateHost(UpdateHostCmd cmd) throws NoTransitionException;

Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException;

Host cancelMaintenance(CancelMaintenanceCmd cmd);

Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,7 @@ public class ApiConstants {
public static final String PUBLIC_MTU = "publicmtu";
public static final String PRIVATE_MTU = "privatemtu";
public static final String MTU = "mtu";
public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost";
public static final String LIST_APIS = "listApis";

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import com.cloud.host.Host;
import com.cloud.user.Account;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.annotation.AnnotationService;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
Expand Down Expand Up @@ -117,9 +116,6 @@ public void execute() {
Host result;
try {
result = _resourceService.updateHost(this);
if(getAnnotation() != null) {
annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true);
}
HostResponse hostResponse = _responseGenerator.createHostResponse(result);
hostResponse.setResponseName(getCommandName());
this.setResponseObject(hostResponse);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand {

boolean _gatewayAccessible = true;
boolean _vnetAccessible = true;
private Boolean hostHealthCheckResult;

protected PingRoutingCommand() {
}
Expand Down Expand Up @@ -57,4 +58,12 @@ public boolean isVnetAccessible() {
public void setVnetAccessible(boolean vnetAccessible) {
_vnetAccessible = vnetAccessible;
}

public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}

public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand {
List<String> hostTags = new ArrayList<String>();
String hypervisorVersion;
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
private Boolean hostHealthCheckResult;

public StartupRoutingCommand() {
super(Host.Type.Routing);
Expand Down Expand Up @@ -188,4 +189,12 @@ public boolean getSupportsClonedVolumes() {
public void setSupportsClonedVolumes(boolean supportsClonedVolumes) {
this.supportsClonedVolumes = supportsClonedVolumes;
}

public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}

public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@
public interface AgentManager {
static final ConfigKey<Integer> Wait = new ConfigKey<Integer>("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return",
true);
ConfigKey<Boolean> EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class,
"enable.kvm.host.auto.enable.disable",
"Advanced",
"false",
"(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " +
"according to the hosts health check results",
true, ConfigKey.Scope.Cluster, null);

public enum TapAgentsAction {
Add, Del, Contains,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.MDC;

Expand Down Expand Up @@ -1250,6 +1251,52 @@ public AgentHandler(final Task.Type type, final Link link, final byte[] data) {
super(type, link, data);
}

private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) {
if (hostHealthCheckResult == null) {
return;
}
HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.error(String.format("Unable to find host with ID: %s", hostId));
return;
}
if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) {
s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " +
"received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName()));
return;
}

ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable;

try {
s_logger.info(String.format("Host health check %s, auto %s KVM host: %s",
hostHealthCheckResult ? "succeeds" : "fails",
hostHealthCheckResult ? "enabling" : "disabling",
host.getName()));
_resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent);
} catch (NoTransitionException e) {
s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e);
}
}

private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) {
if (startup == null) {
s_logger.error("Empty StartupRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = startup.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}

private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) {
if (pingRoutingCommand == null) {
s_logger.error("Empty PingRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}

protected void processRequest(final Link link, final Request request) {
final AgentAttache attache = (AgentAttache)link.attachment();
final Command[] cmds = request.getCommands();
Expand Down Expand Up @@ -1291,6 +1338,7 @@ protected void processRequest(final Link link, final Request request) {
try {
if (cmd instanceof StartupRoutingCommand) {
final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
processStartupRoutingCommand(startup, hostId);
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
} else if (cmd instanceof StartupProxyCommand) {
final StartupProxyCommand startup = (StartupProxyCommand) cmd;
Expand Down Expand Up @@ -1322,6 +1370,7 @@ protected void processRequest(final Link link, final Request request) {
// if the router is sending a ping, verify the
// gateway was pingable
if (cmd instanceof PingRoutingCommand) {
processPingRoutingCommand((PingRoutingCommand) cmd, hostId);
final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible();
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));

Expand Down Expand Up @@ -1748,8 +1797,8 @@ public String getConfigComponentName() {

@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize,
DirectAgentThreadCap };
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize,
DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable };
}

protected class SetHostParamsListener implements Listener {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private String _dcId;
private String _clusterId;
private final Properties _uefiProperties = new Properties();
private String hostHealthCheckScriptPath;

private long _hvVersion;
private Duration _timeout;
Expand Down Expand Up @@ -717,6 +718,10 @@ protected enum BridgeType {
NATIVE, OPENVSWITCH, TUNGSTEN
}

protected enum HealthCheckResult {
SUCCESS, FAILURE, IGNORE
}

protected BridgeType _bridgeType;

protected StorageSubsystemCommandHandler storageHandler;
Expand Down Expand Up @@ -943,6 +948,12 @@ public boolean configure(final String name, final Map<String, Object> params) th
throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh");
}

hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH);
if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) {
s_logger.info(String.format("Unable to find the host health check script at: %s, " +
"discarding it", hostHealthCheckScriptPath));
}

setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh");
if (setupTungstenVrouterPath == null) {
throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh");
Expand Down Expand Up @@ -3436,13 +3447,54 @@ protected synchronized String attachOrDetachDevice(final Connect conn, final boo

@Override
public PingCommand getCurrentStatus(final long id) {

PingRoutingCommand pingRoutingCommand;
if (!_canBridgeFirewall) {
return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
} else {
final HashMap<String, Pair<Long, Long>> nwGrpStates = syncNetworkGroups(id);
return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
}
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}
return pingRoutingCommand;
}

/**
* The health check result is true, if the script is executed successfully and the exit code is 0
* The health check result is false, if the script is executed successfully and the exit code is 1
* The health check result is null, if
* - Script file is not specified, or
* - Script file does not exist, or
* - Script file is not accessible by the user of the cloudstack-agent process, or
* - Script file is not executable
* - There are errors when the script is executed (exit codes other than 0 or 1)
*/
private HealthCheckResult getHostHealthCheckResult() {
if (StringUtils.isBlank(hostHealthCheckScriptPath)) {
s_logger.debug("Host health check script path is not specified");
return HealthCheckResult.IGNORE;
}
File script = new File(hostHealthCheckScriptPath);
if (!script.exists() || !script.isFile() || !script.canExecute()) {
s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " +
"reason: %s", hostHealthCheckScriptPath,
!script.exists() ? "file does not exist" : "please check file permissions to execute this file"));
return HealthCheckResult.IGNORE;
}
int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath);
if (s_logger.isDebugEnabled()) {
s_logger.debug(String.format("Host health check script exit code: %s", exitCode));
}
return retrieveHealthCheckResultFromExitCode(exitCode);
}

private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) {
if (exitCode != 0 && exitCode != 1) {
return HealthCheckResult.IGNORE;
}
return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE;
}

@Override
Expand Down Expand Up @@ -3484,6 +3536,10 @@ public StartupCommand[] initialize() {
cmd.setGatewayIpAddress(_localGateway);
cmd.setIqn(getIqn());
cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption()));
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}

if (cmd.getHostDetails().containsKey("Host.OS")) {
_hostDistro = cmd.getHostDetails().get("Host.OS");
Expand Down
Loading