Skip to content
This repository was archived by the owner on Sep 12, 2024. It is now read-only.

Commit 91c0d6b

Browse files
committed
Merge pull request #173 from spotify/rculbertson/clean-temp-job-logs
Improve log messages from helios testing framework
2 parents 47e15ec + 6df7fff commit 91c0d6b

File tree

4 files changed

+51
-36
lines changed

4 files changed

+51
-36
lines changed

helios-testing/src/main/java/com/spotify/helios/testing/Jobs.java

+20-6
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@
2424
import com.google.common.util.concurrent.ListenableFuture;
2525

2626
import com.spotify.helios.client.HeliosClient;
27+
import com.spotify.helios.common.descriptors.Job;
2728
import com.spotify.helios.common.descriptors.JobId;
2829
import com.spotify.helios.common.protocol.JobDeleteResponse;
2930
import com.spotify.helios.common.protocol.JobUndeployResponse;
3031

32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
34+
3135
import java.util.List;
3236
import java.util.concurrent.ExecutionException;
3337
import java.util.concurrent.TimeUnit;
@@ -38,6 +42,8 @@
3842

3943
class Jobs {
4044

45+
private static final Logger log = LoggerFactory.getLogger(Jobs.class);
46+
4147
static final long TIMEOUT_MILLIS = TimeUnit.MINUTES.toMillis(5);
4248

4349
static <T> T get(final ListenableFuture<T> future)
@@ -50,39 +56,47 @@ static <T> T get(final ListenableFuture<T> future, final long timeout)
5056
return future.get(timeout, MILLISECONDS);
5157
}
5258

59+
static String getJobDescription(final Job job) {
60+
final String shortHash = job.getId().getHash().substring(0, 7);
61+
return String.format("%s (Job %s)", job.getImage(), shortHash);
62+
}
63+
5364
/**
5465
* Undeploy the job from all specified hosts, and delete the job. Any failures will be ignored,
5566
* and we will keep trying each host. A list of errors encountered along the way will be returned
5667
* to the caller.
5768
* @param client the HeliosClient to use
58-
* @param jobId the JobId to undeploy and delete
69+
* @param job the job to undeploy and delete
5970
* @param hosts the hosts to undeploy from
6071
* @param errors errors encountered during the undeploy will be added to this list
6172
* @return the list of errors
6273
*/
63-
static List<AssertionError> undeploy(final HeliosClient client, final JobId jobId,
74+
static List<AssertionError> undeploy(final HeliosClient client, final Job job,
6475
final List<String> hosts,
6576
final List<AssertionError> errors) {
77+
final JobId id = job.getId();
6678
for (String host : hosts) {
79+
log.info("Undeploying {} from {}", getJobDescription(job), host);
6780
final JobUndeployResponse response;
6881
try {
69-
response = get(client.undeploy(jobId, host));
82+
response = get(client.undeploy(id, host));
7083
if (response.getStatus() != JobUndeployResponse.Status.OK &&
7184
response.getStatus() != JobUndeployResponse.Status.JOB_NOT_FOUND) {
7285
errors.add(new AssertionError(format("Failed to undeploy job %s - %s",
73-
jobId, response)));
86+
id, response)));
7487
}
7588
} catch (InterruptedException | ExecutionException | TimeoutException e) {
7689
errors.add(new AssertionError(e));
7790
}
7891
}
7992

8093
try {
81-
final JobDeleteResponse response = get(client.deleteJob(jobId));
94+
log.debug("Deleting job {}", id);
95+
final JobDeleteResponse response = get(client.deleteJob(id));
8296
if (response.getStatus() != JobDeleteResponse.Status.OK &&
8397
response.getStatus() != JobDeleteResponse.Status.JOB_NOT_FOUND) {
8498
errors.add(new AssertionError(format("Failed to delete job %s - %s",
85-
jobId.toString(), response.toString())));
99+
id.toString(), response.toString())));
86100
}
87101
} catch (InterruptedException | ExecutionException | TimeoutException e) {
88102
errors.add(new AssertionError(e));

helios-testing/src/main/java/com/spotify/helios/testing/TemporaryJob.java

+9-15
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import static com.google.common.collect.Maps.newHashMap;
5555
import static com.spotify.helios.testing.Jobs.TIMEOUT_MILLIS;
5656
import static com.spotify.helios.testing.Jobs.get;
57+
import static com.spotify.helios.testing.Jobs.getJobDescription;
5758
import static java.lang.String.format;
5859
import static java.util.concurrent.TimeUnit.MILLISECONDS;
5960
import static org.junit.Assert.fail;
@@ -148,9 +149,8 @@ public List<HostAndPort> addresses(final String port) {
148149

149150
void deploy() {
150151
try {
151-
log.info("Deploying job {} with image {}", job.getId(), job.getImage());
152-
153152
// Create job
153+
log.info("Creating job {}", job.getId().toShortString());
154154
final CreateJobResponse createResponse = get(client.createJob(job));
155155
if (createResponse.getStatus() != CreateJobResponse.Status.OK) {
156156
fail(format("Failed to create job %s - %s", job.getId(),
@@ -170,7 +170,7 @@ void deploy() {
170170
hostToIp.put(host, hostAddress);
171171
}
172172

173-
log.info("Waiting for Helios job deploy of {} on host {}", deployment.getJobId(), host);
173+
log.info("Deploying {} to {}", getJobDescription(job), host);
174174
final JobDeployResponse deployResponse = get(client.deploy(deployment, host));
175175
if (deployResponse.getStatus() != JobDeployResponse.Status.OK) {
176176
fail(format("Failed to deploy job %s %s - %s",
@@ -189,32 +189,30 @@ void deploy() {
189189
}
190190

191191
void undeploy(final List<AssertionError> errors) {
192-
log.info("Undeploying job {}", job.getId());
193-
194-
Jobs.undeploy(client, job.getId(), hosts, errors);
192+
Jobs.undeploy(client, job, hosts, errors);
195193
}
196194

197195
private void awaitUp(final String host) throws TimeoutException {
198-
log.info("Waiting for job {} to be up on host {}", job.getId(), host);
199196

200197
final TaskStatus status = Polling.awaitUnchecked(
201198
TIMEOUT_MILLIS, MILLISECONDS, new Callable<TaskStatus>() {
202199
@Override
203200
public TaskStatus call() throws Exception {
204-
log.debug("Getting job status for job {}", job.getId());
205-
206201
final JobStatus status = Futures.getUnchecked(client.jobStatus(job.getId()));
207202
if (status == null) {
203+
log.debug("Job status not available");
208204
return null;
209205
}
210206
final TaskStatus taskStatus = status.getTaskStatuses().get(host);
211207
if (taskStatus == null) {
208+
log.debug("Task status not available on {}", host);
212209
return null;
213210
}
214211

215212
verifyHealthy(host, taskStatus);
216213

217214
final TaskStatus.State state = taskStatus.getState();
215+
log.info("Job state: {}", state);
218216

219217
if (state == TaskStatus.State.RUNNING) {
220218
return taskStatus;
@@ -233,7 +231,7 @@ public TaskStatus call() throws Exception {
233231
}
234232

235233
void verifyHealthy() throws AssertionError {
236-
log.debug("Checking health of {}", job.getId());
234+
log.debug("Checking health of {}", job.getImage());
237235
final JobStatus status = Futures.getUnchecked(client.jobStatus(job.getId()));
238236
if (status == null) {
239237
return;
@@ -244,7 +242,7 @@ void verifyHealthy() throws AssertionError {
244242
}
245243

246244
private void verifyHealthy(final String host, final TaskStatus status) {
247-
log.debug("Checking health of {} on {}", job.getId(), host);
245+
log.debug("Checking health of {} on {}", job.getImage(), host);
248246
final TaskStatus.State state = status.getState();
249247
if (state == TaskStatus.State.FAILED ||
250248
state == TaskStatus.State.EXITED ||
@@ -256,15 +254,11 @@ private void verifyHealthy(final String host, final TaskStatus status) {
256254
}
257255
throw new AssertionError(format(
258256
"Unexpected job state %s. Check helios agent logs for details.", stateString));
259-
} else if (state == TaskStatus.State.PULLING_IMAGE) {
260-
log.info("Pulling image for {} on {}", job.getId(), host);
261257
}
262258
}
263259

264260
private void awaitPort(final String port, final String host) throws TimeoutException {
265261
final String endpoint = endpointFromHost(host);
266-
log.info("Awaiting port availability on {}:{}", endpoint, port);
267-
268262
final TaskStatus taskStatus = statuses.get(host);
269263
assert taskStatus != null;
270264
final Integer externalPort = taskStatus.getPorts().get(port).getExternalPort();

helios-testing/src/main/java/com/spotify/helios/testing/TemporaryJobs.java

+4-15
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
package com.spotify.helios.testing;
2323

24-
import com.google.common.base.Joiner;
2524
import com.google.common.base.Optional;
2625
import com.google.common.base.Splitter;
2726
import com.google.common.base.Throwables;
@@ -90,7 +89,7 @@ public class TemporaryJobs implements TestRule {
9089
private final ExecutorService executor = MoreExecutors.getExitingExecutorService(
9190
(ThreadPoolExecutor) Executors.newFixedThreadPool(
9291
1, new ThreadFactoryBuilder()
93-
.setNameFormat("helios-temporary-jobs-test-runner-%d")
92+
.setNameFormat("helios-test-runner-%d")
9493
.setDaemon(true)
9594
.build()),
9695
0, SECONDS);
@@ -106,8 +105,7 @@ public TemporaryJob deploy(final Job job, final String hostFilter,
106105

107106
final List<String> hosts;
108107
try {
109-
log.info("Getting list of hosts");
110-
108+
log.debug("Getting list of hosts");
111109
hosts = client.listHosts().get();
112110
} catch (InterruptedException | ExecutionException e) {
113111
throw new AssertionError("Failed to get list of Helios hosts", e);
@@ -138,7 +136,6 @@ public TemporaryJob deploy(final Job job, final List<String> hosts,
138136
"no arguments to automatically select a host");
139137
}
140138

141-
log.info("Deploying {} to {}", job.getImage(), Joiner.on(", ").skipNulls().join(hosts));
142139
final TemporaryJob temporaryJob = new TemporaryJob(client, prober, job, hosts, waitPorts);
143140
jobs.add(temporaryJob);
144141
temporaryJob.deploy();
@@ -190,8 +187,6 @@ public void after() {
190187

191188
final List<AssertionError> errors = newArrayList();
192189

193-
log.info("Undeploying temporary jobs");
194-
195190
for (TemporaryJob job : jobs) {
196191
job.undeploy(errors);
197192
}
@@ -318,7 +313,7 @@ private void verifyJobsHealthy() throws AssertionError {
318313
private void removeOldJobs(final Path prefixDirectory)
319314
throws ExecutionException, InterruptedException, IOException {
320315
final File[] files = prefixDirectory.toFile().listFiles();
321-
if (files == null) {
316+
if (files == null || files.length == 0) {
322317
return;
323318
}
324319

@@ -352,16 +347,10 @@ private void removeOldJobs(final Path prefixDirectory)
352347
continue;
353348
}
354349
// Get list of all hosts where this job is deployed, and undeploy
355-
log.info("Getting status for job {}", jobId);
356350
final JobStatus status = client.jobStatus(entry.getKey()).get();
357351
final List<String> hosts = ImmutableList.copyOf(status.getDeployments().keySet());
358-
359-
log.info("Undeploying job {} from hosts {}",
360-
jobId,
361-
Joiner.on(", ").skipNulls().join(hosts));
362-
363352
final List<AssertionError> errors =
364-
undeploy(client, jobId, hosts, new ArrayList<AssertionError>());
353+
undeploy(client, entry.getValue(), hosts, new ArrayList<AssertionError>());
365354

366355
// Set flag indicating if any errors occur
367356
if (!errors.isEmpty()) {

helios-testing/src/test/java/com/spotify/helios/testing/TemporaryJobsTest.java

+18
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,18 @@
4242
import static com.google.common.base.Charsets.UTF_8;
4343
import static com.google.common.collect.Iterables.getOnlyElement;
4444
import static com.spotify.helios.common.descriptors.HostStatus.Status.UP;
45+
import static com.spotify.helios.testing.Jobs.getJobDescription;
4546
import static java.util.concurrent.TimeUnit.MINUTES;
4647
import static java.util.concurrent.TimeUnit.SECONDS;
48+
import static org.hamcrest.Matchers.both;
4749
import static org.hamcrest.Matchers.contains;
50+
import static org.hamcrest.Matchers.containsString;
4851
import static org.hamcrest.Matchers.equalTo;
4952
import static org.hamcrest.Matchers.hasKey;
5053
import static org.hamcrest.Matchers.is;
5154
import static org.hamcrest.Matchers.not;
5255
import static org.hamcrest.Matchers.notNullValue;
56+
import static org.hamcrest.Matchers.startsWith;
5357
import static org.junit.Assert.assertArrayEquals;
5458
import static org.junit.Assert.assertEquals;
5559
import static org.junit.Assert.assertFalse;
@@ -398,6 +402,20 @@ public void testDeploymentFailure() throws Exception {
398402
assertTrue("Test should not time out", (end-start) < Jobs.TIMEOUT_MILLIS);
399403
}
400404

405+
@Test
406+
public void testGetJobDescription() {
407+
final Job job = Job.newBuilder()
408+
.setImage(BUSYBOX)
409+
.setName("testGetJobDescription")
410+
.setVersion("1")
411+
.build();
412+
final String shortHash = job.getId().getHash().substring(0, 7);
413+
414+
// Simple test to verify the job description contains the image name and a shortened job hash.
415+
assertThat(getJobDescription(job),
416+
both(startsWith(BUSYBOX)).and(containsString(shortHash)));
417+
}
418+
401419
private static boolean fileExists(final Path path, final String prefix) {
402420
return new File(path.toFile(), prefix).exists();
403421
}

0 commit comments

Comments
 (0)