Skip to content

Commit f78f1f0

Browse files
committed
Working Distributed-HelloWorld
1 parent 80d7edc commit f78f1f0

File tree

6 files changed

+42
-26
lines changed

6 files changed

+42
-26
lines changed

config.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Constants (User configurable)
22

3-
APP_NAME = 'DistributedSomething' # Used to generate derivative names unique to the application.
3+
APP_NAME = 'DistributedHelloWorld' # Used to generate derivative names unique to the application.
44

55
# DOCKER REGISTRY INFORMATION:
6-
DOCKERHUB_TAG = 'user/distributed-something:sometag'
6+
DOCKERHUB_TAG = 'distributedscience/distributed-helloworld:latest'
77

88
# AWS GENERAL SETTINGS:
99
AWS_REGION = 'us-east-1'
@@ -13,16 +13,16 @@
1313

1414
# EC2 AND ECS INFORMATION:
1515
ECS_CLUSTER = 'default'
16-
CLUSTER_MACHINES = 3
16+
CLUSTER_MACHINES = 2
1717
TASKS_PER_MACHINE = 1
18-
MACHINE_TYPE = ['m4.xlarge']
18+
MACHINE_TYPE = ['t2.micro']
1919
MACHINE_PRICE = 0.10
20-
EBS_VOL_SIZE = 30 # In GB. Minimum allowed is 22.
20+
EBS_VOL_SIZE = 22 # In GB. Minimum allowed is 22.
2121

2222
# DOCKER INSTANCE RUNNING ENVIRONMENT:
23-
DOCKER_CORES = 4 # Number of software processes to run inside a docker container
23+
DOCKER_CORES = 1 # Number of software processes to run inside a docker container
2424
CPU_SHARES = DOCKER_CORES * 1024 # ECS computing units assigned to each docker container (1024 units = 1 core)
25-
MEMORY = 15000 # Memory assigned to the docker container in MB
25+
MEMORY = 800 # Memory assigned to the docker container in MB
2626
SECONDS_TO_START = 3*60 # Wait before the next process is initiated to avoid memory collisions
2727

2828
# SQS QUEUE INFORMATION:
@@ -40,3 +40,4 @@
4040
NECESSARY_STRING = '' # Is there any string that should be in the file name to "count"?
4141

4242
# PUT ANYTHING SPECIFIC TO YOUR PROGRAM DOWN HERE
43+
MY_NAME = 'FirstName LastName'

files/exampleJob.json

+3-6
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,11 @@
22
"_comment0": "All keys that contain '_comment' will be omitted from the job specification dictionary sent to SQS",
33
"_comment1": "The top part should be variables common to all jobs",
44
"_comment2": "These could be things like a master input location, run parameters, etc",
5-
"input_location": "Somewhere/on/your/bucket",
6-
"job_flag_1": "True",
5+
"favorite_color": "Blue",
76
"_comment3": "The following groups are individual tasks, and each will be run in parallel",
87
"groups": [
9-
{"job_flag_2": "A", "job_flag_3": "foo"},
10-
{"job_flag_2": "A", "job_flag_3": "bar"},
11-
{"job_flag_2": "B", "job_flag_3": "foo"},
12-
{"job_flag_2": "B", "job_flag_3": "bar"}
8+
{"ice_cream": "chocolate", "pizza": "pepperoni"},
9+
{"ice_cream": "cookie dough", "pizza": "mushroom"}
1310
]
1411
}
1512

run.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import print_function
22
import os, sys
33
import boto3
4+
import configparser
45
import datetime
56
import json
67
import time
@@ -145,7 +146,11 @@ def generate_task_definition(AWS_PROFILE):
145146
{
146147
"name": "NECESSARY_STRING",
147148
"value": NECESSARY_STRING
148-
}
149+
},
150+
{
151+
"name": "MY_NAME",
152+
"value": MY_NAME
153+
},
149154
]
150155
return task_definition, taskRoleArn
151156

worker/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#
77

88

9-
FROM someuser/somedocker:sometag
9+
FROM ubuntu:18.04
1010

1111
# Install S3FS
1212
USER root

worker/Makefile

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
user = user
2-
project = distributed-something
3-
tag = sometag
1+
user = distributedscience
2+
project = distributed-helloworld
3+
tag = latest
44

55
.DEFAULT_GOAL: build
66
build:

worker/generic-worker.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
DOWNLOAD_FILES = False
4545
else:
4646
DOWNLOAD_FILES = os.environ['DOWNLOAD_FILES']
47+
MY_NAME = os.environ['MY_NAME']
4748

4849
localIn = '/home/ubuntu/local_input'
4950

@@ -110,18 +111,19 @@ def runSomething(message):
110111

111112
# Parse your message somehow to pull out a name variable that's going to make sense to you when you want to look at the logs later
112113
# What's commented out below will work, otherwise, create your own
113-
#group_to_run = message["group"]
114-
#groupkeys = list(group_to_run.keys())
115-
#groupkeys.sort()
116-
#metadataID = '-'.join(groupkeys)
114+
group_to_run = message["group"]
115+
groupkeys = list(group_to_run.keys())
116+
groupkeys.sort()
117+
metadataID = '-'.join(groupkeys)
117118

118119
# Add a handler with
119-
# watchtowerlogger=watchtower.CloudWatchLogHandler(log_group=LOG_GROUP_NAME, stream_name=str(metadataID),create_log_group=False)
120-
# logger.addHandler(watchtowerlogger)
120+
watchtowerlogger=watchtower.CloudWatchLogHandler(log_group=LOG_GROUP_NAME, stream_name=str(metadataID),create_log_group=False)
121+
logger.addHandler(watchtowerlogger)
121122

122123
# See if this is a message you've already handled, if you've so chosen
123124
# First, build a variable called remoteOut that equals your unique prefix of where your output should be
124125
# Then check if there are too many files
126+
remoteOut = metadataID
125127

126128
if CHECK_IF_DONE_BOOL.upper() == 'TRUE':
127129
try:
@@ -143,14 +145,25 @@ def runSomething(message):
143145
# ie cmd = my-program --my-flag-1 True --my-flag-2 VARIABLE
144146
# you should assign the variable "localOut" to the output location where you expect your program to put files
145147

148+
localOut = metadataID
149+
local_file_name = os.path.join(localOut,'HelloWorld.txt')
150+
if not os.path.exists(localOut):
151+
os.makedirs(localOut,exist_ok=True)
152+
153+
cmd = f'printf "Hi, my name is {MY_NAME}, and my favorite {groupkeys[0]} is {group_to_run[groupkeys[0]]}, and my favorite {groupkeys[1]} is {group_to_run[groupkeys[1]]}" > {local_file_name}'
154+
146155
print('Running', cmd)
147156
logger.info(cmd)
148-
subp = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
157+
#typically, changes to the subprocess command aren't needed at all
158+
subp = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
149159
monitorAndLog(subp,logger)
150160

151161
# Figure out a done condition - a number of files being created, a particular file being created, an exit code, etc.
162+
163+
done = True
164+
152165
# If done, get the outputs and move them to S3
153-
if [ENTER DONE CONDITION HERE]:
166+
if done:
154167
time.sleep(30)
155168
mvtries=0
156169
while mvtries <3:

0 commit comments

Comments
 (0)