Skip to content

Commit e80a3e3

Browse files
authored
Merge pull request #52 from OpenKBC/engineering_dev
Connected pipelines to s3 bucket
2 parents 8f53fbd + 0fd8dae commit e80a3e3

File tree

8 files changed

+68
-14
lines changed

8 files changed

+68
-14
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
* Current members: Kicheol Kim, Junhee Yoon
44
* Please, leave a message in **Discussions** tab if you have any question and requests
5-
* Please use docker image to analyze the data. AWS module is ready and Please ask to members for getting auth f AWS is needed to analze data.
5+
* Please use docker image to analyze the data. AWS module is ready and Please ask to members for getting auth if AWS is needed
66
* Our data is located in S3 bucket
77

88
### Goal

aws_module/ec2_deployment/aws_module.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@ echo "Cooling down starts. It takes more than 8 minutes.."
6969
## 7m, cooling down while AWS is loading and preparing resources
7070
sleep 500
7171

72+
## copy aws credential to ec2
73+
scp -i MSplatform-key.pem -o StrictHostKeyChecking=no $HOME/.aws/credentials ubuntu@$ip_addr:/home/ubuntu/.aws/
74+
7275
## Running installer
7376
ssh -i MSplatform-key.pem -o StrictHostKeyChecking=no ubuntu@$ip_addr 'bash -s' < utils/installer.sh
7477

7578
## Moving credentials to ec2 for s3 connection
76-
scp -i MSplatform-key.pem -o StrictHostKeyChecking=no credentials ubuntu@$ip_addr:/home/ubuntu/.aws
79+
#scp -i MSplatform-key.pem -o StrictHostKeyChecking=no credentials ubuntu@$ip_addr:/home/ubuntu/.aws
7780

7881
## S3 sync from S3 project bucket
7982
ssh -i MSplatform-key.pem -o StrictHostKeyChecking=no ubuntu@$ip_addr 'bash -s' < utils/s3Sync.sh

docker-compose.AWS.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ services:
2424
- /home/ubuntu/MSProject/multiple_sclerosis_proj/pipelines:/pipelines
2525
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/MainData
2626
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/resultFiles:/Output
27+
- $HOME/.aws/credentials:/root/.aws/credentials:ro
2728
ports:
2829
- 80:5000
2930
depends_on:
@@ -46,6 +47,7 @@ services:
4647
- /home/ubuntu/MSProject/multiple_sclerosis_proj/pipelines:/pipelines
4748
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/MainData
4849
- /home/ubuntu/MSProject/multiple_sclerosis_proj/resultFiles:/Output
50+
- $HOME/.aws/credentials:/root/.aws/credentials:ro
4951
working_dir: /pipelines/pipeline_controller/
5052
command: conda run -n pipeline_controller_base celery -A app.celery worker --loglevel=info
5153
depends_on:

docker-compose.yaml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ services:
33
notebook: # Notebook
44
build:
55
context: ./notebook
6-
#image: swiri021/openkbc_msproject:notebookcontainer1
6+
image: swiri021/openkbc_msproject:notebookcontainer1
77
volumes:
88
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_lib:/home/jovyan/work/notebook_lib
99
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_utils:/home/jovyan/work/notebook_utils
@@ -17,15 +17,12 @@ services:
1717
pipelines: # Pipelines
1818
build:
1919
context: ./pipelines
20-
#image: swiri021/openkbc_msproject:pipelinecontainer1
21-
deploy:
22-
resources:
23-
limits:
24-
memory: 4000m
20+
image: swiri021/openkbc_msproject:pipelinecontainer1
2521
volumes:
2622
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines:/pipelines
2723
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/MainData
2824
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/Output
25+
- $HOME/.aws/credentials:/root/.aws/credentials:ro
2926
ports:
3027
- 80:5000
3128
depends_on:
@@ -44,11 +41,16 @@ services:
4441
celery: # celery
4542
build:
4643
context: ./pipelines
47-
#image: swiri021/openkbc_msproject:celerycontainer1
44+
image: swiri021/openkbc_msproject:celerycontainer1
45+
deploy:
46+
resources:
47+
limits:
48+
memory: 8000m
4849
volumes:
4950
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines:/pipelines
5051
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/MainData
5152
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/Output
53+
- $HOME/.aws/credentials:/root/.aws/credentials:ro
5254
working_dir: /pipelines/pipeline_controller/
5355
command: conda run -n pipeline_controller_base celery -A app.celery worker --loglevel=info
5456
depends_on:
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
InputFolder: /MainData/rsem_counts
2+
MetaFile: /MainData/annotation_metadata/EPIC_HCvB_metadata_baseline_updated-share.csv
3+
SampleColumnName: HCVB_ID
4+
CondColumnName: DiseaseCourse
5+
Condition1: RR
6+
Condition2: CIS
7+
OutputFolder: ./OutputTest/
8+
logID: user1

pipelines/pipeline_controller/app.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import uuid
1919
import os
2020
import subprocess
21+
import boto3
22+
import glob
2123

2224
# Custom form making
2325
from wtforms.validators import Required
@@ -123,22 +125,39 @@ def workflow_running(pipeline_path, yaml_file):
123125
line = proc.stdout.readline()
124126
if not line:
125127
break
126-
print(str(line))
127128
current_task.update_state(state='PROGRESS', meta={'msg': str(line)})
128129
return 999
129130

130131
@app.route("/workflow_progress")
131132
def workflow_progress():
132-
print("WORKFLOW RETURN")
133133
jobid = request.values.get('jobid')
134134
print(jobid)
135135
if jobid:
136136
job = AsyncResult(jobid, app=celery)
137137
print(job.state)
138138
if job.state == 'PROGRESS':
139139
return json.dumps(dict( state=job.state, msg=job.result['msg'],))
140+
140141
elif job.state == 'SUCCESS':
142+
## S3 Upload process START
143+
output_counter = int(session.get('output_count', None))
144+
output_folder_list = [ session.get('output'+str(i), None) for i in range(output_counter)]
145+
logID = session.get('logID', None)
146+
bucket_name = 'openkbc-ms-result-bucket' # fixed bucket
147+
#bucket_dest = 's3://'+bucket_name+"/"+logID+"/"
148+
149+
s3 = boto3.client('s3') # Client set, S3
150+
for path in output_folder_list:
151+
filelist = glob.glob(path+"/*") # search all files
152+
for fname in filelist: # get name
153+
with open(fname, "rb") as f:
154+
s3.upload_fileobj(f, bucket_name, logID+"/"+os.path.basename(fname)) # upload to s3
155+
## S3 Upload process END
156+
141157
return json.dumps(dict( state=job.state, msg="done",))
158+
159+
elif job.state == 'FAILURE':
160+
return json.dumps(dict( state=job.state, msg="failture",)) ## return somewhere to exit
142161
return '{}'
143162

144163
@app.route("/status")
@@ -190,13 +209,26 @@ def _reform_yamlFile(selected_pipeline, data_dict):
190209
f = open(yamlFileName, "w") # write file with unique name
191210

192211
nested_items = [] # List for handing nested items
212+
output_count=0 # Output key count(Tracking purpose)
193213
for key, value in data_dict.items():
194214
if key.find('--')>-1: # Nested key has '--'
195215
subkeys = key.split('--')# 2 layers keys
196216
nested_items.append([subkeys[0],subkeys[1],value]) #make list
197217
else:
198-
f.write(key+": "+value+"\n")
218+
## Tracking output path and user ID
219+
if key.find("Output") > -1 or key.find("output") > -1: ## key has 'output' string
220+
output_count+=1
221+
session['output'+str(output_count)]=value # set session for output folder (Tracking purpose)
222+
session['output_count'] = output_count # set session for output counter (Tracking purpose)
223+
224+
if key.find('logID') > -1: # Find log ID
225+
session['logID'] = value # set session for ID
226+
## Tracking output path and user ID
199227

228+
f.write(key+": "+value+"\n") ## Write new form of yaml
229+
230+
### Add error handling here
231+
### Add error handling here
200232
key1_unique=list(set([x[0] for x in nested_items])) # make a list of root key
201233
for x in key1_unique:
202234
f.write(x+":"+"\n") # first line of nested key (root key)
@@ -206,6 +238,10 @@ def _reform_yamlFile(selected_pipeline, data_dict):
206238

207239
f.close()
208240
return yamlFileName
209-
241+
242+
def get_filenames(path):
243+
filelist = glob.glob(path+"/*")
244+
return filelist
245+
210246
if __name__ == '__main__':
211247
app.run(host='0.0.0.0')

pipelines/pipeline_controller/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Flask-Bootstrap==3.3.7.1
88
flask-nav==0.6
99
celery==5.1.2
1010
redis==3.5.3
11+
boto3==1.18.54
12+
awscli==1.20.54
1113
##deg requirements
1214
pip==21.2.2
1315
pandas==1.3.2

pipelines/pipeline_controller/templates/progress.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ <h3>Workflow controller</h3>
2222
$("#pct").html("<b>Workflow has been completed</b>");
2323
if(resp.msg == 'done') {
2424
return;
25-
} else{
25+
}
26+
else{
2627
$("#pct").html("<img src='/static/spinning-loading.gif'>");
2728
setTimeout(poll, 1000.0);
2829
}

0 commit comments

Comments
 (0)