Skip to content

Commit

Permalink
Merge pull request #52 from OpenKBC/engineering_dev
Browse files Browse the repository at this point in the history
Connected pipelines to s3 bucket
  • Loading branch information
swiri021 authored Oct 5, 2021
2 parents 8f53fbd + 0fd8dae commit e80a3e3
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

* Current members: Kicheol Kim, Junhee Yoon
* Please, leave a message in **Discussions** tab if you have any question and requests
* Please use docker image to analyze the data. AWS module is ready and Please ask to members for getting auth f AWS is needed to analze data.
* Please use docker image to analyze the data. AWS module is ready and Please ask to members for getting auth if AWS is needed
* Our data is located in S3 bucket

### Goal
Expand Down
5 changes: 4 additions & 1 deletion aws_module/ec2_deployment/aws_module.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ echo "Cooling down starts. It takes more than 8 minutes.."
## 7m, cooling down while AWS is loading and preparing resources
sleep 500

## copy aws credential to ec2
scp -i MSplatform-key.pem -o StrictHostKeyChecking=no $HOME/.aws/credentials ubuntu@$ip_addr:/home/ubuntu/.aws/

## Running installer
ssh -i MSplatform-key.pem -o StrictHostKeyChecking=no ubuntu@$ip_addr 'bash -s' < utils/installer.sh

## Moving credentials to ec2 for s3 connection
scp -i MSplatform-key.pem -o StrictHostKeyChecking=no credentials ubuntu@$ip_addr:/home/ubuntu/.aws
#scp -i MSplatform-key.pem -o StrictHostKeyChecking=no credentials ubuntu@$ip_addr:/home/ubuntu/.aws

## S3 sync from S3 project bucket
ssh -i MSplatform-key.pem -o StrictHostKeyChecking=no ubuntu@$ip_addr 'bash -s' < utils/s3Sync.sh
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.AWS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ services:
- /home/ubuntu/MSProject/multiple_sclerosis_proj/pipelines:/pipelines
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/MainData
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/resultFiles:/Output
- $HOME/.aws/credentials:/root/.aws/credentials:ro
ports:
- 80:5000
depends_on:
Expand All @@ -46,6 +47,7 @@ services:
- /home/ubuntu/MSProject/multiple_sclerosis_proj/pipelines:/pipelines
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/MainData
- /home/ubuntu/MSProject/multiple_sclerosis_proj/resultFiles:/Output
- $HOME/.aws/credentials:/root/.aws/credentials:ro
working_dir: /pipelines/pipeline_controller/
command: conda run -n pipeline_controller_base celery -A app.celery worker --loglevel=info
depends_on:
Expand Down
16 changes: 9 additions & 7 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ services:
notebook: # Notebook
build:
context: ./notebook
#image: swiri021/openkbc_msproject:notebookcontainer1
image: swiri021/openkbc_msproject:notebookcontainer1
volumes:
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_lib:/home/jovyan/work/notebook_lib
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/notebook_utils:/home/jovyan/work/notebook_utils
Expand All @@ -17,15 +17,12 @@ services:
pipelines: # Pipelines
build:
context: ./pipelines
#image: swiri021/openkbc_msproject:pipelinecontainer1
deploy:
resources:
limits:
memory: 4000m
image: swiri021/openkbc_msproject:pipelinecontainer1
volumes:
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines:/pipelines
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/MainData
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/Output
- $HOME/.aws/credentials:/root/.aws/credentials:ro
ports:
- 80:5000
depends_on:
Expand All @@ -44,11 +41,16 @@ services:
celery: # celery
build:
context: ./pipelines
#image: swiri021/openkbc_msproject:celerycontainer1
image: swiri021/openkbc_msproject:celerycontainer1
deploy:
resources:
limits:
memory: 8000m
volumes:
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/pipelines:/pipelines
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/MainData
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/Output
- $HOME/.aws/credentials:/root/.aws/credentials:ro
working_dir: /pipelines/pipeline_controller/
command: conda run -n pipeline_controller_base celery -A app.celery worker --loglevel=info
depends_on:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
InputFolder: /MainData/rsem_counts
MetaFile: /MainData/annotation_metadata/EPIC_HCvB_metadata_baseline_updated-share.csv
SampleColumnName: HCVB_ID
CondColumnName: DiseaseCourse
Condition1: RR
Condition2: CIS
OutputFolder: ./OutputTest/
logID: user1
44 changes: 40 additions & 4 deletions pipelines/pipeline_controller/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import uuid
import os
import subprocess
import boto3
import glob

# Custom form making
from wtforms.validators import Required
Expand Down Expand Up @@ -123,22 +125,39 @@ def workflow_running(pipeline_path, yaml_file):
line = proc.stdout.readline()
if not line:
break
print(str(line))
current_task.update_state(state='PROGRESS', meta={'msg': str(line)})
return 999

@app.route("/workflow_progress")
def workflow_progress():
print("WORKFLOW RETURN")
jobid = request.values.get('jobid')
print(jobid)
if jobid:
job = AsyncResult(jobid, app=celery)
print(job.state)
if job.state == 'PROGRESS':
return json.dumps(dict( state=job.state, msg=job.result['msg'],))

elif job.state == 'SUCCESS':
## S3 Upload process START
output_counter = int(session.get('output_count', None))
output_folder_list = [ session.get('output'+str(i), None) for i in range(output_counter)]
logID = session.get('logID', None)
bucket_name = 'openkbc-ms-result-bucket' # fixed bucket
#bucket_dest = 's3://'+bucket_name+"/"+logID+"/"

s3 = boto3.client('s3') # Client set, S3
for path in output_folder_list:
filelist = glob.glob(path+"/*") # search all files
for fname in filelist: # get name
with open(fname, "rb") as f:
s3.upload_fileobj(f, bucket_name, logID+"/"+os.path.basename(fname)) # upload to s3
## S3 Upload process END

return json.dumps(dict( state=job.state, msg="done",))

elif job.state == 'FAILURE':
return json.dumps(dict( state=job.state, msg="failture",)) ## return somewhere to exit
return '{}'

@app.route("/status")
Expand Down Expand Up @@ -190,13 +209,26 @@ def _reform_yamlFile(selected_pipeline, data_dict):
f = open(yamlFileName, "w") # write file with unique name

nested_items = [] # List for handing nested items
output_count=0 # Output key count(Tracking purpose)
for key, value in data_dict.items():
if key.find('--')>-1: # Nested key has '--'
subkeys = key.split('--')# 2 layers keys
nested_items.append([subkeys[0],subkeys[1],value]) #make list
else:
f.write(key+": "+value+"\n")
## Tracking output path and user ID
if key.find("Output") > -1 or key.find("output") > -1: ## key has 'output' string
output_count+=1
session['output'+str(output_count)]=value # set session for output folder (Tracking purpose)
session['output_count'] = output_count # set session for output counter (Tracking purpose)

if key.find('logID') > -1: # Find log ID
session['logID'] = value # set session for ID
## Tracking output path and user ID

f.write(key+": "+value+"\n") ## Write new form of yaml

### Add error handling here
### Add error handling here
key1_unique=list(set([x[0] for x in nested_items])) # make a list of root key
for x in key1_unique:
f.write(x+":"+"\n") # first line of nested key (root key)
Expand All @@ -206,6 +238,10 @@ def _reform_yamlFile(selected_pipeline, data_dict):

f.close()
return yamlFileName


def get_filenames(path):
filelist = glob.glob(path+"/*")
return filelist

if __name__ == '__main__':
app.run(host='0.0.0.0')
2 changes: 2 additions & 0 deletions pipelines/pipeline_controller/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Flask-Bootstrap==3.3.7.1
flask-nav==0.6
celery==5.1.2
redis==3.5.3
boto3==1.18.54
awscli==1.20.54
##deg requirements
pip==21.2.2
pandas==1.3.2
Expand Down
3 changes: 2 additions & 1 deletion pipelines/pipeline_controller/templates/progress.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ <h3>Workflow controller</h3>
$("#pct").html("<b>Workflow has been completed</b>");
if(resp.msg == 'done') {
return;
} else{
}
else{
$("#pct").html("<img src='/static/spinning-loading.gif'>");
setTimeout(poll, 1000.0);
}
Expand Down

0 comments on commit e80a3e3

Please sign in to comment.