Skip to content

Commit 0cd7229

Browse files
Saved grouped by as artifact!
1 parent 68b0637 commit 0cd7229

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

k8/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ $(info )
7272
# Pass in the WORKFLOW_NAME as a variable
7373
download-workflow:
7474
argo get ${WORKFLOW_NAME} -o json \
75-
| jq -r '.status.outputs.artifacts[0].s3 | "artifact/" + .bucket + "/" + .key' \
75+
| jq -r '.status.outputs.artifacts[] | select(.name == "api") | .s3 | "artifact/" + .bucket + "/" + .key' \
7676
| xargs mc cat \
7777
| tar -xvzf - -C ../data/api/
7878
mv -f ../data/api/api.json ../data/api/${WORKFLOW_LABEL}.json

k8/argo/workflow.yml

+19-9
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ spec:
3535
template: groupby-lines
3636
- - name: infer-api
3737
template: infer-api
38+
arguments:
39+
artifacts:
40+
- name: grouped
41+
from: "{{workflow.outputs.artifacts.grouped}}"
3842
- - name: trigger-github-action
3943
template: trigger-github-action
4044
- name: record-api
@@ -58,14 +62,19 @@ spec:
5862
- name: data
5963
mountPath: /tmp/vol
6064
- name: groupby-lines
65+
outputs:
66+
artifacts:
67+
- name: grouped
68+
path: /tmp/grouped.jsonl
69+
globalName: "grouped"
6170
container:
6271
image: "{{workflow.parameters.base-image}}"
6372
command: [python, -m, record_api.line_counts]
6473
env:
6574
- name: PYTHON_RECORD_API_INPUT
6675
value: /tmp/vol/raw.jsonl
6776
- name: PYTHON_RECORD_API_OUTPUT
68-
value: /tmp/vol/grouped.jsonl
77+
value: /tmp/grouped.jsonl
6978
resources:
7079
requests:
7180
memory: 6Gi
@@ -77,11 +86,15 @@ spec:
7786
- name: data
7887
mountPath: /tmp/vol
7988
- name: infer-api
89+
inputs:
90+
artifacts:
91+
- name: grouped
92+
path: /tmp/grouped.jsonl
8093
outputs:
8194
artifacts:
8295
- name: api
8396
path: /tmp/api.json
84-
globalName: 'api'
97+
globalName: "api"
8598
container:
8699
image: "{{workflow.parameters.base-image}}"
87100
command: [python, -m, record_api.infer_apis]
@@ -91,19 +104,16 @@ spec:
91104
- name: PYTHON_RECORD_API_MODULES
92105
value: pandas,numpy
93106
- name: PYTHON_RECORD_API_INPUT
94-
value: /tmp/vol/grouped.jsonl
107+
value: /tmp/grouped.jsonl
95108
- name: PYTHON_RECORD_API_OUTPUT
96109
value: /tmp/api.json
97110
resources:
98111
requests:
99-
memory: 1Gi
100-
cpu: 500m
112+
memory: 6Gi
113+
cpu: 100m
101114
limits:
102-
memory: 8Gi
115+
memory: 16Gi
103116
cpu: 4000m
104-
volumeMounts:
105-
- name: data
106-
mountPath: /tmp/vol
107117
- name: trigger-github-action
108118
container:
109119
image: "curlimages/curl"

0 commit comments

Comments
 (0)