Skip to content

Commit fe411b6

Browse files
committed
RHOAIENG-10449 - Add PR check for additional-demos notebooks
1 parent 9e2adef commit fe411b6

File tree

1 file changed

+363
-0
lines changed

1 file changed

+363
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,363 @@
1+
name: Additional demo notebooks tests
2+
3+
on:
4+
# pull_request:
5+
# types: [ labeled ]
6+
workflow_dispatch:
7+
8+
concurrency:
9+
group: ${{ github.head_ref }}-${{ github.workflow }}
10+
cancel-in-progress: true
11+
12+
env:
13+
CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev"
14+
15+
jobs:
16+
verify-hf_interactive:
17+
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
18+
runs-on: ubuntu-20.04-4core
19+
20+
steps:
21+
- name: Checkout code
22+
uses: actions/checkout@v4
23+
with:
24+
submodules: recursive
25+
26+
- name: Checkout common repo code
27+
uses: actions/checkout@v4
28+
with:
29+
repository: 'project-codeflare/codeflare-common'
30+
ref: 'main'
31+
path: 'common'
32+
33+
- name: Checkout CodeFlare operator repository
34+
uses: actions/checkout@v4
35+
with:
36+
repository: project-codeflare/codeflare-operator
37+
path: codeflare-operator
38+
39+
- name: Set Go
40+
uses: actions/setup-go@v5
41+
with:
42+
go-version-file: './codeflare-operator/go.mod'
43+
cache-dependency-path: "./codeflare-operator/go.sum"
44+
45+
- name: Set up gotestfmt
46+
uses: gotesttools/gotestfmt-action@v2
47+
with:
48+
token: ${{ secrets.GITHUB_TOKEN }}
49+
50+
- name: Set up specific Python version
51+
uses: actions/setup-python@v5
52+
with:
53+
python-version: '3.9'
54+
cache: 'pip' # caching pip dependencies
55+
56+
- name: Setup and start KinD cluster
57+
uses: ./common/github-actions/kind
58+
59+
- name: Deploy CodeFlare stack
60+
id: deploy
61+
run: |
62+
cd codeflare-operator
63+
echo Setting up CodeFlare stack
64+
make setup-e2e
65+
echo Deploying CodeFlare operator
66+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
67+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
68+
cd ..
69+
70+
- name: Setup Additional demo notebooks execution
71+
run: |
72+
echo "Installing papermill and dependencies..."
73+
pip install poetry papermill ipython ipykernel
74+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
75+
poetry config virtualenvs.create false
76+
77+
echo "Installing SDK..."
78+
poetry install --with test,docs
79+
80+
- name: Run hf_interactive.ipynb
81+
run: |
82+
set -euo pipefail
83+
set -x
84+
85+
# Remove login/logout cells, as KinD doesn't support authentication using token
86+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
87+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
88+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
89+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
90+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
91+
# Set explicit namespace as SDK need it (currently) to resolve local queues
92+
sed -i "s/worker_cpu_requests=8,/worker_cpu_requests=1, namespace='default',/" hf_interactive.ipynb
93+
# Change cluster parameters (need to decrease)
94+
sed -i "s/{'nvidia.com\/gpu':1}/{'nvidia.com\/gpu':0}/g" hf_interactive.ipynb
95+
sed -i "s/worker_cpu_limits=8,/worker_cpu_limits=1,/" hf_interactive.ipynb
96+
sed -i "s/worker_memory_requests=16,/worker_memory_requests=4,/" hf_interactive.ipynb
97+
sed -i "s/worker_memory_limits=8,/worker_memory_limits=4,/" hf_interactive.ipynb
98+
# Run notebook
99+
poetry run papermill hf_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
100+
working-directory: demo-notebooks/additional-demos
101+
102+
- name: Print CodeFlare operator logs
103+
if: always() && steps.deploy.outcome == 'success'
104+
run: |
105+
echo "Printing CodeFlare operator logs"
106+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
107+
108+
- name: Print Kueue operator logs
109+
if: always() && steps.deploy.outcome == 'success'
110+
run: |
111+
echo "Printing Kueue operator logs"
112+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
113+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
114+
115+
- name: Print KubeRay operator logs
116+
if: always() && steps.deploy.outcome == 'success'
117+
run: |
118+
echo "Printing KubeRay operator logs"
119+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
120+
121+
- name: Export all KinD pod logs
122+
uses: ./common/github-actions/kind-export-logs
123+
if: always() && steps.deploy.outcome == 'success'
124+
with:
125+
output-directory: ${TEMP_DIR}
126+
127+
- name: Upload logs
128+
uses: actions/upload-artifact@v4
129+
if: always() && steps.deploy.outcome == 'success'
130+
with:
131+
name: logs-verify-hf_interactive
132+
retention-days: 10
133+
path: |
134+
${{ env.TEMP_DIR }}/**/*.log
135+
136+
verify-local_interactive:
137+
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
138+
# runs-on: ubuntu-20.04-4core
139+
runs-on: ubuntu-20.04-4core
140+
141+
steps:
142+
- name: Checkout code
143+
uses: actions/checkout@v4
144+
with:
145+
submodules: recursive
146+
147+
- name: Checkout common repo code
148+
uses: actions/checkout@v4
149+
with:
150+
repository: 'project-codeflare/codeflare-common'
151+
ref: 'main'
152+
path: 'common'
153+
154+
- name: Checkout CodeFlare operator repository
155+
uses: actions/checkout@v4
156+
with:
157+
repository: project-codeflare/codeflare-operator
158+
path: codeflare-operator
159+
160+
- name: Set Go
161+
uses: actions/setup-go@v5
162+
with:
163+
go-version-file: './codeflare-operator/go.mod'
164+
cache-dependency-path: "./codeflare-operator/go.sum"
165+
166+
- name: Set up gotestfmt
167+
uses: gotesttools/gotestfmt-action@v2
168+
with:
169+
token: ${{ secrets.GITHUB_TOKEN }}
170+
171+
- name: Set up specific Python version
172+
uses: actions/setup-python@v5
173+
with:
174+
python-version: '3.9'
175+
cache: 'pip' # caching pip dependencies
176+
177+
- name: Setup and start KinD cluster
178+
uses: ./common/github-actions/kind
179+
180+
- name: Deploy CodeFlare stack
181+
id: deploy
182+
run: |
183+
cd codeflare-operator
184+
echo Setting up CodeFlare stack
185+
make setup-e2e
186+
echo Deploying CodeFlare operator
187+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
188+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
189+
cd ..
190+
191+
- name: Setup Additional demo notebooks execution
192+
run: |
193+
echo "Installing papermill and dependencies..."
194+
pip install poetry papermill ipython ipykernel
195+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
196+
poetry config virtualenvs.create false
197+
198+
echo "Installing SDK..."
199+
poetry install --with test,docs
200+
201+
- name: Run local_interactive.ipynb
202+
run: |
203+
set -euo pipefail
204+
205+
# Remove login/logout cells, as KinD doesn't support authentication using token
206+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
207+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
208+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
209+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
210+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
211+
# Set explicit namespace as SDK need it (currently) to resolve local queues
212+
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" local_interactive.ipynb
213+
# Run notebook
214+
poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
215+
working-directory: demo-notebooks/additional-demos
216+
217+
- name: Print CodeFlare operator logs
218+
if: always() && steps.deploy.outcome == 'success'
219+
run: |
220+
echo "Printing CodeFlare operator logs"
221+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
222+
223+
- name: Print Kueue operator logs
224+
if: always() && steps.deploy.outcome == 'success'
225+
run: |
226+
echo "Printing Kueue operator logs"
227+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
228+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
229+
230+
- name: Print KubeRay operator logs
231+
if: always() && steps.deploy.outcome == 'success'
232+
run: |
233+
echo "Printing KubeRay operator logs"
234+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
235+
236+
- name: Export all KinD pod logs
237+
uses: ./common/github-actions/kind-export-logs
238+
if: always() && steps.deploy.outcome == 'success'
239+
with:
240+
output-directory: ${TEMP_DIR}
241+
242+
- name: Upload logs
243+
uses: actions/upload-artifact@v4
244+
if: always() && steps.deploy.outcome == 'success'
245+
with:
246+
name: logs-local_interactive
247+
retention-days: 10
248+
path: |
249+
${{ env.TEMP_DIR }}/**/*.log
250+
251+
verify-ray_job_client:
252+
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
253+
runs-on: ubuntu-20.04-4core
254+
255+
steps:
256+
- name: Checkout code
257+
uses: actions/checkout@v4
258+
with:
259+
submodules: recursive
260+
261+
- name: Checkout common repo code
262+
uses: actions/checkout@v4
263+
with:
264+
repository: 'project-codeflare/codeflare-common'
265+
ref: 'main'
266+
path: 'common'
267+
268+
- name: Checkout CodeFlare operator repository
269+
uses: actions/checkout@v4
270+
with:
271+
repository: project-codeflare/codeflare-operator
272+
path: codeflare-operator
273+
274+
- name: Set Go
275+
uses: actions/setup-go@v5
276+
with:
277+
go-version-file: './codeflare-operator/go.mod'
278+
cache-dependency-path: "./codeflare-operator/go.sum"
279+
280+
- name: Set up gotestfmt
281+
uses: gotesttools/gotestfmt-action@v2
282+
with:
283+
token: ${{ secrets.GITHUB_TOKEN }}
284+
285+
- name: Set up specific Python version
286+
uses: actions/setup-python@v5
287+
with:
288+
python-version: '3.9'
289+
cache: 'pip' # caching pip dependencies
290+
291+
- name: Setup and start KinD cluster
292+
uses: ./common/github-actions/kind
293+
294+
- name: Deploy CodeFlare stack
295+
id: deploy
296+
run: |
297+
cd codeflare-operator
298+
echo Setting up CodeFlare stack
299+
make setup-e2e
300+
echo Deploying CodeFlare operator
301+
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e"
302+
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager
303+
cd ..
304+
305+
- name: Setup Additional demo notebooks execution
306+
run: |
307+
echo "Installing papermill and dependencies..."
308+
pip install poetry papermill ipython ipykernel
309+
# Disable virtualenv due to problems using packaged in virtualenv in papermill
310+
poetry config virtualenvs.create false
311+
312+
echo "Installing SDK..."
313+
poetry install --with test,docs
314+
315+
- name: Run ray_job_client.ipynb
316+
run: |
317+
set -euo pipefail
318+
319+
# Remove login/logout cells, as KinD doesn't support authentication using token
320+
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
321+
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
322+
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
323+
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
324+
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
325+
# Set explicit namespace as SDK need it (currently) to resolve local queues
326+
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" ray_job_client.ipynb
327+
# Run notebook
328+
poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
329+
working-directory: demo-notebooks/additional-demos
330+
331+
- name: Print CodeFlare operator logs
332+
if: always() && steps.deploy.outcome == 'success'
333+
run: |
334+
echo "Printing CodeFlare operator logs"
335+
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
336+
337+
- name: Print Kueue operator logs
338+
if: always() && steps.deploy.outcome == 'success'
339+
run: |
340+
echo "Printing Kueue operator logs"
341+
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
342+
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
343+
344+
- name: Print KubeRay operator logs
345+
if: always() && steps.deploy.outcome == 'success'
346+
run: |
347+
echo "Printing KubeRay operator logs"
348+
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log
349+
350+
- name: Export all KinD pod logs
351+
uses: ./common/github-actions/kind-export-logs
352+
if: always() && steps.deploy.outcome == 'success'
353+
with:
354+
output-directory: ${TEMP_DIR}
355+
356+
- name: Upload logs
357+
uses: actions/upload-artifact@v4
358+
if: always() && steps.deploy.outcome == 'success'
359+
with:
360+
name: logs-ray_job_client
361+
retention-days: 10
362+
path: |
363+
${{ env.TEMP_DIR }}/**/*.log

0 commit comments

Comments
 (0)