ML experiment maintenance #1708

Workflow file for this run

.github/workflows/benchmark.yml at 9f45508

	# This workflow runs benchmark
	# Separation of jobs helps to cache data even benchmark is fail

	name: Benchmark

	on:
	push:
	branches: [ main ]
	pull_request:
	branches: [ main ]

	jobs:

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

	download_data:

	runs-on: ubuntu-latest

	steps:

	- name: Checkout CredData
	uses: actions/checkout@v3
	with:
	repository: Samsung/CredData

	- name: Cache data
	id: cache-data
	uses: actions/cache@v3
	with:
	path: data
	key: cred-data-${{ hashFiles('snapshot.yaml') }}

	- name: Set up Python 3.8
	if: steps.cache-data.outputs.cache-hit != 'true'
	uses: actions/setup-python@v3
	with:
	python-version: "3.8"

	- name: Update PIP
	run: python -m pip install --upgrade pip

	- name: Install requirements of CredData
	if: steps.cache-data.outputs.cache-hit != 'true'
	run: python -m pip install --requirement requirements.txt

	- name: Generate Data Asset
	if: steps.cache-data.outputs.cache-hit != 'true'
	run: python download_data.py --data_dir data


	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

	run_benchmark:

	needs: [download_data]

	runs-on: ubuntu-latest

	steps:

	- name: Checkout CredData
	uses: actions/checkout@v3
	with:
	repository: Samsung/CredData

	- name: Cache data
	id: cache-data
	uses: actions/cache@v3
	with:
	path: data
	key: cred-data-${{ hashFiles('snapshot.yaml') }}

	- name: Failure in case when cache missed
	if: steps.cache-data.outputs.cache-hit != 'true'
	run: exit 1

	- name: Check Data Asset - DEBUG
	if: steps.cache-data.outputs.cache-hit == 'true'
	run: ls -al . && ls -al data

	- name: Set up Python 3.8
	uses: actions/setup-python@v3
	with:
	python-version: "3.8"

	- name: Update PIP
	run: python -m pip install --upgrade pip

	- name: Install requirements of CredData
	run: python -m pip install --requirement requirements.txt

	- name: Fix onnxruntime lib for released version 1.5.5 - todo remove it after new release
	run: python -m pip install onnxruntime==1.15.1

	- name: Checkout CredSweeper
	if: ${{ 'pull_request' == github.event_name }}
	uses: actions/checkout@v3
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	path: temp/CredSweeper

	- name: Patch benchmark for PR work
	if: ${{ 'pull_request' == github.event_name }}
	run: \|
	sed -i 's\|CREDSWEEPER = "https://github.com/Samsung/CredSweeper.git"\|CREDSWEEPER = "dummy://github.com/Samsung/CredSweeper.git"\|' benchmark/common/constants.py
	grep --with-filename --line-number 'dummy://github.com/Samsung/CredSweeper.git' benchmark/common/constants.py

	- name: Run Benchmark
	run: \|
	python -m benchmark --scanner credsweeper \| tee credsweeper.log

	- name: Get only results
	run: \|
	head -n 12 credsweeper.log \| tee benchmark.txt
	tail -n 14 credsweeper.log \| grep -v 'Time Elapsed:' \| tee -a benchmark.txt
	cp -vf ./temp/CredSweeper/output.json report.json

	- name: Upload artifact
	if: always()
	uses: actions/upload-artifact@v3
	with:
	name: report
	path: report.json

	- name: Upload artifact
	if: always()
	uses: actions/upload-artifact@v3
	with:
	name: benchmark
	path: benchmark.txt

	- name: Verify benchmark scores of the PR
	if: ${{ 'pull_request' == github.event_name }}
	# update cicd/benchmark.txt with uploaded artifact if a difference is found
	run: \|
	diff temp/CredSweeper/cicd/benchmark.txt benchmark.txt

	- name: Checkout CredSweeper on push event
	if: ${{ 'pull_request' != github.event_name }}
	uses: actions/checkout@v3
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	path: CredSweeper

	- name: Verify benchmark scores on push event
	if: ${{ 'pull_request' != github.event_name }}
	# update cicd/benchmark.txt with uploaded artifact if a difference is found
	run: \|
	diff CredSweeper/cicd/benchmark.txt benchmark.txt

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

	experiment:
	# the ml train test is placed here to use cached data set
	needs: [ download_data ]

	runs-on: ubuntu-latest

	steps:

	- name: Checkout CredData
	uses: actions/checkout@v3
	with:
	repository: Samsung/CredData

	- name: Cache data
	id: cache-data
	uses: actions/cache@v3
	with:
	path: data
	key: cred-data-${{ hashFiles('snapshot.yaml') }}

	- name: Failure in case when cache missed
	if: steps.cache-data.outputs.cache-hit != 'true'
	run: exit 1

	- name: Exclude some sets and place to CredData dir
	# keep b* & c* only to easy correct experiment/src/split.json
	if: steps.cache-data.outputs.cache-hit == 'true'
	run: \|
	rm -rf data/0* data/1* data/2* data/3* data/4* data/5* data/6* data/7* data/8* data/9* data/a* data/d* data/e* data/f*
	rm -rf meta/0* meta/1* meta/2* meta/3* meta/4* meta/5* meta/6* meta/7* meta/8* meta/9* meta/a* meta/d* meta/e* meta/f*
	mkdir -vp ${{ github.workspace }}/CredData
	mv data ${{ github.workspace }}/CredData/
	mv meta ${{ github.workspace }}/CredData/

	- name: Set up Python 3.8
	if: steps.cache-data.outputs.cache-hit != 'true'
	uses: actions/setup-python@v3
	with:
	python-version: "3.8"

	- name: Update PIP
	run: python -m pip install --upgrade pip

	- name: Checkout current CredSweeper
	uses: actions/checkout@v3
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	path: CredSweeper.head

	- name: Install development packages
	run: python -m pip install --requirement CredSweeper.head/requirements.txt

	- name: Install experimental packages
	# some versions will be changed for compatibility
	run: python -m pip install --requirement CredSweeper.head/experiment/requirements.txt

	- name: dbg
	run: echo ${{ github.workspace }} && ls -al ${{ github.workspace }} && tree ${{ github.workspace }}

	- name: Lighten spit.json
	run: \|
	mv -vf ${{ github.workspace }}/CredSweeper.head/experiment/src/split.json ${{ github.workspace }}/CredSweeper.head/experiment/src/split.json.bak
	cat ${{ github.workspace }}/CredSweeper.head/experiment/src/split.json.bak
	grep -v '"[0-9ad-f][0-9a-f]\+' ${{ github.workspace }}/CredSweeper.head/experiment/src/split.json.bak >${{ github.workspace }}/CredSweeper.head/experiment/src/split.json
	cat ${{ github.workspace }}/CredSweeper.head/experiment/src/split.json

	- name: Do the experiment
	run: \|
	cd CredSweeper.head
	ls -al #dbg
	pwd #dbg
	export PYTHONPATH=$(pwd):${PYTHONPATH}
	cd experiment
	python -m credsweeper --banner #dbg
	python main.py --data ${{ github.workspace }}/CredData -j $(( 2 * $(nproc) ))
	ls -al results


	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ML experiment maintenance #1708

Workflow file

ML experiment maintenance #1708

Jobs

Run details

Workflow file for this run