From 9ac1ed967cd0042f0761f6da85bfa480542d797a Mon Sep 17 00:00:00 2001 From: Kara Moraw Date: Fri, 5 Jul 2024 16:33:23 +0100 Subject: [PATCH] update GitHub actions --- .github/workflows/test.yml | 36 ++++++++++++++++++------------------ .gitignore | 1 + data/debug/test_set.csv | 11 +++++++++++ 3 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 data/debug/test_set.csv diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 80b28c1..0216475 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,8 +21,8 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test crawling contents script - working-directory: ./github - run: python3 crawl_contents.py -f ../data/test_set.csv -n github_id -v + working-directory: ./src/github + run: python3 crawl_contents.py -f ../../data/debug/test_set.csv -n github_id -v - run: ls -l data/ github-crawl-contributions: runs-on: ubuntu-latest @@ -43,9 +43,9 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test crawling contributions script - working-directory: ./github - run: python3 crawl_contributions.py -f ../data/test_set.csv -n github_id -v - - run: ls -l data/ + working-directory: ./src/github + run: python3 crawl_contributions.py -f ../../data/debug/test_set.csv -n github_id -v + - run: ls -l data/raw/github github-crawl-engagement: runs-on: ubuntu-latest defaults: @@ -65,9 +65,9 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test crawling engagement script - working-directory: ./github - run: python3 crawl_engagement.py -f ../data/test_set.csv -n github_id -v - - run: ls -l data/ + working-directory: ./src/github + run: python3 crawl_engagement.py -f ../../data/debug/test_set.csv -n github_id -v + - run: ls -l data/raw/github github-crawl-issues: runs-on: ubuntu-latest defaults: @@ -87,9 +87,9 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test crawling issues script - working-directory: ./github - run: python3 crawl_issues.py -f ../data/test_set.csv -n github_id -v - - run: ls -l data/ + working-directory: ./src/github + run: python3 crawl_issues.py -f ../../data/debug/test_set.csv -n github_id -v + - run: ls -l data/raw/github github-crawl-metadata: runs-on: ubuntu-latest defaults: @@ -109,9 +109,9 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test crawling issues script - working-directory: ./github - run: python3 crawl_metadata.py -f ../data/test_set.csv -n github_id -v - - run: ls -l data/ + working-directory: ./src/github + run: python3 crawl_metadata.py -f ../../data/debug/test_set.csv -n github_id -v + - run: ls -l data/raw/github eprints-parse: runs-on: ubuntu-latest defaults: @@ -131,12 +131,12 @@ jobs: cp config_example.cfg config.cfg sed -i "s//${{ secrets.TOKEN_TEST }}/" config.cfg - name: Test parsing remote eprints - working-directory: ./eprints + working-directory: ./src/eprints run: python3 parse_eprints.py --repo eprints.soton.ac.uk --date 2022-2023 -v - name: Test parsing PDFs - working-directory: ./eprints + working-directory: ./src/eprints run: python3 parse_pdfs.py --repo eprints.soton.ac.uk --date 2022-2023 --domain github.com -v - name: Test cleaning links - working-directory: ./eprints + working-directory: ./src/eprints run: python3 clean_eprints_links.py --repo eprints.soton.ac.uk --date 2022-2023 --domain github.com -v - - run: ls -l data/ \ No newline at end of file + - run: ls -l data/raw/eprints \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9d90c08..1ebc5ba 100644 --- a/.gitignore +++ b/.gitignore @@ -141,6 +141,7 @@ dmypy.json data/debug/* !data/debug/representative_set.csv +!data/debug/test_set.csv # tex *.aux diff --git a/data/debug/test_set.csv b/data/debug/test_set.csv new file mode 100644 index 0000000..2e00bf2 --- /dev/null +++ b/data/debug/test_set.csv @@ -0,0 +1,11 @@ +user_name,repo_name,stars,watchers,forks,commits_no,contributors_no,size_kb,github_id +ObaidaNa,albaath-marks,0,1,0,5,1,630,ObaidaNa/albaath-marks +ja153903,jabbariao-dotcom-api,0,1,0,16,1,36,ja153903/jabbariao-dotcom-api +drone,awesome-drone,100,10,17,22,7,17,drone/awesome-drone +syun0216,goforeat,100,6,31,1011,2,240094,syun0216/goforeat +ng-matero,ng-matero,1000,36,293,1391,10,88339,ng-matero/ng-matero +cov-lineages,pango-designation,1000,91,85,2683,15,1681848,cov-lineages/pango-designation +ellie,atuin,9985,44,268,527,109,42795,ellie/atuin +tiangolo,sqlmodel,9978,132,419,236,53,4835,tiangolo/sqlmodel +996icu,996.ICU,40000,4259,21210,3189,405,188163,996icu/996.ICU +codecrafters-io,build-your-own-x,40000,4510,19327,523,104,971,codecrafters-io/build-your-own-x