azure-pipelines.yml

#########################################################################################
# DataSHIELD Azure test suite - dsBase.
# Starts with a vanilla Ubuntu VM, installs R then does devtools::checks()
# Inside the root directory $(Pipeline.Workspace) will be a file tree like:
# /dsBase               <- Checked out version of datashield/dsBase
# /testStatus           <- Checked out version of datashield/testStatus
# /logs                 <- Where results of tests and lots are collated
#
# As of May 2020 this takes ~ 15 mins to run.
#       Jul 2020 this takes ~ 15 mins to run.
#
# Nothing should ever need changing in this as the repo will always check itself out.
#
#########################################################################################


#####################################################################################
# These should all be constant, except test_filter. This can be used to test subsets
# of test files in the testthat directory. Options are like:
# '*'               <- Run all tests
# 'asNumericDS*'    <- Run all asNumericDS tests, i.e. all the arg, smk etc tests.
# '*_smk_*'         <- Run all the smoke tests for all functions.
variables:
  datetime:    $[format('{0:yyyyMMddHHmmss}', pipeline.startTime)]
  repoName:    $(Build.Repository.Name)
  projectName: 'dsBase'
  branchName:  $(Build.SourceBranchName)
  test_filter: '*'
  _r_check_system_clock_: 0


#########################################################################################
# Need to define all the GH repos and their access tokens, see:
# https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml
resources:
  repositories:
  - repository: testStatusRepo
    type: github
    endpoint: datashield-testing
    name: datashield/testStatus
    ref: master


#########################################################################################
# When and under what condition to run the pipeline.
schedules:
  - cron: "0 0 * * 0"
    displayName: Weekly build - master
    branches:
      include:
      - master
      - 6.3.0
    always: true
  - cron: "0 1 * * *"
    displayName: Nightly build - v6.3.1-dev
    branches:
      include:
      - v6.3.1-dev
    always: true

jobs:
- job: run_devtools_check
  timeoutInMinutes: 120
  pool:
    vmImage: ubuntu-24.04


  steps:
    #####################################################################################
    # Checkout the source code to a subfolder.
    # This may give an error in the logs like:
    # [warning]Unable move and reuse existing repository to required location
    # This is an Azure bug - https://github.com/microsoft/azure-pipelines-yaml/issues/403
  - checkout: self
    path: 'dsBase'

  - checkout: testStatusRepo
    path: 'testStatus'
    persistCredentials: true
    condition: and(eq(variables['Build.Repository.Name'], 'datashield/dsBase'), ne(variables['Build.Reason'], 'PullRequest'))


    #####################################################################################
    # The Azure VMs have 2 CPUs, but trying 4, so configure R to use both when compile/install packages.
    # If previous steps have failed then don't run.
  - bash: |

      echo "options(Ncpus=4)" >> ~/.Rprofile

    displayName: 'Tweak local R env using .Rprofile'
    condition: succeeded()


    #####################################################################################
    # Install R and all the dependencies dsBase requires.
    # If previous steps have failed then don't run.
  - bash: |
      sudo apt-get install --no-install-recommends software-properties-common dirmngr
      wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
      sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
      sudo apt-get update -qq
      sudo apt-get upgrade -qq

      sudo apt-get install -qq pkg-config -y
      sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgit2-dev libharfbuzz-dev libfribidi-dev libfontconfig1-dev -y
      sudo apt-get install -qq libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev -y
      sudo apt-get install -qq r-base -y
      sudo R -e "install.packages('devtools', dependencies=TRUE)"
      sudo R -e "install.packages('RANN', dependencies=TRUE)"
      sudo R -e "install.packages('stringr', dependencies=TRUE)"
      sudo R -e "install.packages('lme4', dependencies=TRUE)"
      sudo R -e "install.packages('dplyr', dependencies=TRUE)"
      sudo R -e "install.packages('reshape2', dependencies=TRUE)"
      sudo R -e "install.packages('polycor', dependencies=TRUE)"
      sudo R -e "install.packages('splines', dependencies=TRUE)"
      sudo R -e "install.packages('gamlss', dependencies=TRUE)"
      sudo R -e "install.packages('gamlss.dist', dependencies=TRUE)"
      sudo R -e "install.packages('mice', dependencies=TRUE)"
      sudo R -e "install.packages('data.table', dependencies=TRUE)"
      sudo R -e "install.packages('childsds', dependencies=TRUE)"

    displayName: 'Install all dependencies for dsBase'
    condition: succeeded()


    #####################################################################################
    # Check that the man files in the repo match what is in the function headers. i.e. has
    # devtools::document() been run before commiting?
    # If previous steps have failed then don't run.
    # If this step fails still mark as failed, but don't stop the rest of the steps running.
  - bash: |

      # Concatenate all the files in the man dir into one long string and md5sum it.
      orig_sum=$(find man -type f | sort -u | xargs cat | md5sum)

      # Rebuild the documentation.
      R -e "devtools::document()"

      # Concatenate all the files in the man dir into one long string and md5sum it.
      new_sum=$(find man -type f | sort -u | xargs cat | md5sum)

      if [ "$orig_sum" != "$new_sum" ]; then
        echo "Your committed manual files (man/*.Rd) are out of sync with the documentation in the R files."
        echo "Run devtools::document() locally then commit again."
        exit 1
      else
        echo "Documentation up to date."
        exit 0
      fi

    workingDirectory: $(Pipeline.Workspace)/dsBase
    displayName: 'Check manual updated before being committed'
    condition: succeeded()
    continueOnError: true


    #####################################################################################
    # Run devtools::check on the checked out source code. 
    # If previous steps have failed then don't run.
    # If this step fails still mark as failed, but don't stop the rest of the steps running.    
  - bash: |

      R -q -e "library('devtools'); devtools::check(args = c('--no-tests', '--no-examples'))" | tee azure-pipelines_check.Rout
      grep --quiet "^0 errors" azure-pipelines_check.Rout && grep --quiet " 0 warnings" azure-pipelines_check.Rout && grep --quiet " 0 notes" azure-pipelines_check.Rout

    workingDirectory: $(Pipeline.Workspace)/dsBase
    displayName: 'Devtools checks'
    condition: succeeded()
    continueOnError: true


    #####################################################################################
    # Essentially run devtools::test() on the checked out code. This is wrapped up with
    # code coverage. The actual command is vary convoluted as it had to do some things
    # which are not default behaviour: output the results to a JUnit xml file, not stop
    # when a small number of errors have happened, run through the code coverage tool.
    # TODO: Tidy up variable names - use timestamps here.
  - bash: |

      # Any of the below makes the tests work.
      sudo R --verbose -e 'devtools::reload()'

      pwd
      mkdir $(Pipeline.Workspace)/logs

      # run the coverage tool and output to coveragelist.csv
      # testthat::testpackage uses a MultiReporter, comprised of a ProgressReporter and JunitReporter
      # R output and messages are redirected by sink() to test_console_output.txt
      # junit reporter output is to test_results.xml
      sudo R -q -e '
        library(covr);
        write.csv(
            coverage_to_list(
                covr::package_coverage(
                    type = c("none"),
                    code = c(
                        '"'"'library(testthat);
                        output_file <- file("test_console_output.txt");
                        sink(output_file);
                        sink(output_file, type = "message");
                        library(testthat);
                        junit_rep <- JunitReporter$new(file = "test_results.xml");
                        progress_rep <- ProgressReporter$new(max_failures = 999999);
                        multi_rep <- MultiReporter$new(reporters = list(progress_rep, junit_rep));
                        testthat::test_package("$(projectName)", filter = "$(test_filter)",reporter = multi_rep, stop_on_failure = FALSE)'"'"'
                    )
                )
            ),
            "coveragelist.csv"
        )'

      # display the test console output
      cat test_console_output.txt
      mv coveragelist.csv $(Pipeline.Workspace)/logs
      mv test_results.xml $(Pipeline.Workspace)/logs
      mv test_console_output.txt $(Pipeline.Workspace)/logs

      grep --quiet " FAIL 0 " $(Pipeline.Workspace)/logs/test_console_output.txt

    workingDirectory: $(Pipeline.Workspace)/dsBase
    displayName: 'Code coverage and JUnit report output'
    condition: succeeded()


    #####################################################################################
    # Parse the JUnit file to see if there are any errors/warnings. If there are then 
    # echo them so finding bugs should be easier.
    # This should run even if previous steps have failed.
  - bash: |

      # Strip out when error and failure = 0 and count the number of times it does not.
      issue_count=$(sed 's/failures="0" errors="0"//' test_results.xml | grep --count errors=)
      echo "Number of testsuites with issues: "$issue_count
      echo "Testsuites with issues:"
      sed 's/failures="0" errors="0"//' test_results.xml | grep errors= > issues.log
      cat issues.log
      exit $issue_count

    workingDirectory: $(Pipeline.Workspace)/logs
    displayName: 'Check for errors & Failures in JUnit file'
    condition: succeededOrFailed()


    #####################################################################################
    # Output some important version numbers to file. This gets added to the testStatus 
    # commit so it can be parsed and used on the status table.
  - bash: |

      echo 'branch:'$(branchName) >> $(datetime).txt
      echo 'os:'$(lsb_release -ds) >> $(datetime).txt
      echo 'R:'$(R --version | head -n 1) >> $(datetime).txt
      echo 'opal:'$(opal system --opal localhost:8080 --user administrator --password "datashield_test&" --version) >> $(datetime).txt

    workingDirectory: $(Pipeline.Workspace)/logs
    displayName: 'Write versions to file'
    condition: succeededOrFailed()


    #####################################################################################
    # Checkout the testStatus repo, add the results from here, push back to GH.
    # TODO: Automatically pull in better email/name info from somewhere.
    # TODO: More debug info in commit message
  - bash: |

      # Git needs some config set to be able to push to a repo.
      git config --global user.email "you@example.com"
      git config --global user.name "Azure pipeline"

      # This repo is checked out in detatched head state, so reconnect it here.
      git checkout master

      # It is possible that other commits have been made to the testStatus repo since it
      # was checked out. i.e. other pipeline runs might have finished.
      git pull

      # Make the directories if they dont already exist
      mkdir --parents logs/$(projectName)/$(branchName)
      mkdir --parents docs/$(projectName)/$(branchName)/latest

      cp $(Pipeline.Workspace)/logs/coveragelist.csv logs/$(projectName)/$(branchName)/
      cp $(Pipeline.Workspace)/logs/coveragelist.csv logs/$(projectName)/$(branchName)/$(datetime).csv

      cp $(Pipeline.Workspace)/logs/test_results.xml logs/$(projectName)/$(branchName)/
      cp $(Pipeline.Workspace)/logs/test_results.xml logs/$(projectName)/$(branchName)/$(datetime).xml

      cp $(Pipeline.Workspace)/logs/$(datetime).txt logs/$(projectName)/$(branchName)/

      # Run the script to parse the results and build the html pages.
      # status.py JUnit_file.xml coverage_file.csv output_file.html local_repo_path remote_repo_name branch
      source/status.py logs/$(projectName)/$(branchName)/$(datetime).xml logs/$(projectName)/$(branchName)/$(datetime).csv logs/$(projectName)/$(branchName)/$(datetime).txt status.html $(Pipeline.Workspace)/$(projectName) $(projectName) $(branchName)

      cp status.html docs/$(projectName)/$(branchName)/latest/index.html
      git add logs/$(projectName)/$(branchName)/coveragelist.csv
      git add logs/$(projectName)/$(branchName)/test_results.xml
      git add logs/$(projectName)/$(branchName)/$(datetime).xml
      git add logs/$(projectName)/$(branchName)/$(datetime).csv
      git add logs/$(projectName)/$(branchName)/$(datetime).txt
      git add docs/$(projectName)/$(branchName)/latest/index.html

      git commit -m "Azure auto test for $(projectName)/$(branchName) @ $(datetime)" -m "Debug info:\nProjectName:$(projectName)\nBranchName:$(branchName)\nDataTime:$(datetime)"
      git push
      exit 0

    workingDirectory: $(Pipeline.Workspace)/testStatus
    displayName: 'Parse test results'
    condition: and(eq(variables['Build.Repository.Name'], 'datashield/dsBase'), ne(variables['Build.Reason'], 'PullRequest'))


    #####################################################################################
    # Output the environment information to the console. This is useful for debugging.
    # Always do this, even if some of the above has failed or the job has been cacelled.
  - bash: |

      echo -e "\n#############################"
      echo -e "ls /: ######################"
      ls $(Pipeline.Workspace)

      echo -e "\n#############################"
      echo -e "lscpu: ######################"
      lscpu

      echo -e "\n#############################"
      echo -e "memory: #####################"
      free -m

      echo -e "\n#############################"
      echo -e "env: ########################"
      env

      echo -e "\n#############################"
      echo -e "Rprofile: ###################"
      cat $(Pipeline.Workspace)/dsBase/.Rprofile

      echo -e "\n#############################"
      echo -e "R installed.packages(): #####"
      R -e 'installed.packages()'

      echo -e "\n#############################"
      echo -e "R sessionInfo(): ############"
      R -e 'sessionInfo()'

      sudo apt install tree -y
      pwd
      echo -e "\n#############################"
      echo -e "File tree: ##################"
      tree $(Pipeline.Workspace)

    displayName: 'Environment info'
    condition: always()