diff --git a/.github/workflows/link_check.yml b/.github/workflows/link_check.yml index c5b1b846..bab440ed 100644 --- a/.github/workflows/link_check.yml +++ b/.github/workflows/link_check.yml @@ -1,20 +1,33 @@ name: Links (Fail Fast) on: - pull_request: {branches: ['main']} + pull_request: + branches: + - main + jobs: linkChecker: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: switch .qmd to .md - run: 'source("R/switch.R")' - shell: Rscript -e + - name: Set up R # Install R from CRAN + uses: r-lib/actions/setup-r@v2 + with: + r-version: '4.3.3' # You can specify a different R version if needed + + - name: Install R packages + run: | + Rscript -e 'install.packages("fs")' + shell: bash + + - name: Switch .qmd to .md + run: Rscript R/switch.R + shell: bash - name: Link Checker uses: lycheeverse/lychee-action@v1.8.0 with: fail: true env: - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8d7014a4..05a1ffe3 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -2,39 +2,11 @@ name: Quarto Publish on: workflow_dispatch: - push: - branches: [main] + repository_dispatch: + types: [quarto-publish] jobs: - Update-post-dates: - runs-on: ubuntu-latest - container: - image: "rocker/tidyverse:4.2.1" - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - ref: main - token: ${{ secrets.PHARMAVERSE_BOT }} - - - name: Run update_post_dates - run: Rscript R/update_post_dates.R # running the R script with Rscript - - - name: Configure Git safe directory - run: git config --global --add safe.directory /__w/blog/blog - - - name: Commit and push changes - uses: stefanzweifel/git-auto-commit-action@v5 - with: - commit_message: "[skip actions] Auto-update blog post date" - file_pattern: "." - commit_user_name: github-actions - commit_user_email: >- - 41898282+github-actions[bot]@users.noreply.github.com - continue-on-error: true - - build-deploy: - needs: Update-post-dates + build_deploy: runs-on: ubuntu-latest permissions: contents: write diff --git a/.github/workflows/update_post_dates.yml b/.github/workflows/update_post_dates.yml new file mode 100644 index 00000000..5cc0437e --- /dev/null +++ b/.github/workflows/update_post_dates.yml @@ -0,0 +1,40 @@ +name: Update Post Dates + +on: + workflow_dispatch: + push: + branches: [main] + +jobs: + update_post_dates: + runs-on: ubuntu-latest + container: + image: "rocker/tidyverse:4.2.1" + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: main + token: ${{ secrets.PHARMAVERSE_BOT }} + + - name: Run update_post_dates + run: Rscript R/update_post_dates.R # running the R script with Rscript + + - name: Configure Git safe directory + run: git config --global --add safe.directory /__w/blog/blog + + - name: Commit and push changes + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: "[skip actions] Auto-update blog post date" + file_pattern: "." + commit_user_name: github-actions + commit_user_email: >- + 41898282+github-actions[bot]@users.noreply.github.com + continue-on-error: true + + - name: Trigger Quarto Publish + uses: peter-evans/repository-dispatch@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + event-type: quarto-publish diff --git a/README.md b/README.md index e7739111..f85f0645 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,9 @@ install.packages(c("jsonlite", "rtables", "teal", "riskmetric", - "tidyCDISC")) + "tidyCDISC", + "mirai", + "admiralmetabolic")) ``` ## How to Use the `blog` Docker Image for Local Development diff --git a/inst/WORDLIST.txt b/inst/WORDLIST.txt index 04fd82e1..9f6a52d8 100644 --- a/inst/WORDLIST.txt +++ b/inst/WORDLIST.txt @@ -102,11 +102,11 @@ AMBUL amd amongst Amor -Anders analysing analytics Analytics aNCA +Anders anderson andre André @@ -170,6 +170,7 @@ BILIBL bindCache bindEvent biogen +Biologics biomarker Biomarker biometrics @@ -851,6 +852,8 @@ s sa sadchla Sadchla +safetyData +SafetyGraphics Salzburg Sanofi Sanofi's @@ -940,6 +943,7 @@ Syon tagList tamor targetdatatype +TAs Taşlıçukur Tatiana TatianaPXL @@ -1016,8 +1020,8 @@ ubuntu ucla ug ui -uk UI +uk Ul un Unardi @@ -1073,8 +1077,8 @@ WAISTHGT waisthip WAISTHIP Walkowiak -Walkthrough walkthrough +Walkthrough wasm WAWA wayback diff --git a/media/data.jpg b/media/data.jpg new file mode 100644 index 00000000..caa4612c Binary files /dev/null and b/media/data.jpg differ diff --git a/posts/zzz_DO_NOT_EDIT_data__packages/appendix.R b/posts/zzz_DO_NOT_EDIT_data__packages/appendix.R new file mode 100644 index 00000000..c69926b0 --- /dev/null +++ b/posts/zzz_DO_NOT_EDIT_data__packages/appendix.R @@ -0,0 +1,73 @@ +suppressMessages(library(dplyr)) +# markdown helpers -------------------------------------------------------- + +markdown_appendix <- function(name, content) { + paste(paste("##", name, "{.appendix}"), " ", content, sep = "\n") +} +markdown_link <- function(text, path) { + paste0("[", text, "](", path, ")") +} + + + +# worker functions -------------------------------------------------------- + +insert_source <- function(repo_spec, name, + collection = "posts", + branch = "main", + host = "https://github.com", + text = "Source", + file_name) { + path <- paste( + host, + repo_spec, + "tree", + branch, + collection, + name, + file_name, + sep = "/" + ) + return(markdown_link(text, path)) +} + +insert_timestamp <- function(tzone = Sys.timezone()) { + time <- lubridate::now(tzone = tzone) + stamp <- as.character(time, tz = tzone, usetz = TRUE) + return(stamp) +} + +insert_lockfile <- function(repo_spec, name, + collection = "posts", + branch = "main", + host = "https://github.com", + text = "Session info") { + path <- path <- "https://pharmaverse.github.io/blog/session_info.html" + + return(markdown_link(text, path)) +} + + + +# top level function ------------------------------------------------------ + +insert_appendix <- function(repo_spec, name, collection = "posts", file_name) { + appendices <- paste( + markdown_appendix( + name = "Last updated", + content = insert_timestamp() + ), + " ", + markdown_appendix( + name = "Details", + content = paste( + insert_source(repo_spec, name, collection, file_name = file_name), + # get renv information, + insert_lockfile(repo_spec, name, collection), + sep = ", " + ) + ), + sep = "\n" + ) + knitr::asis_output(appendices) +} diff --git a/posts/zzz_DO_NOT_EDIT_data__packages/data.jpg b/posts/zzz_DO_NOT_EDIT_data__packages/data.jpg new file mode 100644 index 00000000..caa4612c Binary files /dev/null and b/posts/zzz_DO_NOT_EDIT_data__packages/data.jpg differ diff --git a/posts/zzz_DO_NOT_EDIT_data__packages/data__packages.qmd b/posts/zzz_DO_NOT_EDIT_data__packages/data__packages.qmd new file mode 100644 index 00000000..c8e7ec7d --- /dev/null +++ b/posts/zzz_DO_NOT_EDIT_data__packages/data__packages.qmd @@ -0,0 +1,96 @@ +--- +title: "Collecting all the data!" +author: + - name: Ben Straub +description: "Where is all the data? An intermittent attempt to continuously compile, collate, consolidate, and curate publicly available CDISC data useful for Clinical Reporting in R" +# Note that the date below will be auto-updated when the post is merged. +date: "2025-02-14" +# Please do not use any non-default categories. +# You can find the default categories in the repository README.md +categories: [SDTM, ADaM, Community, Technical] +# Feel free to change the image +image: "data.jpg" + +--- + + + +```{r setup, include=FALSE} +long_slug <- "zzz_DO_NOT_EDIT_data__packages" +library(link) +link::auto(keep_pkg_prefix = FALSE) +``` + + + +The purpose of this blog is to maintain an ongoing list of publicly available data packages, data in packages or data sources that align to CDISC standards. My hope is that this could be a resource for: + +* those intrepid individuals looking to showcase new documentation, functions, packages and other tools +* those enterprising individuals wanting to learn more about CDISC standards and exploring open-source tools. + +The data presented below is just a start and is shown in order of how I found them. Feel free to get in touch with me for additions or clarifications. You can find me on pharmaverse slack by joining [here](https://pharmaverse.slack.com/). In fact, I encourage, nay implore you, to get in touch as this can't be all the data that we have available to us! + +## pharmaversesdtm: SDTM Test Data for the Pharmaverse Family of Packages + +A set of Study Data Tabulation Model (SDTM) datasets from the Clinical Data Interchange Standards Consortium (CDISC) pilot project used for testing and developing Analysis Data Model (ADaM) datasets inside the pharmaverse family of packages. A CDISC Pilot was conducted somewhere between 2008 and 2010. This is that Pilot data but slowly brought up to current CDISC standards. There are also new datasets in the same style (same `STUDYID`, `USUBJID`s, etc.) added by the {admiral} and the {admiral} extension package teams that provide test data for new domains or specific TAs (ophthalmology, vaccines, etc.). + +Most common SDTM datasets can be found as well as some specific disease area SDTMs that are not available in the CDISC pilot datasets. + +Available on [CRAN](https://cloud.r-project.org/web/packages/pharmaversesdtm/index.html). This package is actively maintained on [GitHub](https://github.com/pharmaverse/pharmaversesdtm) + +## pharmaverseadam: ADaM Test Data for the Pharmaverse Family of Packages + +A set of Analysis Data Model (ADaM) datasets constructed using the Study Data Tabulation Model (SDTM) datasets contained in the {pharmaversesdtm} package and the template scripts from the {admiral} family of packages. + +Available on [CRAN](https://cloud.r-project.org/web/packages/pharmaverseadam/index.html). This package is actively maintained on [GitHub](https://github.com/pharmaverse/pharmaversesdtm) + +## admiral: ADaM in R Asset Library + +A toolbox for programming Clinical Data Interchange Standards Consortium (CDISC) compliant Analysis Data Model (ADaM) datasets in R. ADaM datasets are a mandatory part of any New Drug or Biologics License Application submitted to the United States Food and Drug Administration (FDA). Analysis derivations are implemented in accordance with the "Analysis Data Model Implementation Guide. + +Limited datasets like `ADSL`, `ADLB` are provided in {admiral}, because the template scripts available in this package are used to create the ADaMs in {pharmaverseadam}. + +Available on [CRAN](https://cran.r-project.org/web/packages/admiral/index.html). This package is actively maintained on [GitHub](https://github.com/pharmaverse/admiral). + +## random.cdisc.data: Create Random ADaM Datasets + +A set of functions to create *random* Analysis Data Model (ADaM) datasets and cached datasets. You can find a list of the possible random CDISC datasets generated [here](https://insightsengineering.github.io/random.cdisc.data/main/index.html). ADaM dataset specifications are described by the Clinical Data Interchange Standards Consortium (CDISC) Analysis Data Model Team. These datasets are used to power the [TLG Catalog](https://insightsengineering.github.io/tlg-catalog/stable/), though the NEST team is actively substituting them for {pharmaverseadam} datasets instead - see [a recent blog post](https://pharmaverse.github.io/blog/posts/2025-01-15_nest_and_pharmaverseadam/nest_and_pharmaverseadam.html) about this very effort! + + + +Available on [CRAN](https://cran.r-project.org/web/packages/random.cdisc.data/index.html). The package is actively maintained on [GitHub](https://github.com/insightsengineering/random.cdisc.data) by the NEST team. + +## safetyData: Clinical Trial Data + +The package re-formats PHUSE's sample ADaM and SDTM datasets as an R package following R data best practices. + +PHUSE released the data under the permissive MIT license, so reuse with attribution is encouraged. The data are especially useful for prototyping new tables, listings and figures and for writing automated tests. + +Basic documentation for each data file is provided in help files (e.g. ?adam_adae). Full data specifications in the form of define.xml files can also be found at the links above (pdf for ADaM and pdf for SDTM). + +Available on [CRAN](https://cran.r-project.org/web/packages/random.cdisc.data/index.html). The package is available on [GitHub](https://github.com/SafetyGraphics/safetyData). + + +## NEST: Accelerating Clinical Reporting + +[NEST](https://insightsengineering.github.io/nest/) is a collection of open-sourced R packages, which enables faster and more efficient insights generation under clinical research settings, for both exploratory and regulatory purposes. + +They have a wealth of data generated for documentation, demonstrations and testing. You can find all the datasets and what packages they live in [here](https://insightsengineering.r-universe.dev/datasets). + +## Collect all the data! + +As you can see the list is short! Let me know if you have sources (big and small) and we can add to this list. + +![](data.jpg){fig-align="center" width="220"} + + + +```{r, echo=FALSE} +source("appendix.R") +insert_appendix( + repo_spec = "pharmaverse/blog", + name = long_slug, + # file_name should be the name of your file + file_name = list.files() %>% stringr::str_subset(".qmd") %>% first() +) +```