From 99e6b65d64ba20b8bf312f643739d10c572a3ce2 Mon Sep 17 00:00:00 2001
From: Anh Nguyet Vu <anngvu@gmail.com>
Date: Wed, 12 Feb 2025 15:32:42 -0800
Subject: [PATCH] Update vignette

---
 ...ging-portal-data-to-other-platforms-cbioportal.Rmd | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vignettes/bringing-portal-data-to-other-platforms-cbioportal.Rmd b/vignettes/bringing-portal-data-to-other-platforms-cbioportal.Rmd
index f360b759..9e65f185 100644
--- a/vignettes/bringing-portal-data-to-other-platforms-cbioportal.Rmd
+++ b/vignettes/bringing-portal-data-to-other-platforms-cbioportal.Rmd
@@ -53,6 +53,7 @@ Each study dataset combines multiple data types -- clinical, gene expression, ge
 
 cbp_new_study(cancer_study_identifier = "npst_nfosi_ntap_2022",
               name = "Plexiform Neurofibroma and Neurofibroma (Pratilas 2022)",
+              type_of_cancer = "nfib", # required -- see https://oncotree.mskcc.org/
               citation = "TBD")
 ```
 
@@ -77,7 +78,7 @@ Note that:
 
 maf_data <- "syn36553188"
 
-add_cbp_maf(maf_data)
+cbp_add_maf(maf_data)
 ```
 
 ### Add copy number alterations (CNA) data
@@ -108,14 +109,14 @@ cbp_add_expression(mrna_data,
 
 ### Add clinical data
 
-- `clinical_data` is a prepared clinical data table already subsetted to those released in this study, or pass in a query that can be used for subsetting if using a full clinical database table. For example, the full clinical cohort comprises patients 1-50, but this study dataset consists of available and releasable data only for patients 1-20 for expression data and data patients 15-20 for cna data. Here, `clinical_data` can be a smaller table of just those 1-30, or it can be the original table but pass in a suitable additional filter, e.g. `where release = 'batch1'`.
+- Clinical data **should be added last**, after all other data has been added, for sample checks to work properly. 
+- `clinical_data` is prepared from an existing Synapse table. The table can be a subsetted version of those released in the study dataset, or pass in a query that can be used for getting the subset. For example, the full clinical cohort comprises patients 1-50, but the dataset can only release data for patients 1-20 for expression data and data patients 15-20 for cna data. Here, `clinical_data` can be a smaller table of just those 1-30, or it can be the original table but pass in a suitable additional filter, e.g. `where release = 'batch1'`.
 - Clinical data requires mapping to be as consistent with other public datasets as possible. `ref_map` defines the mapping of clinical variables from the NF-OSI data dictionary to cBioPortal's. Only variables in the mapping are exported to cBioPortal. Follow link below to inspect the default file and format used.
-- Clinical data should be added last for overall sample checks to work. For example, if there is expression data for patients 1-20 and cna data patients 15-20,
-it can more informatively warn about any missing/mismatches. 
+- Clinical data **should be added last**, after all other data has been added, for sample checks to work properly.
 
 ```{r add_clinical, eval=FALSE}
 
-clinical_data <- "select * from syn43278088"
+clinical_data <- "select * from syn43278088" # query when the table already contains just the releasable patients
 ref_map <- "https://raw.githubusercontent.com/nf-osi/nf-metadata-dictionary/main/mappings/cBioPortal.yaml"
 
 cbp_add_clinical(clinical_data, ref_map)