Skip to content

Commit ce1767a

Browse files
committed
downloader: add cancer hotspot
1 parent 89831a6 commit ce1767a

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java

+10
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class DownloadProperties {
2626
private EnsemblProperties ensembl;
2727
private EnsemblProperties ensemblGenomes;
2828
private URLProperties hgnc;
29+
private URLProperties cancerHotspot;
2930
private URLProperties refSeq;
3031
private URLProperties refSeqFasta;
3132
private URLProperties refSeqProteinFasta;
@@ -517,6 +518,15 @@ public DownloadProperties setHgnc(URLProperties hgnc) {
517518
return this;
518519
}
519520

521+
public URLProperties getCancerHotspot() {
522+
return cancerHotspot;
523+
}
524+
525+
public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) {
526+
this.cancerHotspot = cancerHotspot;
527+
return this;
528+
}
529+
520530
public static class EnsemblProperties {
521531

522532
private DatabaseCredentials database;

cellbase-core/src/main/resources/configuration.yml

+4
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ download:
6464
hgnc:
6565
host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt
6666
version: 2023-11-01
67+
cancerHotspot:
68+
host: https://www.cancerhotspots.org/files/hotspots_v2.xls
69+
version: "v2"
6770
refSeq:
6871
host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
6972
refSeqFasta:
@@ -161,6 +164,7 @@ download:
161164
host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv
162165
version: "2022-02-01"
163166
cadd:
167+
## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP!
164168
host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
165169
reactome:
166170
host: http://www.reactome.org/download/current/biopax.zip

cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ public List<DownloadFile> download() throws IOException, InterruptedException {
8383
downloadFiles.add(downloadMane(geneFolder));
8484
downloadFiles.add(downloadLrg(geneFolder));
8585
downloadFiles.add(downloadHgnc(geneFolder));
86+
downloadFiles.add(downloadCancerHotspot(geneFolder));
8687
downloadFiles.add(downloadDrugData(geneFolder));
8788
downloadFiles.addAll(downloadGeneUniprotXref(geneFolder));
8889
downloadFiles.add(downloadGeneExpressionAtlas(geneFolder));
@@ -211,7 +212,7 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte
211212

212213
private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException {
213214
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
214-
logger.info("Downloading LRG ...");
215+
logger.info("Downloading HGNC ...");
215216
String url = configuration.getDownload().getHgnc().getHost();
216217
saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(),
217218
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json"));
@@ -221,6 +222,18 @@ private DownloadFile downloadHgnc(Path geneFolder) throws IOException, Interrupt
221222
return null;
222223
}
223224

225+
private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException {
226+
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
227+
logger.info("Downloading Cancer Hotspot ...");
228+
String url = configuration.getDownload().getCancerHotspot().getHost();
229+
saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(),
230+
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json"));
231+
String[] array = url.split("/");
232+
return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
233+
}
234+
return null;
235+
}
236+
224237
private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException {
225238
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
226239
logger.info("Downloading go annotation...");

0 commit comments

Comments
 (0)