Skip to content

Commit bbabc8d

Browse files
committed
app: update exporter for protein substitution predictions (sift, polyphen, revel and alphamissense), #TASK-5464, #TASK-5388
1 parent 4167282 commit bbabc8d

File tree

2 files changed

+7
-38
lines changed

2 files changed

+7
-38
lines changed

cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.beust.jcommander.*;
2020
import org.opencb.cellbase.app.cli.CliOptionsParser;
2121
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
22+
import org.opencb.cellbase.lib.EtlCommons;
2223

2324
import java.util.HashMap;
2425
import java.util.List;
@@ -238,8 +239,8 @@ public class ExportCommandOptions {
238239
public CommonCommandOptions commonOptions = commonCommandOptions;
239240

240241
@Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
241-
+ "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' "
242-
+ " loads everything", required = true, arity = 1)
242+
+ EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA + ", conservation, regulation, protein, clinical_variants, repeats,"
243+
+ " regulatory_pfm, splice_score, pubmed. 'all' export everything", required = true, arity = 1)
243244
public String data;
244245

245246
@Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true,

cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java

+4-36
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ public ExportCommandExecutor(AdminCliOptionsParser.ExportCommandOptions exportCo
8484
if (exportCommandOptions.data.equals("all")) {
8585
this.dataToExport = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
8686
EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
87-
EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
87+
PROTEIN_SUBSTITUTION_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
8888
EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
89-
OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
89+
OBO_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
9090
} else {
9191
this.dataToExport = exportCommandOptions.data.split(",");
9292
}
@@ -200,38 +200,6 @@ public void execute() throws CellBaseException {
200200
counterMsg = counter + " CADD items";
201201
break;
202202
}
203-
case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: {
204-
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
205-
ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
206-
Map<String, List<Integer>> positionMap = new HashMap<>();
207-
for (Variant variant : variants) {
208-
if (!positionMap.containsKey(variant.getChromosome())) {
209-
positionMap.put(variant.getChromosome(), new ArrayList<>());
210-
}
211-
positionMap.get(variant.getChromosome()).add(variant.getStart());
212-
if (positionMap.get(variant.getChromosome()).size() >= 200) {
213-
CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
214-
.getMissenseVariantFunctionalScores(variant.getChromosome(),
215-
positionMap.get(variant.getChromosome()), null, dataRelease);
216-
counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
217-
positionMap.put(variant.getChromosome(), new ArrayList<>());
218-
}
219-
}
220-
221-
// Process map
222-
for (Map.Entry<String, List<Integer>> entry : positionMap.entrySet()) {
223-
if (CollectionUtils.isEmpty(entry.getValue())) {
224-
continue;
225-
}
226-
CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
227-
.getMissenseVariantFunctionalScores(entry.getKey(), entry.getValue(), null, dataRelease);
228-
counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
229-
}
230-
serializer.close();
231-
232-
counterMsg = counter + " missense variation functional scores";
233-
break;
234-
}
235203
case EtlCommons.CONSERVATION_DATA: {
236204
// Export data
237205
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
@@ -271,7 +239,7 @@ public void execute() throws CellBaseException {
271239
counterMsg = counter + " proteins";
272240
break;
273241
}
274-
case EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA: {
242+
case EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA: {
275243
ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
276244
Map<String, List<String>> transcriptsMap = new HashMap<>();
277245
for (Gene gene : genes) {
@@ -290,7 +258,7 @@ public void execute() throws CellBaseException {
290258
}
291259
serializer.close();
292260

293-
counterMsg = counter + " protein functional predictions";
261+
counterMsg = counter + " protein substitution predictions";
294262
break;
295263
}
296264
case EtlCommons.CLINICAL_VARIANTS_DATA: {

0 commit comments

Comments
 (0)