Skip to content

Commit ea4e5e5

Browse files
Merge pull request #134 from qbicsoftware/release/1.18.0
Release 1.18.0
2 parents 24d24fd + 82bfc45 commit ea4e5e5

File tree

9 files changed

+96
-51
lines changed

9 files changed

+96
-51
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
<dependency>
129129
<artifactId>data-model-lib</artifactId>
130130
<groupId>life.qbic</groupId>
131-
<version>2.27.0</version>
131+
<version>2.28.0</version>
132132
</dependency>
133133
<dependency>
134134
<groupId>org.mockito</groupId>

src/main/groovy/life/qbic/utils/BioinformaticAnalysisParser.groovy

Lines changed: 58 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import java.nio.file.NotDirectoryException
1717
import java.nio.file.Path
1818
import java.text.ParseException
1919

20-
2120
/**
2221
* <h1>Parser storing the fileTree of a nf-core pipeline output directory into JSON format</h1>
2322
* <br>
@@ -27,7 +26,7 @@ import java.text.ParseException
2726
* @param directory path of nf-core directory whose fileTree should be converted into a JSON String
2827
*
2928
*/
30-
class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>{
29+
class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult> {
3130

3231
/**
3332
* Contains the associated keys of the required root directory subFolders
@@ -36,19 +35,28 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
3635
* @since 1.8.0
3736
*/
3837
enum RequiredRootFolderKeys {
39-
QUALITY_CONTROL("qualityControl"),
40-
PIPELINE_INFORMATION("pipelineInformation"),
41-
PROCESS_FOLDERS("processFolders")
38+
QUALITY_CONTROL("qualityControl", "multiqc"),
39+
PIPELINE_INFORMATION("pipelineInformation", "pipeline_info"),
40+
//Process_Folder names can vary so no directory name can be assumed for now
41+
PROCESS_FOLDERS("processFolders", null)
4242

4343
private String keyName
4444

45-
RequiredRootFolderKeys(String keyName) {
45+
private String folderName
46+
47+
RequiredRootFolderKeys(String keyName, String folderName) {
4648
this.keyName = keyName
49+
this.folderName = folderName
4750
}
4851

4952
String getKeyName() {
5053
return this.keyName
5154
}
55+
56+
String getFolderName() {
57+
return this.folderName
58+
}
59+
5260
}
5361

5462
/**
@@ -58,18 +66,24 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
5866
* @since 1.8.0
5967
*/
6068
enum RequiredRootFileKeys {
61-
RUN_ID("runId"),
62-
SAMPLE_ID("sampleIds"),
69+
RUN_ID("runId", "run_id.txt"),
70+
SAMPLE_ID("sampleIds", "sample_ids.txt")
6371

6472
private String keyName
73+
private String fileName
6574

66-
RequiredRootFileKeys(String keyName) {
75+
RequiredRootFileKeys(String keyName, String fileName) {
6776
this.keyName = keyName
77+
this.fileName = fileName
6878
}
6979

7080
String getKeyName() {
7181
return this.keyName
7282
}
83+
84+
String getFileName() {
85+
return this.fileName
86+
}
7387
}
7488

7589
/**
@@ -79,19 +93,25 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
7993
* @since 1.8.0
8094
*/
8195
enum RequiredPipelineFileKeys {
82-
SOFTWARE_VERSIONS("softwareVersions"),
83-
EXECUTION_REPORT("executionReport"),
96+
SOFTWARE_VERSIONS("softwareVersions", "software_versions.yml"),
97+
EXECUTION_REPORT("executionReport", "execution_report"),
8498

8599
private String keyName
86100

87-
RequiredPipelineFileKeys(String keyName) {
101+
private String fileName
102+
103+
RequiredPipelineFileKeys(String keyName, String fileName) {
88104
this.keyName = keyName
105+
this.fileName = fileName
89106
}
90107

91108
String getKeyName() {
92109
return this.keyName
93110
}
94111

112+
String getFileName() {
113+
return this.fileName
114+
}
95115
}
96116

97117
/** {@InheritDoc} */
@@ -139,32 +159,23 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
139159
List<Map> processFolders = []
140160
rootChildren.each { currentChild ->
141161
if (currentChild.containsKey("children")) {
142-
//folder
162+
//directory
143163
String folderName = currentChild.get("name")
144-
switch (folderName) {
145-
case "multiqc":
146-
insertAsProperty(map, currentChild, RequiredRootFolderKeys.QUALITY_CONTROL.getKeyName())
147-
break
148-
case "pipeline_info":
164+
RequiredRootFolderKeys requiredRootFolderKeys = RequiredRootFolderKeys.values().find { rootFolderKeys -> (rootFolderKeys.getFolderName() == folderName) }
165+
if (requiredRootFolderKeys) {
166+
if (requiredRootFolderKeys == RequiredRootFolderKeys.PIPELINE_INFORMATION) {
149167
parsePipelineInformation(currentChild)
150-
insertAsProperty(map, currentChild, RequiredRootFolderKeys.PIPELINE_INFORMATION.getKeyName())
151-
break
152-
default:
153-
processFolders.add(currentChild)
154-
break
168+
}
169+
insertAsProperty(map, currentChild, requiredRootFolderKeys.getKeyName())
170+
} else {
171+
processFolders.add(currentChild)
155172
}
156173
} else if (currentChild.containsKey("fileType")) {
157174
//file
158-
switch (currentChild.get("name")) {
159-
case "run_id.txt":
160-
insertAsProperty(map, currentChild, RequiredRootFileKeys.RUN_ID.getKeyName())
161-
break
162-
case "sample_ids.txt":
163-
insertAsProperty(map, currentChild, RequiredRootFileKeys.SAMPLE_ID.getKeyName())
164-
break
165-
default:
166-
//ignore other files
167-
break
175+
String fileName = currentChild.get("name")
176+
RequiredRootFileKeys requiredRootFileKeys = RequiredRootFileKeys.values().find { rootFileKeys -> (rootFileKeys.getFileName() == fileName) }
177+
if (requiredRootFileKeys) {
178+
insertAsProperty(map, currentChild, requiredRootFileKeys.getKeyName())
168179
}
169180
}
170181
}
@@ -200,12 +211,10 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
200211
private static void parsePipelineInformation(Map pipelineInformation) {
201212

202213
pipelineInformation.get("children").each { Map child ->
203-
String filename = child.get("name")
204-
if(filename.equals("software_versions.yml")){
205-
insertAsProperty(pipelineInformation, child, RequiredPipelineFileKeys.SOFTWARE_VERSIONS.getKeyName())
206-
}
207-
else if(filename.matches("^execution_report.*")) {
208-
insertAsProperty(pipelineInformation, child, RequiredPipelineFileKeys.EXECUTION_REPORT.getKeyName())
214+
String fileName = child.get("name")
215+
RequiredPipelineFileKeys requiredPipelineFileKeys = RequiredPipelineFileKeys.values().find { pipelineFileKeys -> (fileName.contains(pipelineFileKeys.fileName)) }
216+
if (requiredPipelineFileKeys) {
217+
insertAsProperty(pipelineInformation, child, requiredPipelineFileKeys.getKeyName())
209218
}
210219
}
211220
}
@@ -218,7 +227,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
218227
* @since 1.8.0
219228
*/
220229
private static void insertAsProperty(Map parent, Object content, String propertyName) {
221-
parent.put(propertyName,content)
230+
parent.put(propertyName, content)
222231
}
223232

224233
/**
@@ -236,7 +245,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
236245
/**
237246
* Method which checks if a given Json String matches a given Json schema
238247
* @param json Json String which will be compared to schema
239-
* @param path to Json schema for validation of Json String
248+
* @param path to Json schema for validation of Json String
240249
* @throws org.everit.json.schema.ValidationException
241250
*/
242251
private static void validateJson(String json) throws ValidationException {
@@ -258,6 +267,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
258267
/*
259268
* Converts a file tree into a json object.
260269
*/
270+
261271
private static class DirectoryConverter {
262272

263273
/**
@@ -321,11 +331,11 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
321331

322332
private static Map convertToRelativePaths(Map content, Path root) {
323333
//Since each value in the root map is a map we need to iterate over each key/value pair
324-
content["path"] = toRelativePath(content["path"] as String, root)
325-
if (content["children"]) {
326-
// Children always contains a map, so convert recursively
327-
content["children"] = (content["children"] as List).collect { convertToRelativePaths(it as Map, root) }
328-
}
334+
content["path"] = toRelativePath(content["path"] as String, root)
335+
if (content["children"]) {
336+
// Children always contains a map, so convert recursively
337+
content["children"] = (content["children"] as List).collect { convertToRelativePaths(it as Map, root) }
338+
}
329339
return content
330340

331341
}
@@ -351,8 +361,8 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
351361

352362

353363
def convertedFile = [
354-
"name" : name,
355-
"path" : path,
364+
"name" : name,
365+
"path" : path,
356366
"fileType": fileType
357367
]
358368
return convertedFile

src/main/groovy/life/qbic/utils/NanoporeParser.groovy

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@ class NanoporeParser {
4242
for (File hiddenFile : hiddenFiles) {
4343
deleteFile(hiddenFile)
4444
}
45-
4645
return convertedExperiment
47-
4846
}
4947

5048
private static void deleteFile(File file) {

src/test/groovy/life/qbic/utils/BioinformaticAnalysisSpec.groovy

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,45 @@ class BioinformaticAnalysisSpec extends Specification {
6262
ExecutionReport executionReport = pipelineInfo.getExecutionReport()
6363
assert executionReport.getRelativePath() == "./pipeline_info/execution_report_1234-56-78_90-12-34.html"
6464
assert executionReport.getName() == "execution_report_1234-56-78_90-12-34.html"
65+
}
6566

67+
def "parsing a valid file structure without a run_id also returns a NfCorePipelineResult object"() {
68+
given: "A valid nf-core pipeline output data structure"
69+
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates-no-run-id")
70+
when: "we parse this valid structure"
71+
NfCorePipelineResult nfCorePipelineResult = bioinformaticAnalysisParser.parseFrom(pathToDirectory)
72+
then: "we expect no exception should be thrown"
73+
assert nfCorePipelineResult instanceof NfCorePipelineResult
74+
//Root files can be parsed
75+
assert !nfCorePipelineResult.runId
76+
assert !nfCorePipelineResult.runId
77+
assert nfCorePipelineResult.sampleIds.getRelativePath() == "./sample_ids.txt"
78+
assert nfCorePipelineResult.sampleIds.getName()== "sample_ids.txt"
79+
//Root Folder can be parsed
80+
QualityControlFolder multiQc = nfCorePipelineResult.getQualityControlFolder()
81+
assert multiQc.getRelativePath() == "./multiqc"
82+
assert multiQc.getName() == "multiqc"
83+
assert multiQc instanceof DataFolder
6684

85+
PipelineInformationFolder pipelineInfo = nfCorePipelineResult.getPipelineInformation()
86+
assert pipelineInfo.getRelativePath() == "./pipeline_info"
87+
assert pipelineInfo.getName() == "pipeline_info"
88+
assert pipelineInfo instanceof DataFolder
89+
90+
List<DataFolder> processFolders = nfCorePipelineResult.getProcessFolders()
91+
assert processFolders[0].getRelativePath()== "./salmon"
92+
assert processFolders[0].getName() == "salmon"
93+
assert processFolders[0] instanceof DataFolder
94+
95+
//Files in Root folders can be parsed
96+
97+
SoftwareVersions softwareVersions = pipelineInfo.getSoftwareVersions()
98+
assert softwareVersions.getRelativePath() == "./pipeline_info/software_versions.yml"
99+
assert softwareVersions.getName() == "software_versions.yml"
100+
101+
ExecutionReport executionReport = pipelineInfo.getExecutionReport()
102+
assert executionReport.getRelativePath() == "./pipeline_info/execution_report_1234-56-78_90-12-34.html"
103+
assert executionReport.getName() == "execution_report_1234-56-78_90-12-34.html"
67104
}
68105

69106
def "parsing an invalid file structure throws DatasetValidationException"() {

src/test/resources/dummyFileSystem/bioinformatic-analysis-output/validates-no-run-id/multiqc/star_salmon/multiqc_report.html

Whitespace-only changes.

src/test/resources/dummyFileSystem/bioinformatic-analysis-output/validates-no-run-id/pipeline_info/execution_report_1234-56-78_90-12-34.html

Whitespace-only changes.

src/test/resources/dummyFileSystem/bioinformatic-analysis-output/validates-no-run-id/pipeline_info/software_versions.yml

Whitespace-only changes.

src/test/resources/dummyFileSystem/bioinformatic-analysis-output/validates-no-run-id/salmon/salmon.merged.gene_tpm.tsv

Whitespace-only changes.

src/test/resources/dummyFileSystem/bioinformatic-analysis-output/validates-no-run-id/sample_ids.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)