-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathmlcube.yaml
More file actions
51 lines (48 loc) · 2.14 KB
/
mlcube.yaml
File metadata and controls
51 lines (48 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
name: Hello World Medperf Data Preparator Cube
description: MLCommons demonstration MLCube for building data preparators for MedPerf
authors:
- {name: "MLCommons Medical Working Group"}
platform:
accelerator_count: 0
docker:
# Image name.
image: mlcommons/medical-data-prep-hello-world
# Docker build context relative to $MLCUBE_ROOT. Default is `build`.
build_context: "../project"
# Docker file name within docker build context, default is `Dockerfile`.
build_file: "Dockerfile"
tasks:
prepare:
# This task is in charge of transforming the input data into the format
# expected by the model cubes.
parameters:
inputs: {
data_path: names/, # Required. Value must point to a directory containing the raw data inside workspace
labels_path: labels/, # Required. Value must point to a directory containing labels for the data
parameters_file: parameters.yaml # Required. Value must be `parameters.yaml`
}
outputs: {
output_path: data/ # Required. Indicates where to store the transformed data. Must contain transformed data and labels
}
sanity_check:
# This task ensures that the previously transformed data was transformed correctly.
# It runs a set of tests that check que quality of the data. The rigurosity of those
# tests is determined by the cube author.
parameters:
inputs: {
data_path: data/, # Required. Value should be the output of the prepare task
parameters_file: parameters.yaml # Required. Value must be `parameters.yaml`
}
statistics:
# This task computes statistics on the prepared dataset. Its purpose is to get a high-level
# idea of what is contained inside the data, without providing any specifics of any single entry
parameters:
inputs: {
data_path: data/, # Required. Value should be the output of the prepare task
parameters_file: parameters.yaml # Required. Value must be `parameters.yaml`
}
outputs: {
output_path: {
type: file, default: statistics.yaml # Required. Value must be `statistics.yaml`
}
}