Skip to content
This repository was archived by the owner on Nov 3, 2023. It is now read-only.

Commit 58122dd

Browse files
committed
publish latest OpenLineage spec
Signed-off-by: Julien Le Dem <[email protected]>
1 parent de231e7 commit 58122dd

12 files changed

+681
-0
lines changed

spec/1-0-2/OpenLineage.json

+236
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://openlineage.io/spec/1-0-2/OpenLineage.json",
4+
"$defs": {
5+
"RunEvent": {
6+
"type": "object",
7+
"properties": {
8+
"eventType": {
9+
"description": "the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE, ABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run. For example to send additional metadata after the run is complete",
10+
"type": "string",
11+
"enum": [
12+
"START",
13+
"COMPLETE",
14+
"ABORT",
15+
"FAIL",
16+
"OTHER"
17+
],
18+
"example": "START|COMPLETE|ABORT|FAIL|OTHER"
19+
},
20+
"eventTime": {
21+
"description": "the time the event occurred at",
22+
"type": "string",
23+
"format": "date-time"
24+
},
25+
"run": {
26+
"$ref": "#/$defs/Run"
27+
},
28+
"job": {
29+
"$ref": "#/$defs/Job"
30+
},
31+
"inputs": {
32+
"description": "The set of **input** datasets.",
33+
"type": "array",
34+
"items": {
35+
"$ref": "#/$defs/InputDataset"
36+
}
37+
},
38+
"outputs": {
39+
"description": "The set of **output** datasets.",
40+
"type": "array",
41+
"items": {
42+
"$ref": "#/$defs/OutputDataset"
43+
}
44+
},
45+
"producer": {
46+
"description": "URI identifying the producer of this metadata. For example this could be a git url with a given tag or sha",
47+
"type": "string",
48+
"format": "uri",
49+
"example": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client"
50+
},
51+
"schemaURL": {
52+
"description": "The JSON Pointer (https://tools.ietf.org/html/rfc6901) URL to the corresponding version of the schema definition for this RunEvent",
53+
"type": "string",
54+
"format": "uri",
55+
"example": "https://openlineage.io/spec/0-0-1/OpenLineage.json"
56+
}
57+
},
58+
"required": [
59+
"run",
60+
"job",
61+
"eventTime",
62+
"producer",
63+
"schemaURL"
64+
]
65+
},
66+
"Run": {
67+
"type": "object",
68+
"properties": {
69+
"runId": {
70+
"description": "The globally unique ID of the run associated with the job.",
71+
"type": "string",
72+
"format": "uuid"
73+
},
74+
"facets": {
75+
"description": "The run facets.",
76+
"type": "object",
77+
"additionalProperties": {
78+
"$ref": "#/$defs/RunFacet"
79+
}
80+
}
81+
},
82+
"required": [
83+
"runId"
84+
]
85+
},
86+
"RunFacet": {
87+
"description": "A Run Facet",
88+
"type": "object",
89+
"allOf": [
90+
{ "$ref": "#/$defs/BaseFacet" }
91+
]
92+
},
93+
"Job": {
94+
"type": "object",
95+
"properties": {
96+
"namespace": {
97+
"description": "The namespace containing that job",
98+
"type": "string",
99+
"example": "my-scheduler-namespace"
100+
},
101+
"name": {
102+
"description": "The unique name for that job within that namespace",
103+
"type": "string",
104+
"example": "myjob.mytask"
105+
},
106+
"facets": {
107+
"description": "The job facets.",
108+
"type": "object",
109+
"additionalProperties": {
110+
"$ref": "#/$defs/JobFacet"
111+
}
112+
}
113+
},
114+
"required": [
115+
"namespace",
116+
"name"
117+
]
118+
},
119+
"JobFacet": {
120+
"description": "A Job Facet",
121+
"type": "object",
122+
"allOf": [
123+
{ "$ref": "#/$defs/BaseFacet" }
124+
]
125+
},
126+
"InputDataset": {
127+
"description": "An input dataset",
128+
"type": "object",
129+
"allOf": [
130+
{ "$ref": "#/$defs/Dataset" },
131+
{
132+
"type": "object",
133+
"properties": {
134+
"inputFacets": {
135+
"description": "The input facets for this dataset.",
136+
"type": "object",
137+
"additionalProperties": {
138+
"$ref": "#/$defs/InputDatasetFacet"
139+
}
140+
}
141+
}
142+
}
143+
]
144+
},
145+
"InputDatasetFacet": {
146+
"description": "An Input Dataset Facet",
147+
"type": "object",
148+
"allOf": [
149+
{ "$ref": "#/$defs/BaseFacet" }
150+
]
151+
},
152+
"OutputDataset": {
153+
"description": "An output dataset",
154+
"type": "object",
155+
"allOf": [
156+
{ "$ref": "#/$defs/Dataset" },
157+
{
158+
"type": "object",
159+
"properties": {
160+
"outputFacets": {
161+
"description": "The output facets for this dataset",
162+
"type": "object",
163+
"additionalProperties": {
164+
"$ref": "#/$defs/OutputDatasetFacet"
165+
}
166+
}
167+
}
168+
}
169+
]
170+
},
171+
"OutputDatasetFacet": {
172+
"description": "An Output Dataset Facet",
173+
"type": "object",
174+
"allOf": [
175+
{ "$ref": "#/$defs/BaseFacet" }
176+
]
177+
},
178+
"Dataset": {
179+
"type": "object",
180+
"properties": {
181+
"namespace": {
182+
"description": "The namespace containing that dataset",
183+
"type": "string",
184+
"example": "my-datasource-namespace"
185+
},
186+
"name": {
187+
"description": "The unique name for that dataset within that namespace",
188+
"type": "string",
189+
"example": "instance.schema.table"
190+
},
191+
"facets": {
192+
"description": "The facets for this dataset",
193+
"type": "object",
194+
"additionalProperties": {
195+
"$ref": "#/$defs/DatasetFacet"
196+
}
197+
}
198+
},
199+
"required": [
200+
"namespace",
201+
"name"
202+
]
203+
},
204+
"DatasetFacet": {
205+
"description": "A Dataset Facet",
206+
"type": "object",
207+
"allOf": [
208+
{ "$ref": "#/$defs/BaseFacet" }
209+
]
210+
},
211+
"BaseFacet": {
212+
"description": "all fields of the base facet are prefixed with _ to avoid name conflicts in facets",
213+
"type": "object",
214+
"properties": {
215+
"_producer": {
216+
"description": "URI identifying the producer of this metadata. For example this could be a git url with a given tag or sha",
217+
"type": "string",
218+
"format": "uri",
219+
"example": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client"
220+
},
221+
"_schemaURL": {
222+
"description": "The JSON Pointer (https://tools.ietf.org/html/rfc6901) URL to the corresponding version of the schema definition for this facet",
223+
"type": "string",
224+
"format": "uri",
225+
"example": "https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/BaseFacet"
226+
}
227+
},
228+
"additionalProperties": true,
229+
"required": [
230+
"_producer",
231+
"_schemaURL"
232+
]
233+
}
234+
},
235+
"$ref": "#/$defs/RunEvent"
236+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"$schema" : "https://json-schema.org/draft/2020-12/schema",
3+
"$id" : "https://openlineage.io/spec/facets/1-0-0/DataQualityAssertionsDatasetFacet.json",
4+
"$defs" : {
5+
"DataQualityAssertionsDatasetFacet" : {
6+
"description" : "list of tests performed on dataset or dataset columns, and their results",
7+
"allOf" : [ {
8+
"$ref" : "https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/InputDatasetFacet"
9+
}, {
10+
"type" : "object",
11+
"required" : [ "assertions" ],
12+
"properties" : {
13+
"assertions" : {
14+
"type" : "array",
15+
"items" : {
16+
"type" : "object",
17+
"properties" : {
18+
"assertion" : {
19+
"type" : "string",
20+
"description" : "Type of expectation test that dataset is subjected to",
21+
"example" : "not_null"
22+
},
23+
"success" : {
24+
"type" : "boolean"
25+
},
26+
"column" : {
27+
"type" : "string",
28+
"description" : "Column that expectation is testing. It should match the name provided in SchemaDatasetFacet. If column field is empty, then expectation refers to whole dataset.",
29+
"example" : "id"
30+
}
31+
},
32+
"required" : [ "assertion", "success" ]
33+
}
34+
}
35+
}
36+
} ],
37+
"type" : "object"
38+
}
39+
},
40+
"type" : "object",
41+
"properties" : {
42+
"dataQualityAssertions" : {
43+
"$ref" : "#/$defs/DataQualityAssertionsDatasetFacet"
44+
}
45+
}
46+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"$schema" : "https://json-schema.org/draft/2020-12/schema",
3+
"$id" : "https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsInputDatasetFacet.json",
4+
"$defs" : {
5+
"DataQualityMetricsInputDatasetFacet" : {
6+
"allOf" : [ {
7+
"$ref" : "https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/InputDatasetFacet"
8+
}, {
9+
"type" : "object",
10+
"required" : [ "columnMetrics" ],
11+
"properties" : {
12+
"rowCount" : {
13+
"description" : "The number of rows evaluated",
14+
"type" : "integer"
15+
},
16+
"bytes" : {
17+
"description" : "The size in bytes",
18+
"type" : "integer"
19+
},
20+
"columnMetrics" : {
21+
"description" : "The property key is the column name",
22+
"type" : "object",
23+
"additionalProperties" : {
24+
"type" : "object",
25+
"properties" : {
26+
"nullCount" : {
27+
"description" : "The number of null values in this column for the rows evaluated",
28+
"type" : "integer"
29+
},
30+
"distinctCount" : {
31+
"description" : "The number of distinct values in this column for the rows evaluated",
32+
"type" : "integer"
33+
},
34+
"sum" : {
35+
"description" : "The total sum of values in this column for the rows evaluated",
36+
"type" : "number"
37+
},
38+
"count" : {
39+
"description" : "The number of values in this column",
40+
"type" : "number"
41+
},
42+
"min" : {
43+
"type" : "number"
44+
},
45+
"max" : {
46+
"type" : "number"
47+
},
48+
"quantiles" : {
49+
"description" : "The property key is the quantile. Examples: 0.1 0.25 0.5 0.75 1",
50+
"type" : "object",
51+
"additionalProperties" : {
52+
"type" : "number"
53+
}
54+
}
55+
}
56+
}
57+
}
58+
}
59+
} ],
60+
"type" : "object"
61+
}
62+
},
63+
"type" : "object",
64+
"properties" : {
65+
"dataQualityMetrics" : {
66+
"$ref" : "#/$defs/DataQualityMetricsInputDatasetFacet"
67+
}
68+
}
69+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"$schema" : "https://json-schema.org/draft/2020-12/schema",
3+
"$id" : "https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json",
4+
"$defs" : {
5+
"DatasourceDatasetFacet" : {
6+
"allOf" : [ {
7+
"$ref" : "https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/DatasetFacet"
8+
}, {
9+
"type" : "object",
10+
"properties" : {
11+
"name" : {
12+
"type" : "string"
13+
},
14+
"uri" : {
15+
"type" : "string",
16+
"format" : "uri"
17+
}
18+
}
19+
} ],
20+
"type" : "object"
21+
}
22+
},
23+
"type" : "object",
24+
"properties" : {
25+
"dataSource" : {
26+
"$ref" : "#/$defs/DatasourceDatasetFacet"
27+
}
28+
}
29+
}

0 commit comments

Comments
 (0)