Skip to content

Commit

Permalink
add schema test for millis, macros and nanos
Browse files Browse the repository at this point in the history
  • Loading branch information
saraswatpuneet committed Sep 12, 2024
1 parent abf7f0b commit 0a5040f
Show file tree
Hide file tree
Showing 8 changed files with 422 additions and 5 deletions.
11 changes: 8 additions & 3 deletions lib/jsonSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ const fromJsonSchemaArray = (fieldValue: SupportedJSONSchema4, optionalFieldList
fieldValue.items.properties.unit &&
fieldValue.items.properties.isAdjustedToUTC
) {
const unit = fieldValue.items.properties.unit.default?.toString() || 'MILLIS';
if (!fieldValue.items.properties.unit.enum) {
throw new UnsupportedJsonSchemaError('Unit enum is not defined');
}
const unit = fieldValue.items.properties.unit.enum[0];
const isAdjustedToUTC = !!fieldValue.items.properties.isAdjustedToUTC.default;
let timeUnit: TimeUnit;

Expand Down Expand Up @@ -152,10 +155,12 @@ const fromJsonSchemaField =

case 'object':
if (fieldValue.properties && fieldValue.properties.unit && fieldValue.properties.isAdjustedToUTC) {
const unit = fieldValue.properties.unit.default?.toString() || 'MILLIS';
if (!fieldValue.properties.unit.enum) {
throw new UnsupportedJsonSchemaError('Unit enum is not defined');
}
const unit = fieldValue.properties.unit.enum[0];
const isAdjustedToUTC = !!fieldValue.properties.isAdjustedToUTC.default;
let timeUnit: TimeUnit;

switch (unit) {
case 'MICROS':
timeUnit = new TimeUnit({ MICROS: true });
Expand Down
25 changes: 23 additions & 2 deletions test/jsonSchema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import arraySchema from './test-files/array.schema.json';
import objectSchema from './test-files/object.schema.json';
import objectNestedSchema from './test-files/object-nested.schema.json';
import timeSchema from './test-files/time.schema.json';

import timeSchemaMillis from './test-files/time.schema_millis.json';
import timeSchemaMicros from './test-files/time.schema_micros.json';
import timeSchemaNanos from './test-files/time.schema_nanos.json';
import { ParquetSchema, ParquetWriter, ParquetReader } from '../parquet';

const update = false;
Expand Down Expand Up @@ -54,11 +56,29 @@ describe('Json Schema Conversion', function () {
checkSnapshot(ps, './test-files/object-nested.schema.result.json', update);
});

it('Time Schema', function () {
it('Time Schema Generic', function () {
const js = timeSchema as JSONSchema4;
const ps = ParquetSchema.fromJsonSchema(js);
checkSnapshot(ps, './test-files/time.schema.result.json', update);
});

it('Time Schema MILLIS', function () {
const js = timeSchemaMillis as JSONSchema4;
const ps = ParquetSchema.fromJsonSchema(js);
checkSnapshot(ps, './test-files/time.schema_millis.result.json', update);
});

it('Time Schema MICROS', function () {
const js = timeSchemaMicros as JSONSchema4;
const ps = ParquetSchema.fromJsonSchema(js);
checkSnapshot(ps, './test-files/time.schema_micros.result.json', update);
});

it('Time Schema NANOS', function () {
const js = timeSchemaNanos as JSONSchema4;
const ps = ParquetSchema.fromJsonSchema(js);
checkSnapshot(ps, './test-files/time.schema_nanos.result.json', update);
});
});

const parquetSchema = ParquetSchema.fromJsonSchema({
Expand Down Expand Up @@ -128,6 +148,7 @@ const parquetSchema = ParquetSchema.fromJsonSchema({
},
unit: {
type: 'string',
enum: ['MILLIS', 'MICROS', 'NANOS'], // Define enum for time units
},
isAdjustedToUTC: {
type: 'boolean',
Expand Down
29 changes: 29 additions & 0 deletions test/test-files/time.schema_micros.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$id": "https://example.com/time-micros.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "A schema to test the TIME logical type in Parquet with MICROS unit",
"type": "object",
"properties": {
"time_field": {
"type": "object",
"properties": {
"value": {
"type": "number",
"description": "Time value in MICROS"
},
"unit": {
"type": "string",
"enum": ["MICROS"],
"description": "The unit is fixed to MICROS"
},
"isAdjustedToUTC": {
"type": "boolean",
"description": "Whether the time is adjusted to UTC"
}
},
"required": ["value", "isAdjustedToUTC"],
"additionalProperties": false
}
},
"required": ["time_field"]
}
102 changes: 102 additions & 0 deletions test/test-files/time.schema_micros.result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"schema": {
"time_field": {
"optional": false,
"type": "INT64",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": null,
"MICROS": {},
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"encoding": "PLAIN",
"compression": "UNCOMPRESSED"
}
},
"fields": {
"time_field": {
"name": "time_field",
"primitiveType": "INT64",
"originalType": "TIME_MICROS",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": null,
"MICROS": {},
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"path": ["time_field"],
"repetitionType": "REQUIRED",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 0,
"dLevelMax": 0
}
},
"fieldList": [
{
"name": "time_field",
"primitiveType": "INT64",
"originalType": "TIME_MICROS",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": null,
"MICROS": {},
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"path": ["time_field"],
"repetitionType": "REQUIRED",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 0,
"dLevelMax": 0
}
]
}
29 changes: 29 additions & 0 deletions test/test-files/time.schema_millis.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$id": "https://example.com/time-millis.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "A schema to test the TIME logical type in Parquet with MILLIS unit",
"type": "object",
"properties": {
"time_field": {
"type": "object",
"properties": {
"value": {
"type": "number",
"description": "Time value in MILLIS"
},
"unit": {
"type": "string",
"enum": ["MILLIS"],
"description": "The unit is fixed to MILLIS"
},
"isAdjustedToUTC": {
"type": "boolean",
"description": "Whether the time is adjusted to UTC"
}
},
"required": ["value", "isAdjustedToUTC"],
"additionalProperties": false
}
},
"required": ["time_field"]
}
102 changes: 102 additions & 0 deletions test/test-files/time.schema_millis.result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"schema": {
"time_field": {
"optional": false,
"type": "INT32",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": {},
"MICROS": null,
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"encoding": "PLAIN",
"compression": "UNCOMPRESSED"
}
},
"fields": {
"time_field": {
"name": "time_field",
"primitiveType": "INT32",
"originalType": "TIME_MILLIS",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": {},
"MICROS": null,
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"path": ["time_field"],
"repetitionType": "REQUIRED",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 0,
"dLevelMax": 0
}
},
"fieldList": [
{
"name": "time_field",
"primitiveType": "INT32",
"originalType": "TIME_MILLIS",
"logicalType": {
"STRING": null,
"MAP": null,
"LIST": null,
"ENUM": null,
"DECIMAL": null,
"DATE": null,
"TIME": {
"isAdjustedToUTC": false,
"unit": {
"MILLIS": {},
"MICROS": null,
"NANOS": null
}
},
"TIMESTAMP": null,
"INTEGER": null,
"UNKNOWN": null,
"JSON": null,
"BSON": null,
"UUID": null
},
"path": ["time_field"],
"repetitionType": "REQUIRED",
"encoding": "PLAIN",
"compression": "UNCOMPRESSED",
"rLevelMax": 0,
"dLevelMax": 0
}
]
}
29 changes: 29 additions & 0 deletions test/test-files/time.schema_nanos.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$id": "https://example.com/time-nanos.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "A schema to test the TIME logical type in Parquet with NANOS unit",
"type": "object",
"properties": {
"time_field": {
"type": "object",
"properties": {
"value": {
"type": "number",
"description": "Time value in NANOS"
},
"unit": {
"type": "string",
"enum": ["NANOS"],
"description": "The unit is fixed to NANOS"
},
"isAdjustedToUTC": {
"type": "boolean",
"description": "Whether the time is adjusted to UTC"
}
},
"required": ["value", "isAdjustedToUTC"],
"additionalProperties": false
}
},
"required": ["time_field"]
}
Loading

0 comments on commit 0a5040f

Please sign in to comment.