diff --git a/src/core/etl/src/Flow/ETL/DSL/functions.php b/src/core/etl/src/Flow/ETL/DSL/functions.php index 50362e040..705662f85 100644 --- a/src/core/etl/src/Flow/ETL/DSL/functions.php +++ b/src/core/etl/src/Flow/ETL/DSL/functions.php @@ -25,7 +25,7 @@ use Flow\ETL\PHP\Type\{Type, TypeDetector}; use Flow\ETL\Row\Factory\NativeEntryFactory; use Flow\ETL\Row\Schema\Formatter\ASCIISchemaFormatter; -use Flow\ETL\Row\Schema\{Definition, SchemaFormatter}; +use Flow\ETL\Row\Schema\{Definition, Matcher\EvolvingSchemaMatcher, Matcher\StrictSchemaMatcher, SchemaFormatter}; use Flow\ETL\Row\{EntryFactory, EntryReference, Reference, References, Schema}; use Flow\ETL\{Config, ConfigBuilder, DataFrame, Extractor, Flow, FlowContext, Formatter, Loader, Partition, Pipeline, Row, Rows, Transformer, Window}; @@ -974,6 +974,16 @@ function schema_from_json(string $schema) : Schema return Schema::fromArray(\json_decode($schema, true, 512, JSON_THROW_ON_ERROR)); } +function schema_strict_matcher() : StrictSchemaMatcher +{ + return new StrictSchemaMatcher(); +} + +function schema_evolving_matcher() : EvolvingSchemaMatcher +{ + return new EvolvingSchemaMatcher(); +} + function int_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition { return Definition::integer($name, $nullable, $metadata); diff --git a/src/core/etl/src/Flow/ETL/Row/Schema.php b/src/core/etl/src/Flow/ETL/Row/Schema.php index 6b64fed02..6854a05b3 100644 --- a/src/core/etl/src/Flow/ETL/Row/Schema.php +++ b/src/core/etl/src/Flow/ETL/Row/Schema.php @@ -5,7 +5,7 @@ namespace Flow\ETL\Row; use Flow\ETL\Exception\{InvalidArgumentException, SchemaDefinitionNotFoundException, SchemaDefinitionNotUniqueException}; -use Flow\ETL\Row\Schema\Definition; +use Flow\ETL\Row\Schema\{Definition, Matcher\StrictSchemaMatcher, SchemaMatcher}; final class Schema implements \Countable { @@ -136,6 +136,11 @@ public function keep(string|Reference ...$entries) : self return $this; } + public function matches(self $schema, SchemaMatcher $matcher = new StrictSchemaMatcher()) : bool + { + return $matcher->match($this, $schema); + } + public function merge(self $schema) : self { $newDefinitions = $this->definitions; diff --git a/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/EvolvingSchemaMatcher.php b/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/EvolvingSchemaMatcher.php new file mode 100644 index 000000000..925a52726 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/EvolvingSchemaMatcher.php @@ -0,0 +1,50 @@ +count() < $left->count()) { + return false; + } + + foreach ($right->definitions() as $rightDefinition) { + $leftDefinition = $left->findDefinition($rightDefinition->entry()); + + if ($leftDefinition === null) { + if ($right->count() === $left->count()) { + return false; + } + + continue; + } + + if (!$rightDefinition->isNullable() && $leftDefinition->isNullable()) { + return false; + } + + // making both sides nullable to compare just types of the fields + if (!$rightDefinition->type()->makeNullable(true)->isEqual($leftDefinition->type()->makeNullable(true))) { + return false; + } + } + + return true; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/StrictSchemaMatcher.php b/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/StrictSchemaMatcher.php new file mode 100644 index 000000000..9d06a6eb3 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Schema/Matcher/StrictSchemaMatcher.php @@ -0,0 +1,32 @@ +definitions()) !== \count($right->definitions())) { + return false; + } + + foreach ($left->definitions() as $leftDefinition) { + $rightDefinition = $right->findDefinition($leftDefinition->entry()); + + if ($rightDefinition === null) { + return false; + } + + if (!$leftDefinition->isEqual($rightDefinition)) { + return false; + } + } + + return true; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Schema/SchemaMatcher.php b/src/core/etl/src/Flow/ETL/Row/Schema/SchemaMatcher.php new file mode 100644 index 000000000..dd94c2d20 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Schema/SchemaMatcher.php @@ -0,0 +1,12 @@ +isEqual( + Definition::integer('id', nullable: false) + ) + ); + self::assertTrue( + $def->isEqual( + Definition::integer('id', nullable: true) + ) + ); + } + public function test_equals_types() : void { $def = Definition::list('list', new ListType(ListElement::integer())); diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/EvolvingSchemaMatcherTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/EvolvingSchemaMatcherTest.php new file mode 100644 index 000000000..44d13e966 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/EvolvingSchemaMatcherTest.php @@ -0,0 +1,117 @@ +match($left, $right)); + } + + public function test_right_having_same_number_of_definitions_but_different_names() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('surname'), + ); + + self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right)); + } + + public function test_right_schema_adding_new_field() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('name'), + bool_schema('active'), + ); + + self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right)); + } + + public function test_right_schema_changing_nullable_field_to_non_nullable() : void + { + $left = schema( + int_schema('id'), + str_schema('name', nullable: true), + ); + + $right = schema( + int_schema('id'), + str_schema('name'), + ); + + self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right)); + } + + public function test_right_schema_changing_type_of_field() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + bool_schema('name'), + ); + + self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right)); + } + + public function test_right_schema_is_the_same_as_left_schema() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('name'), + ); + + self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right)); + } + + public function test_right_schema_making_non_nullable_field_into_nullable() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('name', nullable: true), + ); + + self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right)); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/StrictSchemaMatcherTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/StrictSchemaMatcherTest.php new file mode 100644 index 000000000..fcab728a9 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Matcher/StrictSchemaMatcherTest.php @@ -0,0 +1,73 @@ +match($left, $right)); + } + + public function test_matching_same_number_of_definitions_but_different_names() : void + { + $left = schema( + str_schema('id'), + str_schema('name'), + ); + + $right = schema( + str_schema('id'), + str_schema('surname'), + ); + + self::assertFalse((new StrictSchemaMatcher())->match($left, $right)); + } + + public function test_matching_schemas_with_different_nullable_fields() : void + { + $left = schema( + str_schema('id'), + str_schema('name', nullable: true), + ); + + $right = schema( + str_schema('id'), + str_schema('name'), + ); + + self::assertFalse((new StrictSchemaMatcher())->match($left, $right)); + } + + public function test_matching_the_same_schema() : void + { + $left = schema( + str_schema('id'), + str_schema('name'), + ); + + $right = schema( + str_schema('id'), + str_schema('name'), + ); + + self::assertTrue((new StrictSchemaMatcher())->match($left, $right)); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/SchemaTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/SchemaTest.php index 02b9cf3d9..4f684760b 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/SchemaTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/SchemaTest.php @@ -4,7 +4,25 @@ namespace Flow\ETL\Tests\Unit\Row; -use function Flow\ETL\DSL\{bool_schema, int_schema, json_schema, list_schema, map_schema, schema, schema_from_json, schema_to_json, str_schema, struct_element, structure_schema, type_int, type_list, type_map, type_string, type_structure, uuid_schema}; +use function Flow\ETL\DSL\{bool_schema, + int_schema, + json_schema, + list_schema, + map_schema, + schema, + schema_evolving_matcher, + schema_from_json, + schema_strict_matcher, + schema_to_json, + str_schema, + struct_element, + structure_schema, + type_int, + type_list, + type_map, + type_string, + type_structure, + uuid_schema}; use Flow\ETL\Exception\{InvalidArgumentException, SchemaDefinitionNotFoundException, SchemaDefinitionNotUniqueException}; use Flow\ETL\Row\{EntryReference, Schema}; use PHPUnit\Framework\TestCase; @@ -143,6 +161,38 @@ public function test_making_whole_schema_nullable() : void ); } + public function test_matching_schema_with_evolving_schema_matcher() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('name'), + str_schema('surname'), + ); + + self::assertTrue($left->matches($right, schema_evolving_matcher())); + } + + public function test_matching_schema_with_strict_schema_matcher() : void + { + $left = schema( + int_schema('id'), + str_schema('name'), + ); + + $right = schema( + int_schema('id'), + str_schema('name'), + str_schema('surname'), + ); + + self::assertFalse($left->matches($right, schema_strict_matcher())); + } + public function test_normalizing_and_recreating_schema() : void { $schema = schema(