Skip to content

Commit fe1f0d1

Browse files
authored
Added option to optimize primitive schemas before exporting them to a file (#19)
* Added option to optimize primitive schemas before export * Revert adding option to optimizePrimitiveSchemas * Added PrimitiveSchemaOptimizer * Added PrimitiveSchemaOptimizer to SubSchemaMergeCommand * Allow string schemas * Updated README with new command * Fix CS * Enhance isPrimitive to support string schemas * Remove transformExportSchemaDefinition from interface * Remove space * Two spaces * Resolve discussions * Change primitive optimizer logic
1 parent 2d3d2fe commit fe1f0d1

9 files changed

+174
-20
lines changed

README.md

+6-3
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,14 @@ $merger->merge();
4444
### Merge optimizers
4545
There are optimizers that you can enable for merging schema:
4646
- FullNameOptimizer: removes unneeded namespaces
47-
- FieldOrderOptimizer: the first fields of a record schema will be: type, name, namespace (if present)
47+
- FieldOrderOptimizer: the first fields of a record schema will be: type, name, namespace (if present)
48+
- PrimitiveSchemaOptimizer: Optimizes primitive schema e.g. `{"type": "string"}` to `"string"`
4849

4950
How to enable optimizer:
5051

5152
**Console example**
5253
```bash
53-
./vendor/bin/avro-cli --optimizeFullNames --optimizeFieldOrder avro:subschema:merge ./example/schemaTemplates ./example/schema
54+
./vendor/bin/avro-cli --optimizeFullNames --optimizeFieldOrder --optimizePrimitiveSchemas avro:subschema:merge ./example/schemaTemplates ./example/schema
5455
```
5556
**PHP Example**
5657
```php
@@ -60,6 +61,7 @@ use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistry;
6061
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
6162
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FieldOrderOptimizer;
6263
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FullNameOptimizer;
64+
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
6365

6466
$registry = (new SchemaRegistry())
6567
->addSchemaTemplateDirectory('./schemaTemplates')
@@ -68,6 +70,7 @@ $registry = (new SchemaRegistry())
6870
$merger = new SchemaMerger($registry, './schema');
6971
$merger->addOptimizer(new FieldOrderOptimizer());
7072
$merger->addOptimizer(new FullNameOptimizer());
73+
$merger->addOptimizer(new PrimitiveSchemaOptimizer());
7174

7275
$merger->merge();
7376

@@ -104,6 +107,6 @@ $generator->exportSchemas($schemas);
104107

105108
## Disclaimer
106109
In `v1.3.0` the option `--optimizeSubSchemaNamespaces` was added. It was not working fully
107-
in the `1.x` version and we had some discussions (#13) about it.
110+
in the `1.x` version and we had some discussions ([#13](https://github.com/php-kafka/php-avro-schema-generator/issues/13)) about it.
108111
Ultimately the decision was to adapt this behaviour fully in `v2.0.0` so you might want to
109112
upgrade if you rely on that behaviour.

src/Command/SubSchemaMergeCommand.php

+9-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
namespace PhpKafka\PhpAvroSchemaGenerator\Command;
66

7-
use http\Exception\RuntimeException;
87
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FieldOrderOptimizer;
98
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FullNameOptimizer;
9+
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
1010
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistry;
1111
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
1212
use Symfony\Component\Console\Command\Command;
@@ -21,6 +21,7 @@ class SubSchemaMergeCommand extends Command
2121
protected $optimizerOptionMapping = [
2222
'optimizeFieldOrder' => FieldOrderOptimizer::class,
2323
'optimizeFullNames' => FullNameOptimizer::class,
24+
'optimizePrimitiveSchemas' => PrimitiveSchemaOptimizer::class,
2425
];
2526
protected function configure(): void
2627
{
@@ -48,6 +49,12 @@ protected function configure(): void
4849
null,
4950
InputOption::VALUE_NONE,
5051
'Remove namespaces if they are enclosed in the same namespace'
52+
)
53+
->addOption(
54+
'optimizePrimitiveSchemas',
55+
null,
56+
InputOption::VALUE_NONE,
57+
'Optimize primitive schemas with using just type as a schema'
5158
);
5259
}
5360

@@ -59,7 +66,6 @@ public function execute(InputInterface $input, OutputInterface $output): int
5966
$templateDirectoryArg = $input->getArgument('templateDirectory');
6067
/** @var string $outputDirectoryArg */
6168
$outputDirectoryArg = $input->getArgument('outputDirectory');
62-
$optimizeFullNames = (bool)$input->getOption('optimizeFullNames');
6369

6470
$templateDirectory = $this->getPath($templateDirectoryArg);
6571
$outputDirectory = $this->getPath($outputDirectoryArg);
@@ -71,7 +77,7 @@ public function execute(InputInterface $input, OutputInterface $output): int
7177
$merger = new SchemaMerger($registry, $outputDirectory);
7278

7379
foreach ($this->optimizerOptionMapping as $optionName => $optimizerClass) {
74-
if (true === (bool)$input->getOption($optionName)) {
80+
if (true === (bool) $input->getOption($optionName)) {
7581
$merger->addOptimizer(new $optimizerClass());
7682
}
7783
}

src/Merger/SchemaMerger.php

+13-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use PhpKafka\PhpAvroSchemaGenerator\Avro\Avro;
99
use PhpKafka\PhpAvroSchemaGenerator\Exception\SchemaMergerException;
1010
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\OptimizerInterface;
11+
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
1112
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistryInterface;
1213
use PhpKafka\PhpAvroSchemaGenerator\Schema\SchemaTemplateInterface;
1314

@@ -125,7 +126,12 @@ public function merge(
125126
$resolvedTemplate = $this->getResolvedSchemaTemplate($rootSchemaTemplate);
126127
foreach ($this->optimizers as $optimizer) {
127128
$resolvedTemplate = $resolvedTemplate->withSchemaDefinition(
128-
$optimizer->optimize($resolvedTemplate->getSchemaDefinition())
129+
$optimizer instanceof PrimitiveSchemaOptimizer ?
130+
$optimizer->optimize(
131+
$resolvedTemplate->getSchemaDefinition(),
132+
$resolvedTemplate->isPrimitive()
133+
) :
134+
$optimizer->optimize($resolvedTemplate->getSchemaDefinition())
129135
);
130136
}
131137
} catch (SchemaMergerException $e) {
@@ -177,12 +183,14 @@ public function exportSchema(
177183
}
178184

179185
/**
180-
* @param array<string,mixed> $schemaDefinition
181-
* @return array<string,mixed>
186+
* @param mixed $schemaDefinition
187+
* @return mixed
182188
*/
183-
public function transformExportSchemaDefinition(array $schemaDefinition): array
189+
private function transformExportSchemaDefinition($schemaDefinition)
184190
{
185-
unset($schemaDefinition['schema_level']);
191+
if (is_array($schemaDefinition)) {
192+
unset($schemaDefinition['schema_level']);
193+
}
186194

187195
return $schemaDefinition;
188196
}

src/Merger/SchemaMergerInterface.php

-6
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,6 @@ public function merge(): int;
3838
*/
3939
public function exportSchema(SchemaTemplateInterface $rootRootSchemaTemplate): void;
4040

41-
/**
42-
* @param array<string,mixed> $schemaDefinition
43-
* @return array<string,mixed>
44-
*/
45-
public function transformExportSchemaDefinition(array $schemaDefinition): array;
46-
4741
/**
4842
* @param OptimizerInterface $optimizer
4943
*/
+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpKafka\PhpAvroSchemaGenerator\Optimizer;
6+
7+
class PrimitiveSchemaOptimizer extends AbstractOptimizer implements OptimizerInterface
8+
{
9+
/**
10+
* @param string $definition
11+
* @return string
12+
* @throws \JsonException
13+
*/
14+
public function optimize(string $definition, bool $isPrimitive = false): string
15+
{
16+
if (false === $isPrimitive) {
17+
return $definition;
18+
}
19+
20+
$data = json_decode($definition, true, JSON_THROW_ON_ERROR);
21+
22+
$data = $this->processSchema($data);
23+
24+
return json_encode($data, JSON_THROW_ON_ERROR);
25+
}
26+
27+
/**
28+
* @param mixed $data
29+
* @return mixed
30+
*/
31+
private function processSchema($data)
32+
{
33+
if (true === isset($data['type'])) {
34+
$data = $data['type'];
35+
}
36+
37+
return $data;
38+
}
39+
}

src/Schema/SchemaTemplate.php

+4
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ public function isPrimitive(): bool
129129
{
130130
$fields = json_decode($this->getSchemaDefinition(), true, JSON_THROW_ON_ERROR);
131131

132+
if (is_string($fields) && true === isset(self::AVRO_PRIMITIVE_TYPES[$fields])) {
133+
return true;
134+
}
135+
132136
if (true === isset($fields['type'])) {
133137
return array_key_exists($fields['type'], self::AVRO_PRIMITIVE_TYPES);
134138
}

tests/Unit/Merger/SchemaMergerTest.php

+42
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use PhpKafka\PhpAvroSchemaGenerator\Exception\SchemaMergerException;
99
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
1010
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\OptimizerInterface;
11+
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
1112
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistryInterface;
1213
use PhpKafka\PhpAvroSchemaGenerator\Schema\SchemaTemplateInterface;
1314
use PHPUnit\Framework\TestCase;
@@ -443,6 +444,47 @@ public function testMergePrimitive()
443444
rmdir('/tmp/foobar');
444445
}
445446

447+
public function testMergePrimitiveWithOptimizerEnabled()
448+
{
449+
$definition = '{
450+
"type": "string"
451+
}';
452+
453+
$schemaTemplate = $this->getMockForAbstractClass(SchemaTemplateInterface::class);
454+
$schemaTemplate
455+
->expects(self::exactly(3))
456+
->method('getSchemaDefinition')
457+
->willReturn($definition);
458+
$schemaTemplate
459+
->expects(self::exactly(2))
460+
->method('withSchemaDefinition')
461+
->with($definition)
462+
->willReturn($schemaTemplate);
463+
$schemaTemplate
464+
->expects(self::once())
465+
->method('getFilename')
466+
->willReturn('primitive-type.avsc');
467+
$schemaTemplate
468+
->expects(self::exactly(3))
469+
->method('isPrimitive')
470+
->willReturn(true);
471+
472+
$schemaRegistry = $this->getMockForAbstractClass(SchemaRegistryInterface::class);
473+
$schemaRegistry
474+
->expects(self::once())
475+
->method('getRootSchemas')
476+
->willReturn([$schemaTemplate]);
477+
$optimizer = $this->getMockBuilder(PrimitiveSchemaOptimizer::class)->getMock();
478+
$optimizer->expects(self::once())->method('optimize')->with($definition, true)->willReturn($definition);
479+
$merger = new SchemaMerger($schemaRegistry, '/tmp/foobar');
480+
$merger->addOptimizer($optimizer);
481+
$merger->merge(true);
482+
483+
self::assertFileExists('/tmp/foobar/primitive-type.avsc');
484+
unlink('/tmp/foobar/primitive-type.avsc');
485+
rmdir('/tmp/foobar');
486+
}
487+
446488
public function testMergeWithFilenameOption()
447489
{
448490
$definition = '{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpKafka\PhpAvroSchemaGenerator\Tests\Unit\Optimizer;
6+
7+
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
8+
use PHPUnit\Framework\TestCase;
9+
10+
class PrimitiveSchemaOptimizerTest extends TestCase
11+
{
12+
public function testOptimize(): void
13+
{
14+
$schema = '{"type": "string"}';
15+
16+
$expectedResult = json_encode(json_decode('"string"'));
17+
18+
$optimizer = new PrimitiveSchemaOptimizer();
19+
20+
self::assertEquals($expectedResult, $optimizer->optimize($schema, true));
21+
}
22+
23+
public function testOptimizeForStringSchema(): void
24+
{
25+
$schema = '"string"';
26+
27+
$expectedResult = json_encode(json_decode('"string"'));
28+
29+
$optimizer = new PrimitiveSchemaOptimizer();
30+
31+
self::assertEquals($expectedResult, $optimizer->optimize($schema, true));
32+
}
33+
34+
public function testOptimizeForRecordSchema(): void
35+
{
36+
$schema = '{"type":"record","namespace":"com.example","name":"Book","fields":[{"name":"isbn","type":"string"}]}';
37+
38+
$expectedResult = json_encode(json_decode($schema));
39+
40+
$optimizer = new PrimitiveSchemaOptimizer();
41+
42+
self::assertEquals($expectedResult, $optimizer->optimize($schema, false));
43+
}
44+
}

tests/Unit/Schema/SchemaTemplateTest.php

+17-3
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,34 @@ public function testIsPrimitiveTrue()
4949
{
5050
$template = (new SchemaTemplate())->withSchemaDefinition('{"type":"string"}');
5151

52-
self::assertTrue($template->isPrimitive($template));
52+
self::assertTrue($template->isPrimitive());
5353
}
5454

5555
public function testIsPrimitiveFalse()
5656
{
5757
$template = (new SchemaTemplate())->withSchemaDefinition('{"type":"record"}');
5858

59-
self::assertFalse($template->isPrimitive($template));
59+
self::assertFalse($template->isPrimitive());
60+
}
61+
62+
public function testIsPrimitiveTrueForOptimizedSchema()
63+
{
64+
$template = (new SchemaTemplate())->withSchemaDefinition('"string"');
65+
66+
self::assertTrue($template->isPrimitive());
67+
}
68+
69+
public function testIsPrimitiveFalseForOptimizedSchema()
70+
{
71+
$template = (new SchemaTemplate())->withSchemaDefinition('"foo"');
72+
73+
self::assertFalse($template->isPrimitive());
6074
}
6175

6276
public function testIsPrimitiveFalseOnMissingType()
6377
{
6478
$template = (new SchemaTemplate())->withSchemaDefinition('{"foo":"bar"}');
6579

66-
self::assertFalse($template->isPrimitive($template));
80+
self::assertFalse($template->isPrimitive());
6781
}
6882
}

0 commit comments

Comments
 (0)