diff --git a/CHANGELOG.md b/CHANGELOG.md index ca59bc063..03e8a7878 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## [Unreleased] - 2025-01-31 +## [Unreleased] - 2025-02-01 ### Added - [#1429](https://github.com/flow-php/flow/pull/1429) - **Flow to Doctrine Dbal schema converter** - [@norberttech](https://github.com/norberttech) @@ -70,6 +70,7 @@ - [#1244](https://github.com/flow-php/flow/pull/1244) - **Added CLI command to read schema from a file** - [@norberttech](https://github.com/norberttech) ### Changed +- [#1437](https://github.com/flow-php/flow/pull/1437) - **Allow to not pass primary key name to dbal metadata since it's not used in some cases** - [@norberttech](https://github.com/norberttech) - [#1435](https://github.com/flow-php/flow/pull/1435) - **Indexes and primary key are added to directly the table instead of being returned** - [@norberttech](https://github.com/norberttech) - [#1429](https://github.com/flow-php/flow/pull/1429) - **Moved UPGRADE.md to documentation folder to make it available as a static website** - [@norberttech](https://github.com/norberttech) - [#1427](https://github.com/flow-php/flow/pull/1427) - **Revert "Bump friendsofphp/php-cs-fixer from 3.68.0 to 3.68.2 in /tools/cs-fixer"** - [@norberttech](https://github.com/norberttech) @@ -135,6 +136,7 @@ - [#1240](https://github.com/flow-php/flow/pull/1240) - **Update Homebrew TAP formula: flow-php to version: 0.10.0** - [@norberttech](https://github.com/norberttech) ### Fixed +- [#1437](https://github.com/flow-php/flow/pull/1437) - **Setting up the offset for reading remote parquet files** - [@norberttech](https://github.com/norberttech) - [#1436](https://github.com/flow-php/flow/pull/1436) - **typo in documentation** - [@paul-court](https://github.com/paul-court) - [#1435](https://github.com/flow-php/flow/pull/1435) - **Read scale from float type in Dbal Schema Converter** - [@norberttech](https://github.com/norberttech) - [#1429](https://github.com/flow-php/flow/pull/1429) - **links in all repos readme** - [@norberttech](https://github.com/norberttech) diff --git a/examples/topics/data_reading/elasticsearch/output.raw.txt b/examples/topics/data_reading/elasticsearch/output.raw.txt index 2c7584814..b46f931f2 100644 --- a/examples/topics/data_reading/elasticsearch/output.raw.txt +++ b/examples/topics/data_reading/elasticsearch/output.raw.txt @@ -1,6 +1,6 @@ +------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | took | timed_out | _shards | hits | +------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | false | {"total":1,"successful":1,"skipped":0,"failed":0} | {"total":{"value":6,"relation":"eq"},"max_score":1,"hits":[{"_index":"test_index","_type":"_doc","_id":"1","_score":1,"_source":{"id":1,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"2","_score":1,"_source":{"id":2,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"3","_score":1,"_source":{"id":3,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"4","_score":1,"_source":{"id":4,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"5","_score":1,"_source":{"id":5,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"6","_score":1,"_source":{"id":6,"text":"lorem ipsum"}}]} | +| 1 | false | {"total":1,"successful":1,"skipped":0,"failed":0} | {"total":{"value":6,"relation":"eq"},"max_score":1,"hits":[{"_index":"test_index","_type":"_doc","_id":"1","_score":1,"_source":{"id":1,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"2","_score":1,"_source":{"id":2,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"3","_score":1,"_source":{"id":3,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"4","_score":1,"_source":{"id":4,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"5","_score":1,"_source":{"id":5,"text":"lorem ipsum"}},{"_index":"test_index","_type":"_doc","_id":"6","_score":1,"_source":{"id":6,"text":"lorem ipsum"}}]} | +------+-----------+---------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1 rows diff --git a/examples/topics/data_reading/http_dynamic/output.txt b/examples/topics/data_reading/http_dynamic/output.txt index 646f6f01e..7db4a80fa 100644 --- a/examples/topics/data_reading/http_dynamic/output.txt +++ b/examples/topics/data_reading/http_dynamic/output.txt @@ -1,6 +1,6 @@ +----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+ | name | html_url | blog | login | public_repos | followers | created_at | +----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+ -| Flow PHP | https://github.com/flow-php | http://flow-php.com | flow-php | 38 | 109 | 2020-10-26T18:40:27Z | +| Flow PHP | https://github.com/flow-php | http://flow-php.com | flow-php | 38 | 110 | 2020-10-26T18:40:27Z | +----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+ 1 rows diff --git a/examples/topics/filesystem/azure/code.php b/examples/topics/filesystem/azure/code.php index 7019b1cc5..c03012543 100644 --- a/examples/topics/filesystem/azure/code.php +++ b/examples/topics/filesystem/azure/code.php @@ -3,7 +3,7 @@ declare(strict_types=1); use function Flow\Azure\SDK\DSL\{azure_blob_service, azure_blob_service_config, azure_shared_key_authorization_factory}; -use function Flow\ETL\Adapter\CSV\{from_csv, to_csv}; +use function Flow\ETL\Adapter\Parquet\{from_parquet, to_parquet}; use function Flow\ETL\DSL\{config_builder, data_frame, from_array, overwrite, to_stream}; use function Flow\Filesystem\Bridge\Azure\DSL\azure_filesystem; use function Flow\Filesystem\DSL\path; @@ -44,10 +44,10 @@ ['id' => 4, 'name' => 'test'], ])) ->saveMode(overwrite()) - ->write(to_csv(path('azure-blob://test.csv'))) + ->write(to_parquet(path('azure-blob://test.parquet'))) ->run(); data_frame($config) - ->read(from_csv(path('azure-blob://test.csv'))) + ->read(from_parquet(path('azure-blob://test.parquet'))) ->write(to_stream(__DIR__ . '/output.txt', truncate: false)) ->run(); diff --git a/examples/topics/filesystem/s3/code.php b/examples/topics/filesystem/s3/code.php index ff5c027ba..9a8a37aae 100644 --- a/examples/topics/filesystem/s3/code.php +++ b/examples/topics/filesystem/s3/code.php @@ -2,7 +2,7 @@ declare(strict_types=1); -use function Flow\ETL\Adapter\CSV\{from_csv, to_csv}; +use function Flow\ETL\Adapter\Parquet\{from_parquet, to_parquet}; use function Flow\ETL\DSL\{config_builder, data_frame, from_array, overwrite, to_stream}; use function Flow\Filesystem\Bridge\AsyncAWS\DSL\{aws_s3_client, aws_s3_filesystem}; use function Flow\Filesystem\DSL\path; @@ -39,10 +39,10 @@ ['id' => 4, 'name' => 'test'], ])) ->saveMode(overwrite()) - ->write(to_csv(path('aws-s3://test.csv'))) + ->write(to_parquet(path('aws-s3://test.parquet'))) ->run(); data_frame($config) - ->read(from_csv(path('aws-s3://test.csv'))) + ->read(from_parquet(path('aws-s3://test.parquet'))) ->write(to_stream(__DIR__ . '/output.txt', truncate: false)) ->run(); diff --git a/src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/DbalMetadata.php b/src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/DbalMetadata.php index 861c7f624..4c8cee864 100644 --- a/src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/DbalMetadata.php +++ b/src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/DbalMetadata.php @@ -67,7 +67,7 @@ public static function precision(int $precision) : Metadata return Metadata::with(self::PRECISION->value, $precision); } - public static function primaryKey(string $name) : Metadata + public static function primaryKey(string $name = '') : Metadata { return Metadata::with(self::PRIMARY_KEY->value, $name); } diff --git a/src/adapter/etl-adapter-doctrine/tests/Flow/ETL/Adapter/Doctrine/Tests/Unit/SchemaConverterTest.php b/src/adapter/etl-adapter-doctrine/tests/Flow/ETL/Adapter/Doctrine/Tests/Unit/SchemaConverterTest.php index 5189b1d6d..4a7a53877 100644 --- a/src/adapter/etl-adapter-doctrine/tests/Flow/ETL/Adapter/Doctrine/Tests/Unit/SchemaConverterTest.php +++ b/src/adapter/etl-adapter-doctrine/tests/Flow/ETL/Adapter/Doctrine/Tests/Unit/SchemaConverterTest.php @@ -54,4 +54,26 @@ public function test_converting_flow_to_dbal_schema() : void to_dbal_schema_table($flowSchema, 'test') ); } + + public function test_converting_flow_to_dbal_schema_without_providing_pk_name() : void + { + $flowSchema = schema( + int_schema('int', nullable: false, metadata: DbalMetadata::primaryKey()), + str_schema('str', nullable: true, metadata: DbalMetadata::primaryKey()), + ); + + self::assertEquals( + new Table( + 'test', + [ + new Column('int', Type::getType('integer'), ['notnull' => true]), + new Column('str', Type::getType('string'), ['notnull' => true]), // pk changes nullable true into false + ], + [ + new Index('', ['int', 'str'], true, true), + ] + ), + to_dbal_schema_table($flowSchema, 'test') + ); + } } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile.php index bfe63c2a9..a714ca9ac 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile.php @@ -49,16 +49,18 @@ public function metadata() : Metadata return $this->metadata; } - if ($this->stream->read(4, -4) !== self::PARQUET_MAGIC_NUMBER) { + $fileTotalSize = $this->stream->size(); + + if ($this->stream->read(4, $fileTotalSize - 4) !== self::PARQUET_MAGIC_NUMBER) { throw new InvalidArgumentException('Given file is not valid Parquet file'); } /** * @phpstan-ignore-next-line */ - $metadataLength = \unpack($this->byteOrder->value, $this->stream->read(4, -8))[1]; + $metadataLength = \unpack($this->byteOrder->value, $this->stream->read(4, $fileTotalSize - 8))[1]; - $metadata = $this->stream->read($metadataLength, -($metadataLength + 8)); + $metadata = $this->stream->read($metadataLength, $fileTotalSize - ($metadataLength + 8)); $thriftMetadata = new FileMetaData(); $thriftMetadata->read(