Skip to content

Commit c4005ed

Browse files
committed
Added DataStream builder to symfony http foundation bridge
1 parent 95607ef commit c4005ed

File tree

13 files changed

+370
-22
lines changed

13 files changed

+370
-22
lines changed

documentation/components/bridges/symfony-http-foundation-bridge.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,28 @@ declare(strict_types=1);
3232

3333
namespace Symfony\Application\Controller;
3434

35-
use Flow\Bridge\Symfony\HttpFoundation\FlowStreamedResponse;
35+
use Flow\Bridge\Symfony\HttpFoundation\DataStream;
3636
use Flow\Bridge\Symfony\HttpFoundation\Output\CSVOutput;
3737
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
3838
use Symfony\Component\HttpFoundation\Response;
3939
use Symfony\Component\Routing\Attribute\Route;
4040
use function Flow\ETL\Adapter\Parquet\ParquetEtractor;
41+
use Flow\Bridge\Symfony\HttpFoundation\Transformation\MaskColumns;
42+
use Flow\Bridge\Symfony\HttpFoundation\Transformation\AddRowIndex;
4143

4244
final class ReportsController extends AbstractController
4345
{
4446
#[Route('/stream/report', name: 'stream-report')]
4547
public function streamReport() : Response
4648
{
47-
return new FlowStreamedResponse(
48-
new ParquetEtractor(__DIR__ . '/reports/orders.parquet'),
49-
new CSVOutput(withHeader: true)
50-
);
49+
return DataStream()
50+
::open(from_parquet(__DIR__ . '/reports/orders.parquet'))
51+
->underFilename('orders.csv')
52+
->transform(
53+
new MaskColumns(['email', 'address']),
54+
new AddRowIndex()
55+
)
56+
->to(new CSVOutput(withHeader: true));
5157
}
5258
}
5359
```
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Bridge\Symfony\HttpFoundation;
6+
7+
use Flow\ETL\{Extractor, Transformation, Transformations};
8+
use Symfony\Component\HttpFoundation\{HeaderUtils, Response};
9+
10+
/**
11+
* FlowStreamedResponse builder.
12+
*/
13+
final class DataStream
14+
{
15+
/**
16+
* @var array<string, string>
17+
*/
18+
private array $headers = [
19+
'Cache-Control' => 'no-store, no-cache, must-revalidate, private',
20+
'X-Accel-Buffering' => 'no', // provides support for Nginx
21+
'Pragma' => 'no-cache', // Backward compatibility for HTTP/1.0
22+
];
23+
24+
private ?Output $output = null;
25+
26+
private int $status = Response::HTTP_OK;
27+
28+
/**
29+
* @var array<Transformation>
30+
*/
31+
private array $transformations = [];
32+
33+
public function __construct(private readonly Extractor $extractor)
34+
{
35+
}
36+
37+
public static function open(Extractor $extractor) : self
38+
{
39+
return new self($extractor);
40+
}
41+
42+
/**
43+
* Send the data stream to the output.
44+
*/
45+
public function sendTo(Output $output) : FlowStreamedResponse
46+
{
47+
$this->output = $output;
48+
49+
$this->headers['Content-Type'] = $this->output->type()->toContentTypeHeader();
50+
51+
return new FlowStreamedResponse(
52+
$this->extractor,
53+
$this->output,
54+
\count($this->transformations) ? new Transformations(...$this->transformations) : new Transformations(),
55+
$this->status,
56+
$this->headers
57+
);
58+
}
59+
60+
/**
61+
* Apply transformations to the data stream.
62+
* Transformations are applied in the order they are passed.
63+
* Transformations are applied on the fly, while streaming the data, this means
64+
* that any resource expensive transformations like for example aggregations or sorting
65+
* might significantly slow down the streaming process or even cause out of memory errors.
66+
*/
67+
public function transform(Transformation ...$transformations) : self
68+
{
69+
$this->transformations = $transformations;
70+
71+
return $this;
72+
}
73+
74+
/**
75+
* Set the filename for the response.
76+
* If the attachment flag is set to true, the response will be treated as an attachment meaning that
77+
* the browser will prompt the user to download the file.
78+
*/
79+
public function underFilename(string $name, bool $attachment = true) : self
80+
{
81+
$this->headers['Content-Disposition'] = HeaderUtils::makeDisposition(
82+
$attachment ? HeaderUtils::DISPOSITION_ATTACHMENT : HeaderUtils::DISPOSITION_INLINE,
83+
$name
84+
);
85+
86+
return $this;
87+
}
88+
89+
/**
90+
* Set additional headers.
91+
* Headers are merged with the default headers.
92+
*/
93+
public function withHeaders(array $headers) : self
94+
{
95+
$this->headers = array_merge($this->headers, $headers);
96+
97+
return $this;
98+
}
99+
100+
/**
101+
* Remove a specific header if it exists.
102+
* If the header does not exist, nothing happens.
103+
*/
104+
public function withoutHeader(string $name) : self
105+
{
106+
if (\array_key_exists($name, $this->headers)) {
107+
unset($this->headers[$name]);
108+
}
109+
110+
return $this;
111+
}
112+
113+
/**
114+
* Set the HTTP status code. Default is 200.
115+
*/
116+
public function withStatus(int $status) : self
117+
{
118+
$this->status = $status;
119+
120+
return $this;
121+
}
122+
}

src/bridge/symfony/http-foundation/src/Flow/Bridge/Symfony/HttpFoundation/FlowStreamedResponse.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
namespace Flow\Bridge\Symfony\HttpFoundation;
66

77
use function Flow\ETL\DSL\df;
8-
use Flow\Bridge\Symfony\HttpFoundation\Transformation\{Transformations};
98
use Flow\ETL\{Config, Extractor, Transformation};
9+
use Flow\ETL\{Transformations};
1010
use Symfony\Component\HttpFoundation\StreamedResponse;
1111

1212
class FlowStreamedResponse extends StreamedResponse
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Bridge\Symfony\HttpFoundation\Transformation;
6+
7+
use function Flow\ETL\DSL\int_entry;
8+
use Flow\Bridge\Symfony\HttpFoundation\Transformation\AddRowIndex\StartFrom;
9+
use Flow\ETL\{DataFrame, Row, Transformation};
10+
11+
final readonly class AddRowIndex implements Transformation
12+
{
13+
public function __construct(private string $indexColumn = 'index', private StartFrom $startFrom = StartFrom::ZERO)
14+
{
15+
}
16+
17+
public function transform(DataFrame $dataFrame) : DataFrame
18+
{
19+
$index = $this->startFrom === StartFrom::ZERO ? 0 : 1;
20+
21+
return $dataFrame->map(function (Row $row) use (&$index) {
22+
$row = $row->add(int_entry($this->indexColumn, $index));
23+
$index++;
24+
25+
return $row;
26+
});
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Bridge\Symfony\HttpFoundation\Transformation\AddRowIndex;
6+
7+
enum StartFrom
8+
{
9+
case ONE;
10+
case ZERO;
11+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Bridge\Symfony\HttpFoundation\Transformation;
6+
7+
use Flow\ETL\{DataFrame, Transformation};
8+
use Flow\Filesystem\Exception\InvalidArgumentException;
9+
10+
/**
11+
* Sets batch size for DataFrame.
12+
* Small batch size can be useful when processing large data sets since only one row is processed at a time.
13+
* This means that while processing large data sets, memory usage is kept low.
14+
*
15+
* Normally flow allows to use batch size -1 (which means no batches) but it defeats the purpose of using this transformation on
16+
* Data Streams.
17+
*/
18+
final readonly class BatchSize implements Transformation
19+
{
20+
/**
21+
* @param int<1, max> $size
22+
*/
23+
public function __construct(private int $size)
24+
{
25+
if ($size < 1) {
26+
throw new InvalidArgumentException('Batch size must be greater than 0');
27+
}
28+
}
29+
30+
public function transform(DataFrame $dataFrame) : DataFrame
31+
{
32+
return $dataFrame->batchSize($this->size);
33+
}
34+
}

src/bridge/symfony/http-foundation/src/Flow/Bridge/Symfony/HttpFoundation/Transformation/MaskColumns.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
use function Flow\ETL\DSL\lit;
88
use Flow\ETL\{DataFrame, Transformation};
99

10+
/**
11+
* Mask columns in DataFrame by replacing their values with a mask.
12+
* If column does not exist in DataFrame, it will be added with a mask value.
13+
*/
1014
final readonly class MaskColumns implements Transformation
1115
{
1216
public function __construct(private array $columns = [], private string $mask = '******')

src/bridge/symfony/http-foundation/tests/Flow/Bridge/Symfony/HttpFoundation/Tests/Integration/FlowStreamedResponseTest.php

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
use function Flow\ETL\Adapter\JSON\from_json;
88
use function Flow\ETL\DSL\from_array;
9-
use Flow\Bridge\Symfony\HttpFoundation\{FlowStreamedResponse,
9+
use Flow\Bridge\Symfony\HttpFoundation\{DataStream,
10+
FlowStreamedResponse,
1011
Output\CSVOutput,
1112
Output\JsonOutput,
1213
Output\XMLOutput};
@@ -37,14 +38,14 @@ public function test_streaming_array_response_to_csv() : void
3738

3839
public function test_streaming_array_response_to_json() : void
3940
{
40-
$response = new FlowStreamedResponse(
41-
from_array([
42-
['id' => 1, 'size' => 'XL', 'color' => 'red', 'ean' => '1234567890123'],
43-
['id' => 2, 'size' => 'M', 'color' => 'blue', 'ean' => '1234567890124'],
44-
['id' => 3, 'size' => 'S', 'color' => 'green', 'ean' => '1234567890125'],
45-
]),
46-
new JsonOutput()
47-
);
41+
$extractor = from_array([
42+
['id' => 1, 'size' => 'XL', 'color' => 'red', 'ean' => '1234567890123'],
43+
['id' => 2, 'size' => 'M', 'color' => 'blue', 'ean' => '1234567890124'],
44+
['id' => 3, 'size' => 'S', 'color' => 'green', 'ean' => '1234567890125'],
45+
]);
46+
47+
$response = DataStream::open($extractor)
48+
->sendTo(new JsonOutput());
4849

4950
self::assertEquals(<<<'JSON'
5051
[{"id":1,"size":"XL","color":"red","ean":"1234567890123"},{"id":2,"size":"M","color":"blue","ean":"1234567890124"},{"id":3,"size":"S","color":"green","ean":"1234567890125"}]
@@ -100,6 +101,21 @@ public function test_streaming_partitioned_dataset() : void
100101
, $this->sendResponse($response));
101102
}
102103

104+
public function test_streaming_with_disposition() : void
105+
{
106+
$response = DataStream::open(
107+
from_array([
108+
['id' => 1, 'size' => 'XL', 'color' => 'red', 'ean' => '1234567890123'],
109+
['id' => 2, 'size' => 'M', 'color' => 'blue', 'ean' => '1234567890124'],
110+
['id' => 3, 'size' => 'S', 'color' => 'green', 'ean' => '1234567890125'],
111+
])
112+
)
113+
->underFilename('products.csv')
114+
->sendTo(new CSVOutput());
115+
116+
self::assertEquals('attachment; filename=products.csv', $response->headers->get('Content-Disposition'));
117+
}
118+
103119
private function sendResponse(FlowStreamedResponse $response) : string
104120
{
105121
ob_start();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Bridge\Symfony\HttpFoundation\Tests\Unit\Transformation;
6+
7+
use function Flow\ETL\DSL\{df, from_array};
8+
use Flow\Bridge\Symfony\HttpFoundation\Transformation\AddRowIndex;
9+
use Flow\Bridge\Symfony\HttpFoundation\Transformation\AddRowIndex\StartFrom;
10+
use Flow\ETL\Tests\FlowTestCase;
11+
12+
final class AddRowIndexTest extends FlowTestCase
13+
{
14+
public function test_adding_row_index_to_each_row() : void
15+
{
16+
$rows = df()
17+
->read(from_array(
18+
[
19+
['id' => 1, 'name' => 'John Doe', 'salary' => 7000, 'currency' => 'USD'],
20+
['id' => 2, 'name' => 'Jane Doe', 'salary' => 8000, 'currency' => 'USD'],
21+
['id' => 3, 'name' => 'John Smith', 'salary' => 9000, 'currency' => 'USD'],
22+
['id' => 4, 'name' => 'Jane Smith', 'salary' => 10000, 'currency' => 'USD'],
23+
]
24+
))
25+
->with(new AddRowIndex())
26+
->fetch()
27+
->toArray();
28+
29+
self::assertEquals(
30+
[
31+
['index' => 0, 'id' => 1, 'name' => 'John Doe', 'salary' => 7000, 'currency' => 'USD'],
32+
['index' => 1, 'id' => 2, 'name' => 'Jane Doe', 'salary' => 8000, 'currency' => 'USD'],
33+
['index' => 2, 'id' => 3, 'name' => 'John Smith', 'salary' => 9000, 'currency' => 'USD'],
34+
['index' => 3, 'id' => 4, 'name' => 'Jane Smith', 'salary' => 10000, 'currency' => 'USD'],
35+
],
36+
$rows
37+
);
38+
}
39+
40+
public function test_adding_row_index_to_each_row_starting_from_1() : void
41+
{
42+
$rows = df()
43+
->read(from_array(
44+
[
45+
['id' => 1, 'name' => 'John Doe', 'salary' => 7000, 'currency' => 'USD'],
46+
['id' => 2, 'name' => 'Jane Doe', 'salary' => 8000, 'currency' => 'USD'],
47+
['id' => 3, 'name' => 'John Smith', 'salary' => 9000, 'currency' => 'USD'],
48+
['id' => 4, 'name' => 'Jane Smith', 'salary' => 10000, 'currency' => 'USD'],
49+
]
50+
))
51+
->with(new AddRowIndex(startFrom: StartFrom::ONE))
52+
->fetch()
53+
->toArray();
54+
55+
self::assertEquals(
56+
[
57+
['index' => 1, 'id' => 1, 'name' => 'John Doe', 'salary' => 7000, 'currency' => 'USD'],
58+
['index' => 2, 'id' => 2, 'name' => 'Jane Doe', 'salary' => 8000, 'currency' => 'USD'],
59+
['index' => 3, 'id' => 3, 'name' => 'John Smith', 'salary' => 9000, 'currency' => 'USD'],
60+
['index' => 4, 'id' => 4, 'name' => 'Jane Smith', 'salary' => 10000, 'currency' => 'USD'],
61+
],
62+
$rows
63+
);
64+
}
65+
}

0 commit comments

Comments
 (0)