Skip to content

Commit 75d183d

Browse files
authored
Added DataFrame::dropPartitions() transformation (#922)
1 parent 349f4d2 commit 75d183d

File tree

5 files changed

+115
-0
lines changed

5 files changed

+115
-0
lines changed

src/core/etl/src/Flow/ETL/DataFrame.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
use Flow\ETL\Transformer\CallbackRowTransformer;
3535
use Flow\ETL\Transformer\CrossJoinRowsTransformer;
3636
use Flow\ETL\Transformer\DropDuplicatesTransformer;
37+
use Flow\ETL\Transformer\DropPartitionsTransformer;
3738
use Flow\ETL\Transformer\EntryNameStyleConverterTransformer;
3839
use Flow\ETL\Transformer\JoinEachRowsTransformer;
3940
use Flow\ETL\Transformer\JoinRowsTransformer;
@@ -300,6 +301,13 @@ public function dropDuplicates(string|Reference ...$entries) : self
300301
return $this;
301302
}
302303

304+
public function dropPartitions() : self
305+
{
306+
$this->pipeline->add(new DropPartitionsTransformer());
307+
308+
return $this;
309+
}
310+
303311
/**
304312
* Be aware that fetch is not memory safe and will load all rows into memory.
305313
* If you want to safely iterate over Rows use oe of the following methods:.

src/core/etl/src/Flow/ETL/Rows.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ public function drop(int $size) : self
142142
return self::partitioned(\array_slice($this->rows, $size), $this->partitions);
143143
}
144144

145+
public function dropPartitions() : self
146+
{
147+
return new self(...$this->rows);
148+
}
149+
145150
public function dropRight(int $size) : self
146151
{
147152
if ($size === 0) {
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Transformer;
6+
7+
use Flow\ETL\FlowContext;
8+
use Flow\ETL\Rows;
9+
use Flow\ETL\Transformer;
10+
11+
final class DropPartitionsTransformer implements Transformer
12+
{
13+
public function transform(Rows $rows, FlowContext $context) : Rows
14+
{
15+
if ($rows->isPartitioned()) {
16+
return $rows->dropPartitions();
17+
}
18+
19+
return $rows;
20+
}
21+
}

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/PartitioningTest.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,28 @@
2020

2121
final class PartitioningTest extends IntegrationTestCase
2222
{
23+
public function test_dropping_partitions() : void
24+
{
25+
$rows = df()
26+
->read(from_rows(
27+
rows_partitioned(
28+
[
29+
row(int_entry('id', 1), str_entry('country', 'PL'), int_entry('age', 20)),
30+
row(int_entry('id', 2), str_entry('country', 'PL'), int_entry('age', 20)),
31+
row(int_entry('id', 3), str_entry('country', 'PL'), int_entry('age', 25)),
32+
row(int_entry('id', 4), str_entry('country', 'PL'), int_entry('age', 30)),
33+
],
34+
[
35+
partition('country', 'PL'),
36+
]
37+
)
38+
))
39+
->dropPartitions()
40+
->fetch();
41+
42+
$this->assertFalse($rows->isPartitioned());
43+
}
44+
2345
public function test_partition_by() : void
2446
{
2547
$rows = df()
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Tests\Unit\Transformer;
6+
7+
use function Flow\ETL\DSL\array_to_rows;
8+
use function Flow\ETL\DSL\flow_context;
9+
use function Flow\ETL\DSL\ref;
10+
use Flow\ETL\Transformer\DropPartitionsTransformer;
11+
use PHPUnit\Framework\TestCase;
12+
13+
final class DropPartitionsTransformerTest extends TestCase
14+
{
15+
public function test_dropping_partitions() : void
16+
{
17+
$partitioned = array_to_rows([
18+
['id' => 1, 'name' => 'one', 'category' => 'a'],
19+
['id' => 2, 'name' => 'two', 'category' => 'a'],
20+
['id' => 3, 'name' => 'three', 'category' => 'a'],
21+
['id' => 4, 'name' => 'four', 'category' => 'a'],
22+
['id' => 5, 'name' => 'five', 'category' => 'a'],
23+
['id' => 6, 'name' => 'six', 'category' => 'b'],
24+
['id' => 7, 'name' => 'seven', 'category' => 'b'],
25+
['id' => 8, 'name' => 'eight', 'category' => 'b'],
26+
['id' => 9, 'name' => 'nine', 'category' => 'b'],
27+
['id' => 10, 'name' => 'ten', 'category' => 'b'],
28+
])->partitionBy(ref('category'));
29+
30+
foreach ($partitioned as $rows) {
31+
$this->assertTrue($rows->isPartitioned());
32+
33+
$notPartitioned = (new DropPartitionsTransformer())->transform($rows, flow_context());
34+
35+
$this->assertFalse($notPartitioned->isPartitioned());
36+
}
37+
}
38+
39+
public function test_transforming_not_partitioned_rows() : void
40+
{
41+
$rows = array_to_rows([
42+
['id' => 1, 'name' => 'one', 'category' => 'a'],
43+
['id' => 2, 'name' => 'two', 'category' => 'a'],
44+
['id' => 3, 'name' => 'three', 'category' => 'a'],
45+
['id' => 4, 'name' => 'four', 'category' => 'a'],
46+
['id' => 5, 'name' => 'five', 'category' => 'a'],
47+
['id' => 6, 'name' => 'six', 'category' => 'b'],
48+
['id' => 7, 'name' => 'seven', 'category' => 'b'],
49+
['id' => 8, 'name' => 'eight', 'category' => 'b'],
50+
['id' => 9, 'name' => 'nine', 'category' => 'b'],
51+
['id' => 10, 'name' => 'ten', 'category' => 'b'],
52+
]);
53+
54+
$this->assertSame(
55+
$rows,
56+
(new DropPartitionsTransformer())->transform($rows, flow_context())
57+
);
58+
}
59+
}

0 commit comments

Comments
 (0)