Skip to content

Commit 656bcc3

Browse files
committed
Removed GroupedDataFrame::toDF()
1 parent 3b0313d commit 656bcc3

File tree

7 files changed

+44
-50
lines changed

7 files changed

+44
-50
lines changed

UPGRADE.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ Above changes were introduced in all 3 types of joins:
2323
- `DataFrame::joinEach()`
2424
- `DataFrame::crossJoin()`
2525

26+
### 2) GroupBy
27+
28+
From now on, DataFrame::groupBy() method will return GroupedDataFrame object, which is nothing more than a GroupBy
29+
statement Builder. In order to get the results you first need to define the aggregation functions or optionally pivot the data.
30+
2631
## Upgrading from 0.6.x to 0.7.x
2732

2833
### 1) DataFrame::appendSafe() method was removed

src/core/etl/src/Flow/ETL/DataFrame/GroupedDataFrame.php

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,6 @@ public function aggregate(AggregatingFunction ...$aggregations) : DataFrame
1818
{
1919
$this->groupBy->aggregate(...$aggregations);
2020

21-
return $this->toDataFrame();
22-
}
23-
24-
public function pivot(Reference $ref) : self
25-
{
26-
$this->groupBy->pivot($ref);
27-
28-
return $this;
29-
}
30-
31-
public function toDataFrame() : DataFrame
32-
{
3321
return $this->df->rebuild(function (Pipeline $pipeline, FlowContext $context) : DataFrame {
3422
return new DataFrame(
3523
new Pipeline\LinkedPipeline(new Pipeline\GroupByPipeline($this->groupBy, $pipeline)),
@@ -38,8 +26,10 @@ public function toDataFrame() : DataFrame
3826
});
3927
}
4028

41-
public function toDF() : DataFrame
29+
public function pivot(Reference $ref) : self
4230
{
43-
return $this->toDataFrame();
31+
$this->groupBy->pivot($ref);
32+
33+
return $this;
4434
}
4535
}

src/core/etl/src/Flow/ETL/Pipeline/HashJoinPipeline.php

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,6 @@ public function process(FlowContext $context) : \Generator
114114
}
115115
}
116116

117-
public function setSource(Extractor $extractor) : Pipeline
118-
{
119-
$this->extractor = $extractor;
120-
121-
return $this;
122-
}
123-
124117
public function source() : Extractor
125118
{
126119
return $this->extractor;

src/core/etl/src/Flow/ETL/Pipeline/SynchronousPipeline.php

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,6 @@ public function process(FlowContext $context) : \Generator
8282
}
8383
}
8484

85-
public function setSource(Extractor $extractor) : self
86-
{
87-
$this->extractor = $extractor;
88-
89-
return $this;
90-
}
91-
9285
public function source() : Extractor
9386
{
9487
return $this->extractor;

src/core/etl/src/Flow/ETL/Pipeline/VoidPipeline.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,6 @@ public function process(FlowContext $context) : \Generator
3636
yield new Rows();
3737
}
3838

39-
public function setSource(Extractor $extractor) : self
40-
{
41-
return $this;
42-
}
43-
4439
public function source() : Extractor
4540
{
4641
return $this->pipeline->source();

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/GroupByTest.php

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,23 @@
44

55
namespace Flow\ETL\Tests\Integration\DataFrame;
66

7-
use function Flow\ETL\DSL\{average, count, df, float_entry, from_all, from_array, from_memory, from_rows, int_entry, integer_entry, lit, max, rank, ref, str_entry, sum, window};
7+
use function Flow\ETL\DSL\{average,
8+
count,
9+
df,
10+
float_entry,
11+
from_all,
12+
from_array,
13+
from_memory,
14+
from_rows,
15+
int_entry,
16+
integer_entry,
17+
lit,
18+
max,
19+
rank,
20+
ref,
21+
str_entry,
22+
sum,
23+
window};
824
use Flow\ETL\Memory\ArrayMemory;
925
use Flow\ETL\Tests\Integration\IntegrationTestCase;
1026
use Flow\ETL\{Loader, Row, Rows};
@@ -32,19 +48,20 @@ public function test_group_by_multiple_columns_and_batch_size() : void
3248
)
3349
))
3450
->groupBy('country', 'gender')
35-
->toDF()
51+
->aggregate(average(ref('age')))
52+
->withEntry('age_avg', ref('age_avg')->round(lit(2)))
3653
->batchSize(1)
3754
->write($loader)
3855
->fetch();
3956

4057
self::assertEquals(
41-
new Rows(
42-
Row::create(str_entry('country', 'PL'), str_entry('gender', 'male')),
43-
Row::create(str_entry('country', 'PL'), str_entry('gender', 'female')),
44-
Row::create(str_entry('country', 'US'), str_entry('gender', 'female')),
45-
Row::create(str_entry('country', 'US'), str_entry('gender', 'male')),
46-
),
47-
$rows
58+
[
59+
['country' => 'PL', 'gender' => 'male', 'age_avg' => 21.67],
60+
['country' => 'PL', 'gender' => 'female', 'age_avg' => 30.0],
61+
['country' => 'US', 'gender' => 'female', 'age_avg' => 42.5],
62+
['country' => 'US', 'gender' => 'male', 'age_avg' => 45],
63+
],
64+
$rows->toArray()
4865
);
4966
}
5067

@@ -125,15 +142,15 @@ public function test_group_by_single_column() : void
125142
)
126143
))
127144
->groupBy('country')
128-
->toDF()
145+
->aggregate(sum(ref('age')))
129146
->fetch();
130147

131148
self::assertEquals(
132-
new Rows(
133-
Row::create(str_entry('country', 'PL')),
134-
Row::create(str_entry('country', 'US')),
135-
),
136-
$rows
149+
[
150+
['country' => 'PL', 'age_sum' => 95],
151+
['country' => 'US', 'age_sum' => 175],
152+
],
153+
$rows->toArray()
137154
);
138155
}
139156

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/PartitioningTest.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
namespace Flow\ETL\Tests\Integration\DataFrame;
66

77
use function Flow\ETL\Adapter\Text\{from_text, to_text};
8-
use function Flow\ETL\DSL\{df,
8+
use function Flow\ETL\DSL\{collect,
9+
df,
910
from_array,
1011
from_rows,
1112
int_entry,
@@ -195,7 +196,7 @@ public function test_pruning_single_partition() : void
195196
->select('year')
196197
->withEntry('year', ref('year')->cast('int'))
197198
->groupBy(ref('year'))
198-
->toDF()
199+
->aggregate(collect(ref('year')))
199200
->fetch();
200201

201202
self::assertCount(1, $rows);

0 commit comments

Comments
 (0)