Skip to content

Commit 7d72bdf

Browse files
committed
aggregators now always filter out nulls, not just for columns
1 parent f17929b commit 7d72bdf

File tree

6 files changed

+37
-29
lines changed

6 files changed

+37
-29
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ import kotlin.reflect.full.withNullability
1313
* The [AggregatorBase] class is a base implementation of this interface.
1414
*
1515
* @param Value The type of the values to be aggregated.
16-
* This can be nullable for [Iterables][Iterable] or not, depending on the use case.
17-
* For columns, [Value] will always be considered nullable; nulls are filtered out from columns anyway.
16+
* The input can always have nulls, they are filtered out.
1817
* @param Return The type of the resulting value. Can optionally be nullable.
1918
*/
2019
@PublishedApi
@@ -26,17 +25,18 @@ internal interface Aggregator<in Value, out Return> {
2625
/**
2726
* Base function of [Aggregator].
2827
*
29-
* Aggregates the given values, taking [type] into account, and computes a single resulting value.
28+
* Aggregates the given values, taking [type] into account,
29+
* filtering nulls, and computes a single resulting value.
3030
*
3131
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
3232
*
3333
* When the exact [type] is unknown, use [aggregateCalculatingType].
3434
*/
35-
fun aggregate(values: Iterable<Value>, type: KType): Return
35+
fun aggregate(values: Iterable<Value?>, type: KType): Return
3636

3737
/**
3838
* Aggregates the data in the given column and computes a single resulting value.
39-
* Nulls are filtered out by default, then [aggregate] (with [Iterable] and [KType]) is called.
39+
* Calls [aggregate] (with [Iterable] and [KType]).
4040
*
4141
* See [AggregatorBase.aggregate].
4242
*/
@@ -57,7 +57,7 @@ internal interface Aggregator<in Value, out Return> {
5757
* It should contain all types of [values].
5858
* If `null`, the types of [values] will be calculated at runtime (heavy!).
5959
*/
60-
fun aggregateCalculatingType(values: Iterable<Value>, valueTypes: Set<KType>? = null): Return
60+
fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>? = null): Return
6161

6262
/**
6363
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
@@ -92,8 +92,11 @@ internal fun <Value, Return> Aggregator<*, *>.cast2(): Aggregator<Value, Return>
9292
/** Type alias for [Aggregator.calculateReturnTypeOrNull] */
9393
internal typealias CalculateReturnTypeOrNull = (type: KType, emptyInput: Boolean) -> KType?
9494

95-
/** Type alias for [Aggregator.aggregate]. */
96-
internal typealias Aggregate<Value, Return> = Iterable<Value>.(type: KType) -> Return
95+
/**
96+
* Type alias for the argument for [Aggregator.aggregate].
97+
* Nulls have already been filtered out when this argument is called.
98+
*/
99+
internal typealias Aggregate<Value, Return> = Iterable<Value & Any>.(type: KType) -> Return
97100

98101
/** Common case for [CalculateReturnTypeOrNull], preserves return type, but makes it nullable for empty inputs. */
99102
internal val preserveReturnTypeNullIfEmpty: CalculateReturnTypeOrNull = { type, emptyInput ->

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorBase.kt

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import kotlin.reflect.full.withNullability
1414
* or multiple [DataColumns][DataColumn].
1515
*
1616
* @param name The name of this aggregator.
17-
* @param aggregator Functional argument for the [aggregate] function.
17+
* @param aggregator Functional argument for the [aggregate] function. Nulls are filtered out before this is called.
1818
*/
1919
internal abstract class AggregatorBase<in Value, out Return>(
2020
override val name: String,
@@ -25,13 +25,19 @@ internal abstract class AggregatorBase<in Value, out Return>(
2525
/**
2626
* Base function of [Aggregator].
2727
*
28-
* Aggregates the given values, taking [type] into account, and computes a single resulting value.
28+
* Aggregates the given values, taking [type] into account,
29+
* filtering nulls, and computes a single resulting value.
2930
*
30-
* Uses [aggregator] to compute the result.
31+
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
3132
*
3233
* When the exact [type] is unknown, use [aggregateCalculatingType].
3334
*/
34-
override fun aggregate(values: Iterable<Value>, type: KType): Return = aggregator(values, type)
35+
@Suppress("UNCHECKED_CAST")
36+
override fun aggregate(values: Iterable<Value?>, type: KType): Return =
37+
aggregator(
38+
values.asSequence().filterNotNull().asIterable(), // TODO make dependant on type's nullability
39+
type.withNullability(false),
40+
)
3541

3642
/**
3743
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
@@ -44,7 +50,7 @@ internal abstract class AggregatorBase<in Value, out Return>(
4450
* @return The return type of [aggregate] as [KType].
4551
*/
4652
override fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType? =
47-
getReturnTypeOrNull(type, emptyInput)
53+
getReturnTypeOrNull(type.withNullability(false), emptyInput)
4854

4955
/**
5056
* Aggregates the data in the given column and computes a single resulting value.
@@ -54,17 +60,12 @@ internal abstract class AggregatorBase<in Value, out Return>(
5460
@Suppress("UNCHECKED_CAST")
5561
override fun aggregate(column: DataColumn<Value?>): Return =
5662
aggregate(
57-
values =
58-
if (column.hasNulls()) {
59-
column.asSequence().filterNotNull().asIterable()
60-
} else {
61-
column.asIterable() as Iterable<Value>
62-
},
63-
type = column.type().withNullability(false),
63+
values = column.asIterable(),
64+
type = column.type(),
6465
)
6566

6667
/** @include [Aggregator.aggregateCalculatingType] */
67-
override fun aggregateCalculatingType(values: Iterable<Value>, valueTypes: Set<KType>?): Return {
68+
override fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>?): Return {
6869
val commonType = if (valueTypes != null) {
6970
valueTypes.commonType(false)
7071
} else {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/FlatteningAggregator.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import kotlin.reflect.full.withNullability
88
/**
99
* Simple [Aggregator] implementation with flattening behavior for multiple columns.
1010
*
11-
* Nulls are filtered from columns.
11+
* Nulls are filtered out.
1212
*
1313
* When called on multiple columns,
1414
* the columns are flattened into a single list of values, filtering nulls as usual;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import kotlin.reflect.full.withNullability
1010
/**
1111
* A slightly more advanced [Aggregator] implementation.
1212
*
13-
* Nulls are filtered from columns.
13+
* Nulls are filtered out.
1414
*
1515
* When called on multiple columns, this [Aggregator] works in two steps:
1616
* First, it aggregates within a [DataColumn]/[Iterable] ([stepOneAggregator]) with their (given) type,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepNumbersAggregator.kt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ private val logger = KotlinLogging.logger { }
2424
* [Aggregator] made specifically for number calculations.
2525
* Mixed number types are [unified][UnifyingNumbers] to [primitives][PRIMITIVES_ONLY].
2626
*
27-
* Nulls are filtered from columns.
27+
* Nulls are filtered out.
2828
*
2929
* When called on multiple columns (with potentially mixed [Number] types),
3030
* this [Aggregator] works in two steps:
@@ -113,11 +113,10 @@ internal class TwoStepNumbersAggregator<out Return : Number?>(
113113
*
114114
* When the exact [type] is unknown, use [aggregateCalculatingType].
115115
*/
116-
override fun aggregate(values: Iterable<Number>, type: KType): Return {
116+
override fun aggregate(values: Iterable<Number?>, type: KType): Return {
117117
require(type.isSubtypeOf(typeOf<Number?>())) {
118118
"${TwoStepNumbersAggregator::class.simpleName}: Type $type is not a subtype of Number?"
119119
}
120-
121120
return when (type.withNullability(false)) {
122121
// If the type is not a specific number, but rather a mixed Number, we unify the types first.
123122
// This is heavy and could be avoided by calling aggregate with a specific number type
@@ -147,8 +146,8 @@ internal class TwoStepNumbersAggregator<out Return : Number?>(
147146
* If `null`, the types of [values] will be calculated at runtime (heavy!).
148147
*/
149148
@Suppress("UNCHECKED_CAST")
150-
override fun aggregateCalculatingType(values: Iterable<Number>, valueTypes: Set<KType>?): Return {
151-
val valueTypes = valueTypes ?: values.types()
149+
override fun aggregateCalculatingType(values: Iterable<Number?>, valueTypes: Set<KType>?): Return {
150+
val valueTypes = valueTypes ?: values.filterNotNull().types()
152151
val commonType = valueTypes
153152
.unifiedNumberType(PRIMITIVES_ONLY)
154153
.withNullability(false)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/ofRowExpression.kt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,18 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregateInternal
1313
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregator
1414
import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal
1515
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
16+
import kotlin.reflect.full.withNullability
1617
import kotlin.reflect.typeOf
1718

1819
@PublishedApi
1920
internal inline fun <C, reified V, R> Aggregator<V, R>.aggregateOf(
2021
values: Iterable<C>,
2122
noinline transform: (C) -> V,
22-
): R = aggregate(values.asSequence().map(transform).asIterable(), typeOf<V>())
23+
): R =
24+
aggregate(
25+
values = values.asSequence().mapNotNull(transform).asIterable(),
26+
type = typeOf<V>().withNullability(false),
27+
)
2328

2429
@PublishedApi
2530
internal inline fun <C, reified V, R> Aggregator<V, R>.aggregateOf(

0 commit comments

Comments
 (0)