Skip to content

Commit dc7502e

Browse files
committed
wip making Aggregators more modular
1 parent 418acfe commit dc7502e

22 files changed

+427
-393
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import kotlin.reflect.typeOf
3434

3535
// region DataColumn
3636

37-
public fun DataColumn<Number?>.mean(skipNA: Boolean = skipNA_default): Double = Aggregators.mean(skipNA).aggregate(this)
37+
public fun DataColumn<Number?>.mean(skipNA: Boolean = skipNA_default): Double = Aggregators.mean(skipNA).aggregateSingleColumn(this)
3838

3939
public inline fun <T, reified R : Number> DataColumn<T>.meanOf(
4040
skipNA: Boolean = skipNA_default,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt

+4-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
1313
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1414
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1515
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
16+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregateCalculatingValueType
1617
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast
1718
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1819
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
@@ -27,7 +28,8 @@ import kotlin.reflect.KProperty
2728

2829
public fun <T : Comparable<T>> DataColumn<T?>.median(): T = medianOrNull().suggestIfNull("median")
2930

30-
public fun <T : Comparable<T>> DataColumn<T?>.medianOrNull(): T? = Aggregators.median.cast<T>().aggregate(this)
31+
public fun <T : Comparable<T>> DataColumn<T?>.medianOrNull(): T? =
32+
Aggregators.median.cast<T>().aggregateSingleColumn(this)
3133

3234
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.medianOfOrNull(noinline expression: (T) -> R?): R? =
3335
Aggregators.median.cast<R?>().aggregateOf(this, expression)
@@ -40,7 +42,7 @@ public inline fun <T, reified R : Comparable<R>> DataColumn<T>.medianOf(noinline
4042
// region DataRow
4143

4244
public fun AnyRow.rowMedianOrNull(): Any? =
43-
Aggregators.median.aggregateCalculatingType(
45+
Aggregators.median.aggregateCalculatingValueType(
4446
values = values().filterIsInstance<Comparable<Any?>>().asIterable(),
4547
valueTypes = df().columns().filter { it.valuesAreComparable() }.map { it.type() }.toSet(),
4648
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import kotlin.reflect.KProperty
2828

2929
public fun <T : Comparable<T>> DataColumn<T?>.min(): T = minOrNull().suggestIfNull("min")
3030

31-
public fun <T : Comparable<T>> DataColumn<T?>.minOrNull(): T? = Aggregators.min<T>().aggregate(this)
31+
public fun <T : Comparable<T>> DataColumn<T?>.minOrNull(): T? = Aggregators.min<T>().aggregateSingleColumn(this)
3232

3333
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.minBy(noinline selector: (T) -> R?): T =
3434
minByOrNull(selector).suggestIfNull("minBy")

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public fun <T : Comparable<T>> DataColumn<T?>.percentile(percentile: Double): T
2727
percentileOrNull(percentile).suggestIfNull("percentile")
2828

2929
public fun <T : Comparable<T>> DataColumn<T?>.percentileOrNull(percentile: Double): T? =
30-
Aggregators.percentile(percentile).cast<T>().aggregate(this)
30+
Aggregators.percentile(percentile).cast<T>().aggregateSingleColumn(this)
3131

3232
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.percentileOfOrNull(
3333
percentile: Double,
@@ -44,7 +44,7 @@ public inline fun <T, reified R : Comparable<R>> DataColumn<T>.percentileOf(
4444
// region DataRow
4545

4646
public fun AnyRow.rowPercentileOrNull(percentile: Double): Any? =
47-
Aggregators.percentile(percentile).aggregate(
47+
Aggregators.percentile(percentile).aggregateSingleColumn(
4848
values().filterIsInstance<Comparable<Any?>>().toValueColumn(),
4949
)
5050

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/std.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import kotlin.reflect.typeOf
2626
// region DataColumn
2727

2828
public fun <T : Number> DataColumn<T?>.std(skipNA: Boolean = skipNA_default, ddof: Int = ddof_default): Double =
29-
Aggregators.std(skipNA, ddof).aggregate(this) ?: .0
29+
Aggregators.std(skipNA, ddof).aggregateSingleColumn(this) ?: .0
3030

3131
public inline fun <T, reified R : Number> DataColumn<T>.stdOf(
3232
skipNA: Boolean = skipNA_default,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sum.kt

+3-3
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ import kotlin.reflect.typeOf
4242
// region DataColumn
4343

4444
@JvmName("sumShort")
45-
public fun DataColumn<Short?>.sum(): Int = Aggregators.sum.aggregate(this) as Int
45+
public fun DataColumn<Short?>.sum(): Int = Aggregators.sum.aggregateSingleColumn(this) as Int
4646

4747
@JvmName("sumByte")
48-
public fun DataColumn<Byte?>.sum(): Int = Aggregators.sum.aggregate(this) as Int
48+
public fun DataColumn<Byte?>.sum(): Int = Aggregators.sum.aggregateSingleColumn(this) as Int
4949

5050
@Suppress("UNCHECKED_CAST")
5151
@JvmName("sumNumber")
52-
public fun <T : Number> DataColumn<T?>.sum(): T = Aggregators.sum.aggregate(this) as T
52+
public fun <T : Number> DataColumn<T?>.sum(): T = Aggregators.sum.aggregateSingleColumn(this) as T
5353

5454
@JvmName("sumOfShort")
5555
@OverloadResolutionByLambdaReturnType

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt

+50-27
Original file line numberDiff line numberDiff line change
@@ -25,63 +25,83 @@ internal interface Aggregator<in Value, out Return> {
2525
/**
2626
* Base function of [Aggregator].
2727
*
28-
* Aggregates the given values, taking [type] into account,
28+
* Aggregates the given values, taking [valueType] into account,
2929
* filtering nulls (only if [type.isMarkedNullable][KType.isMarkedNullable]),
3030
* and computes a single resulting value.
3131
*
32-
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
32+
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregateSingle] argument.
3333
*
34-
* When the exact [type] is unknown, use [aggregateCalculatingType].
34+
* When the exact [valueType] is unknown, use [aggregateCalculatingValueType].
3535
*/
36-
fun aggregate(values: Iterable<Value?>, type: KType): Return
36+
fun aggregateSingleIterable(values: Iterable<Value?>, valueType: KType): Return
37+
38+
fun calculateValueType(valueTypes: Set<KType>): KType
39+
40+
fun calculateValueType(values: Iterable<Value?>): KType
3741

3842
/**
3943
* Aggregates the data in the given column and computes a single resulting value.
40-
* Calls [aggregate] (with [Iterable] and [KType]).
44+
* Calls [aggregateSingleColumn] (with [Iterable] and [KType]).
4145
*
42-
* See [AggregatorBase.aggregate].
46+
* See [AggregatorBase.aggregateSingleIterable].
4347
*/
44-
fun aggregate(column: DataColumn<Value?>): Return
48+
fun aggregateSingleColumn(column: DataColumn<Value?>): Return
4549

4650
/**
4751
* Aggregates the data in the multiple given columns and computes a single resulting value.
4852
*/
49-
fun aggregate(columns: Iterable<DataColumn<Value?>>): Return
53+
fun aggregateMultipleColumns(columns: Iterable<DataColumn<Value?>>): Return
5054

5155
/**
52-
* Special case of [aggregate] with [Iterable] that calculates the common type of the values at runtime.
53-
* Without [valueTypes], this is a heavy operation and should be avoided when possible.
54-
*
55-
* @param values The values to be aggregated.
56-
* @param valueTypes The types of the values.
57-
* If provided, this can be used to avoid calculating the types of [values] at runtime with reflection.
58-
* It should contain all types of [values].
59-
* If `null` or empty, the types of [values] will be calculated at runtime (heavy!).
60-
*/
61-
fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>? = null): Return
62-
63-
/**
64-
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
56+
* Function that can give the return type of [aggregateSingleIterable] as [KType], given the type of the input.
6557
* This allows aggregators to avoid runtime type calculations.
6658
*
6759
* @param type The type of the input values.
6860
* @param emptyInput If `true`, the input values are considered empty. This often affects the return type.
69-
* @return The return type of [aggregate] as [KType].
61+
* @return The return type of [aggregateSingleIterable] as [KType].
7062
*/
7163
fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType?
7264

7365
/**
74-
* Function that can give the return type of [aggregate] with columns as [KType],
66+
* Function that can give the return type of [aggregateSingleIterable] with columns as [KType],
7567
* given the multiple types of the input.
7668
* This allows aggregators to avoid runtime type calculations.
7769
*
7870
* @param colTypes The types of the input columns.
7971
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
80-
* @return The return type of [aggregate] as [KType].
72+
* @return The return type of [aggregateSingleIterable] as [KType].
8173
*/
82-
fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
74+
fun calculateReturnTypeMultipleColumnsOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
75+
76+
val ref: Aggregator<Value, Return> get() = this
8377
}
8478

79+
internal fun <Value, Return> Aggregator<Value, Return>.aggregate(values: Iterable<Value?>, valueType: KType) =
80+
aggregateSingleIterable(values, valueType)
81+
82+
internal fun <Value, Return> Aggregator<Value, Return>.calculateValueType(
83+
values: Iterable<Value?>,
84+
valueTypes: Set<KType>? = null,
85+
) = if (valueTypes != null && valueTypes.isNotEmpty()) {
86+
calculateValueType(valueTypes)
87+
} else {
88+
calculateValueType(values)
89+
}
90+
91+
internal fun <Value, Return> Aggregator<Value, Return>.aggregateCalculatingValueType(
92+
values: Iterable<Value?>,
93+
valueTypes: Set<KType>? = null,
94+
) = aggregateSingleIterable(
95+
values = values,
96+
valueType = calculateValueType(values, valueTypes),
97+
)
98+
99+
internal fun <Value, Return> Aggregator<Value, Return>.aggregate(column: DataColumn<Value?>) =
100+
aggregateSingleColumn(column)
101+
102+
internal fun <Value, Return> Aggregator<Value, Return>.aggregate(columns: Iterable<DataColumn<Value?>>) =
103+
aggregateMultipleColumns(columns)
104+
85105
@Suppress("UNCHECKED_CAST")
86106
@PublishedApi
87107
internal fun <Type> Aggregator<*, *>.cast(): Aggregator<Type, Type> = this as Aggregator<Type, Type>
@@ -90,15 +110,18 @@ internal fun <Type> Aggregator<*, *>.cast(): Aggregator<Type, Type> = this as Ag
90110
@PublishedApi
91111
internal fun <Value, Return> Aggregator<*, *>.cast2(): Aggregator<Value, Return> = this as Aggregator<Value, Return>
92112

93-
/** Type alias for [Aggregator.calculateReturnTypeOrNull] */
113+
/** Type alias for [Aggregator.calculateReturnTypeMultipleColumnsOrNull] */
94114
internal typealias CalculateReturnTypeOrNull = (type: KType, emptyInput: Boolean) -> KType?
95115

96116
/**
97-
* Type alias for the argument for [Aggregator.aggregate].
117+
* Type alias for the argument for [Aggregator.aggregateSingleIterable].
98118
* Nulls have already been filtered out when this argument is called.
99119
*/
100120
internal typealias Aggregate<Value, Return> = Iterable<Value & Any>.(type: KType) -> Return
101121

122+
internal typealias AggregateBy<Source, Value, Return> =
123+
Iterable<Source>.(sourceType: KType, valueType: KType, selector: (Source) -> Value) -> Return
124+
102125
/** Common case for [CalculateReturnTypeOrNull], preserves return type, but makes it nullable for empty inputs. */
103126
internal val preserveReturnTypeNullIfEmpty: CalculateReturnTypeOrNull = { type, emptyInput ->
104127
type.withNullability(emptyInput)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorBase.kt

+15-69
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.api.asIterable
5-
import org.jetbrains.kotlinx.dataframe.api.asSequence
6-
import org.jetbrains.kotlinx.dataframe.impl.commonType
7-
import org.jetbrains.kotlinx.dataframe.impl.nothingType
85
import kotlin.reflect.KType
96
import kotlin.reflect.full.withNullability
107

@@ -15,100 +12,49 @@ import kotlin.reflect.full.withNullability
1512
* or multiple [DataColumns][DataColumn].
1613
*
1714
* @param name The name of this aggregator.
18-
* @param aggregator Functional argument for the [aggregate] function. Nulls are filtered out before this is called.
1915
*/
2016
internal abstract class AggregatorBase<in Value, out Return>(
2117
override val name: String,
22-
protected val getReturnTypeOrNull: CalculateReturnTypeOrNull,
23-
protected val aggregator: Aggregate<Value, Return>,
18+
private val getReturnTypeOrNull: CalculateReturnTypeOrNull,
19+
private val aggregateSingle: Aggregate<Value, Return>,
2420
) : Aggregator<Value, Return> {
2521

2622
/**
2723
* Base function of [Aggregator].
2824
*
29-
* Aggregates the given values, taking [type] into account,
25+
* Aggregates the given values, taking [valueType] into account,
3026
* filtering nulls (only if [type.isMarkedNullable][KType.isMarkedNullable]),
3127
* and computes a single resulting value.
3228
*
33-
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
29+
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregateSingle] argument.
3430
*
35-
* When the exact [type] is unknown, use [aggregateCalculatingType].
31+
* When the exact [valueType] is unknown, use [Aggregator.aggregateCalculatingValueType].
3632
*/
3733
@Suppress("UNCHECKED_CAST")
38-
override fun aggregate(values: Iterable<Value?>, type: KType): Return =
39-
aggregator(
34+
override fun aggregateSingleIterable(values: Iterable<Value?>, valueType: KType): Return =
35+
aggregateSingle(
4036
// values =
41-
if (type.isMarkedNullable) {
37+
if (valueType.isMarkedNullable) {
4238
values.asSequence().filterNotNull().asIterable()
4339
} else {
4440
values as Iterable<Value & Any>
4541
},
4642
// type =
47-
type.withNullability(false),
43+
valueType.withNullability(false),
4844
)
4945

50-
/**
51-
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
52-
* This allows aggregators to avoid runtime type calculations.
53-
*
54-
* Uses [getReturnTypeOrNull] to calculate the return type.
55-
*
56-
* @param type The type of the input values.
57-
* @param emptyInput If `true`, the input values are considered empty. This often affects the return type.
58-
* @return The return type of [aggregate] as [KType].
59-
*/
60-
override fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType? =
61-
getReturnTypeOrNull(type.withNullability(false), emptyInput)
62-
6346
/**
6447
* Aggregates the data in the given column and computes a single resulting value.
6548
*
66-
* Nulls are filtered out by default, then [aggregate] (with [Iterable] and [KType]) is called.
49+
* Nulls are filtered out by default, then [aggregateSingleColumn] (with [Iterable] and [KType]) is called.
6750
*/
6851
@Suppress("UNCHECKED_CAST")
69-
override fun aggregate(column: DataColumn<Value?>): Return =
70-
aggregate(
52+
final override fun aggregateSingleColumn(column: DataColumn<Value?>): Return =
53+
aggregateSingleIterable(
7154
values = column.asIterable(),
72-
type = column.type(),
55+
valueType = column.type(),
7356
)
7457

75-
/** @include [Aggregator.aggregateCalculatingType] */
76-
override fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>?): Return {
77-
val commonType = if (valueTypes != null && valueTypes.isNotEmpty()) {
78-
valueTypes.commonType(false)
79-
} else {
80-
var hasNulls = false
81-
val classes = values.mapNotNull {
82-
if (it == null) {
83-
hasNulls = true
84-
null
85-
} else {
86-
it.javaClass.kotlin
87-
}
88-
}
89-
if (classes.isEmpty()) {
90-
nothingType(hasNulls)
91-
} else {
92-
classes.commonType(hasNulls)
93-
}
94-
}
95-
return aggregate(values, commonType)
96-
}
97-
98-
/**
99-
* Aggregates the data in the multiple given columns and computes a single resulting value.
100-
* Must be overridden to use.
101-
*/
102-
abstract override fun aggregate(columns: Iterable<DataColumn<Value?>>): Return
103-
104-
/**
105-
* Function that can give the return type of [aggregate] with columns as [KType],
106-
* given the multiple types of the input.
107-
* This allows aggregators to avoid runtime type calculations.
108-
*
109-
* @param colTypes The types of the input columns.
110-
* @param colsEmpty If `true`, all the input columns are considered empty. This often affects the return type.
111-
* @return The return type of [aggregate] as [KType].
112-
*/
113-
abstract override fun calculateReturnTypeOrNull(colTypes: Set<KType>, colsEmpty: Boolean): KType?
58+
final override fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType? =
59+
getReturnTypeOrNull(type, emptyInput)
11460
}

0 commit comments

Comments
 (0)