Skip to content

Commit c0ed438

Browse files
committed
Merge branch 'mean' into sum
2 parents 13150c4 + b42c7b4 commit c0ed438

File tree

9 files changed

+89
-46
lines changed

9 files changed

+89
-46
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions
2121
* potentially losing a little precision, but a warning will be given.
2222
*
2323
* See [UnifiedNumberTypeOptions] for these settings.
24+
*
25+
* At the bottom of the graph is [Nothing]. This can be interpreted as `null`.
2426
*/
2527
internal interface UnifyingNumbers {
2628

@@ -40,6 +42,9 @@ internal interface UnifyingNumbers {
4042
* | / |
4143
* | / |
4244
* UByte Byte
45+
* \\ /
46+
* \\ /
47+
* Nothing?
4348
* ```
4449
*/
4550
interface Graph

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/NumberTypeUtils.kt

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ internal fun getUnifiedNumberTypeGraph(
8282

8383
addEdge(typeOf<Short>(), typeOf<UByte>())
8484
addEdge(typeOf<Short>(), typeOf<Byte>())
85+
86+
addEdge(typeOf<UByte>(), nothingType)
87+
addEdge(typeOf<Byte>(), nothingType)
8588
}
8689
}
8790

@@ -121,7 +124,11 @@ internal fun getUnifiedNumberType(
121124
?: error("Can not find common number type for $first and $second")
122125
}
123126

124-
return if (first.isMarkedNullable || second.isMarkedNullable) result.withNullability(true) else result
127+
return if (first.isMarkedNullable || second.isMarkedNullable) {
128+
result.withNullability(true)
129+
} else {
130+
result
131+
}
125132
}
126133

127134
/** @include [getUnifiedNumberType] */
@@ -184,7 +191,7 @@ internal fun Iterable<Number?>.convertToUnifiedNumberType(
184191
options: UnifiedNumberTypeOptions = UnifiedNumberTypeOptions.DEFAULT,
185192
commonNumberType: KType? = null,
186193
): Iterable<Number?> {
187-
val commonNumberType = commonNumberType ?: this.filterNotNull().types().unifiedNumberType(options)
194+
val commonNumberType = commonNumberType ?: this.types().unifiedNumberType(options)
188195
val converter = createConverter(typeOf<Number>(), commonNumberType)!! as (Number) -> Number?
189196
return map {
190197
if (it == null) return@map null
@@ -209,7 +216,7 @@ internal fun Sequence<Number?>.convertToUnifiedNumberType(
209216
options: UnifiedNumberTypeOptions = UnifiedNumberTypeOptions.DEFAULT,
210217
commonNumberType: KType? = null,
211218
): Sequence<Number?> {
212-
val commonNumberType = commonNumberType ?: this.filterNotNull().asIterable().types().unifiedNumberType(options)
219+
val commonNumberType = commonNumberType ?: this.asIterable().types().unifiedNumberType(options)
213220
val converter = createConverter(typeOf<Number>(), commonNumberType)!! as (Number) -> Number?
214221
return map {
215222
if (it == null) return@map null

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -667,19 +667,26 @@ internal fun Any.isBigNumber(): Boolean = this is BigInteger || this is BigDecim
667667
*
668668
* The [KClass] is determined by retrieving the runtime class of each element.
669669
*
670+
* [Nothing::class][Nothing] is used for elements that are `null`.
671+
*
670672
* @return A set of [KClass] objects representing the runtime types of elements in the iterable.
671673
*/
672-
internal fun Iterable<Any>.classes(): Set<KClass<*>> = mapTo(mutableSetOf()) { it::class }
674+
internal fun Iterable<Any?>.classes(): Set<KClass<*>> =
675+
mapTo(mutableSetOf()) {
676+
if (it == null) Nothing::class else it::class
677+
}
673678

674679
/**
675680
* Returns a set of [KType] objects representing the star-projected types of the runtime classes
676681
* of all unique elements in the iterable.
677682
*
678-
* The method internally relies on the [classes] function to collect the runtime classes of the
679-
* elements in the iterable and then maps each class to its star-projected type.
680-
*
681683
* This can be a heavy operation!
682684
*
685+
* [typeOf<Nothing?>()][nullableNothingType] is used for elements that are `null`.
686+
*
683687
* @return A set of [KType] objects corresponding to the star-projected runtime types of elements in the iterable.
684688
*/
685-
internal fun Iterable<Any>.types(): Set<KType> = classes().mapTo(mutableSetOf()) { it.createStarProjectedType(false) }
689+
internal fun Iterable<Any?>.types(): Set<KType> =
690+
mapTo(mutableSetOf()) {
691+
if (it == null) nullableNothingType else it::class.createStarProjectedType(false)
692+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ import kotlin.reflect.full.withNullability
1313
* The [AggregatorBase] class is a base implementation of this interface.
1414
*
1515
* @param Value The type of the values to be aggregated.
16-
* This can be nullable for [Iterables][Iterable] or not, depending on the use case.
17-
* For columns, [Value] will always be considered nullable; nulls are filtered out from columns anyway.
16+
* The input can always have nulls, they are filtered out.
1817
* @param Return The type of the resulting value. Can optionally be nullable.
1918
*/
2019
@PublishedApi
@@ -26,17 +25,19 @@ internal interface Aggregator<in Value, out Return> {
2625
/**
2726
* Base function of [Aggregator].
2827
*
29-
* Aggregates the given values, taking [type] into account, and computes a single resulting value.
28+
* Aggregates the given values, taking [type] into account,
29+
* filtering nulls (only if [type.isMarkedNullable][KType.isMarkedNullable]),
30+
* and computes a single resulting value.
3031
*
3132
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
3233
*
3334
* When the exact [type] is unknown, use [aggregateCalculatingType].
3435
*/
35-
fun aggregate(values: Iterable<Value>, type: KType): Return
36+
fun aggregate(values: Iterable<Value?>, type: KType): Return
3637

3738
/**
3839
* Aggregates the data in the given column and computes a single resulting value.
39-
* Nulls are filtered out by default, then [aggregate] (with [Iterable] and [KType]) is called.
40+
* Calls [aggregate] (with [Iterable] and [KType]).
4041
*
4142
* See [AggregatorBase.aggregate].
4243
*/
@@ -55,9 +56,9 @@ internal interface Aggregator<in Value, out Return> {
5556
* @param valueTypes The types of the values.
5657
* If provided, this can be used to avoid calculating the types of [values] at runtime with reflection.
5758
* It should contain all types of [values].
58-
* If `null`, the types of [values] will be calculated at runtime (heavy!).
59+
* If `null` or empty, the types of [values] will be calculated at runtime (heavy!).
5960
*/
60-
fun aggregateCalculatingType(values: Iterable<Value>, valueTypes: Set<KType>? = null): Return
61+
fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>? = null): Return
6162

6263
/**
6364
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
@@ -92,8 +93,11 @@ internal fun <Value, Return> Aggregator<*, *>.cast2(): Aggregator<Value, Return>
9293
/** Type alias for [Aggregator.calculateReturnTypeOrNull] */
9394
internal typealias CalculateReturnTypeOrNull = (type: KType, emptyInput: Boolean) -> KType?
9495

95-
/** Type alias for [Aggregator.aggregate]. */
96-
internal typealias Aggregate<Value, Return> = Iterable<Value>.(type: KType) -> Return
96+
/**
97+
* Type alias for the argument for [Aggregator.aggregate].
98+
* Nulls have already been filtered out when this argument is called.
99+
*/
100+
internal typealias Aggregate<Value, Return> = Iterable<Value & Any>.(type: KType) -> Return
97101

98102
/** Common case for [CalculateReturnTypeOrNull], preserves return type, but makes it nullable for empty inputs. */
99103
internal val preserveReturnTypeNullIfEmpty: CalculateReturnTypeOrNull = { type, emptyInput ->

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorBase.kt

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
44
import org.jetbrains.kotlinx.dataframe.api.asIterable
55
import org.jetbrains.kotlinx.dataframe.api.asSequence
66
import org.jetbrains.kotlinx.dataframe.impl.commonType
7+
import org.jetbrains.kotlinx.dataframe.impl.nothingType
78
import kotlin.reflect.KType
89
import kotlin.reflect.full.withNullability
910

@@ -14,7 +15,7 @@ import kotlin.reflect.full.withNullability
1415
* or multiple [DataColumns][DataColumn].
1516
*
1617
* @param name The name of this aggregator.
17-
* @param aggregator Functional argument for the [aggregate] function.
18+
* @param aggregator Functional argument for the [aggregate] function. Nulls are filtered out before this is called.
1819
*/
1920
internal abstract class AggregatorBase<in Value, out Return>(
2021
override val name: String,
@@ -25,13 +26,26 @@ internal abstract class AggregatorBase<in Value, out Return>(
2526
/**
2627
* Base function of [Aggregator].
2728
*
28-
* Aggregates the given values, taking [type] into account, and computes a single resulting value.
29+
* Aggregates the given values, taking [type] into account,
30+
* filtering nulls (only if [type.isMarkedNullable][KType.isMarkedNullable]),
31+
* and computes a single resulting value.
2932
*
30-
* Uses [aggregator] to compute the result.
33+
* When using [AggregatorBase], this can be supplied by the [AggregatorBase.aggregator] argument.
3134
*
3235
* When the exact [type] is unknown, use [aggregateCalculatingType].
3336
*/
34-
override fun aggregate(values: Iterable<Value>, type: KType): Return = aggregator(values, type)
37+
@Suppress("UNCHECKED_CAST")
38+
override fun aggregate(values: Iterable<Value?>, type: KType): Return =
39+
aggregator(
40+
// values =
41+
if (type.isMarkedNullable) {
42+
values.asSequence().filterNotNull().asIterable()
43+
} else {
44+
values as Iterable<Value & Any>
45+
},
46+
// type =
47+
type.withNullability(false),
48+
)
3549

3650
/**
3751
* Function that can give the return type of [aggregate] as [KType], given the type of the input.
@@ -44,7 +58,7 @@ internal abstract class AggregatorBase<in Value, out Return>(
4458
* @return The return type of [aggregate] as [KType].
4559
*/
4660
override fun calculateReturnTypeOrNull(type: KType, emptyInput: Boolean): KType? =
47-
getReturnTypeOrNull(type, emptyInput)
61+
getReturnTypeOrNull(type.withNullability(false), emptyInput)
4862

4963
/**
5064
* Aggregates the data in the given column and computes a single resulting value.
@@ -54,18 +68,13 @@ internal abstract class AggregatorBase<in Value, out Return>(
5468
@Suppress("UNCHECKED_CAST")
5569
override fun aggregate(column: DataColumn<Value?>): Return =
5670
aggregate(
57-
values =
58-
if (column.hasNulls()) {
59-
column.asSequence().filterNotNull().asIterable()
60-
} else {
61-
column.asIterable() as Iterable<Value>
62-
},
63-
type = column.type().withNullability(false),
71+
values = column.asIterable(),
72+
type = column.type(),
6473
)
6574

6675
/** @include [Aggregator.aggregateCalculatingType] */
67-
override fun aggregateCalculatingType(values: Iterable<Value>, valueTypes: Set<KType>?): Return {
68-
val commonType = if (valueTypes != null) {
76+
override fun aggregateCalculatingType(values: Iterable<Value?>, valueTypes: Set<KType>?): Return {
77+
val commonType = if (valueTypes != null && valueTypes.isNotEmpty()) {
6978
valueTypes.commonType(false)
7079
} else {
7180
var hasNulls = false
@@ -77,7 +86,11 @@ internal abstract class AggregatorBase<in Value, out Return>(
7786
it.javaClass.kotlin
7887
}
7988
}
80-
classes.commonType(hasNulls)
89+
if (classes.isEmpty()) {
90+
nothingType(hasNulls)
91+
} else {
92+
classes.commonType(hasNulls)
93+
}
8194
}
8295
return aggregate(values, commonType)
8396
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/FlatteningAggregator.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import kotlin.reflect.full.withNullability
88
/**
99
* Simple [Aggregator] implementation with flattening behavior for multiple columns.
1010
*
11-
* Nulls are filtered from columns.
11+
* Nulls are filtered out.
1212
*
1313
* When called on multiple columns,
1414
* the columns are flattened into a single list of values, filtering nulls as usual;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepAggregator.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import kotlin.reflect.full.withNullability
1010
/**
1111
* A slightly more advanced [Aggregator] implementation.
1212
*
13-
* Nulls are filtered from columns.
13+
* Nulls are filtered out.
1414
*
1515
* When called on multiple columns, this [Aggregator] works in two steps:
1616
* First, it aggregates within a [DataColumn]/[Iterable] ([stepOneAggregator]) with their (given) type,

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/TwoStepNumbersAggregator.kt

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
77
import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions.Companion.PRIMITIVES_ONLY
88
import org.jetbrains.kotlinx.dataframe.impl.anyNull
99
import org.jetbrains.kotlinx.dataframe.impl.convertToUnifiedNumberType
10+
import org.jetbrains.kotlinx.dataframe.impl.isNothing
1011
import org.jetbrains.kotlinx.dataframe.impl.nothingType
1112
import org.jetbrains.kotlinx.dataframe.impl.primitiveNumberTypes
1213
import org.jetbrains.kotlinx.dataframe.impl.renderType
@@ -24,7 +25,7 @@ private val logger = KotlinLogging.logger { }
2425
* [Aggregator] made specifically for number calculations.
2526
* Mixed number types are [unified][UnifyingNumbers] to [primitives][PRIMITIVES_ONLY].
2627
*
27-
* Nulls are filtered from columns.
28+
* Nulls are filtered out.
2829
*
2930
* When called on multiple columns (with potentially mixed [Number] types),
3031
* this [Aggregator] works in two steps:
@@ -106,18 +107,19 @@ internal class TwoStepNumbersAggregator<out Return : Number?>(
106107
*
107108
* Aggregates the given values, taking [type] into account, and computes a single resulting value.
108109
*
110+
* Nulls are filtered out (only if [type.isMarkedNullable][KType.isMarkedNullable]).
111+
*
109112
* Uses [aggregator] to compute the result.
110113
*
111114
* This function is modified to call [aggregateCalculatingType] when it encounters mixed number types.
112115
* This is not optimal and should be avoided by calling [aggregateCalculatingType] with known number types directly.
113116
*
114117
* When the exact [type] is unknown, use [aggregateCalculatingType].
115118
*/
116-
override fun aggregate(values: Iterable<Number>, type: KType): Return {
119+
override fun aggregate(values: Iterable<Number?>, type: KType): Return {
117120
require(type.isSubtypeOf(typeOf<Number?>())) {
118121
"${TwoStepNumbersAggregator::class.simpleName}: Type $type is not a subtype of Number?"
119122
}
120-
121123
return when (type.withNullability(false)) {
122124
// If the type is not a specific number, but rather a mixed Number, we unify the types first.
123125
// This is heavy and could be avoided by calling aggregate with a specific number type
@@ -144,21 +146,21 @@ internal class TwoStepNumbersAggregator<out Return : Number?>(
144146
* @param valueTypes The types of the numbers.
145147
* If provided, this can be used to avoid calculating the types of [values] at runtime with reflection.
146148
* It should contain all types of [values].
147-
* If `null`, the types of [values] will be calculated at runtime (heavy!).
149+
* If `null` or empty, the types of [values] will be calculated at runtime (heavy!).
148150
*/
149151
@Suppress("UNCHECKED_CAST")
150-
override fun aggregateCalculatingType(values: Iterable<Number>, valueTypes: Set<KType>?): Return {
151-
val valueTypes = valueTypes ?: values.types()
152-
val commonType = valueTypes
153-
.unifiedNumberType(PRIMITIVES_ONLY)
154-
.withNullability(false)
152+
override fun aggregateCalculatingType(values: Iterable<Number?>, valueTypes: Set<KType>?): Return {
153+
val valueTypes = valueTypes?.takeUnless { it.isEmpty() } ?: values.types()
154+
val commonType = valueTypes.unifiedNumberType(PRIMITIVES_ONLY)
155155

156-
if (commonType == typeOf<Double>() && (typeOf<ULong>() in valueTypes || typeOf<Long>() in valueTypes)) {
156+
if (commonType.isSubtypeOf(typeOf<Double?>()) &&
157+
(typeOf<ULong>() in valueTypes || typeOf<Long>() in valueTypes)
158+
) {
157159
logger.warn {
158160
"Number unification of Long -> Double happened during aggregation. Loss of precision may have occurred."
159161
}
160162
}
161-
if (commonType !in primitiveNumberTypes && commonType != nothingType) {
163+
if (commonType.withNullability(false) !in primitiveNumberTypes && !commonType.isNothing) {
162164
throw IllegalArgumentException(
163165
"Cannot calculate $name of ${renderType(commonType)}, only primitive numbers are supported.",
164166
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/ofRowExpression.kt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,18 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregateInternal
1313
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregator
1414
import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal
1515
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
16+
import kotlin.reflect.full.withNullability
1617
import kotlin.reflect.typeOf
1718

1819
@PublishedApi
1920
internal inline fun <C, reified V, R> Aggregator<V, R>.aggregateOf(
2021
values: Iterable<C>,
2122
noinline transform: (C) -> V,
22-
): R = aggregate(values.asSequence().map(transform).asIterable(), typeOf<V>())
23+
): R =
24+
aggregate(
25+
values = values.asSequence().mapNotNull(transform).asIterable(),
26+
type = typeOf<V>().withNullability(false),
27+
)
2328

2429
@PublishedApi
2530
internal inline fun <C, reified V, R> Aggregator<V, R>.aggregateOf(

0 commit comments

Comments
 (0)