Skip to content

Commit 418acfe

Browse files
committed
starting min/max
1 parent 436c442 commit 418acfe

File tree

5 files changed

+101
-24
lines changed

5 files changed

+101
-24
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt

+7-7
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowMaxOf(): T = rowMaxOfOrN
5858
public fun <T> DataFrame<T>.max(): DataRow<T> = maxFor(intraComparableColumns())
5959

6060
public fun <T, C : Comparable<C>> DataFrame<T>.maxFor(columns: ColumnsForAggregateSelector<T, C?>): DataRow<T> =
61-
Aggregators.max.aggregateFor(this, columns)
61+
Aggregators.max<C>().aggregateFor(this, columns)
6262

6363
public fun <T> DataFrame<T>.maxFor(vararg columns: String): DataRow<T> = maxFor { columns.toComparableColumns() }
6464

@@ -84,7 +84,7 @@ public fun <T, C : Comparable<C>> DataFrame<T>.max(vararg columns: KProperty<C?>
8484
maxOrNull(*columns).suggestIfNull("max")
8585

8686
public fun <T, C : Comparable<C>> DataFrame<T>.maxOrNull(columns: ColumnsSelector<T, C?>): C? =
87-
Aggregators.max.aggregateAll(this, columns) as C?
87+
Aggregators.max<C>().aggregateAll(this, columns) as C?
8888

8989
public fun <T> DataFrame<T>.maxOrNull(vararg columns: String): Comparable<Any?>? =
9090
maxOrNull { columns.toComparableColumns() }
@@ -140,7 +140,7 @@ public fun <T> Grouped<T>.max(): DataFrame<T> = maxFor(intraComparableColumns())
140140
@Refine
141141
@Interpretable("GroupByMax0")
142142
public fun <T, C : Comparable<C>> Grouped<T>.maxFor(columns: ColumnsForAggregateSelector<T, C?>): DataFrame<T> =
143-
Aggregators.max.aggregateFor(this, columns)
143+
Aggregators.max<C>().aggregateFor(this, columns)
144144

145145
public fun <T> Grouped<T>.maxFor(vararg columns: String): DataFrame<T> = maxFor { columns.toComparableColumns() }
146146

@@ -155,7 +155,7 @@ public fun <T, C : Comparable<C>> Grouped<T>.maxFor(vararg columns: KProperty<C?
155155
@Refine
156156
@Interpretable("GroupByMax0")
157157
public fun <T, C : Comparable<C>> Grouped<T>.max(name: String? = null, columns: ColumnsSelector<T, C?>): DataFrame<T> =
158-
Aggregators.max.aggregateAll(this, name, columns)
158+
Aggregators.max<C>().aggregateAll(this, name, columns)
159159

160160
public fun <T> Grouped<T>.max(vararg columns: String, name: String? = null): DataFrame<T> =
161161
max(name) { columns.toComparableColumns() }
@@ -175,7 +175,7 @@ public fun <T, C : Comparable<C>> Grouped<T>.max(vararg columns: KProperty<C?>,
175175
public fun <T, C : Comparable<C>> Grouped<T>.maxOf(
176176
name: String? = null,
177177
expression: RowExpression<T, C>,
178-
): DataFrame<T> = Aggregators.max.aggregateOfDelegated(this, name) { maxOfOrNull(expression) }
178+
): DataFrame<T> = Aggregators.max<C>().aggregateOfDelegated(this, name) { maxOfOrNull(expression) }
179179

180180
@Interpretable("GroupByReduceExpression")
181181
public fun <T, G, R : Comparable<R>> GroupBy<T, G>.maxBy(rowExpression: RowExpression<G, R?>): ReducedGroupBy<T, G> =
@@ -256,7 +256,7 @@ public fun <T> PivotGroupBy<T>.max(separate: Boolean = false): DataFrame<T> = ma
256256
public fun <T, R : Comparable<R>> PivotGroupBy<T>.maxFor(
257257
separate: Boolean = false,
258258
columns: ColumnsForAggregateSelector<T, R?>,
259-
): DataFrame<T> = Aggregators.max.aggregateFor(this, separate, columns)
259+
): DataFrame<T> = Aggregators.max<R>().aggregateFor(this, separate, columns)
260260

261261
public fun <T> PivotGroupBy<T>.maxFor(vararg columns: String, separate: Boolean = false): DataFrame<T> =
262262
maxFor(separate) { columns.toComparableColumns() }
@@ -274,7 +274,7 @@ public fun <T, R : Comparable<R>> PivotGroupBy<T>.maxFor(
274274
): DataFrame<T> = maxFor(separate) { columns.toColumnSet() }
275275

276276
public fun <T, R : Comparable<R>> PivotGroupBy<T>.max(columns: ColumnsSelector<T, R?>): DataFrame<T> =
277-
Aggregators.max.aggregateAll(this, columns)
277+
Aggregators.max<R>().aggregateAll(this, columns)
278278

279279
public fun <T> PivotGroupBy<T>.max(vararg columns: String): DataFrame<T> = max { columns.toComparableColumns() }
280280

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt

+18-13
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
1212
import org.jetbrains.kotlinx.dataframe.annotations.Refine
1313
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1414
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
15-
import org.jetbrains.kotlinx.dataframe.columns.values
1615
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
1716
import org.jetbrains.kotlinx.dataframe.impl.aggregation.intraComparableColumns
1817
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
18+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateByOrNull
1919
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
20+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf
2021
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated
2122
import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns
2223
import org.jetbrains.kotlinx.dataframe.impl.indexOfMin
@@ -27,17 +28,19 @@ import kotlin.reflect.KProperty
2728

2829
public fun <T : Comparable<T>> DataColumn<T?>.min(): T = minOrNull().suggestIfNull("min")
2930

30-
public fun <T : Comparable<T>> DataColumn<T?>.minOrNull(): T? = asSequence().filterNotNull().minOrNull()
31+
public fun <T : Comparable<T>> DataColumn<T?>.minOrNull(): T? = Aggregators.min<T>().aggregate(this)
3132

32-
public fun <T, R : Comparable<R>> DataColumn<T>.minBy(selector: (T) -> R): T =
33+
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.minBy(noinline selector: (T) -> R?): T =
3334
minByOrNull(selector).suggestIfNull("minBy")
3435

35-
public fun <T, R : Comparable<R>> DataColumn<T>.minByOrNull(selector: (T) -> R): T? = values.minByOrNull(selector)
36+
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.minByOrNull(noinline selector: (T) -> R?): T? =
37+
Aggregators.min<R>().aggregateByOrNull(this, selector)
3638

37-
public fun <T, R : Comparable<R>> DataColumn<T>.minOf(selector: (T) -> R): R =
39+
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.minOf(crossinline selector: (T) -> R?): R =
3840
minOfOrNull(selector).suggestIfNull("minOf")
3941

40-
public fun <T, R : Comparable<R>> DataColumn<T>.minOfOrNull(selector: (T) -> R): R? = values.minOfOrNull(selector)
42+
public inline fun <T, reified R : Comparable<R>> DataColumn<T>.minOfOrNull(crossinline selector: (T) -> R?): R? =
43+
Aggregators.min<R>().aggregateOf(this, selector)
4144

4245
// endregion
4346

@@ -47,6 +50,8 @@ public fun AnyRow.rowMinOrNull(): Any? = values().filterIsInstance<Comparable<*>
4750

4851
public fun AnyRow.rowMin(): Any = rowMinOrNull().suggestIfNull("rowMin")
4952

53+
// todo add rowMinBy?
54+
5055
public inline fun <reified T : Comparable<T>> AnyRow.rowMinOfOrNull(): T? = values().filterIsInstance<T>().minOrNull()
5156

5257
public inline fun <reified T : Comparable<T>> AnyRow.rowMinOf(): T = rowMinOfOrNull<T>().suggestIfNull("rowMinOf")
@@ -58,7 +63,7 @@ public inline fun <reified T : Comparable<T>> AnyRow.rowMinOf(): T = rowMinOfOrN
5863
public fun <T> DataFrame<T>.min(): DataRow<T> = minFor(intraComparableColumns())
5964

6065
public fun <T, C : Comparable<C>> DataFrame<T>.minFor(columns: ColumnsForAggregateSelector<T, C?>): DataRow<T> =
61-
Aggregators.min.aggregateFor(this, columns)
66+
Aggregators.min<C>().aggregateFor(this, columns)
6267

6368
public fun <T> DataFrame<T>.minFor(vararg columns: String): DataRow<T> = minFor { columns.toComparableColumns() }
6469

@@ -84,7 +89,7 @@ public fun <T, C : Comparable<C>> DataFrame<T>.min(vararg columns: KProperty<C?>
8489
minOrNull(*columns).suggestIfNull("min")
8590

8691
public fun <T, C : Comparable<C>> DataFrame<T>.minOrNull(columns: ColumnsSelector<T, C?>): C? =
87-
Aggregators.min.aggregateAll(this, columns) as C?
92+
Aggregators.min<C>().aggregateAll(this, columns) as C?
8893

8994
public fun <T> DataFrame<T>.minOrNull(vararg columns: String): Comparable<Any?>? =
9095
minOrNull { columns.toComparableColumns() }
@@ -140,7 +145,7 @@ public fun <T> Grouped<T>.min(): DataFrame<T> = minFor(intraComparableColumns())
140145
@Refine
141146
@Interpretable("GroupByMin0")
142147
public fun <T, C : Comparable<C>> Grouped<T>.minFor(columns: ColumnsForAggregateSelector<T, C?>): DataFrame<T> =
143-
Aggregators.min.aggregateFor(this, columns)
148+
Aggregators.min<C>().aggregateFor(this, columns)
144149

145150
public fun <T> Grouped<T>.minFor(vararg columns: String): DataFrame<T> = minFor { columns.toComparableColumns() }
146151

@@ -155,7 +160,7 @@ public fun <T, C : Comparable<C>> Grouped<T>.minFor(vararg columns: KProperty<C?
155160
@Refine
156161
@Interpretable("GroupByMin0")
157162
public fun <T, C : Comparable<C>> Grouped<T>.min(name: String? = null, columns: ColumnsSelector<T, C?>): DataFrame<T> =
158-
Aggregators.min.aggregateAll(this, name, columns)
163+
Aggregators.min<C>().aggregateAll(this, name, columns)
159164

160165
public fun <T> Grouped<T>.min(vararg columns: String, name: String? = null): DataFrame<T> =
161166
min(name) { columns.toComparableColumns() }
@@ -175,7 +180,7 @@ public fun <T, C : Comparable<C>> Grouped<T>.min(vararg columns: KProperty<C?>,
175180
public fun <T, C : Comparable<C>> Grouped<T>.minOf(
176181
name: String? = null,
177182
expression: RowExpression<T, C>,
178-
): DataFrame<T> = Aggregators.min.aggregateOfDelegated(this, name) { minOfOrNull(expression) }
183+
): DataFrame<T> = Aggregators.min<C>().aggregateOfDelegated(this, name) { minOfOrNull(expression) }
179184

180185
@Interpretable("GroupByReduceExpression")
181186
public fun <T, G, R : Comparable<R>> GroupBy<T, G>.minBy(rowExpression: RowExpression<G, R?>): ReducedGroupBy<T, G> =
@@ -257,7 +262,7 @@ public fun <T> PivotGroupBy<T>.min(separate: Boolean = false): DataFrame<T> = mi
257262
public fun <T, R : Comparable<R>> PivotGroupBy<T>.minFor(
258263
separate: Boolean = false,
259264
columns: ColumnsForAggregateSelector<T, R?>,
260-
): DataFrame<T> = Aggregators.min.aggregateFor(this, separate, columns)
265+
): DataFrame<T> = Aggregators.min<R>().aggregateFor(this, separate, columns)
261266

262267
public fun <T> PivotGroupBy<T>.minFor(vararg columns: String, separate: Boolean = false): DataFrame<T> =
263268
minFor(separate) { columns.toComparableColumns() }
@@ -275,7 +280,7 @@ public fun <T, R : Comparable<R>> PivotGroupBy<T>.minFor(
275280
): DataFrame<T> = minFor(separate) { columns.toColumnSet() }
276281

277282
public fun <T, R : Comparable<R>> PivotGroupBy<T>.min(columns: ColumnsSelector<T, R?>): DataFrame<T> =
278-
Aggregators.min.aggregateAll(this, columns)
283+
Aggregators.min<R>().aggregateAll(this, columns)
279284

280285
public fun <T> PivotGroupBy<T>.min(vararg columns: String): DataFrame<T> = min { columns.toComparableColumns() }
281286

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt

+14-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators
33
import org.jetbrains.kotlinx.dataframe.math.mean
44
import org.jetbrains.kotlinx.dataframe.math.meanTypeConversion
55
import org.jetbrains.kotlinx.dataframe.math.median
6+
import org.jetbrains.kotlinx.dataframe.math.minOrNull
67
import org.jetbrains.kotlinx.dataframe.math.percentile
78
import org.jetbrains.kotlinx.dataframe.math.std
89
import org.jetbrains.kotlinx.dataframe.math.stdTypeConversion
@@ -86,13 +87,23 @@ internal object Aggregators {
8687
getAggregator: (Param1, Param2) -> AggregatorProvider<AggregatorType>,
8788
) = AggregatorOptionSwitch2.Factory(getAggregator)
8889

90+
private fun <Value, Return> AggregatorProvider<Aggregator<Value, Return>>.asByAggregator(
91+
aggregatorBy: AggregateBy<Any?, Value, Return>,
92+
) where Value : Any, Value : Comparable<Value> = ByAggregator.Factory(this, aggregatorBy)
93+
8994
// T: Comparable<T> -> T?
90-
val min by twoStepPreservingType<Comparable<Any?>> {
91-
minOrNull()
95+
fun <T : Comparable<T>?> min() = min.cast2<T, T?>()
96+
97+
private val min by twoStepPreservingType<Comparable<Any?>?> { type ->
98+
minOrNull(type)
99+
}.asByAggregator { sourceType, valueType, selector ->
100+
minByOrNull(selector)
92101
}
93102

94103
// T: Comparable<T> -> T?
95-
val max by twoStepPreservingType<Comparable<Any?>> {
104+
fun <T : Comparable<T>?> max() = max.cast2<T, T?>()
105+
106+
private val max by twoStepPreservingType<Comparable<Any?>> {
96107
maxOrNull()
97108
}
98109

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/modes/aggregateBy.kt

+42-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.jetbrains.kotlinx.dataframe.impl.aggregation.modes
22

3+
import org.jetbrains.kotlinx.dataframe.DataColumn
34
import org.jetbrains.kotlinx.dataframe.DataFrame
45
import org.jetbrains.kotlinx.dataframe.DataFrameExpression
56
import org.jetbrains.kotlinx.dataframe.DataRow
@@ -8,10 +9,13 @@ import org.jetbrains.kotlinx.dataframe.api.GroupBy
89
import org.jetbrains.kotlinx.dataframe.api.Grouped
910
import org.jetbrains.kotlinx.dataframe.api.cast
1011
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregateInternal
12+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregator
13+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators
1114
import org.jetbrains.kotlinx.dataframe.impl.namedValues
15+
import kotlin.reflect.typeOf
1216

1317
@CandidateForRemoval
14-
internal fun <T> Grouped<T>.aggregateBy(body: DataFrameExpression<T, DataRow<T>?>): DataFrame<T> {
18+
internal fun <T> Grouped<T>.aggregateByOrNull(body: DataFrameExpression<T, DataRow<T>?>): DataFrame<T> {
1519
require(this is GroupBy<*, T>)
1620
val keyColumns = keys.columnNames().toSet()
1721
return aggregateInternal {
@@ -21,3 +25,40 @@ internal fun <T> Grouped<T>.aggregateBy(body: DataFrameExpression<T, DataRow<T>?
2125
}
2226
}.cast()
2327
}
28+
29+
/**
30+
* Aggregates the values of the column using the provided [Aggregator] `by` the provided [selector].
31+
* H
32+
*/
33+
@Suppress("UNCHECKED_CAST")
34+
@PublishedApi
35+
internal inline fun <C, reified V : Comparable<V>> Aggregator<V, V?>.aggregateByOrNull(
36+
values: Iterable<C>,
37+
noinline selector: (C) -> V?,
38+
): C? =
39+
when (name) { // todo?
40+
Aggregators.min<Comparable<Any?>>().name ->
41+
values.asSequence()
42+
.filterNot { selector(it) == null }
43+
.minByOrNull(selector as (C) -> V)
44+
45+
Aggregators.max<Comparable<Any?>>().name ->
46+
values.asSequence()
47+
.filterNot { selector(it) == null }
48+
.maxByOrNull(selector as (C) -> V)
49+
50+
else -> {
51+
// less efficient but more generic
52+
val aggregateResult = aggregate(
53+
values = values.asSequence().map { selector(it) }.asIterable(),
54+
type = typeOf<V>(),
55+
)
56+
values.first { selector(it) == aggregateResult }
57+
}
58+
}
59+
60+
@PublishedApi
61+
internal inline fun <C, reified V : Comparable<V>?> Aggregator<V, V?>.aggregateByOrNull(
62+
column: DataColumn<C>,
63+
noinline selector: (C) -> V?,
64+
): C? = aggregateByOrNull(column.values(), selector)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package org.jetbrains.kotlinx.dataframe.math
2+
3+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.preserveReturnTypeNullIfEmpty
4+
import kotlin.reflect.KType
5+
import kotlin.reflect.full.withNullability
6+
7+
@PublishedApi
8+
internal fun <T : Comparable<T>> Iterable<T?>.minOrNull(type: KType): T? = asSequence().minOrNull(type)
9+
10+
@Suppress("UNCHECKED_CAST")
11+
@PublishedApi
12+
internal fun <T : Comparable<T>> Sequence<T?>.minOrNull(type: KType): T? {
13+
if (type.isMarkedNullable) {
14+
return filterNotNull().minOrNull(type.withNullability(false))
15+
}
16+
return (this as Sequence<T>).minOrNull()
17+
}
18+
19+
/** T: Comparable<T> -> T(?) */
20+
internal val minTypeConversion = preserveReturnTypeNullIfEmpty

0 commit comments

Comments
 (0)