Skip to content

Commit 9b356c8

Browse files
committed
added medianBy overloads
1 parent 817d570 commit 9b356c8

File tree

2 files changed

+118
-19
lines changed
  • core/src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/api
    • test/kotlin/org/jetbrains/kotlinx/dataframe/statistics

2 files changed

+118
-19
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt

+114-14
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ import kotlin.reflect.KProperty
3232
* numbers -> Double
3333
* comparable -> itself
3434
*
35-
* medianBy is new
35+
* TODO cases where the lambda dictates the return type require explicit type arguments for
36+
* non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683
37+
* so, `df.median { intCol }` works, but needs `df.median<_, String> { stringCol }`
38+
*
39+
* medianBy is new for all overloads :)
3640
*/
3741

3842
// region DataColumn
@@ -65,19 +69,18 @@ public inline fun <T, reified R : Comparable<R & Any>?> DataColumn<T>.medianByOr
6569
crossinline selector: (T) -> R,
6670
): T? = Aggregators.medianCommon<R>(skipNaN).aggregateByOrNull(this, selector)
6771

68-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
72+
// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683
6973
@OverloadResolutionByLambdaReturnType
7074
public inline fun <T, reified R : Comparable<R & Any>?> DataColumn<T>.medianOf(
7175
crossinline expression: (T) -> R,
7276
): R & Any = medianOfOrNull(expression).suggestIfNull("medianOf")
7377

74-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
78+
// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683
7579
@OverloadResolutionByLambdaReturnType
7680
public inline fun <T, reified R : Comparable<R & Any>?> DataColumn<T>.medianOfOrNull(
7781
crossinline expression: (T) -> R,
7882
): R? = Aggregators.medianComparables<R>().aggregateOf(this, expression)
7983

80-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
8184
@OverloadResolutionByLambdaReturnType
8285
public inline fun <T, reified R> DataColumn<T>.medianOf(
8386
skipNaN: Boolean = skipNaNDefault,
@@ -86,7 +89,6 @@ public inline fun <T, reified R> DataColumn<T>.medianOf(
8689
where R : Comparable<R & Any>?, R : Number? =
8790
medianOfOrNull(skipNaN, expression).suggestIfNull("medianOf")
8891

89-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
9092
@OverloadResolutionByLambdaReturnType
9193
public inline fun <T, reified R> DataColumn<T>.medianOfOrNull(
9294
skipNaN: Boolean = skipNaNDefault,
@@ -149,26 +151,24 @@ public fun <T, C : Comparable<C & Any>?> DataFrame<T>.medianFor(
149151
skipNaN: Boolean = skipNaNDefault,
150152
): DataRow<T> = medianFor(skipNaN) { columns.toColumnSet() }
151153

152-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
154+
// TODO, requires explicit type C due to https://youtrack.jetbrains.com/issue/KT-76683
153155
@OverloadResolutionByLambdaReturnType
154156
public fun <T, C : Comparable<C & Any>?> DataFrame<T>.median(columns: ColumnsSelector<T, C>): C & Any =
155157
medianOrNull(columns).suggestIfNull("median")
156158

157-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
159+
// TODO, requires explicit type C due to https://youtrack.jetbrains.com/issue/KT-76683
158160
@OverloadResolutionByLambdaReturnType
159161
@Suppress("UNCHECKED_CAST")
160162
public fun <T, C : Comparable<C & Any>?> DataFrame<T>.medianOrNull(columns: ColumnsSelector<T, C>): C? =
161163
Aggregators.medianComparables<C>().aggregateAll(this, columns)
162164

163-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
164165
@OverloadResolutionByLambdaReturnType
165166
public fun <T, C> DataFrame<T>.median(
166167
skipNaN: Boolean = skipNaNDefault,
167168
columns: ColumnsSelector<T, C>,
168169
): Double
169170
where C : Number?, C : Comparable<C & Any>? = medianOrNull(skipNaN, columns).suggestIfNull("median")
170171

171-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
172172
@OverloadResolutionByLambdaReturnType
173173
@Suppress("UNCHECKED_CAST")
174174
public fun <T, C> DataFrame<T>.medianOrNull(
@@ -230,19 +230,18 @@ public fun <T, C> DataFrame<T>.medianOrNull(
230230
): Double?
231231
where C : Comparable<C & Any>?, C : Number? = medianOrNull(skipNaN) { columns.toColumnSet() }
232232

233-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
233+
// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683
234234
@OverloadResolutionByLambdaReturnType
235235
public inline fun <T, reified R : Comparable<R & Any>?> DataFrame<T>.medianOf(
236236
crossinline expression: RowExpression<T, R>,
237237
): R & Any = medianOfOrNull(expression).suggestIfNull("medianOf")
238238

239-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
239+
// TODO, requires explicit type R due to https://youtrack.jetbrains.com/issue/KT-76683
240240
@OverloadResolutionByLambdaReturnType
241241
public inline fun <T, reified R : Comparable<R & Any>?> DataFrame<T>.medianOfOrNull(
242242
crossinline expression: RowExpression<T, R>,
243243
): R? = Aggregators.medianComparables<R>().aggregateOf(this, expression)
244244

245-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
246245
@OverloadResolutionByLambdaReturnType
247246
public inline fun <T, reified R> DataFrame<T>.medianOf(
248247
skipNaN: Boolean = skipNaNDefault,
@@ -251,7 +250,6 @@ public inline fun <T, reified R> DataFrame<T>.medianOf(
251250
where R : Comparable<R & Any>?, R : Number? =
252251
medianOfOrNull(skipNaN, expression).suggestIfNull("medianOf")
253252

254-
// todo check overload resolution https://youtrack.jetbrains.com/issue/KT-76683
255253
@OverloadResolutionByLambdaReturnType
256254
public inline fun <T, reified R> DataFrame<T>.medianOfOrNull(
257255
skipNaN: Boolean = skipNaNDefault,
@@ -260,6 +258,46 @@ public inline fun <T, reified R> DataFrame<T>.medianOfOrNull(
260258
where R : Comparable<R & Any>?, R : Number? =
261259
Aggregators.medianNumbers<R>(skipNaN).aggregateOf(this, expression)
262260

261+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianBy(
262+
skipNaN: Boolean = skipNaNDefault,
263+
crossinline expression: RowExpression<T, C>,
264+
): DataRow<T> = medianByOrNull(skipNaN, expression).suggestIfNull("medianBy")
265+
266+
public fun <T> DataFrame<T>.medianBy(column: String, skipNaN: Boolean = skipNaNDefault): DataRow<T> =
267+
medianByOrNull(column, skipNaN).suggestIfNull("medianBy")
268+
269+
@AccessApiOverload
270+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianBy(
271+
column: ColumnReference<C>,
272+
skipNaN: Boolean = skipNaNDefault,
273+
): DataRow<T> = medianByOrNull(column, skipNaN).suggestIfNull("medianBy")
274+
275+
@AccessApiOverload
276+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianBy(
277+
column: KProperty<C>,
278+
skipNaN: Boolean = skipNaNDefault,
279+
): DataRow<T> = medianByOrNull(column, skipNaN).suggestIfNull("medianBy")
280+
281+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianByOrNull(
282+
skipNaN: Boolean = skipNaNDefault,
283+
crossinline expression: RowExpression<T, C>,
284+
): DataRow<T>? = Aggregators.min<C>(skipNaN).aggregateByOrNull(this, expression)
285+
286+
public fun <T> DataFrame<T>.medianByOrNull(column: String, skipNaN: Boolean = skipNaNDefault): DataRow<T>? =
287+
medianByOrNull(column.toColumnOf<Comparable<Any>?>(), skipNaN)
288+
289+
@AccessApiOverload
290+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianByOrNull(
291+
column: ColumnReference<C>,
292+
skipNaN: Boolean = skipNaNDefault,
293+
): DataRow<T>? = Aggregators.min<C>(skipNaN).aggregateByOrNull(this, column)
294+
295+
@AccessApiOverload
296+
public inline fun <T, reified C : Comparable<C & Any>?> DataFrame<T>.medianByOrNull(
297+
column: KProperty<C>,
298+
skipNaN: Boolean = skipNaNDefault,
299+
): DataRow<T>? = medianByOrNull(column.toColumnAccessor(), skipNaN)
300+
263301
// endregion
264302

265303
// region GroupBy
@@ -325,6 +363,27 @@ public inline fun <T, reified R : Comparable<R & Any>?> Grouped<T>.medianOf(
325363
crossinline expression: RowExpression<T, R>,
326364
): DataFrame<T> = Aggregators.medianCommon<R>(skipNaN).aggregateOf(this, name, expression)
327365

366+
@Interpretable("GroupByReduceExpression") // TODO?
367+
public inline fun <T, G, reified R : Comparable<R & Any>?> GroupBy<T, G>.medianBy(
368+
skipNaN: Boolean = skipNaNDefault,
369+
crossinline rowExpression: RowExpression<G, R>,
370+
): ReducedGroupBy<T, G> = reduce { medianByOrNull(skipNaN, rowExpression) }
371+
372+
@AccessApiOverload
373+
public inline fun <T, G, reified C : Comparable<C & Any>?> GroupBy<T, G>.medianBy(
374+
column: ColumnReference<C>,
375+
skipNaN: Boolean = skipNaNDefault,
376+
): ReducedGroupBy<T, G> = reduce { medianByOrNull(column, skipNaN) }
377+
378+
public fun <T, G> GroupBy<T, G>.medianBy(column: String, skipNaN: Boolean = skipNaNDefault): ReducedGroupBy<T, G> =
379+
medianBy(column.toColumnAccessor().cast<Comparable<Any>?>(), skipNaN)
380+
381+
@AccessApiOverload
382+
public inline fun <T, G, reified C : Comparable<C & Any>?> GroupBy<T, G>.medianBy(
383+
column: KProperty<C>,
384+
skipNaN: Boolean = skipNaNDefault,
385+
): ReducedGroupBy<T, G> = medianBy(column.toColumnAccessor(), skipNaN)
386+
328387
// endregion
329388

330389
// region Pivot
@@ -383,6 +442,25 @@ public inline fun <T, reified R : Comparable<R & Any>?> Pivot<T>.medianOf(
383442
crossinline expression: RowExpression<T, R>,
384443
): DataRow<T> = delegate { medianOf(skipNaN, expression) }
385444

445+
public inline fun <T, reified R : Comparable<R & Any>?> Pivot<T>.medianBy(
446+
skipNaN: Boolean = skipNaNDefault,
447+
crossinline rowExpression: RowExpression<T, R>,
448+
): ReducedPivot<T> = reduce { medianByOrNull(skipNaN, rowExpression) }
449+
450+
@AccessApiOverload
451+
public inline fun <T, reified C : Comparable<C & Any>?> Pivot<T>.medianBy(
452+
column: ColumnReference<C>,
453+
skipNaN: Boolean = skipNaNDefault,
454+
): ReducedPivot<T> = reduce { medianByOrNull(column, skipNaN) }
455+
456+
public fun <T> Pivot<T>.medianBy(column: String, skipNaN: Boolean = skipNaNDefault): ReducedPivot<T> =
457+
medianBy(column.toColumnAccessor().cast<Comparable<Any>?>(), skipNaN)
458+
459+
@AccessApiOverload
460+
public inline fun <T, reified C : Comparable<C & Any>?> Pivot<T>.medianBy(
461+
column: KProperty<C>,
462+
skipNaN: Boolean = skipNaNDefault,
463+
): ReducedPivot<T> = medianBy(column.toColumnAccessor(), skipNaN)
386464
// endregion
387465

388466
// region PivotGroupBy
@@ -441,6 +519,28 @@ public inline fun <T, reified R : Comparable<R & Any>?> PivotGroupBy<T>.medianOf
441519
crossinline expression: RowExpression<T, R>,
442520
): DataFrame<T> = Aggregators.medianCommon<R>(skipNaN).aggregateOf(this, expression)
443521

522+
public inline fun <T, reified R : Comparable<R & Any>?> PivotGroupBy<T>.medianBy(
523+
skipNaN: Boolean = skipNaNDefault,
524+
crossinline rowExpression: RowExpression<T, R>,
525+
): ReducedPivotGroupBy<T> = reduce { medianByOrNull(skipNaN, rowExpression) }
526+
527+
@AccessApiOverload
528+
public inline fun <T, reified C : Comparable<C & Any>?> PivotGroupBy<T>.medianBy(
529+
column: ColumnReference<C>,
530+
skipNaN: Boolean = skipNaNDefault,
531+
): ReducedPivotGroupBy<T> = reduce { medianByOrNull(column, skipNaN) }
532+
533+
public fun <T> PivotGroupBy<T>.medianBy(column: String, skipNaN: Boolean = skipNaNDefault): ReducedPivotGroupBy<T> =
534+
medianBy(column.toColumnAccessor().cast<Comparable<Any>?>(), skipNaN)
535+
536+
@AccessApiOverload
537+
public inline fun <T, reified C : Comparable<C & Any>?> PivotGroupBy<T>.medianBy(
538+
column: KProperty<C>,
539+
skipNaN: Boolean = skipNaNDefault,
540+
): ReducedPivotGroupBy<T> = medianBy(column.toColumnAccessor(), skipNaN)
541+
444542
// endregion
445543

446-
// TODO more medianBy overloads
544+
// region binary compatibility
545+
// TODO
546+
// endregion

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt

+4-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import org.jetbrains.kotlinx.dataframe.api.median
1313
import org.jetbrains.kotlinx.dataframe.api.medianOf
1414
import org.jetbrains.kotlinx.dataframe.api.medianOrNull
1515
import org.jetbrains.kotlinx.dataframe.api.rowMedianOf
16-
import org.jetbrains.kotlinx.dataframe.statistics.myFun
1716
import org.junit.Test
1817
import kotlin.experimental.ExperimentalTypeInference
1918
import kotlin.reflect.typeOf
@@ -39,11 +38,11 @@ class MedianTests {
3938
val d = personsDf.groupBy("city").medianOf("newAge") { "age"<Int>() * 10 }
4039
d["newAge"].type() shouldBe typeOf<Int>()
4140

42-
val e = personsDf.medianOf("newAge") { "age"<Int>().toString() }
41+
val e = personsDf.medianOf<_, String> { "age"<Int>().toString() }
4342

4443
val column = personsDf[column<Int>("age")]
4544
column.medianOf { it }
46-
column.medianOf { it.toString() }
45+
column.medianOf<_, String> { it.toString() }
4746
}
4847

4948
@Test
@@ -58,8 +57,8 @@ class MedianTests {
5857
df.medianOrNull { "a"<Int>() and "b"<Int>() } shouldBe 5.0
5958
df.median("c") shouldBe "b"
6059

61-
df.median { "c"<String>() } shouldBe "b"
62-
df.medianOrNull { "c"<String>() } shouldBe "b"
60+
df.median<_, String> { "c"<String>() } shouldBe "b"
61+
df.medianOrNull<_, String> { "c"<String>() } shouldBe "b"
6362

6463
df.median({ "c"<String>() }) shouldBe "b"
6564
df.medianOrNull({ "c"<String>() }) shouldBe "b"

0 commit comments

Comments
 (0)