Skip to content

Commit e4a3af5

Browse files
committed
Merge branch 'master' into implodeFix
2 parents 3037a97 + acafb6d commit e4a3af5

File tree

33 files changed

+276
-104
lines changed

33 files changed

+276
-104
lines changed

core/build.gradle.kts

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ dependencies {
7373

7474
api(libs.kotlin.datetimeJvm)
7575
implementation(libs.kotlinpoet)
76+
implementation(libs.sl4j)
77+
implementation(libs.kotlinLogging)
7678

7779
testImplementation(libs.junit)
7880
testImplementation(libs.kotestAssertions) {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt

+13-9
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,19 @@ internal fun commonParent(vararg classes: KClass<*>): KClass<*>? = commonParent(
197197
internal fun Iterable<KClass<*>>.withMostSuperclasses(): KClass<*>? = maxByOrNull { it.allSuperclasses.size }
198198

199199
internal fun Iterable<KClass<*>>.createType(nullable: Boolean, upperBound: KType? = null): KType =
200-
if (upperBound == null) {
201-
(withMostSuperclasses() ?: Any::class).createStarProjectedType(nullable)
202-
} else {
203-
val upperClass = upperBound.classifier as KClass<*>
204-
val baseClass = filter { it.isSubclassOf(upperClass) }.withMostSuperclasses() ?: withMostSuperclasses()
205-
if (baseClass == null) {
206-
upperBound.withNullability(nullable)
207-
} else {
208-
upperBound.projectTo(baseClass).withNullability(nullable)
200+
when {
201+
!iterator().hasNext() -> upperBound?.withNullability(nullable) ?: nothingType(nullable)
202+
203+
upperBound == null -> (withMostSuperclasses() ?: Any::class).createStarProjectedType(nullable)
204+
205+
else -> {
206+
val upperClass = upperBound.classifier as KClass<*>
207+
val baseClass = filter { it.isSubclassOf(upperClass) }.withMostSuperclasses() ?: withMostSuperclasses()
208+
if (baseClass == null) {
209+
upperBound.withNullability(nullable)
210+
} else {
211+
upperBound.projectTo(baseClass).withNullability(nullable)
212+
}
209213
}
210214
}
211215

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt

+7-3
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ internal fun <T> Iterable<T>.anyNull(): Boolean = any { it == null }
121121
internal fun emptyPath(): ColumnPath = ColumnPath(emptyList())
122122

123123
@PublishedApi
124-
internal fun <T : Number> KClass<T>.zero(): T =
124+
internal fun <T : Number> KClass<T>.zeroOrNull(): T? =
125125
when (this) {
126126
Int::class -> 0 as T
127127
Byte::class -> 0.toByte() as T
@@ -131,10 +131,14 @@ internal fun <T : Number> KClass<T>.zero(): T =
131131
Float::class -> 0.toFloat() as T
132132
BigDecimal::class -> BigDecimal.ZERO as T
133133
BigInteger::class -> BigInteger.ZERO as T
134-
Number::class -> 0 as T
135-
else -> TODO()
134+
Number::class -> 0 as? T
135+
else -> null
136136
}
137137

138+
@PublishedApi
139+
internal fun <T : Number> KClass<T>.zero(): T =
140+
zeroOrNull() ?: throw NotImplementedError("Zero value for $this is not supported")
141+
138142
internal fun <T> catchSilent(body: () -> T): T? =
139143
try {
140144
body()

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt

+43-19
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.jetbrains.kotlinx.dataframe.impl.api
22

3+
import io.github.oshai.kotlinlogging.KotlinLogging
34
import org.jetbrains.kotlinx.dataframe.AnyFrame
45
import org.jetbrains.kotlinx.dataframe.AnyRow
56
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
@@ -11,13 +12,13 @@ import org.jetbrains.kotlinx.dataframe.api.ConvertSchemaDsl
1112
import org.jetbrains.kotlinx.dataframe.api.ConverterScope
1213
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
1314
import org.jetbrains.kotlinx.dataframe.api.Infer
15+
import org.jetbrains.kotlinx.dataframe.api.add
1416
import org.jetbrains.kotlinx.dataframe.api.all
1517
import org.jetbrains.kotlinx.dataframe.api.allNulls
1618
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
1719
import org.jetbrains.kotlinx.dataframe.api.concat
1820
import org.jetbrains.kotlinx.dataframe.api.convertTo
1921
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
20-
import org.jetbrains.kotlinx.dataframe.api.getColumnPaths
2122
import org.jetbrains.kotlinx.dataframe.api.isEmpty
2223
import org.jetbrains.kotlinx.dataframe.api.map
2324
import org.jetbrains.kotlinx.dataframe.api.name
@@ -29,12 +30,14 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
2930
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
3031
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
3132
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
33+
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
3234
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
3335
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
3436
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3537
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
36-
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn
38+
import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths
3739
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
40+
import org.jetbrains.kotlinx.dataframe.impl.schema.createNullFilledColumn
3841
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
3942
import org.jetbrains.kotlinx.dataframe.impl.schema.render
4043
import org.jetbrains.kotlinx.dataframe.kind
@@ -45,6 +48,8 @@ import kotlin.reflect.KType
4548
import kotlin.reflect.full.withNullability
4649
import kotlin.reflect.jvm.jvmErasure
4750

51+
private val logger = KotlinLogging.logger {}
52+
4853
private open class Converter(val transform: ConverterScope.(Any?) -> Any?, val skipNulls: Boolean)
4954

5055
private class Filler(val columns: ColumnsSelector<*, *>, val expr: RowExpression<*, *>)
@@ -252,22 +257,16 @@ internal fun AnyFrame.convertToImpl(
252257
}
253258
}.toMutableList()
254259

255-
// when the target is nullable but the source does not contain a column, fill it in with nulls / empty dataframes
260+
// when the target is nullable but the source does not contain a column,
261+
// fill it in with nulls / empty dataframes
256262
val size = this.size.nrow
257263
schema.columns.forEach { (name, targetColumn) ->
258-
val isNullable =
259-
// like value column of type Int?
260-
targetColumn.nullable ||
261-
// like value column of type Int? (backup check)
262-
targetColumn.type.isMarkedNullable ||
263-
// like DataRow<Something?> for a group column (all columns in the group will be nullable)
264-
targetColumn.contentType?.isMarkedNullable == true ||
265-
// frame column can be filled with empty dataframes
266-
targetColumn.kind == ColumnKind.Frame
267-
268264
if (name !in visited) {
269-
newColumns += targetColumn.createEmptyColumn(name, size)
270-
if (!isNullable) {
265+
try {
266+
newColumns += targetColumn.createNullFilledColumn(name, size)
267+
} catch (e: IllegalStateException) {
268+
logger.debug(e) { "" }
269+
// if this could not be done automatically, they need to be filled manually
271270
missingPaths.add(path + name)
272271
}
273272
}
@@ -279,14 +278,39 @@ internal fun AnyFrame.convertToImpl(
279278
val marker = MarkersExtractor.get(clazz)
280279
var result = convertToSchema(marker.schema, emptyPath())
281280

281+
/*
282+
* Here we handle all registered fillers of the user.
283+
* Fillers are registered in the DSL like:
284+
* ```kt
285+
* df.convertTo<Target> {
286+
* fill { col1 and col2 }.with { something }
287+
* fill { col3 }.with { somethingElse }
288+
* }
289+
* ```
290+
* Users can use this to fill up any column that was missing during the conversion.
291+
* They can also fill up and thus overwrite any existing column here.
292+
*/
282293
dsl.fillers.forEach { filler ->
283-
val paths = result.getColumnPaths(filler.columns)
284-
missingPaths.removeAll(paths.toSet())
285-
result = result.update { paths.toColumnSet() }.with {
286-
filler.expr(this, this)
294+
// get all paths from the `fill { col1 and col2 }` part
295+
val paths = result.getColumnPaths(UnresolvedColumnsPolicy.Create, filler.columns).toSet()
296+
297+
// split the paths into those that are already in the df and those that are missing
298+
val (newPaths, existingPaths) = paths.partition { it in missingPaths }
299+
300+
// first fill cols that are already in the df using the `with {}` part of the dsl
301+
result = result.update { existingPaths.toColumnSet() }.with { filler.expr(this, this) }
302+
303+
// then create any missing ones by filling using the `with {}` part of the dsl
304+
result = newPaths.fold(result) { df, newPath ->
305+
df.add(newPath, Infer.Type) { filler.expr(this, this) }
287306
}
307+
308+
// remove the paths that are now filled
309+
missingPaths -= paths
288310
}
289311

312+
// Inform the user which target columns could not be created in the conversion
313+
// The user will need to supply extra information for these, like `fill {}` them.
290314
if (missingPaths.isNotEmpty()) {
291315
throw IllegalArgumentException(
292316
"The following columns were not found in DataFrame: ${

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt

+2-11
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import org.jetbrains.kotlinx.dataframe.columns.size
2525
import org.jetbrains.kotlinx.dataframe.columns.values
2626
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
2727
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
28+
import org.jetbrains.kotlinx.dataframe.impl.renderType
2829
import org.jetbrains.kotlinx.dataframe.index
2930
import org.jetbrains.kotlinx.dataframe.kind
3031
import org.jetbrains.kotlinx.dataframe.type
31-
import kotlin.reflect.jvm.jvmErasure
3232

3333
internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {
3434
fun List<AnyCol>.collectAll(atAnyDepth: Boolean): List<AnyCol> =
@@ -65,7 +65,7 @@ internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {
6565
if (hasLongPaths) {
6666
ColumnDescription::path from { it.path() }
6767
}
68-
ColumnDescription::type from { buildTypeName(it) }
68+
ColumnDescription::type from { renderType(it.type) }
6969
ColumnDescription::count from { it.size }
7070
ColumnDescription::unique from { it.countDistinct() }
7171
ColumnDescription::nulls from { it.values.count { it == null } }
@@ -94,12 +94,3 @@ internal fun describeImpl(cols: List<AnyCol>): DataFrame<ColumnDescription> {
9494

9595
return df.cast()
9696
}
97-
98-
private fun buildTypeName(it: AnyCol): String {
99-
val rawJavaType = it.type.jvmErasure.simpleName.toString()
100-
return if (it.type.isMarkedNullable) {
101-
"$rawJavaType?"
102-
} else {
103-
rawJavaType
104-
}
105-
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/DataColumnImpl.kt

+30
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
package org.jetbrains.kotlinx.dataframe.impl.columns
22

3+
import org.jetbrains.kotlinx.dataframe.BuildConfig
34
import org.jetbrains.kotlinx.dataframe.DataColumn
45
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
6+
import org.jetbrains.kotlinx.dataframe.impl.isArray
7+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
8+
import kotlin.reflect.KClass
59
import kotlin.reflect.KType
10+
import kotlin.reflect.full.isSubclassOf
611

712
internal abstract class DataColumnImpl<T>(
813
protected val values: List<T>,
@@ -12,6 +17,31 @@ internal abstract class DataColumnImpl<T>(
1217
) : DataColumn<T>,
1318
DataColumnInternal<T> {
1419

20+
private infix fun <T> T?.matches(type: KType) =
21+
when {
22+
this == null -> type.isMarkedNullable
23+
24+
this.isPrimitiveArray ->
25+
type.isPrimitiveArray &&
26+
this!!::class.qualifiedName == type.classifier?.let { (it as KClass<*>).qualifiedName }
27+
28+
this.isArray -> type.isArray
29+
30+
// cannot check the precise type of array
31+
else -> this!!::class.isSubclassOf(type.classifier as KClass<*>)
32+
}
33+
34+
init {
35+
// Check for [Issue #713](https://github.com/Kotlin/dataframe/issues/713).
36+
// This only runs with `kotlin.dataframe.debug=true` in gradle.properties.
37+
if (BuildConfig.DEBUG) {
38+
require(values.all { it matches type }) {
39+
val types = values.map { if (it == null) "Nothing?" else it!!::class.simpleName }.distinct()
40+
"Values of column '$name' have types '$types' which are not compatible given with column type '$type'"
41+
}
42+
}
43+
}
44+
1545
protected val distinct = distinct ?: lazy { values.toSet() }
1646

1747
override fun name() = name

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt

+20-9
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ internal fun AnyCol.extractSchema(): ColumnSchema =
102102
@PublishedApi
103103
internal fun getSchema(kClass: KClass<*>): DataFrameSchema = MarkersExtractor.get(kClass).schema
104104

105+
/**
106+
* Create "empty" column based on the toplevel of [this] [ColumnSchema].
107+
*/
105108
internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
106109
when (this) {
107110
is ColumnSchema.Value -> DataColumn.createValueColumn<Any?>(name, emptyList(), type)
@@ -110,14 +113,22 @@ internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
110113
else -> error("Unexpected ColumnSchema: $this")
111114
}
112115

113-
/** Create "empty" column, filled with either null or empty dataframes. */
114-
internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): AnyCol =
116+
/**
117+
* Creates a column based on [this] [ColumnSchema] filled with `null` or empty dataframes.
118+
* @throws IllegalStateException if the column is not nullable and [numberOfRows]` > 0`.
119+
*/
120+
internal fun ColumnSchema.createNullFilledColumn(name: String, numberOfRows: Int): AnyCol =
115121
when (this) {
116-
is ColumnSchema.Value -> DataColumn.createValueColumn(
117-
name = name,
118-
values = List(numberOfRows) { null },
119-
type = type,
120-
)
122+
is ColumnSchema.Value -> {
123+
if (!type.isMarkedNullable && numberOfRows > 0) {
124+
error("Cannot create a null-filled value column of type $type as it's not nullable.")
125+
}
126+
DataColumn.createValueColumn(
127+
name = name,
128+
values = List(numberOfRows) { null },
129+
type = type,
130+
)
131+
}
121132

122133
is ColumnSchema.Group -> DataColumn.createColumnGroup(
123134
name = name,
@@ -130,7 +141,7 @@ internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): An
130141
schema = lazyOf(schema),
131142
)
132143

133-
else -> error("Unexpected ColumnSchema: $this")
144+
else -> error("Cannot create null-filled column of unexpected ColumnSchema: $this")
134145
}
135146

136147
internal fun DataFrameSchema.createEmptyDataFrame(): AnyFrame =
@@ -143,7 +154,7 @@ internal fun DataFrameSchema.createEmptyDataFrame(numberOfRows: Int): AnyFrame =
143154
DataFrame.empty(numberOfRows)
144155
} else {
145156
columns.map { (name, schema) ->
146-
schema.createEmptyColumn(name, numberOfRows)
157+
schema.createNullFilledColumn(name, numberOfRows)
147158
}.toDataFrame()
148159
}
149160

Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
package org.jetbrains.kotlinx.dataframe.jupyter
22

3+
import org.jetbrains.kotlinx.dataframe.BuildConfig
34
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
45

56
public class JupyterConfiguration {
67
public val display: DisplayConfiguration = DisplayConfiguration()
78

9+
/** Version of the library. */
10+
public val version: String = BuildConfig.VERSION
11+
812
/** DSL accessor. */
913
public operator fun invoke(block: JupyterConfiguration.() -> Unit): JupyterConfiguration = apply(block)
1014
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.math
22

33
import org.jetbrains.kotlinx.dataframe.api.skipNA_default
4+
import org.jetbrains.kotlinx.dataframe.impl.renderType
45
import java.math.BigDecimal
56
import kotlin.reflect.KType
67
import kotlin.reflect.full.withNullability
@@ -31,7 +32,10 @@ internal fun <T : Number> Sequence<T>.mean(type: KType, skipNA: Boolean = skipNA
3132

3233
Number::class -> (this as Sequence<Number>).map { it.toDouble() }.mean(skipNA)
3334

34-
else -> throw IllegalArgumentException("Unable to compute mean for type $type")
35+
// this means the sequence is empty
36+
Nothing::class -> Double.NaN
37+
38+
else -> throw IllegalArgumentException("Unable to compute the mean for type ${renderType(type)}")
3539
}
3640
}
3741

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.math
22

33
import org.jetbrains.kotlinx.dataframe.api.ddof_default
44
import org.jetbrains.kotlinx.dataframe.api.skipNA_default
5+
import org.jetbrains.kotlinx.dataframe.impl.renderType
56
import java.math.BigDecimal
67
import kotlin.reflect.KType
78
import kotlin.reflect.full.withNullability
@@ -13,11 +14,10 @@ internal fun <T : Number> Iterable<T?>.std(
1314
ddof: Int = ddof_default,
1415
): Double {
1516
if (type.isMarkedNullable) {
16-
if (skipNA) {
17-
return filterNotNull().std(type.withNullability(false), true, ddof)
18-
} else {
19-
if (contains(null)) return Double.NaN
20-
return std(type.withNullability(false), skipNA, ddof)
17+
return when {
18+
skipNA -> filterNotNull().std(type = type.withNullability(false), skipNA = true, ddof = ddof)
19+
contains(null) -> Double.NaN
20+
else -> std(type = type.withNullability(false), skipNA = false, ddof = ddof)
2121
}
2222
}
2323
return when (type.classifier) {
@@ -26,7 +26,8 @@ internal fun <T : Number> Iterable<T?>.std(
2626
Int::class, Short::class, Byte::class -> (this as Iterable<Int>).std(ddof)
2727
Long::class -> (this as Iterable<Long>).std(ddof)
2828
BigDecimal::class -> (this as Iterable<BigDecimal>).std(ddof)
29-
else -> throw IllegalArgumentException("Unsupported type ${type.classifier}")
29+
Nothing::class -> Double.NaN
30+
else -> throw IllegalArgumentException("Unable to compute the std for type ${renderType(type)}")
3031
}
3132
}
3233

0 commit comments

Comments
 (0)