|
| 1 | +package org.jetbrains.kotlinx.dataframe |
| 2 | + |
| 3 | +import org.jetbrains.kotlinx.dataframe.api.Infer |
| 4 | +import org.jetbrains.kotlinx.dataframe.api.asDataColumn |
| 5 | +import org.jetbrains.kotlinx.dataframe.api.cast |
| 6 | +import org.jetbrains.kotlinx.dataframe.api.concat |
| 7 | +import org.jetbrains.kotlinx.dataframe.api.filter |
| 8 | +import org.jetbrains.kotlinx.dataframe.api.map |
| 9 | +import org.jetbrains.kotlinx.dataframe.api.schema |
| 10 | +import org.jetbrains.kotlinx.dataframe.api.take |
| 11 | +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn |
| 12 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup |
| 13 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind |
| 14 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath |
| 15 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext |
| 16 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath |
| 17 | +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn |
| 18 | +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn |
| 19 | +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl |
| 20 | +import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl |
| 21 | +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl |
| 22 | +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath |
| 23 | +import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType |
| 24 | +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind |
| 25 | +import org.jetbrains.kotlinx.dataframe.impl.getValuesType |
| 26 | +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices |
| 27 | +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema |
| 28 | +import kotlin.reflect.KClass |
| 29 | +import kotlin.reflect.KProperty |
| 30 | +import kotlin.reflect.KType |
| 31 | +import kotlin.reflect.typeOf |
| 32 | + |
| 33 | +/** |
| 34 | + * Column with [name] and [values] of specific [type]. |
| 35 | + * |
| 36 | + * Base interface for [ValueColumn] and [FrameColumn], but not for [ColumnGroup]. However, implementations for all three [column kinds][ColumnKind] derive from DataColumn and can cast to it safely. |
| 37 | + * Column operations that have signature clash with [DataFrame] API ([filter], [take], [map] etc.) are defined for [DataColumn] and not for [BaseColumn]. |
| 38 | + * |
| 39 | + * @param T type of values in the column. |
| 40 | + */ |
| 41 | +public interface DataColumn<out T> : BaseColumn<T> { |
| 42 | + |
| 43 | + public companion object { |
| 44 | + |
| 45 | + /** |
| 46 | + * Creates [ValueColumn] using given [name], [values] and [type]. |
| 47 | + * |
| 48 | + * @param name name of the column |
| 49 | + * @param values list of column values |
| 50 | + * @param type type of the column |
| 51 | + * @param infer column type inference mode |
| 52 | + */ |
| 53 | + public fun <T> createValueColumn( |
| 54 | + name: String, |
| 55 | + values: List<T>, |
| 56 | + type: KType, |
| 57 | + infer: Infer = Infer.None, |
| 58 | + defaultValue: T? = null, |
| 59 | + ): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) |
| 60 | + |
| 61 | + /** |
| 62 | + * Creates [ValueColumn] using given [name], [values] and reified column [type]. |
| 63 | + * |
| 64 | + * Note, that column [type] will be defined at compile-time using [T] argument |
| 65 | + * |
| 66 | + * @param T type of the column |
| 67 | + * @param name name of the column |
| 68 | + * @param values list of column values |
| 69 | + * @param infer column type inference mode |
| 70 | + */ |
| 71 | + public inline fun <reified T> createValueColumn( |
| 72 | + name: String, |
| 73 | + values: List<T>, |
| 74 | + infer: Infer = Infer.None, |
| 75 | + ): ValueColumn<T> = |
| 76 | + createValueColumn( |
| 77 | + name, |
| 78 | + values, |
| 79 | + getValuesType( |
| 80 | + values, |
| 81 | + typeOf<T>(), |
| 82 | + infer, |
| 83 | + ), |
| 84 | + ) |
| 85 | + |
| 86 | + public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df) |
| 87 | + |
| 88 | + public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> = |
| 89 | + FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) |
| 90 | + |
| 91 | + public fun <T> createFrameColumn( |
| 92 | + name: String, |
| 93 | + groups: List<DataFrame<T>>, |
| 94 | + schema: Lazy<DataFrameSchema>? = null, |
| 95 | + ): FrameColumn<T> = FrameColumnImpl(name, groups, schema) |
| 96 | + |
| 97 | + public fun <T> createWithTypeInference( |
| 98 | + name: String, |
| 99 | + values: List<T>, |
| 100 | + nullable: Boolean? = null, |
| 101 | + ): DataColumn<T> = guessColumnType(name, values, nullable = nullable) |
| 102 | + |
| 103 | + public fun <T> create( |
| 104 | + name: String, |
| 105 | + values: List<T>, |
| 106 | + type: KType, |
| 107 | + infer: Infer = Infer.None, |
| 108 | + ): DataColumn<T> = |
| 109 | + when (type.toColumnKind()) { |
| 110 | + ColumnKind.Value -> createValueColumn(name, values, type, infer) |
| 111 | + ColumnKind.Group -> createColumnGroup(name, (values as List<AnyRow?>).concat()).asDataColumn().cast() |
| 112 | + ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast() |
| 113 | + } |
| 114 | + |
| 115 | + public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> = |
| 116 | + create(name, values, typeOf<T>(), infer) |
| 117 | + |
| 118 | + public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>()) |
| 119 | + } |
| 120 | + |
| 121 | + public fun hasNulls(): Boolean = type().isMarkedNullable |
| 122 | + |
| 123 | + override fun distinct(): DataColumn<T> |
| 124 | + |
| 125 | + override fun get(indices: Iterable<Int>): DataColumn<T> |
| 126 | + |
| 127 | + override fun rename(newName: String): DataColumn<T> |
| 128 | + |
| 129 | + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<T>? = this.addPath() |
| 130 | + |
| 131 | + override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn<T> = |
| 132 | + super.getValue(thisRef, property) as DataColumn<T> |
| 133 | + |
| 134 | + public operator fun iterator(): Iterator<T> = values().iterator() |
| 135 | + |
| 136 | + public override operator fun get(range: IntRange): DataColumn<T> |
| 137 | +} |
| 138 | + |
| 139 | +public val AnyCol.name: String get() = name() |
| 140 | +public val AnyCol.path: ColumnPath get() = path() |
| 141 | + |
| 142 | +public val <T> DataColumn<T>.values: Iterable<T> get() = values() |
| 143 | +public val AnyCol.hasNulls: Boolean get() = hasNulls() |
| 144 | +public val AnyCol.size: Int get() = size() |
| 145 | +public val AnyCol.indices: IntRange get() = indices() |
| 146 | + |
| 147 | +public val AnyCol.type: KType get() = type() |
| 148 | +public val AnyCol.kind: ColumnKind get() = kind() |
| 149 | +public val AnyCol.typeClass: KClass<*> |
| 150 | + get() = type.classifier as? KClass<*> |
| 151 | + ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") |
| 152 | + |
| 153 | +public fun AnyBaseCol.indices(): IntRange = 0 until size() |
0 commit comments