Skip to content

Commit a2f0eb5

Browse files
committed
ColumnDataCollector now uses ColumnDataHolder for collecting data directly into primitive arrays
1 parent 1ceb69f commit a2f0eb5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+3121
-1395
lines changed

core/build.gradle.kts

+2-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ dependencies {
8383
testImplementation(libs.kotlin.scriptingJvm)
8484
testImplementation(libs.jsoup)
8585

86-
testImplementation("org.openjdk.jol:jol-core:0.10")
86+
// testImplementation("org.openjdk.jol:jol-core:0.10")
87+
implementation("org.openjdk.jol:jol-core:0.10")
8788
implementation("it.unimi.dsi:fastutil:8.5.14")
8889
}
8990

Original file line numberDiff line numberDiff line change
@@ -1,72 +1,89 @@
1+
@file:OptIn(ExperimentalUnsignedTypes::class)
2+
13
package org.jetbrains.kotlinx.dataframe
24

5+
import org.jetbrains.kotlinx.dataframe.impl.columns.BOOLEAN
6+
import org.jetbrains.kotlinx.dataframe.impl.columns.BYTE
7+
import org.jetbrains.kotlinx.dataframe.impl.columns.CHAR
38
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
9+
import org.jetbrains.kotlinx.dataframe.impl.columns.DOUBLE
10+
import org.jetbrains.kotlinx.dataframe.impl.columns.FLOAT
11+
import org.jetbrains.kotlinx.dataframe.impl.columns.INT
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.LONG
13+
import org.jetbrains.kotlinx.dataframe.impl.columns.SHORT
14+
import org.jetbrains.kotlinx.dataframe.impl.columns.UBYTE
15+
import org.jetbrains.kotlinx.dataframe.impl.columns.UINT
16+
import org.jetbrains.kotlinx.dataframe.impl.columns.ULONG
17+
import org.jetbrains.kotlinx.dataframe.impl.columns.USHORT
18+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofBoxedArray
19+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofCollection
20+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofPrimitiveArray
421
import kotlin.reflect.KType
522
import kotlin.reflect.typeOf
623

7-
public interface ColumnDataHolder<T> : Iterable<T> {
8-
9-
public val size: Int
24+
/**
25+
* Represents the contents of a column; however, it may be implemented.
26+
* The default implementation is found at [ColumnDataHolderImpl].
27+
*/
28+
public interface ColumnDataHolder<T> : List<T> {
1029

1130
public fun toSet(): Set<T>
1231

13-
public fun toList(): List<T>
14-
15-
public fun contains(value: T): Boolean
32+
public operator fun get(range: IntRange): List<T>
1633

17-
public operator fun get(index: Int): T
34+
public fun add(element: T)
1835

19-
public operator fun get(range: IntRange): List<T>
36+
public fun canAdd(element: T): Boolean
2037

2138
public val distinct: Lazy<Set<T>>
2239

2340
public companion object
2441
}
2542

2643
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
27-
ColumnDataHolderImpl.of(this, type, distinct)
44+
ColumnDataHolder.ofCollection(this, type, distinct)
2845

2946
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
3047
this.toColumnDataHolder(typeOf<T>(), distinct)
3148

3249
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
33-
ColumnDataHolderImpl.of(this, type, distinct)
50+
ColumnDataHolder.ofBoxedArray(this, type, distinct)
3451

3552
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
3653
this.toColumnDataHolder(typeOf<T>(), distinct)
3754

3855
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
39-
ColumnDataHolderImpl.of(this, typeOf<Boolean>(), distinct)
56+
ColumnDataHolder.ofPrimitiveArray(this, BOOLEAN, distinct)
4057

4158
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
42-
ColumnDataHolderImpl.of(this, typeOf<Byte>(), distinct)
59+
ColumnDataHolder.ofPrimitiveArray(this, BYTE, distinct)
4360

4461
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
45-
ColumnDataHolderImpl.of(this, typeOf<Short>(), distinct)
62+
ColumnDataHolder.ofPrimitiveArray(this, SHORT, distinct)
4663

4764
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
48-
ColumnDataHolderImpl.of(this, typeOf<Int>(), distinct)
65+
ColumnDataHolder.ofPrimitiveArray(this, INT, distinct)
4966

5067
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
51-
ColumnDataHolderImpl.of(this, typeOf<Long>(), distinct)
68+
ColumnDataHolder.ofPrimitiveArray(this, LONG, distinct)
5269

5370
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
54-
ColumnDataHolderImpl.of(this, typeOf<Float>(), distinct)
71+
ColumnDataHolder.ofPrimitiveArray(this, FLOAT, distinct)
5572

5673
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
57-
ColumnDataHolderImpl.of(this, typeOf<Double>(), distinct)
74+
ColumnDataHolder.ofPrimitiveArray(this, DOUBLE, distinct)
5875

5976
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
60-
ColumnDataHolderImpl.of(this, typeOf<Char>(), distinct)
77+
ColumnDataHolder.ofPrimitiveArray(this, CHAR, distinct)
6178

6279
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
63-
ColumnDataHolderImpl.of(this, typeOf<UByte>(), distinct)
80+
ColumnDataHolder.ofPrimitiveArray(this, UBYTE, distinct)
6481

6582
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
66-
ColumnDataHolderImpl.of(this, typeOf<UShort>(), distinct)
83+
ColumnDataHolder.ofPrimitiveArray(this, USHORT, distinct)
6784

6885
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
69-
ColumnDataHolderImpl.of(this, typeOf<UInt>(), distinct)
86+
ColumnDataHolder.ofPrimitiveArray(this, UINT, distinct)
7087

7188
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
72-
ColumnDataHolderImpl.of(this, typeOf<ULong>(), distinct)
89+
ColumnDataHolder.ofPrimitiveArray(this, ULONG, distinct)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

+93-9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
2121
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
2222
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
2323
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
24+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofCollection
25+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofBoxedArray
2426
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
2527
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
2628
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
@@ -42,6 +44,49 @@ public interface DataColumn<out T> : BaseColumn<T> {
4244

4345
public companion object {
4446

47+
public fun <T> createValueColumn(
48+
name: String,
49+
values: ColumnDataHolder<T>,
50+
type: KType,
51+
defaultValue: T? = null,
52+
): ValueColumn<T> = ValueColumnImpl(values, name, type, defaultValue)
53+
54+
public fun createValueColumn(name: String, values: BooleanArray): ValueColumn<Boolean> =
55+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Boolean>())
56+
57+
public fun createValueColumn(name: String, values: ByteArray): ValueColumn<Byte> =
58+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Byte>())
59+
60+
public fun createValueColumn(name: String, values: ShortArray): ValueColumn<Short> =
61+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Short>())
62+
63+
public fun createValueColumn(name: String, values: IntArray): ValueColumn<Int> =
64+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Int>())
65+
66+
public fun createValueColumn(name: String, values: LongArray): ValueColumn<Long> =
67+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Long>())
68+
69+
public fun createValueColumn(name: String, values: FloatArray): ValueColumn<Float> =
70+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Float>())
71+
72+
public fun createValueColumn(name: String, values: DoubleArray): ValueColumn<Double> =
73+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Double>())
74+
75+
public fun createValueColumn(name: String, values: CharArray): ValueColumn<Char> =
76+
createValueColumn(name, values.asColumnDataHolder(), typeOf<Char>())
77+
78+
public fun createValueColumn(name: String, values: UByteArray): ValueColumn<UByte> =
79+
createValueColumn(name, values.asColumnDataHolder(), typeOf<UByte>())
80+
81+
public fun createValueColumn(name: String, values: UShortArray): ValueColumn<UShort> =
82+
createValueColumn(name, values.asColumnDataHolder(), typeOf<UShort>())
83+
84+
public fun createValueColumn(name: String, values: UIntArray): ValueColumn<UInt> =
85+
createValueColumn(name, values.asColumnDataHolder(), typeOf<UInt>())
86+
87+
public fun createValueColumn(name: String, values: ULongArray): ValueColumn<ULong> =
88+
createValueColumn(name, values.asColumnDataHolder(), typeOf<ULong>())
89+
4590
/**
4691
* Creates [ValueColumn] using given [name], [values] and [type].
4792
*
@@ -56,7 +101,15 @@ public interface DataColumn<out T> : BaseColumn<T> {
56101
type: KType,
57102
infer: Infer = Infer.None,
58103
defaultValue: T? = null,
59-
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
104+
): ValueColumn<T> {
105+
val valueType = getValuesType(values, type, infer)
106+
return createValueColumn(
107+
name = name,
108+
values = ColumnDataHolder.ofCollection(values, valueType),
109+
type = valueType,
110+
defaultValue = defaultValue,
111+
)
112+
}
60113

61114
/**
62115
* Creates [ValueColumn] using given [name], [values] and reified column [type].
@@ -74,25 +127,56 @@ public interface DataColumn<out T> : BaseColumn<T> {
74127
infer: Infer = Infer.None,
75128
): ValueColumn<T> =
76129
createValueColumn(
77-
name,
78-
values,
79-
getValuesType(
80-
values,
81-
typeOf<T>(),
82-
infer,
130+
name = name,
131+
values = values,
132+
type = getValuesType(
133+
values = values,
134+
type = typeOf<T>(),
135+
infer = infer,
83136
),
84137
)
85138

139+
public fun <T> createValueColumn(
140+
name: String,
141+
values: Array<T>,
142+
type: KType,
143+
infer: Infer = Infer.None,
144+
defaultValue: T? = null,
145+
): ValueColumn<T> {
146+
val valueType = getValuesType(values.asList(), type, infer)
147+
return createValueColumn(
148+
name = name,
149+
values = ColumnDataHolder.ofBoxedArray(values, valueType),
150+
type = valueType,
151+
defaultValue = defaultValue,
152+
)
153+
}
154+
155+
public inline fun <reified T> createValueColumn(
156+
name: String,
157+
values: Array<T>,
158+
infer: Infer = Infer.None,
159+
): ValueColumn<T> =
160+
createValueColumn(
161+
name = name,
162+
values = values,
163+
type = getValuesType(values.asList(), typeOf<T>(), infer),
164+
)
165+
86166
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df)
87167

88168
public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> =
89-
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
169+
FrameColumnImpl(
170+
name,
171+
df.splitByIndices(startIndices.asSequence()).toList().toColumnDataHolder(),
172+
lazy { df.schema() },
173+
)
90174

91175
public fun <T> createFrameColumn(
92176
name: String,
93177
groups: List<DataFrame<T>>,
94178
schema: Lazy<DataFrameSchema>? = null,
95-
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)
179+
): FrameColumn<T> = FrameColumnImpl(name, groups.toColumnDataHolder(), schema)
96180

97181
public fun <T> createWithTypeInference(
98182
name: String,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnSelectionDsl.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public interface ColumnSelectionDsl<out T> : ColumnsContainer<T> {
5959
/**
6060
* Retrieves the value of this [ColumnPath] from the [DataFrame].
6161
* This is a shorthand for [getColumn][ColumnsContainer.getColumn]`(myColumnPath)` and
62-
* is most often used in combination with `operator fun String.get(column: String)`,
62+
* is most often used in combination with `operator fun String.get(column: String)`,
6363
* for instance:
6464
* ```kotlin
6565
* "myColumn"["myNestedColumn"]<NestedColumnType>()

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

+21-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
99
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
1010
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
11+
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl
12+
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
13+
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
14+
import org.jetbrains.kotlinx.dataframe.documentation.ExportAsHtml
15+
import org.jetbrains.kotlinx.dataframe.documentation.Indent
16+
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
17+
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
18+
import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver
1119
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList
1220
import org.jetbrains.kotlinx.dataframe.util.COL_SELECT_DSL_LIST_DATACOLUMN_GET
1321
import org.jetbrains.kotlinx.dataframe.util.COL_SELECT_DSL_LIST_DATACOLUMN_GET_REPLACE
@@ -187,7 +195,7 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
187195
*
188196
* ### What can be called directly in the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl]:
189197
*
190-
*
198+
*
191199
* &nbsp;&nbsp;&nbsp;&nbsp;
192200
*
193201
* [`column`][org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate.ColumnDef]` `[**`..`**][org.jetbrains.kotlinx.dataframe.api.ColumnRangeColumnsSelectionDsl.rangeTo]` `[`column`][org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate.ColumnDef]
@@ -250,7 +258,7 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
250258
*
251259
* ### What can be called on a [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]:
252260
*
253-
*
261+
*
254262
* &nbsp;&nbsp;&nbsp;&nbsp;
255263
*
256264
* [`columnSet`][org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate.ColumnSetDef]
@@ -313,7 +321,7 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
313321
*
314322
* ### What can be called on a [Column Group (reference)][org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate.ColumnGroupDef]:
315323
*
316-
*
324+
*
317325
* &nbsp;&nbsp;&nbsp;&nbsp;
318326
*
319327
* [`columnGroup`][org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate.ColumnGroupDef]
@@ -391,7 +399,16 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
391399
*
392400
*
393401
*/
394-
public interface DslGrammar
402+
public interface DslGrammar {
403+
404+
405+
406+
407+
408+
409+
410+
411+
}
395412

396413
/**
397414
* Invokes the given [ColumnsSelector] using this [ColumnsSelectionDsl].

0 commit comments

Comments
 (0)