@@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.api.cast
6
6
import org.jetbrains.kotlinx.dataframe.api.concat
7
7
import org.jetbrains.kotlinx.dataframe.api.filter
8
8
import org.jetbrains.kotlinx.dataframe.api.map
9
- import org.jetbrains.kotlinx.dataframe.api.schema
10
9
import org.jetbrains.kotlinx.dataframe.api.take
11
10
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
12
11
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
@@ -15,16 +14,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
15
14
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
16
15
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
17
16
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
17
+ import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
18
18
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
19
+ import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl
19
20
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
20
21
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
21
22
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
22
23
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
23
- import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
24
+ import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
24
25
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
25
26
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
26
- import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
27
27
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
28
+ import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT
29
+ import org.jetbrains.kotlinx.dataframe.util.CREATE
30
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_INFERENCE_IMPORT
31
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_TYPE_IMPORT
32
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN
33
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE
34
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_INLINE_REPLACE
35
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_REPLACE
36
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE
37
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE_REPLACE
38
+ import org.jetbrains.kotlinx.dataframe.util.TYPE_SUGGESTION_IMPORT
28
39
import kotlin.reflect.KClass
29
40
import kotlin.reflect.KProperty
30
41
import kotlin.reflect.KType
@@ -45,6 +56,9 @@ public interface DataColumn<out T> : BaseColumn<T> {
45
56
/* *
46
57
* Creates [ValueColumn] using given [name], [values] and [type].
47
58
*
59
+ * Be careful; values are NOT checked to adhere to [type] for efficiency,
60
+ * unless you specify [infer].
61
+ *
48
62
* @param name name of the column
49
63
* @param values list of column values
50
64
* @param type type of the column
@@ -56,12 +70,20 @@ public interface DataColumn<out T> : BaseColumn<T> {
56
70
type : KType ,
57
71
infer : Infer = Infer .None ,
58
72
defaultValue : T ? = null,
59
- ): ValueColumn <T > = ValueColumnImpl (values, name, getValuesType(values, type, infer), defaultValue)
73
+ ): ValueColumn <T > =
74
+ ValueColumnImpl (
75
+ values = values,
76
+ name = name,
77
+ type = getValuesType(values, type, infer),
78
+ defaultValue = defaultValue,
79
+ )
60
80
61
81
/* *
62
82
* Creates [ValueColumn] using given [name], [values] and reified column [type].
63
83
*
64
- * Note, that column [type] will be defined at compile-time using [T] argument
84
+ * The column [type] will be defined at compile-time using [T] argument.
85
+ * Be careful with casting; values are NOT checked to adhere to `reified` type [T] for efficiency,
86
+ * unless you specify [infer].
65
87
*
66
88
* @param T type of the column
67
89
* @param name name of the column
@@ -74,48 +96,187 @@ public interface DataColumn<out T> : BaseColumn<T> {
74
96
infer : Infer = Infer .None ,
75
97
): ValueColumn <T > =
76
98
createValueColumn(
77
- name,
78
- values,
79
- getValuesType(
80
- values,
81
- typeOf<T >(),
82
- infer,
83
- ),
99
+ name = name,
100
+ values = values,
101
+ type = typeOf<T >(),
102
+ infer = infer,
84
103
)
85
104
105
+ /* *
106
+ * Creates [ColumnGroup] using the given [name] and [df] representing the group of columns.
107
+ *
108
+ * @param name name of the column group
109
+ * @param df the collection of columns representing the column group
110
+ */
86
111
public fun <T > createColumnGroup (name : String , df : DataFrame <T >): ColumnGroup <T > = ColumnGroupImpl (name, df)
87
112
88
- public fun <T > createFrameColumn (name : String , df : DataFrame <T >, startIndices : Iterable <Int >): FrameColumn <T > =
89
- FrameColumnImpl (name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
90
-
113
+ /* *
114
+ * Creates [FrameColumn] using the given [name] and list of dataframes [groups].
115
+ *
116
+ * [groups] must be a non-null list of [DataFrames][DataFrame], as [FrameColumn] does
117
+ * not allow `null` values.
118
+ * This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
119
+ *
120
+ * @param name name of the frame column
121
+ * @param groups the dataframes to be put in the column
122
+ * @param schema an optional (lazily calculated) [DataFrameSchema] representing
123
+ * the intersecting schema of [groups]
124
+ */
91
125
public fun <T > createFrameColumn (
92
126
name : String ,
93
127
groups : List <DataFrame <T >>,
94
128
schema : Lazy <DataFrameSchema >? = null,
95
129
): FrameColumn <T > = FrameColumnImpl (name, groups, schema)
96
130
97
- public fun <T > createWithTypeInference (
131
+ /* *
132
+ * Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
133
+ * [values].
134
+ *
135
+ * This is safer but slower than the other functions.
136
+ *
137
+ * Some conversions are done automatically to attempt to unify the values.
138
+ *
139
+ * For instance, when there are other [DataFrames][DataFrame] present in [values], we'll convert:
140
+ * - `null` -> [DataFrame.empty]`()`
141
+ * - [DataRow] -> single-row [DataFrame]
142
+ * - [List][List]`<`[DataRow][DataRow]`<*>>` -> multi-row [DataFrame]
143
+ *
144
+ * to be able to create a [FrameColumn].
145
+ * There are more conversions for other types as well.
146
+ *
147
+ * @param name name of the column
148
+ * @param values the values to represent each row in the column
149
+ * @param suggestedType optional suggested type for values. Default is [TypeSuggestion.Infer].
150
+ * See [TypeSuggestion] for more information.
151
+ * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
152
+ */
153
+ public fun <T > createByInference (
98
154
name : String ,
99
155
values : List <T >,
156
+ suggestedType : TypeSuggestion = TypeSuggestion .Infer ,
100
157
nullable : Boolean? = null,
101
- ): DataColumn <T > = guessColumnType(name, values, nullable = nullable)
158
+ ): DataColumn <T > =
159
+ createColumnGuessingType(
160
+ name = name,
161
+ values = values,
162
+ suggestedType = suggestedType,
163
+ nullable = nullable,
164
+ )
102
165
103
- public fun <T > create (
166
+ /* *
167
+ * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
168
+ * [type].
169
+ *
170
+ * This may be unsafe but is more efficient than [createByInference].
171
+ *
172
+ * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
173
+ * do we check whether there are unexpected nulls among the values.
174
+ *
175
+ * It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
176
+ *
177
+ * @param name the name of the column
178
+ * @param values the values to represent each row in the column
179
+ * @param type the (unchecked) common type of [values]
180
+ * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
181
+ */
182
+ public fun <T > createByType (
104
183
name : String ,
105
184
values : List <T >,
106
185
type : KType ,
107
186
infer : Infer = Infer .None ,
108
187
): DataColumn <T > =
109
- when (type.toColumnKind()) {
188
+ when (type.toColumnKind()) { // AnyFrame -> Frame, AnyRow? -> Group, else -> Value
110
189
ColumnKind .Value -> createValueColumn(name, values, type, infer)
190
+
111
191
ColumnKind .Group -> createColumnGroup(name, (values as List <AnyRow ?>).concat()).asDataColumn().cast()
192
+
112
193
ColumnKind .Frame -> createFrameColumn(name, values as List <AnyFrame >).asDataColumn().cast()
113
194
}
114
195
115
- public inline fun <reified T > create (name : String , values : List <T >, infer : Infer = Infer .None ): DataColumn <T > =
116
- create(name, values, typeOf<T >(), infer)
196
+ /* *
197
+ * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
198
+ * type [T].
199
+ *
200
+ * This is generally safe, as [T] can be inferred by the compiler,
201
+ * and more efficient than [createByInference].
202
+ *
203
+ * Be careful when casting occurs; Values in [values] are NOT checked to adhere to the given/inferred type [T],
204
+ * nor do we check whether there are unexpected nulls among the values.
205
+ *
206
+ * It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
207
+ *
208
+ * @param T the (unchecked) common type of [values]
209
+ * @param name the name of the column
210
+ * @param values the values to represent each row in the column
211
+ * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
212
+ */
213
+ public inline fun <reified T > createByType (
214
+ name : String ,
215
+ values : List <T >,
216
+ infer : Infer = Infer .None ,
217
+ ): DataColumn <T > = createByType(name, values, typeOf<T >(), infer)
117
218
219
+ /* * Creates an empty [DataColumn] with given [name]. */
118
220
public fun empty (name : String = ""): AnyCol = createValueColumn(name, emptyList<Unit >(), typeOf<Unit >())
221
+
222
+ // region deprecated
223
+
224
+ @Deprecated(
225
+ message = CREATE_FRAME_COLUMN ,
226
+ replaceWith = ReplaceWith (CREATE_FRAME_COLUMN_REPLACE , CHUNKED_IMPL_IMPORT ),
227
+ level = DeprecationLevel .WARNING ,
228
+ )
229
+ public fun <T > createFrameColumn (name : String , df : DataFrame <T >, startIndices : Iterable <Int >): FrameColumn <T > =
230
+ df.chunkedImpl(startIndices = startIndices, name = name)
231
+
232
+ @Deprecated(
233
+ message = CREATE_WITH_TYPE_INFERENCE ,
234
+ replaceWith = ReplaceWith (
235
+ CREATE_WITH_TYPE_INFERENCE_REPLACE ,
236
+ CREATE_BY_INFERENCE_IMPORT ,
237
+ TYPE_SUGGESTION_IMPORT ,
238
+ ),
239
+ level = DeprecationLevel .WARNING ,
240
+ )
241
+ public fun <T > createWithTypeInference (
242
+ name : String ,
243
+ values : List <T >,
244
+ nullable : Boolean? = null,
245
+ ): DataColumn <T > =
246
+ createByInference(
247
+ name = name,
248
+ values = values,
249
+ suggestedType = TypeSuggestion .Infer ,
250
+ nullable = nullable,
251
+ )
252
+
253
+ @Deprecated(
254
+ message = CREATE ,
255
+ replaceWith = ReplaceWith (CREATE_REPLACE , CREATE_BY_TYPE_IMPORT ),
256
+ level = DeprecationLevel .WARNING ,
257
+ )
258
+ public fun <T > create (
259
+ name : String ,
260
+ values : List <T >,
261
+ type : KType ,
262
+ infer : Infer = Infer .None ,
263
+ ): DataColumn <T > =
264
+ createByType(
265
+ name = name,
266
+ values = values,
267
+ type = type,
268
+ infer = infer,
269
+ )
270
+
271
+ @Deprecated(
272
+ message = CREATE ,
273
+ replaceWith = ReplaceWith (CREATE_INLINE_REPLACE , CREATE_BY_TYPE_IMPORT ),
274
+ level = DeprecationLevel .WARNING ,
275
+ )
276
+ public inline fun <reified T > create (name : String , values : List <T >, infer : Infer = Infer .None ): DataColumn <T > =
277
+ createByType(name = name, values = values, type = typeOf<T >(), infer = infer)
278
+
279
+ // endregion
119
280
}
120
281
121
282
public fun hasNulls (): Boolean = type().isMarkedNullable
0 commit comments