Skip to content

Commit 639156b

Browse files
committed
Merge branch 'master' into new-csv-implementation
# Conflicts: # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/BaseColumn.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/insert.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt # core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt # core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt
2 parents cc32153 + f410705 commit 639156b

File tree

70 files changed

+2740
-289
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2740
-289
lines changed

core/build.gradle.kts

+4-2
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,10 @@ tasks.withType<Jar> {
268268
}
269269

270270
// modify all publishing tasks to depend on `changeJarTask` so the sources are swapped out with generated sources
271-
tasks.named { it.startsWith("publish") }.configureEach {
272-
dependsOn(processKDocsMain, changeJarTask)
271+
tasks.configureEach {
272+
if (name.startsWith("publish")) {
273+
dependsOn(processKDocsMain, changeJarTask)
274+
}
273275
}
274276

275277
// Exclude the generated/processed sources from the IDE
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
4+
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
6+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
7+
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
8+
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
9+
import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
10+
import kotlin.reflect.KProperty
11+
12+
// region DataFrame
13+
14+
@Refine
15+
@Interpretable("FlattenDefault")
16+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, separator: String = "_"): DataFrame<T> =
17+
flatten(keepParentNameForColumns, separator) { all() }
18+
19+
@Refine
20+
@Interpretable("Flatten0")
21+
public fun <T, C> DataFrame<T>.flatten(
22+
keepParentNameForColumns: Boolean = false,
23+
separator: String = "_",
24+
columns: ColumnsSelector<T, C>,
25+
): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns, separator)
26+
27+
public fun <T> DataFrame<T>.flatten(
28+
vararg columns: String,
29+
keepParentNameForColumns: Boolean = false,
30+
separator: String = "_",
31+
): DataFrame<T> = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
32+
33+
public fun <T, C> DataFrame<T>.flatten(
34+
vararg columns: ColumnReference<C>,
35+
keepParentNameForColumns: Boolean = false,
36+
separator: String = "_",
37+
): DataFrame<T> = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
38+
39+
public fun <T, C> DataFrame<T>.flatten(
40+
vararg columns: KProperty<C>,
41+
keepParentNameForColumns: Boolean = false,
42+
separator: String = "_",
43+
): DataFrame<T> = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
44+
45+
// endregion
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.jetbrains.kotlinx.dataframe.AnyCol
4+
import org.jetbrains.kotlinx.dataframe.ColumnSelector
5+
import org.jetbrains.kotlinx.dataframe.DataColumn
6+
import org.jetbrains.kotlinx.dataframe.DataFrame
7+
import org.jetbrains.kotlinx.dataframe.RowExpression
8+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
9+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
10+
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
11+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
12+
import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
13+
import org.jetbrains.kotlinx.dataframe.impl.columnName
14+
import org.jetbrains.kotlinx.dataframe.impl.removeAt
15+
import kotlin.reflect.KProperty
16+
17+
// region DataFrame
18+
19+
// region insert
20+
21+
@Interpretable("Insert0")
22+
public fun <T, C> DataFrame<T>.insert(column: DataColumn<C>): InsertClause<T> = InsertClause(this, column)
23+
24+
@Interpretable("Insert1")
25+
public inline fun <T, reified R> DataFrame<T>.insert(
26+
name: String,
27+
infer: Infer = Infer.Nulls,
28+
noinline expression: RowExpression<T, R>,
29+
): InsertClause<T> = insert(mapToColumn(name, infer, expression))
30+
31+
@Interpretable("Insert2")
32+
public inline fun <T, reified R> DataFrame<T>.insert(
33+
column: ColumnAccessor<R>,
34+
infer: Infer = Infer.Nulls,
35+
noinline expression: RowExpression<T, R>,
36+
): InsertClause<T> = insert(column.name(), infer, expression)
37+
38+
@Interpretable("Insert3")
39+
public inline fun <T, reified R> DataFrame<T>.insert(
40+
column: KProperty<R>,
41+
infer: Infer = Infer.Nulls,
42+
noinline expression: RowExpression<T, R>,
43+
): InsertClause<T> = insert(column.columnName, infer, expression)
44+
45+
// endregion
46+
47+
public class InsertClause<T>(internal val df: DataFrame<T>, internal val column: AnyCol) {
48+
override fun toString(): String = "InsertClause(df=$df, column=$column)"
49+
}
50+
51+
// region under
52+
53+
@Refine
54+
@Interpretable("Under0")
55+
public fun <T> InsertClause<T>.under(column: ColumnSelector<T, *>): DataFrame<T> = under(df.getColumnPath(column))
56+
57+
@Refine
58+
@Interpretable("Under1")
59+
public fun <T> InsertClause<T>.under(columnPath: ColumnPath): DataFrame<T> =
60+
df.insertImpl(columnPath + column.name, column)
61+
62+
@Refine
63+
@Interpretable("Under2")
64+
public fun <T> InsertClause<T>.under(column: ColumnAccessor<*>): DataFrame<T> = under(column.path())
65+
66+
@Refine
67+
@Interpretable("Under3")
68+
public fun <T> InsertClause<T>.under(column: KProperty<*>): DataFrame<T> = under(column.columnName)
69+
70+
@Refine
71+
@Interpretable("Under4")
72+
public fun <T> InsertClause<T>.under(column: String): DataFrame<T> = under(pathOf(column))
73+
74+
// endregion
75+
76+
// region after
77+
78+
public fun <T> InsertClause<T>.after(column: ColumnSelector<T, *>): DataFrame<T> = after(df.getColumnPath(column))
79+
80+
public fun <T> InsertClause<T>.after(column: String): DataFrame<T> = df.add(this.column).move(this.column).after(column)
81+
82+
public fun <T> InsertClause<T>.after(column: ColumnAccessor<*>): DataFrame<T> = after(column.path())
83+
84+
public fun <T> InsertClause<T>.after(column: KProperty<*>): DataFrame<T> = after(column.columnName)
85+
86+
public fun <T> InsertClause<T>.after(columnPath: ColumnPath): DataFrame<T> {
87+
val dstPath = ColumnPath(columnPath.removeAt(columnPath.size - 1) + column.name())
88+
return df.insertImpl(dstPath, column).move { dstPath }.after { columnPath }
89+
}
90+
91+
// endregion
92+
93+
// region at
94+
95+
public fun <T> InsertClause<T>.at(position: Int): DataFrame<T> = df.add(column).move(column).to(position)
96+
97+
// endregion
98+
99+
// endregion
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.jetbrains.kotlinx.dataframe.AnyFrame
4+
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
5+
import org.jetbrains.kotlinx.dataframe.DataColumn
6+
import org.jetbrains.kotlinx.dataframe.DataFrame
7+
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
8+
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
9+
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
10+
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
11+
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
12+
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
13+
import org.jetbrains.kotlinx.dataframe.typeClass
14+
import java.time.format.DateTimeFormatter
15+
import java.util.Locale
16+
import kotlin.reflect.KProperty
17+
18+
public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers
19+
20+
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
21+
parseImpl(options, columns)
22+
23+
public fun <T> DataFrame<T>.parse(vararg columns: String, options: ParserOptions? = null): DataFrame<T> =
24+
parse(options) { columns.toColumnSet() }
25+
26+
public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options: ParserOptions? = null): DataFrame<T> =
27+
parse(options) { columns.toColumnSet() }
28+
29+
public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
30+
parse(options) { columns.toColumnSet() }
31+
32+
public interface GlobalParserOptions {
33+
34+
public fun addDateTimePattern(pattern: String)
35+
36+
public fun addNullString(str: String)
37+
38+
public fun resetToDefault()
39+
40+
public var locale: Locale
41+
}
42+
43+
public data class ParserOptions(
44+
val locale: Locale? = null,
45+
// TODO, migrate to kotlinx.datetime.format.DateTimeFormat? https://github.com/Kotlin/dataframe/issues/876
46+
val dateTimeFormatter: DateTimeFormatter? = null,
47+
val dateTimePattern: String? = null,
48+
val nullStrings: Set<String>? = null,
49+
) {
50+
internal fun getDateTimeFormatter(): DateTimeFormatter? =
51+
when {
52+
dateTimeFormatter != null -> dateTimeFormatter
53+
dateTimePattern != null && locale != null -> DateTimeFormatter.ofPattern(dateTimePattern, locale)
54+
dateTimePattern != null -> DateTimeFormatter.ofPattern(dateTimePattern)
55+
else -> null
56+
}
57+
}
58+
59+
/** Tries to parse a column of strings into a column of a different type.
60+
* Each parser in [Parsers][org.jetbrains.kotlinx.dataframe.impl.api.Parsers] is run in order until a valid parser is found,
61+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
62+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
63+
* simply returns the original string, leaving the column unchanged.
64+
*
65+
* Parsers that are [covered by][org.jetbrains.kotlinx.dataframe.impl.api.StringParser.coveredBy] other parsers are skipped.
66+
*
67+
* @param options options for parsing, like providing a locale or a custom date-time formatter
68+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
69+
* @return a new column with parsed values */
70+
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
71+
72+
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
73+
parse(options) {
74+
colsAtAnyDepth { !it.isColumnGroup() }
75+
}
76+
77+
/**
78+
* Tries to parse a column of strings into a column of a different type.
79+
* Each parser in [Parsers] is run in order until a valid parser is found,
80+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
81+
* fails to parse any value, the next parser is tried.
82+
*
83+
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
84+
* use [tryParse] instead.
85+
*
86+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
87+
*
88+
* @param options options for parsing, like providing a locale or a custom date-time formatter
89+
* @throws IllegalStateException if no valid parser is found
90+
* @return a new column with parsed values
91+
*/
92+
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
93+
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
94+
95+
@JvmName("parseAnyFrameNullable")
96+
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
97+
map { it?.parse(options) }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package org.jetbrains.kotlinx.dataframe.columns
2+
3+
import org.jetbrains.kotlinx.dataframe.AnyBaseCol
4+
import org.jetbrains.kotlinx.dataframe.AnyCol
5+
import org.jetbrains.kotlinx.dataframe.AnyRow
6+
import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl
7+
import org.jetbrains.kotlinx.dataframe.impl.asList
8+
import org.jetbrains.kotlinx.dataframe.impl.columnName
9+
import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnInternal
10+
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
11+
import kotlin.reflect.KProperty
12+
import kotlin.reflect.KType
13+
14+
/**
15+
* Column with [type], [name]/[path] and [values]
16+
* Base interface for all three kinds of columns: [ValueColumn], [ColumnGroup] and [FrameColumn].
17+
* Column operations that doesn't clash by signature with [DataFrame] operations can be defined for [BaseColumn]
18+
*
19+
* @param T type of values contained in column.
20+
*/
21+
public interface BaseColumn<out T> : ColumnReference<T> {
22+
23+
// region info
24+
25+
public fun size(): Int
26+
27+
public fun kind(): ColumnKind
28+
29+
public fun type(): KType
30+
31+
// TODO: remove
32+
public fun defaultValue(): T?
33+
34+
// endregion
35+
36+
// region get
37+
38+
/**
39+
* Gets the row at given [index].
40+
*
41+
* NOTE: This doesn't work in the [ColumnsSelectionDsl], use [ColumnsSelectionDsl.col] to select a column by index.
42+
*/
43+
public operator fun get(index: Int): T
44+
45+
/**
46+
* Gets the rows at given indices.
47+
*
48+
* NOTE: This doesn't work in the [ColumnsSelectionDsl], use [ColumnsSelectionDsl.cols] to select columns by index.
49+
*/
50+
public operator fun get(firstIndex: Int, vararg otherIndices: Int): BaseColumn<T> =
51+
get(
52+
headPlusIterable(
53+
firstIndex,
54+
otherIndices.asIterable(),
55+
),
56+
)
57+
58+
public operator fun get(row: AnyRow): T = get(row.index())
59+
60+
/**
61+
* Gets the rows at given range of indices.
62+
*
63+
* NOTE: This doesn't work in the [ColumnsSelectionDsl], use [ColumnsSelectionDsl.cols] to select columns by range.
64+
*/
65+
public operator fun get(range: IntRange): BaseColumn<T>
66+
67+
/**
68+
* Gets the rows at given indices.
69+
*
70+
* NOTE: This doesn't work in the [ColumnsSelectionDsl], use [ColumnsSelectionDsl.cols] to select columns by index.
71+
*/
72+
public operator fun get(indices: Iterable<Int>): BaseColumn<T>
73+
74+
public operator fun get(columnName: String): AnyCol
75+
76+
// endregion
77+
78+
// region values
79+
80+
public fun values(): Iterable<T>
81+
82+
public fun toList(): List<T> = values().asList()
83+
84+
public fun toSet(): Set<T>
85+
86+
public fun distinct(): BaseColumn<T>
87+
88+
public fun countDistinct(): Int
89+
90+
public operator fun contains(value: @UnsafeVariance T): Boolean
91+
92+
// endregion
93+
94+
override fun rename(newName: String): BaseColumn<T>
95+
96+
public override operator fun getValue(thisRef: Any?, property: KProperty<*>): BaseColumn<T> =
97+
(this as DataColumnInternal<*>).rename(property.columnName).forceResolve() as BaseColumn<T>
98+
}
99+
100+
internal val <T> BaseColumn<T>.values: Iterable<T> get() = values()
101+
102+
internal val AnyBaseCol.size: Int get() = size()

0 commit comments

Comments
 (0)