Skip to content

Commit 41577df

Browse files
authored
Add separator parameter to DataFrame.flatten (#667)
Added a 'separator' parameter to the DataFrame.flatten function to customize the separator used in column names when 'keepParentNameForColumns' is true. This allows greater flexibility in formatting column names. Tests have been updated accordingly to check for proper functionality.
1 parent bcf6e64 commit 41577df

File tree

6 files changed

+84
-28
lines changed

6 files changed

+84
-28
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

+28-11
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,33 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
13-
14-
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
15-
16-
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
17-
18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19-
flatten(keepParentNameForColumns) { columns.toColumnSet() }
20-
21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22-
flatten(keepParentNameForColumns) { columns.toColumnSet() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame<T> =
13+
flatten(keepParentNameForColumns, separator) { all() }
14+
15+
public fun <T, C> DataFrame<T>.flatten(
16+
keepParentNameForColumns: Boolean = false,
17+
separator: String = ".",
18+
columns: ColumnsSelector<T, C>
19+
): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns, separator)
20+
21+
public fun <T> DataFrame<T>.flatten(
22+
vararg columns: String,
23+
keepParentNameForColumns: Boolean = false,
24+
separator: String = "."
25+
): DataFrame<T> = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
26+
27+
public fun <T, C> DataFrame<T>.flatten(
28+
vararg columns: ColumnReference<C>,
29+
keepParentNameForColumns: Boolean = false,
30+
separator: String = "."
31+
): DataFrame<T> =
32+
flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
33+
34+
public fun <T, C> DataFrame<T>.flatten(
35+
vararg columns: KProperty<C>,
36+
keepParentNameForColumns: Boolean = false,
37+
separator: String = "."
38+
): DataFrame<T> =
39+
flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
2340

2441
// endregion

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
1616
columns: ColumnsSelector<T, C>,
17-
keepParentNameForColumns: Boolean = false
17+
keepParentNameForColumns: Boolean = false,
18+
separator: String = ".",
1819
): DataFrame<T> {
1920
val rootColumns = getColumnsWithPaths {
2021
columns.toColumnSet().filter { it.isColumnGroup() }.simplify()
@@ -32,7 +33,7 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
3233
.into {
3334
val targetPath = getRootPrefix(it.path).dropLast(1)
3435
val nameGen = nameGenerators[targetPath]!!
35-
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
36+
val preferredName = if (keepParentNameForColumns) "${it.parentName}${separator}${it.name()}" else it.name()
3637
val name = nameGen.addUnique(preferredName)
3738
targetPath + name
3839
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

+11-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,17 @@ class FlattenTests {
9595

9696
aggregate
9797
.flatten(keepParentNameForColumns = true)
98-
.columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std")
98+
.columnNames() shouldBe listOf("city", "mean.age", "mean.weight", "std.age", "std.weight")
99+
100+
aggregate
101+
.flatten(keepParentNameForColumns = true, separator = "_happy_separator_")
102+
.columnNames() shouldBe listOf(
103+
"city",
104+
"mean_happy_separator_age",
105+
"mean_happy_separator_weight",
106+
"std_happy_separator_age",
107+
"std_happy_separator_weight"
108+
)
99109
}
100110

101111
@DataSchema

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

+28-11
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,33 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
13-
14-
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
15-
16-
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
17-
18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19-
flatten(keepParentNameForColumns) { columns.toColumnSet() }
20-
21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22-
flatten(keepParentNameForColumns) { columns.toColumnSet() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame<T> =
13+
flatten(keepParentNameForColumns, separator) { all() }
14+
15+
public fun <T, C> DataFrame<T>.flatten(
16+
keepParentNameForColumns: Boolean = false,
17+
separator: String = ".",
18+
columns: ColumnsSelector<T, C>
19+
): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns, separator)
20+
21+
public fun <T> DataFrame<T>.flatten(
22+
vararg columns: String,
23+
keepParentNameForColumns: Boolean = false,
24+
separator: String = "."
25+
): DataFrame<T> = flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
26+
27+
public fun <T, C> DataFrame<T>.flatten(
28+
vararg columns: ColumnReference<C>,
29+
keepParentNameForColumns: Boolean = false,
30+
separator: String = "."
31+
): DataFrame<T> =
32+
flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
33+
34+
public fun <T, C> DataFrame<T>.flatten(
35+
vararg columns: KProperty<C>,
36+
keepParentNameForColumns: Boolean = false,
37+
separator: String = "."
38+
): DataFrame<T> =
39+
flatten(keepParentNameForColumns, separator) { columns.toColumnSet() }
2340

2441
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
1616
columns: ColumnsSelector<T, C>,
17-
keepParentNameForColumns: Boolean = false
17+
keepParentNameForColumns: Boolean = false,
18+
separator: String = ".",
1819
): DataFrame<T> {
1920
val rootColumns = getColumnsWithPaths {
2021
columns.toColumnSet().filter { it.isColumnGroup() }.simplify()
@@ -32,7 +33,7 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
3233
.into {
3334
val targetPath = getRootPrefix(it.path).dropLast(1)
3435
val nameGen = nameGenerators[targetPath]!!
35-
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
36+
val preferredName = if (keepParentNameForColumns) "${it.parentName}${separator}${it.name()}" else it.name()
3637
val name = nameGen.addUnique(preferredName)
3738
targetPath + name
3839
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

+11-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,17 @@ class FlattenTests {
9595

9696
aggregate
9797
.flatten(keepParentNameForColumns = true)
98-
.columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std")
98+
.columnNames() shouldBe listOf("city", "mean.age", "mean.weight", "std.age", "std.weight")
99+
100+
aggregate
101+
.flatten(keepParentNameForColumns = true, separator = "_happy_separator_")
102+
.columnNames() shouldBe listOf(
103+
"city",
104+
"mean_happy_separator_age",
105+
"mean_happy_separator_weight",
106+
"std_happy_separator_age",
107+
"std_happy_separator_weight"
108+
)
99109
}
100110

101111
@DataSchema

0 commit comments

Comments
 (0)