Skip to content

Commit 48370b5

Browse files
committed
Replace calls of toList() on Arrays with asList() to improve performance of the affected functions by reducing the overhead of copying the array elements for once
1 parent d787f9d commit 48370b5

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt

+4-4
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,19 @@ inline fun <reified T> SparkSession.toDF(list: List<T>, vararg colNames: String)
6161
* Utility method to create dataset from *array or vararg arguments
6262
*/
6363
inline fun <reified T> SparkSession.dsOf(vararg t: T): Dataset<T> =
64-
createDataset(t.toList(), encoder<T>())
64+
createDataset(t.asList(), encoder<T>())
6565

6666
/**
6767
* Utility method to create dataframe from *array or vararg arguments
6868
*/
6969
inline fun <reified T> SparkSession.dfOf(vararg t: T): Dataset<Row> =
70-
createDataset(t.toList(), encoder<T>()).toDF()
70+
createDataset(t.asList(), encoder<T>()).toDF()
7171

7272
/**
7373
* Utility method to create dataframe from *array or vararg arguments with given column names
7474
*/
7575
inline fun <reified T> SparkSession.dfOf(colNames: Array<String>, vararg t: T): Dataset<Row> =
76-
createDataset(t.toList(), encoder<T>())
76+
createDataset(t.asList(), encoder<T>())
7777
.run { if (colNames.isEmpty()) toDF() else toDF(*colNames) }
7878

7979
/**
@@ -92,7 +92,7 @@ inline fun <reified T> List<T>.toDF(spark: SparkSession, vararg colNames: String
9292
* Utility method to create dataset from list
9393
*/
9494
inline fun <reified T> Array<T>.toDS(spark: SparkSession): Dataset<T> =
95-
toList().toDS(spark)
95+
asList().toDS(spark)
9696

9797
/**
9898
* Utility method to create dataframe from list

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import java.io.Serializable
1111
fun <T> JavaSparkContext.rddOf(
1212
vararg elements: T,
1313
numSlices: Int = defaultParallelism(),
14-
): JavaRDD<T> = parallelize(elements.toList(), numSlices)
14+
): JavaRDD<T> = parallelize(elements.asList(), numSlices)
1515

1616
/**
1717
* Utility method to create an RDD from a list.

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class KSparkSession(val spark: SparkSession) {
114114
* NOTE: [T] must be [Serializable].
115115
*/
116116
fun <T> rddOf(vararg elements: T, numSlices: Int = sc.defaultParallelism()): JavaRDD<T> =
117-
sc.toRDD(elements.toList(), numSlices)
117+
sc.toRDD(elements.asList(), numSlices)
118118

119119
/**
120120
* A collection of methods for registering user-defined functions (UDF).

kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class TypeInferenceTest : ShouldSpec({
215215
should("generate valid serializer schema") {
216216
expect(encoder<Sample>().schema()) {
217217
this
218-
.feature("data type", { this.fields()?.toList() }) {
218+
.feature("data type", { this.fields()?.asList() }) {
219219
this.notToEqualNull().toContain.inOrder.only.entry {
220220
this
221221
.feature("element name", { name() }) { toEqual("optionList") }

0 commit comments

Comments
 (0)