Skip to content

Commit 920c1a0

Browse files
committed
adding benchmarking test and fixed some compilation issues
1 parent 8c2d501 commit 920c1a0

File tree

14 files changed

+93370
-10
lines changed

14 files changed

+93370
-10
lines changed

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DoubleParserTests.kt

+2
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ class DoubleParserTests {
120120
"100123.35",
121121
"-204,235.23",
122122
"1.234e3",
123+
"3e-04", // failed with old double parser
123124
)
124125

125126
val expectedDoubles = listOf(
@@ -128,6 +129,7 @@ class DoubleParserTests {
128129
100_123.35,
129130
-204_235.23,
130131
1.234e3,
132+
3e-04,
131133
)
132134

133135
// CharSequence

dataframe-csv/build.gradle.kts

+8-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ plugins {
1111
alias(ktlint)
1212
alias(jupyter.api)
1313
alias(docProcessor)
14+
alias(kotlinx.benchmark)
1415
}
1516
idea
1617
}
@@ -39,12 +40,19 @@ dependencies {
3940
implementation(libs.kotlin.coroutinesCore)
4041

4142
testApi(project(":core"))
43+
testImplementation(libs.kotlinx.benchmark.runtime)
4244
testImplementation(libs.junit)
4345
testImplementation(libs.kotestAssertions) {
4446
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
4547
}
4648
}
4749

50+
benchmark {
51+
targets {
52+
register("test")
53+
}
54+
}
55+
4856
val generatedSourcesFolderName = "generated-sources"
4957

5058
// Backup the kotlin source files location
@@ -155,7 +163,6 @@ kotlin {
155163
explicitApi()
156164
sourceSets.all {
157165
languageSettings {
158-
enableLanguageFeature("ExplicitBackingFields")
159166
}
160167
}
161168
}

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/ListSink.kt

+6-7
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSour
6666
)
6767
}
6868

69-
@Suppress("MUST_BE_INITIALIZED_OR_BE_ABSTRACT", "EXPLICIT_BACKING_FIELDS_UNSUPPORTED")
69+
private val _data: MutableList<Any?> = mutableListOf()
70+
7071
val data: List<Any?>
71-
field = mutableListOf()
72+
get() = _data
7273

7374
var hasNulls: Boolean = false
7475
private set
@@ -121,13 +122,12 @@ internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSour
121122
destEnd: Int,
122123
isNull: BooleanArray,
123124
) {
124-
data as MutableList<Any?>
125125
while (data.size < destBegin) {
126-
data += null
126+
_data += null
127127
hasNulls = true
128128
}
129129
for ((srcIndex, _) in (destBegin..<destEnd).withIndex()) {
130-
data += getValue(src, srcIndex, isNull)
130+
_data += getValue(src, srcIndex, isNull)
131131
}
132132
}
133133

@@ -137,9 +137,8 @@ internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSour
137137
destEnd: Int,
138138
isNull: BooleanArray,
139139
) {
140-
data as MutableList<Any?>
141140
for ((srcIndex, destIndex) in (destBegin..<destEnd).withIndex()) {
142-
data[destIndex] = getValue(src, srcIndex, isNull)
141+
_data[destIndex] = getValue(src, srcIndex, isNull)
143142
}
144143
}
145144

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
@file:JvmName("CsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import org.jetbrains.kotlinx.dataframe.DataFrame
4-
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
56
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
67
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
78
import org.jetbrains.kotlinx.dataframe.impl.io.DelimParams

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("ReadCsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import io.deephaven.csv.CsvSpecs

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDelim.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("ReadDelimDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import io.deephaven.csv.CsvSpecs

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readTsv.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("ReadTsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import io.deephaven.csv.CsvSpecs

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("TsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import org.jetbrains.kotlinx.dataframe.DataFrame

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeCsv.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("WriteCsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import org.apache.commons.csv.CSVFormat

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/writeTsv.kt

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
@file:JvmName("WriteTsvDeephavenKt")
2+
13
package org.jetbrains.kotlinx.dataframe.io
24

35
import org.apache.commons.csv.CSVFormat
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package org.jetbrains.kotlinx.dataframe.io
2+
3+
import org.jetbrains.kotlinx.dataframe.DataFrame
4+
import org.openjdk.jmh.annotations.Benchmark
5+
import org.openjdk.jmh.annotations.BenchmarkMode
6+
import org.openjdk.jmh.annotations.Measurement
7+
import org.openjdk.jmh.annotations.Mode
8+
import org.openjdk.jmh.annotations.Param
9+
import org.openjdk.jmh.annotations.Scope
10+
import org.openjdk.jmh.annotations.Setup
11+
import org.openjdk.jmh.annotations.State
12+
import org.openjdk.jmh.annotations.TearDown
13+
import org.openjdk.jmh.annotations.Warmup
14+
import java.io.File
15+
import java.util.concurrent.TimeUnit
16+
17+
@BenchmarkMode(Mode.SingleShotTime)
18+
@Warmup(iterations = 10, time = 5, timeUnit = TimeUnit.SECONDS)
19+
@Measurement(iterations = 20, timeUnit = TimeUnit.SECONDS)
20+
@State(Scope.Benchmark)
21+
open class BenchmarkTest {
22+
23+
@Param("small", "medium", "large")
24+
var type = ""
25+
var file: File? = null
26+
27+
@Setup
28+
fun setup() {
29+
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info")
30+
file = File(
31+
"src/test/resources/" + when (type) {
32+
"small" -> "testCSV.csv"
33+
"medium" -> "gross-domestic-product-june-2024-quarter.csv"
34+
"large" -> "largeCsv.csv.gz"
35+
else -> throw IllegalArgumentException("Invalid type")
36+
},
37+
)
38+
}
39+
40+
@TearDown
41+
fun tearDown() {
42+
file = null
43+
}
44+
45+
@Benchmark
46+
fun apache() {
47+
DataFrame.readCSV(file!!)
48+
}
49+
50+
@OptIn(ExperimentalCsv::class)
51+
@Benchmark
52+
fun deephaven() {
53+
DataFrame.readCsv(file!!)
54+
}
55+
}

0 commit comments

Comments
 (0)