Skip to content

Commit 22fa5ae

Browse files
committed
added spark connect example which does not yet work with kotlin-spark-api
1 parent eae0196 commit 22fa5ae

File tree

9 files changed

+98
-7
lines changed

9 files changed

+98
-7
lines changed

Diff for: buildSrc/src/main/kotlin/Dependencies.kt

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ object Dependencies : Dsl<Dependencies> {
44
inline val scalaLibrary get() = "org.scala-lang:scala-library:${Versions.scala}"
55
inline val kotlinxHtml get() = "org.jetbrains.kotlinx:kotlinx-html-jvm:${Versions.kotlinxHtml}"
66
inline val sparkSql get() = "org.apache.spark:spark-sql_${Versions.scalaCompat}:${Versions.spark}"
7+
inline val sparkSqlApi get() = "org.apache.spark:spark-sql-api_${Versions.scalaCompat}:${Versions.spark}"
8+
inline val sparkConnectClient get() = "org.apache.spark:spark-connect-client-jvm_${Versions.scalaCompat}:${Versions.spark}"
79
inline val sparkMl get() = "org.apache.spark:spark-mllib_${Versions.scalaCompat}:${Versions.spark}"
810
inline val sparkStreaming get() = "org.apache.spark:spark-streaming_${Versions.scalaCompat}:${Versions.spark}"
911
inline val hadoopClient get() = "org.apache.hadoop:hadoop-client:${Versions.hadoop}"

Diff for: gradle/bootstraps/compiler-plugin.jar

1 Byte
Binary file not shown.

Diff for: gradle/bootstraps/gradle-plugin.jar

0 Bytes
Binary file not shown.

Diff for: jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt

+3-2
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,11 @@ abstract class Integration(private val notebook: Notebook, private val options:
162162
}
163163

164164
beforeCellExecution {
165-
if (scalaCompatVersion.toDouble() >= 2.13)
165+
if (scalaCompatVersion.toDouble() >= 2.13) {
166166
execute("scala.`Console\$`.`MODULE\$`.setOutDirect(System.out)")
167-
else
167+
} else {
168168
execute("""scala.Console.setOut(System.out)""")
169+
}
169170

170171
beforeCellExecution()
171172
}

Diff for: kotlin-spark-api/build.gradle.kts

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ dependencies {
4242
// https://github.com/FasterXML/jackson-bom/issues/52
4343
if (Versions.spark == "3.3.1") implementation(jacksonDatabind)
4444

45-
if (Versions.sparkConnect) TODO("unsupported for now")
45+
// if (Versions.sparkConnect) TODO("unsupported for now")
4646

4747
implementation(
4848
kotlinStdLib,

Diff for: kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
package org.jetbrains.kotlinx.spark.api
22

33
import io.kotest.core.spec.style.ShouldSpec
4-
import io.kotest.core.spec.style.Test
5-
import io.kotest.core.test.TestScope
64
import io.kotest.matchers.collections.shouldContainAll
75
import io.kotest.matchers.shouldBe
86
import org.apache.spark.api.java.JavaRDD
9-
import org.jetbrains.kotlinx.spark.api.tuples.*
7+
import org.jetbrains.kotlinx.spark.api.tuples.X
8+
import org.jetbrains.kotlinx.spark.api.tuples.t
109
import scala.Tuple2
11-
import java.io.Serializable
1210

1311
class RddTest : ShouldSpec({
1412
context("RDD extension functions") {

Diff for: settings.gradle.kts

+3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ gradleEnterprise {
2121
val spark: String by settings
2222
val scala: String by settings
2323
val skipScalaOnlyDependent: String by settings
24+
val sparkConnect: String by settings
2425
System.setProperty("spark", spark)
2526
System.setProperty("scala", scala)
2627
System.setProperty("skipScalaOnlyDependent", skipScalaOnlyDependent)
28+
System.setProperty("sparkConnect", sparkConnect)
2729

2830
val scalaCompat
2931
get() = scala.substringBeforeLast('.')
@@ -37,6 +39,7 @@ include("scala-tuples-in-kotlin")
3739
include("kotlin-spark-api")
3840
include("jupyter")
3941
include("examples")
42+
include("spark-connect-examples")
4043
include("compiler-plugin")
4144
include("gradle-plugin")
4245

Diff for: spark-connect-examples/build.gradle.kts

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
2+
3+
plugins {
4+
// Needs to be installed in the local maven repository or have the bootstrap jar on the classpath
5+
id("org.jetbrains.kotlinx.spark.api")
6+
kotlin("jvm")
7+
application
8+
}
9+
10+
// run with `./gradlew run`
11+
application {
12+
mainClass = "org.jetbrains.kotlinx.spark.examples.MainKt"
13+
14+
// workaround for java 17
15+
applicationDefaultJvmArgs = listOf("--add-opens", "java.base/java.nio=ALL-UNNAMED")
16+
}
17+
18+
kotlinSparkApi {
19+
enabled = true
20+
sparkifyAnnotationFqNames = listOf("org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify")
21+
}
22+
23+
group = Versions.groupID
24+
version = Versions.project
25+
26+
repositories {
27+
mavenLocal()
28+
mavenCentral()
29+
}
30+
31+
dependencies {
32+
Projects {
33+
implementation(
34+
// TODO kotlinSparkApi,
35+
)
36+
}
37+
38+
Dependencies {
39+
40+
// IMPORTANT!
41+
compileOnly(sparkSqlApi)
42+
implementation(sparkConnectClient)
43+
}
44+
}
45+
46+
// spark-connect seems to work well with java 17 as client and java 1.8 as server
47+
// also set gradle and your project sdk to java 17
48+
kotlin {
49+
jvmToolchain {
50+
languageVersion = JavaLanguageVersion.of(17)
51+
}
52+
compilerOptions {
53+
jvmTarget = JvmTarget.JVM_17
54+
}
55+
}
56+
57+
tasks.withType<JavaCompile> {
58+
sourceCompatibility = JavaVersion.VERSION_17.toString()
59+
targetCompatibility = JavaVersion.VERSION_17.toString()
60+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package org.jetbrains.kotlinx.spark.examples
2+
3+
import org.apache.spark.sql.SparkSession
4+
import org.apache.spark.sql.connect.client.REPLClassDirMonitor
5+
6+
// run with `./gradlew run` or set VM options: "--add-opens=java.base/java.nio=ALL-UNNAMED" in the IDE
7+
fun main() {
8+
val spark =
9+
SparkSession
10+
.builder()
11+
.remote("sc://localhost")
12+
.create()
13+
14+
val classFinder = REPLClassDirMonitor("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/classes")
15+
spark.registerClassFinder(classFinder)
16+
spark.addArtifact("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/libs/spark-connect-examples-2.0.0-SNAPSHOT.jar")
17+
18+
spark.sql("select 1").show()
19+
20+
spark.stop()
21+
}
22+
23+
//@Sparkify
24+
//data class Person(
25+
// val name: String,
26+
// val age: Int,
27+
//)

0 commit comments

Comments
 (0)