diff --git a/buildSrc/src/main/kotlin/Dependencies.kt b/buildSrc/src/main/kotlin/Dependencies.kt index 20fce75d..d19181c8 100644 --- a/buildSrc/src/main/kotlin/Dependencies.kt +++ b/buildSrc/src/main/kotlin/Dependencies.kt @@ -4,6 +4,8 @@ object Dependencies : Dsl { inline val scalaLibrary get() = "org.scala-lang:scala-library:${Versions.scala}" inline val kotlinxHtml get() = "org.jetbrains.kotlinx:kotlinx-html-jvm:${Versions.kotlinxHtml}" inline val sparkSql get() = "org.apache.spark:spark-sql_${Versions.scalaCompat}:${Versions.spark}" + inline val sparkSqlApi get() = "org.apache.spark:spark-sql-api_${Versions.scalaCompat}:${Versions.spark}" + inline val sparkConnectClient get() = "org.apache.spark:spark-connect-client-jvm_${Versions.scalaCompat}:${Versions.spark}" inline val sparkMl get() = "org.apache.spark:spark-mllib_${Versions.scalaCompat}:${Versions.spark}" inline val sparkStreaming get() = "org.apache.spark:spark-streaming_${Versions.scalaCompat}:${Versions.spark}" inline val hadoopClient get() = "org.apache.hadoop:hadoop-client:${Versions.hadoop}" diff --git a/gradle/bootstraps/compiler-plugin.jar b/gradle/bootstraps/compiler-plugin.jar index fa1826de..6de5e469 100644 Binary files a/gradle/bootstraps/compiler-plugin.jar and b/gradle/bootstraps/compiler-plugin.jar differ diff --git a/gradle/bootstraps/gradle-plugin.jar b/gradle/bootstraps/gradle-plugin.jar index b470374e..740a990d 100644 Binary files a/gradle/bootstraps/gradle-plugin.jar and b/gradle/bootstraps/gradle-plugin.jar differ diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt index 715fa94a..30b9b27b 100644 --- a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt @@ -162,10 +162,11 @@ abstract class Integration(private val notebook: Notebook, private val options: } beforeCellExecution { - if (scalaCompatVersion.toDouble() >= 2.13) + if (scalaCompatVersion.toDouble() >= 2.13) { execute("scala.`Console\$`.`MODULE\$`.setOutDirect(System.out)") - else + } else { execute("""scala.Console.setOut(System.out)""") + } beforeCellExecution() } diff --git a/kotlin-spark-api/build.gradle.kts b/kotlin-spark-api/build.gradle.kts index 812af551..9e0097d7 100644 --- a/kotlin-spark-api/build.gradle.kts +++ b/kotlin-spark-api/build.gradle.kts @@ -42,7 +42,7 @@ dependencies { // https://github.com/FasterXML/jackson-bom/issues/52 if (Versions.spark == "3.3.1") implementation(jacksonDatabind) - if (Versions.sparkConnect) TODO("unsupported for now") + // if (Versions.sparkConnect) TODO("unsupported for now") implementation( kotlinStdLib, diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt index 7bd1ca7b..51a97c3d 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt @@ -1,14 +1,12 @@ package org.jetbrains.kotlinx.spark.api import io.kotest.core.spec.style.ShouldSpec -import io.kotest.core.spec.style.Test -import io.kotest.core.test.TestScope import io.kotest.matchers.collections.shouldContainAll import io.kotest.matchers.shouldBe import org.apache.spark.api.java.JavaRDD -import org.jetbrains.kotlinx.spark.api.tuples.* +import org.jetbrains.kotlinx.spark.api.tuples.X +import org.jetbrains.kotlinx.spark.api.tuples.t import scala.Tuple2 -import java.io.Serializable class RddTest : ShouldSpec({ context("RDD extension functions") { diff --git a/settings.gradle.kts b/settings.gradle.kts index 98776e06..07822dec 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -21,9 +21,11 @@ gradleEnterprise { val spark: String by settings val scala: String by settings val skipScalaOnlyDependent: String by settings +val sparkConnect: String by settings System.setProperty("spark", spark) System.setProperty("scala", scala) System.setProperty("skipScalaOnlyDependent", skipScalaOnlyDependent) +System.setProperty("sparkConnect", sparkConnect) val scalaCompat get() = scala.substringBeforeLast('.') @@ -37,6 +39,7 @@ include("scala-tuples-in-kotlin") include("kotlin-spark-api") include("jupyter") include("examples") +include("spark-connect-examples") include("compiler-plugin") include("gradle-plugin") diff --git a/spark-connect-examples/build.gradle.kts b/spark-connect-examples/build.gradle.kts new file mode 100644 index 00000000..c1f20c0a --- /dev/null +++ b/spark-connect-examples/build.gradle.kts @@ -0,0 +1,60 @@ +import org.jetbrains.kotlin.gradle.dsl.JvmTarget + +plugins { + // Needs to be installed in the local maven repository or have the bootstrap jar on the classpath + id("org.jetbrains.kotlinx.spark.api") + kotlin("jvm") + application +} + +// run with `./gradlew run` +application { + mainClass = "org.jetbrains.kotlinx.spark.examples.MainKt" + + // workaround for java 17 + applicationDefaultJvmArgs = listOf("--add-opens", "java.base/java.nio=ALL-UNNAMED") +} + +kotlinSparkApi { + enabled = true + sparkifyAnnotationFqNames = listOf("org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify") +} + +group = Versions.groupID +version = Versions.project + +repositories { + mavenLocal() + mavenCentral() +} + +dependencies { + Projects { + implementation( + // TODO kotlinSparkApi, + ) + } + + Dependencies { + + // IMPORTANT! + compileOnly(sparkSqlApi) + implementation(sparkConnectClient) + } +} + +// spark-connect seems to work well with java 17 as client and java 1.8 as server +// also set gradle and your project sdk to java 17 +kotlin { + jvmToolchain { + languageVersion = JavaLanguageVersion.of(17) + } + compilerOptions { + jvmTarget = JvmTarget.JVM_17 + } +} + +tasks.withType { + sourceCompatibility = JavaVersion.VERSION_17.toString() + targetCompatibility = JavaVersion.VERSION_17.toString() +} diff --git a/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt b/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt new file mode 100644 index 00000000..790bad24 --- /dev/null +++ b/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.spark.examples + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.connect.client.REPLClassDirMonitor + +// run with `./gradlew run` or set VM options: "--add-opens=java.base/java.nio=ALL-UNNAMED" in the IDE +fun main() { + val spark = + SparkSession + .builder() + .remote("sc://localhost") + .create() + + val classFinder = REPLClassDirMonitor("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/classes") + spark.registerClassFinder(classFinder) + spark.addArtifact("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/libs/spark-connect-examples-2.0.0-SNAPSHOT.jar") + + spark.sql("select 1").show() + + spark.stop() +} + +//@Sparkify +//data class Person( +// val name: String, +// val age: Int, +//)