Skip to content

Commit c28a6fa

Browse files
srowendongjoon-hyun
authored andcommitted
[SPARK-29292][SQL][ML] Update rest of default modules (Hive, ML, etc) for Scala 2.13 compilation
### What changes were proposed in this pull request? Same as apache#29078 and apache#28971 . This makes the rest of the default modules (i.e. those you get without specifying `-Pyarn` etc) compile under Scala 2.13. It does not close the JIRA, as a result. this also of course does not demonstrate that tests pass yet in 2.13. Note, this does not fix the `repl` module; that's separate. ### Why are the changes needed? Eventually, we need to support a Scala 2.13 build, perhaps in Spark 3.1. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. (2.13 was not tested; this is about getting it to compile without breaking 2.12) Closes apache#29111 from srowen/SPARK-29292.3. Authored-by: Sean Owen <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent b05f309 commit c28a6fa

File tree

36 files changed

+106
-102
lines changed

36 files changed

+106
-102
lines changed

examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import java.util.Arrays;
2424
import java.util.List;
2525

26-
import scala.collection.mutable.WrappedArray;
26+
import scala.collection.mutable.Seq;
2727

2828
import org.apache.spark.ml.feature.RegexTokenizer;
2929
import org.apache.spark.ml.feature.Tokenizer;
@@ -69,7 +69,7 @@ public static void main(String[] args) {
6969
.setPattern("\\W"); // alternatively .setPattern("\\w+").setGaps(false);
7070

7171
spark.udf().register(
72-
"countTokens", (WrappedArray<?> words) -> words.size(), DataTypes.IntegerType);
72+
"countTokens", (Seq<?> words) -> words.size(), DataTypes.IntegerType);
7373

7474
Dataset<Row> tokenized = tokenizer.transform(sentenceDataFrame);
7575
tokenized.select("sentence", "words")

examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ object SparkKMeans {
8282
while(tempDist > convergeDist) {
8383
val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
8484

85-
val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}
85+
val pointStats = closest.reduceByKey(mergeResults)
8686

8787
val newPoints = pointStats.map {pair =>
8888
(pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()
@@ -102,5 +102,11 @@ object SparkKMeans {
102102
kPoints.foreach(println)
103103
spark.stop()
104104
}
105+
106+
private def mergeResults(
107+
a: (Vector[Double], Int),
108+
b: (Vector[Double], Int)): (Vector[Double], Int) = {
109+
(a._1 + b._1, a._2 + b._2)
110+
}
105111
}
106112
// scalastyle:on println

external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ object SchemaConverters {
8585
StructField(f.name, schemaType.dataType, schemaType.nullable)
8686
}
8787

88-
SchemaType(StructType(fields), nullable = false)
88+
SchemaType(StructType(fields.toSeq), nullable = false)
8989

9090
case ARRAY =>
9191
val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)
@@ -126,7 +126,7 @@ object SchemaConverters {
126126
StructField(s"member$i", schemaType.dataType, nullable = true)
127127
}
128128

129-
SchemaType(StructType(fields), nullable = false)
129+
SchemaType(StructType(fields.toSeq), nullable = false)
130130
}
131131

132132
case other => throw new IncompatibleSchemaException(s"Unsupported type $other")

external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ private[kafka010] class KafkaOffsetReader(
336336
}
337337
})
338338
}
339-
incorrectOffsets
339+
incorrectOffsets.toSeq
340340
}
341341

342342
// Retry to fetch latest offsets when detecting incorrect offsets. We don't use

external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,8 +1540,8 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
15401540
makeSureGetOffsetCalled,
15411541
Execute { q =>
15421542
// wait to reach the last offset in every partition
1543-
q.awaitOffset(
1544-
0, KafkaSourceOffset(partitionOffsets.mapValues(_ => 3L)), streamingTimeout.toMillis)
1543+
q.awaitOffset(0,
1544+
KafkaSourceOffset(partitionOffsets.mapValues(_ => 3L).toMap), streamingTimeout.toMillis)
15451545
},
15461546
CheckAnswer(-20, -21, -22, 0, 1, 2, 11, 12, 22),
15471547
StopStream,

mllib/src/main/scala/org/apache/spark/ml/Estimator.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage {
7676
* @return fitted models, matching the input parameter maps
7777
*/
7878
@Since("2.0.0")
79-
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[M] = {
79+
def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[M] = {
8080
paramMaps.map(fit(dataset, _))
8181
}
8282

mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -492,12 +492,7 @@ class GaussianMixture @Since("2.0.0") (
492492
(i, (agg.means(i), agg.covs(i), agg.weights(i), ws))
493493
}
494494
} else Iterator.empty
495-
}.reduceByKey { case ((mean1, cov1, w1, ws1), (mean2, cov2, w2, ws2)) =>
496-
// update the weights, means and covariances for i-th distributions
497-
BLAS.axpy(1.0, mean2, mean1)
498-
BLAS.axpy(1.0, cov2, cov1)
499-
(mean1, cov1, w1 + w2, ws1 + ws2)
500-
}.mapValues { case (mean, cov, w, ws) =>
495+
}.reduceByKey(GaussianMixture.mergeWeightsMeans).mapValues { case (mean, cov, w, ws) =>
501496
// Create new distributions based on the partial assignments
502497
// (often referred to as the "M" step in literature)
503498
GaussianMixture.updateWeightsAndGaussians(mean, cov, w, ws)
@@ -560,12 +555,7 @@ class GaussianMixture @Since("2.0.0") (
560555
agg.meanIter.zip(agg.covIter).zipWithIndex
561556
.map { case ((mean, cov), i) => (i, (mean, cov, agg.weights(i), ws)) }
562557
} else Iterator.empty
563-
}.reduceByKey { case ((mean1, cov1, w1, ws1), (mean2, cov2, w2, ws2)) =>
564-
// update the weights, means and covariances for i-th distributions
565-
BLAS.axpy(1.0, mean2, mean1)
566-
BLAS.axpy(1.0, cov2, cov1)
567-
(mean1, cov1, w1 + w2, ws1 + ws2)
568-
}.mapValues { case (mean, cov, w, ws) =>
558+
}.reduceByKey(GaussianMixture.mergeWeightsMeans).mapValues { case (mean, cov, w, ws) =>
569559
// Create new distributions based on the partial assignments
570560
// (often referred to as the "M" step in literature)
571561
GaussianMixture.updateWeightsAndGaussians(mean, cov, w, ws)
@@ -624,8 +614,8 @@ class GaussianMixture @Since("2.0.0") (
624614
val gaussians = Array.tabulate(numClusters) { i =>
625615
val start = i * numSamples
626616
val end = start + numSamples
627-
val sampleSlice = samples.view(start, end)
628-
val weightSlice = sampleWeights.view(start, end)
617+
val sampleSlice = samples.view.slice(start, end)
618+
val weightSlice = sampleWeights.view.slice(start, end)
629619
val localWeightSum = weightSlice.sum
630620
weights(i) = localWeightSum / weightSum
631621

@@ -691,6 +681,16 @@ object GaussianMixture extends DefaultParamsReadable[GaussianMixture] {
691681
new DenseMatrix(n, n, symmetricValues)
692682
}
693683

684+
private def mergeWeightsMeans(
685+
a: (DenseVector, DenseVector, Double, Double),
686+
b: (DenseVector, DenseVector, Double, Double)): (DenseVector, DenseVector, Double, Double) =
687+
{
688+
// update the weights, means and covariances for i-th distributions
689+
BLAS.axpy(1.0, b._1, a._1)
690+
BLAS.axpy(1.0, b._2, a._2)
691+
(a._1, a._2, a._3 + b._3, a._4 + b._4)
692+
}
693+
694694
/**
695695
* Update the weight, mean and covariance of gaussian distribution.
696696
*

mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {
201201
}
202202
Iterator.tabulate(numFeatures)(i => (i, summaries(i).compress))
203203
} else Iterator.empty
204-
}.reduceByKey { case (s1, s2) => s1.merge(s2) }
204+
}.reduceByKey { (s1, s2) => s1.merge(s2) }
205205
} else {
206206
val scale = math.max(math.ceil(math.sqrt(vectors.getNumPartitions)).toInt, 2)
207207
vectors.mapPartitionsWithIndex { case (pid, iter) =>
@@ -214,7 +214,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {
214214
seqOp = (s, v) => s.insert(v),
215215
combOp = (s1, s2) => s1.compress.merge(s2.compress)
216216
).map { case ((_, i), s) => (i, s)
217-
}.reduceByKey { case (s1, s2) => s1.compress.merge(s2.compress) }
217+
}.reduceByKey { (s1, s2) => s1.compress.merge(s2.compress) }
218218
}
219219
}
220220

mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ class Word2VecModel private[ml] (
291291
val outputSchema = transformSchema(dataset.schema, logging = true)
292292
val vectors = wordVectors.getVectors
293293
.mapValues(vv => Vectors.dense(vv.map(_.toDouble)))
294-
.map(identity) // mapValues doesn't return a serializable map (SI-7005)
294+
.map(identity).toMap // mapValues doesn't return a serializable map (SI-7005)
295295
val bVectors = dataset.sparkSession.sparkContext.broadcast(vectors)
296296
val d = $(vectorSize)
297297
val emptyVec = Vectors.sparse(d, Array.emptyIntArray, Array.emptyDoubleArray)

mllib/src/main/scala/org/apache/spark/ml/param/params.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
937937

938938
/** Put param pairs with a `java.util.List` of values for Python. */
939939
private[ml] def put(paramPairs: JList[ParamPair[_]]): this.type = {
940-
put(paramPairs.asScala: _*)
940+
put(paramPairs.asScala.toSeq: _*)
941941
}
942942

943943
/**

0 commit comments

Comments
 (0)