haizhi-tech
diff --git a/‎examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
Lines changed: 2 additions & 2 deletions b/‎examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
Lines changed: 7 additions & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
Lines changed: 7 additions & 1 deletion
diff --git a/‎external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
Lines changed: 2 additions & 2 deletions b/‎external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
Lines changed: 1 addition & 1 deletion b/‎external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
Lines changed: 2 additions & 2 deletions b/‎external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
Lines changed: 14 additions & 14 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
Lines changed: 14 additions & 14 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
Lines changed: 2 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Lines changed: 1 addition & 1 deletion
@@ -23,7 +23,7 @@
 import java.util.Arrays;
 import java.util.List;
 
-import scala.collection.mutable.WrappedArray;
+import scala.collection.mutable.Seq;
 
 import org.apache.spark.ml.feature.RegexTokenizer;
 import org.apache.spark.ml.feature.Tokenizer;
@@ -69,7 +69,7 @@ public static void main(String[] args) {
         .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
 
     spark.udf().register(
-      "countTokens", (WrappedArray<?> words) -> words.size(), DataTypes.IntegerType);
+      "countTokens", (Seq<?> words) -> words.size(), DataTypes.IntegerType);
 
     Dataset<Row> tokenized = tokenizer.transform(sentenceDataFrame);
     tokenized.select("sentence", "words")
 
@@ -82,7 +82,7 @@ object SparkKMeans {
     while(tempDist > convergeDist) {
       val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
 
-      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}
+      val pointStats = closest.reduceByKey(mergeResults)
 
       val newPoints = pointStats.map {pair =>
         (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()
@@ -102,5 +102,11 @@ object SparkKMeans {
     kPoints.foreach(println)
     spark.stop()
   }
+
+  private def mergeResults(
+      a: (Vector[Double], Int),
+      b: (Vector[Double], Int)): (Vector[Double], Int) = {
+    (a._1 + b._1, a._2 + b._2)
+  }
 }
 // scalastyle:on println
@@ -85,7 +85,7 @@ object SchemaConverters {
           StructField(f.name, schemaType.dataType, schemaType.nullable)
         }
 
-        SchemaType(StructType(fields), nullable = false)
+        SchemaType(StructType(fields.toSeq), nullable = false)
 
       case ARRAY =>
         val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)
@@ -126,7 +126,7 @@ object SchemaConverters {
                 StructField(s"member$i", schemaType.dataType, nullable = true)
             }
 
-            SchemaType(StructType(fields), nullable = false)
+            SchemaType(StructType(fields.toSeq), nullable = false)
         }
 
       case other => throw new IncompatibleSchemaException(s"Unsupported type $other")
 
@@ -336,7 +336,7 @@ private[kafka010] class KafkaOffsetReader(
               }
             })
           }
-          incorrectOffsets
+          incorrectOffsets.toSeq
         }
 
         // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
 
@@ -1540,8 +1540,8 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       makeSureGetOffsetCalled,
       Execute { q =>
         // wait to reach the last offset in every partition
-        q.awaitOffset(
-          0, KafkaSourceOffset(partitionOffsets.mapValues(_ => 3L)), streamingTimeout.toMillis)
+        q.awaitOffset(0,
+          KafkaSourceOffset(partitionOffsets.mapValues(_ => 3L).toMap), streamingTimeout.toMillis)
       },
       CheckAnswer(-20, -21, -22, 0, 1, 2, 11, 12, 22),
       StopStream,
 
@@ -76,7 +76,7 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage {
    * @return fitted models, matching the input parameter maps
    */
   @Since("2.0.0")
-  def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[M] = {
+  def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[M] = {
     paramMaps.map(fit(dataset, _))
   }
 
 
@@ -492,12 +492,7 @@ class GaussianMixture @Since("2.0.0") (
             (i, (agg.means(i), agg.covs(i), agg.weights(i), ws))
           }
         } else Iterator.empty
-      }.reduceByKey { case ((mean1, cov1, w1, ws1), (mean2, cov2, w2, ws2)) =>
-        // update the weights, means and covariances for i-th distributions
-        BLAS.axpy(1.0, mean2, mean1)
-        BLAS.axpy(1.0, cov2, cov1)
-        (mean1, cov1, w1 + w2, ws1 + ws2)
-      }.mapValues { case (mean, cov, w, ws) =>
+      }.reduceByKey(GaussianMixture.mergeWeightsMeans).mapValues { case (mean, cov, w, ws) =>
         // Create new distributions based on the partial assignments
         // (often referred to as the "M" step in literature)
         GaussianMixture.updateWeightsAndGaussians(mean, cov, w, ws)
@@ -560,12 +555,7 @@ class GaussianMixture @Since("2.0.0") (
           agg.meanIter.zip(agg.covIter).zipWithIndex
             .map { case ((mean, cov), i) => (i, (mean, cov, agg.weights(i), ws)) }
         } else Iterator.empty
-      }.reduceByKey { case ((mean1, cov1, w1, ws1), (mean2, cov2, w2, ws2)) =>
-        // update the weights, means and covariances for i-th distributions
-        BLAS.axpy(1.0, mean2, mean1)
-        BLAS.axpy(1.0, cov2, cov1)
-        (mean1, cov1, w1 + w2, ws1 + ws2)
-      }.mapValues { case (mean, cov, w, ws) =>
+      }.reduceByKey(GaussianMixture.mergeWeightsMeans).mapValues { case (mean, cov, w, ws) =>
         // Create new distributions based on the partial assignments
         // (often referred to as the "M" step in literature)
         GaussianMixture.updateWeightsAndGaussians(mean, cov, w, ws)
@@ -624,8 +614,8 @@ class GaussianMixture @Since("2.0.0") (
     val gaussians = Array.tabulate(numClusters) { i =>
       val start = i * numSamples
       val end = start + numSamples
-      val sampleSlice = samples.view(start, end)
-      val weightSlice = sampleWeights.view(start, end)
+      val sampleSlice = samples.view.slice(start, end)
+      val weightSlice = sampleWeights.view.slice(start, end)
       val localWeightSum = weightSlice.sum
       weights(i) = localWeightSum / weightSum
 
@@ -691,6 +681,16 @@ object GaussianMixture extends DefaultParamsReadable[GaussianMixture] {
     new DenseMatrix(n, n, symmetricValues)
   }
 
+  private def mergeWeightsMeans(
+      a: (DenseVector, DenseVector, Double, Double),
+      b: (DenseVector, DenseVector, Double, Double)): (DenseVector, DenseVector, Double, Double) =
+  {
+    // update the weights, means and covariances for i-th distributions
+    BLAS.axpy(1.0, b._1, a._1)
+    BLAS.axpy(1.0, b._2, a._2)
+    (a._1, a._2, a._3 + b._3, a._4 + b._4)
+  }
+
   /**
    * Update the weight, mean and covariance of gaussian distribution.
    *
 
@@ -201,7 +201,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {
           }
           Iterator.tabulate(numFeatures)(i => (i, summaries(i).compress))
         } else Iterator.empty
-      }.reduceByKey { case (s1, s2) => s1.merge(s2) }
+      }.reduceByKey { (s1, s2) => s1.merge(s2) }
     } else {
       val scale = math.max(math.ceil(math.sqrt(vectors.getNumPartitions)).toInt, 2)
       vectors.mapPartitionsWithIndex { case (pid, iter) =>
@@ -214,7 +214,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {
         seqOp = (s, v) => s.insert(v),
         combOp = (s1, s2) => s1.compress.merge(s2.compress)
       ).map { case ((_, i), s) => (i, s)
-      }.reduceByKey { case (s1, s2) => s1.compress.merge(s2.compress) }
+      }.reduceByKey { (s1, s2) => s1.compress.merge(s2.compress) }
     }
   }
 
 
@@ -291,7 +291,7 @@ class Word2VecModel private[ml] (
     val outputSchema = transformSchema(dataset.schema, logging = true)
     val vectors = wordVectors.getVectors
       .mapValues(vv => Vectors.dense(vv.map(_.toDouble)))
-      .map(identity) // mapValues doesn't return a serializable map (SI-7005)
+      .map(identity).toMap // mapValues doesn't return a serializable map (SI-7005)
     val bVectors = dataset.sparkSession.sparkContext.broadcast(vectors)
     val d = $(vectorSize)
     val emptyVec = Vectors.sparse(d, Array.emptyIntArray, Array.emptyDoubleArray)
 
@@ -937,7 +937,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
 
   /** Put param pairs with a `java.util.List` of values for Python. */
   private[ml] def put(paramPairs: JList[ParamPair[_]]): this.type = {
-    put(paramPairs.asScala: _*)
+    put(paramPairs.asScala.toSeq: _*)
   }
 
   /**
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ object SchemaConverters {`
`85`	`85`	`StructField(f.name, schemaType.dataType, schemaType.nullable)`
`86`	`86`	`}`
`87`	`87`
`88`		`- SchemaType(StructType(fields), nullable = false)`
	`88`	`+ SchemaType(StructType(fields.toSeq), nullable = false)`
`89`	`89`
`90`	`90`	`case ARRAY =>`
`91`	`91`	`val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)`
`@@ -126,7 +126,7 @@ object SchemaConverters {`
`126`	`126`	`StructField(s"member$i", schemaType.dataType, nullable = true)`
`127`	`127`	`}`
`128`	`128`
`129`		`- SchemaType(StructType(fields), nullable = false)`
	`129`	`+ SchemaType(StructType(fields.toSeq), nullable = false)`
`130`	`130`	`}`
`131`	`131`
`132`	`132`	`case other => throw new IncompatibleSchemaException(s"Unsupported type $other")`
Original file line number	Diff line number	Diff line change
`@@ -336,7 +336,7 @@ private[kafka010] class KafkaOffsetReader(`
`336`	`336`	`}`
`337`	`337`	`})`
`338`	`338`	`}`
`339`		`- incorrectOffsets`
	`339`	`+ incorrectOffsets.toSeq`
`340`	`340`	`}`
`341`	`341`
`342`	`342`	`// Retry to fetch latest offsets when detecting incorrect offsets. We don't use`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage {`
`76`	`76`	`* @return fitted models, matching the input parameter maps`
`77`	`77`	`*/`
`78`	`78`	`@Since("2.0.0")`
`79`		`- def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[M] = {`
	`79`	`+ def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[M] = {`
`80`	`80`	`paramMaps.map(fit(dataset, _))`
`81`	`81`	`}`
`82`	`82`
Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {`
`201`	`201`	`}`
`202`	`202`	`Iterator.tabulate(numFeatures)(i => (i, summaries(i).compress))`
`203`	`203`	`} else Iterator.empty`
`204`		`- }.reduceByKey { case (s1, s2) => s1.merge(s2) }`
	`204`	`+ }.reduceByKey { (s1, s2) => s1.merge(s2) }`
`205`	`205`	`} else {`
`206`	`206`	`val scale = math.max(math.ceil(math.sqrt(vectors.getNumPartitions)).toInt, 2)`
`207`	`207`	`vectors.mapPartitionsWithIndex { case (pid, iter) =>`
`@@ -214,7 +214,7 @@ object RobustScaler extends DefaultParamsReadable[RobustScaler] {`
`214`	`214`	`seqOp = (s, v) => s.insert(v),`
`215`	`215`	`combOp = (s1, s2) => s1.compress.merge(s2.compress)`
`216`	`216`	`).map { case ((_, i), s) => (i, s)`
`217`		`- }.reduceByKey { case (s1, s2) => s1.compress.merge(s2.compress) }`
	`217`	`+ }.reduceByKey { (s1, s2) => s1.compress.merge(s2.compress) }`
`218`	`218`	`}`
`219`	`219`	`}`
`220`	`220`
Original file line number	Diff line number	Diff line change
`@@ -937,7 +937,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])`
`937`	`937`
`938`	`938`	/** Put param pairs with a `java.util.List` of values for Python. */
`939`	`939`	`private[ml] def put(paramPairs: JList[ParamPair[_]]): this.type = {`
`940`		`- put(paramPairs.asScala: _*)`
	`940`	`+ put(paramPairs.asScala.toSeq: _*)`
`941`	`941`	`}`
`942`	`942`
`943`	`943`	`/**`