Skip to content

Commit 94287f1

Browse files
committed
Update Spark to 2.1.0
1 parent ca93e31 commit 94287f1

32 files changed

+266
-176
lines changed

assembly/pom.xml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
<modelVersion>4.0.0</modelVersion>
2121
<parent>
2222
<groupId>com.github.cloudml.zen</groupId>
23-
<artifactId>zen-parent_2.10</artifactId>
24-
<version>0.3-SNAPSHOT</version>
23+
<artifactId>zen-parent_2.11</artifactId>
24+
<version>0.4-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
27-
<artifactId>zen-assembly_2.10</artifactId>
27+
<artifactId>zen-assembly_2.11</artifactId>
2828
<name>Zen Project Assembly</name>
2929
<url>https://github.com/cloudml/zen/</url>
3030
<packaging>pom</packaging>

examples/pom.xml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
<modelVersion>4.0.0</modelVersion>
2121
<parent>
2222
<groupId>com.github.cloudml.zen</groupId>
23-
<artifactId>zen-parent_2.10</artifactId>
24-
<version>0.3-SNAPSHOT</version>
23+
<artifactId>zen-parent_2.11</artifactId>
24+
<version>0.4-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
27-
<artifactId>zen-examples_2.10</artifactId>
27+
<artifactId>zen-examples_2.11</artifactId>
2828
<name>Zen Project Examples</name>
2929
<url>https://github.com/cloudml/zen/</url>
3030
<properties>

examples/src/main/scala/com/github/cloudml/zen/examples/ml/LDADriver.scala

-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import scala.annotation.tailrec
2222
import com.github.cloudml.zen.ml.clustering.LDA
2323
import com.github.cloudml.zen.ml.clustering.LDADefines._
2424
import com.github.cloudml.zen.ml.clustering.algorithm.LDATrainer
25-
import com.github.cloudml.zen.ml.util.SparkHacker
2625
import com.github.cloudml.zen.ml.util.SparkUtils
2726

2827
import org.apache.hadoop.fs.Path
@@ -131,7 +130,6 @@ object LDADriver {
131130
alphaAS: Double,
132131
algo: LDATrainer,
133132
storageLevel: StorageLevel): Double = {
134-
SparkHacker.gcCleaner(30 * 60, 30 * 60, "LDA_gcCleaner")
135133
val trainingStartedTime = System.nanoTime()
136134
val termModel = LDA.train(docs, totalIter, numTopics, alpha, beta, alphaAS, algo, storageLevel)
137135
val trainingEndedTime = System.nanoTime()

examples/src/main/scala/com/github/cloudml/zen/examples/ml/MovieLensBSFM.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
package com.github.cloudml.zen.examples.ml
1818

1919
import com.github.cloudml.zen.ml.recommendation.{BSFMModel, BSFMRegression}
20-
import com.github.cloudml.zen.ml.util.SparkHacker
20+
import com.github.cloudml.zen.ml.util.Logging
2121
import org.apache.spark.graphx2.GraphXUtils
2222
import org.apache.spark.storage.StorageLevel
23-
import org.apache.spark.{Logging, SparkConf, SparkContext}
23+
import org.apache.spark.{SparkConf, SparkContext}
2424
import scopt.OptionParser
2525

2626
object MovieLensBSFM extends Logging {
@@ -114,7 +114,6 @@ object MovieLensBSFM extends Logging {
114114
val sc = new SparkContext(conf)
115115
val checkpointDir = s"$out/checkpoint"
116116
sc.setCheckpointDir(checkpointDir)
117-
SparkHacker.gcCleaner(60 * 10, 60 * 10, "MovieLensBSFM")
118117
val (trainSet, testSet, views) = if (useSVDPlusPlus) {
119118
MovieLensUtils.genSamplesSVDPlusPlus(sc, input, numPartitions, storageLevel)
120119
}

examples/src/main/scala/com/github/cloudml/zen/examples/ml/MovieLensFM.scala

+3-4
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
*/
1717
package com.github.cloudml.zen.examples.ml
1818

19-
import com.github.cloudml.zen.ml.recommendation.{FMRegression, FMModel, FMClassification, FM}
20-
import com.github.cloudml.zen.ml.util.SparkHacker
21-
import org.apache.spark.{Logging, SparkConf, SparkContext}
19+
import com.github.cloudml.zen.ml.recommendation.{FMModel, FMRegression}
20+
import com.github.cloudml.zen.ml.util.Logging
21+
import org.apache.spark.{SparkConf, SparkContext}
2222
import org.apache.spark.graphx2.GraphXUtils
2323
import org.apache.spark.storage.StorageLevel
2424
import scopt.OptionParser
@@ -110,7 +110,6 @@ object MovieLensFM extends Logging {
110110
val sc = new SparkContext(conf)
111111
val checkpointDir = s"$out/checkpoint"
112112
sc.setCheckpointDir(checkpointDir)
113-
SparkHacker.gcCleaner(60 * 10, 60 * 10, "MovieLensFM")
114113
val (trainSet, testSet, _) = if (useSVDPlusPlus) {
115114
MovieLensUtils.genSamplesSVDPlusPlus(sc, input, numPartitions, storageLevel)
116115
}

examples/src/main/scala/com/github/cloudml/zen/examples/ml/MovieLensMVM.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
package com.github.cloudml.zen.examples.ml
1818

1919
import com.github.cloudml.zen.ml.recommendation.{MVMModel, MVMRegression}
20-
import com.github.cloudml.zen.ml.util.SparkHacker
20+
import com.github.cloudml.zen.ml.util.Logging
2121
import org.apache.spark.graphx2.GraphXUtils
2222
import org.apache.spark.storage.StorageLevel
23-
import org.apache.spark.{Logging, SparkConf, SparkContext}
23+
import org.apache.spark.{SparkConf, SparkContext}
2424
import scopt.OptionParser
2525

2626
object MovieLensMVM extends Logging {
@@ -108,7 +108,6 @@ object MovieLensMVM extends Logging {
108108
}
109109
val sc = new SparkContext(conf)
110110
sc.setCheckpointDir(checkpointDir)
111-
SparkHacker.gcCleaner(60 * 10, 60 * 10, "MovieLensMVM")
112111
val (trainSet, testSet, views) = if (useSVDPlusPlus) {
113112
MovieLensUtils.genSamplesSVDPlusPlus(sc, input, numPartitions, storageLevel)
114113
} else {

examples/src/main/scala/com/github/cloudml/zen/examples/ml/MovieLensUtils.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717

1818
package com.github.cloudml.zen.examples.ml
1919

20-
import java.util.{Timer, TimerTask}
21-
import java.lang.ref.WeakReference
2220
import breeze.linalg.{SparseVector => BSV}
23-
import org.apache.spark.{Logging, SparkContext}
21+
import com.github.cloudml.zen.ml.util.Logging
22+
import org.apache.spark.SparkContext
2423
import org.apache.spark.mllib.linalg.{SparseVector => SSV}
2524
import org.apache.spark.mllib.regression.LabeledPoint
2625
import org.apache.spark.rdd.RDD

examples/src/main/scala/com/github/cloudml/zen/examples/ml/NetflixPrizeFM.scala

+2-12
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,12 @@
1616
*/
1717
package com.github.cloudml.zen.examples.ml
1818

19-
import java.text.SimpleDateFormat
20-
import java.util.{TimeZone, Locale}
21-
22-
import breeze.linalg.{SparseVector => BSV}
2319
import com.github.cloudml.zen.ml.recommendation._
24-
import com.github.cloudml.zen.ml.util.SparkHacker
20+
import com.github.cloudml.zen.ml.util.Logging
2521
import org.apache.spark.graphx2.GraphXUtils
26-
import org.apache.spark.mllib.linalg.{SparseVector => SSV}
27-
import org.apache.spark.mllib.regression.LabeledPoint
28-
import org.apache.spark.storage.StorageLevel
29-
import org.apache.spark.{Logging, SparkConf, SparkContext}
22+
import org.apache.spark.{SparkConf, SparkContext}
3023
import scopt.OptionParser
3124

32-
import scala.collection.mutable.ArrayBuffer
33-
3425
object NetflixPrizeFM extends Logging {
3526

3627
case class Params(
@@ -113,7 +104,6 @@ object NetflixPrizeFM extends Logging {
113104
}
114105
val sc = new SparkContext(conf)
115106
sc.setCheckpointDir(checkpointDir)
116-
SparkHacker.gcCleaner(60 * 10, 60 * 10, "NetflixPrizeFM")
117107
val (trainSet, testSet, _) = NetflixPrizeUtils.genSamplesWithTime(sc, input, numPartitions)
118108
val model = FM.trainRegression(trainSet, numIterations, stepSize, l2, rank, useAdaGrad, 1.0)
119109
model.save(sc, out)

examples/src/main/scala/com/github/cloudml/zen/examples/ml/NetflixPrizeMVM.scala

+2-11
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,13 @@
1616
*/
1717
package com.github.cloudml.zen.examples.ml
1818

19-
import java.text.SimpleDateFormat
20-
import java.util.{TimeZone, Locale}
21-
22-
import breeze.linalg.{SparseVector => BSV}
2319
import com.github.cloudml.zen.ml.recommendation._
24-
import com.github.cloudml.zen.ml.util.SparkHacker
20+
import com.github.cloudml.zen.ml.util.Logging
2521
import org.apache.spark.graphx2.GraphXUtils
26-
import org.apache.spark.mllib.linalg.{SparseVector => SSV}
27-
import org.apache.spark.mllib.regression.LabeledPoint
2822
import org.apache.spark.storage.StorageLevel
29-
import org.apache.spark.{Logging, SparkConf, SparkContext}
23+
import org.apache.spark.{SparkConf, SparkContext}
3024
import scopt.OptionParser
3125

32-
import scala.collection.mutable.ArrayBuffer
33-
3426
object NetflixPrizeMVM extends Logging {
3527

3628
case class Params(
@@ -112,7 +104,6 @@ object NetflixPrizeMVM extends Logging {
112104
}
113105
val sc = new SparkContext(conf)
114106
sc.setCheckpointDir(checkpointDir)
115-
SparkHacker.gcCleaner(60 * 10, 60 * 10, "NetflixPrizeMVM")
116107
val (trainSet, testSet, views) = NetflixPrizeUtils.genSamplesWithTime(sc, input, numPartitions)
117108
val fm = new MVMRegression(trainSet, stepSize, views, regular, 0.0, rank,
118109
useAdaGrad, useWeightedLambda, 1.0, StorageLevel.MEMORY_AND_DISK)

ml/pom.xml

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
<modelVersion>4.0.0</modelVersion>
2121
<parent>
2222
<groupId>com.github.cloudml.zen</groupId>
23-
<artifactId>zen-parent_2.10</artifactId>
24-
<version>0.3-SNAPSHOT</version>
23+
<artifactId>zen-parent_2.11</artifactId>
24+
<version>0.4-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
27-
<artifactId>zen-ml_2.10</artifactId>
27+
<artifactId>zen-ml_2.11</artifactId>
2828
<name>Zen Project ML Library</name>
2929
<url>https://github.com/cloudml/zen/</url>
3030
<properties>

ml/src/main/scala/com/github/cloudml/zen/ml/linalg/BLAS.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717

1818
package com.github.cloudml.zen.ml.linalg
1919

20-
import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
20+
import com.github.cloudml.zen.ml.util.Logging
21+
import com.github.fommil.netlib.{F2jBLAS, BLAS => NetlibBLAS}
2122
import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
22-
23-
import org.apache.spark.Logging
2423
import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
2524

2625
/**

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/DBN.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package com.github.cloudml.zen.ml.neuralNetwork
1919

20-
import org.apache.spark.Logging
20+
import com.github.cloudml.zen.ml.util.Logging
2121
import org.apache.spark.annotation.Experimental
2222
import org.apache.spark.mllib.linalg.{Vector => SV}
2323
import org.apache.spark.rdd.RDD

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/Layer.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ package com.github.cloudml.zen.ml.neuralNetwork
1919

2020
import java.util.Random
2121

22-
import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, Axis => brzAxis, sum => brzSum, max => brzMax}
23-
import org.apache.spark.Logging
22+
import breeze.linalg.{Axis => brzAxis, DenseMatrix => BDM, DenseVector => BDV, max => brzMax, sum => brzSum}
2423
import NNUtil._
24+
import com.github.cloudml.zen.ml.util.Logging
2525

2626
private[ml] trait Layer extends Serializable {
2727

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/MLP.scala

+4-8
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,14 @@
1717

1818
package com.github.cloudml.zen.ml.neuralNetwork
1919

20-
import java.util.Random
21-
22-
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, argmax => brzArgMax,
23-
axpy => brzAxpy, max => brzMax, norm => brzNorm, sum => brzSum}
24-
import breeze.numerics.signum
20+
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, argmax => brzArgMax, axpy => brzAxpy}
2521
import com.github.cloudml.zen.ml.linalg.BLAS
26-
import com.github.cloudml.zen.ml.util.{LoaderUtils, SparkUtils}
22+
import com.github.cloudml.zen.ml.util.{LoaderUtils, Logging, SparkUtils}
2723
import com.github.cloudml.zen.ml.optimization._
28-
import org.apache.spark.{SparkContext, Logging}
24+
import org.apache.spark.SparkContext
2925
import org.apache.spark.annotation.Experimental
3026
import org.apache.spark.mllib.linalg.{DenseVector => SDV, SparseVector => SSV, Vector => SV}
31-
import org.apache.spark.mllib.util.{Loader, Saveable}
27+
import org.apache.spark.mllib.util.Loader
3228
import org.apache.spark.rdd.RDD
3329
import org.apache.spark.storage.StorageLevel
3430
import org.json4s.DefaultFormats

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/MLPModel.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ import java.util.Random
2121

2222
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
2323
import breeze.numerics.signum
24+
import com.github.cloudml.zen.ml.util.Logging
2425
import org.apache.spark.annotation.Experimental
2526
import org.apache.spark.mllib.util.Saveable
26-
import org.apache.spark.{Logging, SparkContext}
27+
import org.apache.spark.SparkContext
2728

2829
@Experimental
2930
class MLPModel(

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/RBM.scala

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
package com.github.cloudml.zen.ml.neuralNetwork
1919

20-
import breeze.linalg.{Axis => BrzAxis, DenseMatrix => BDM, DenseVector => BDV, axpy => brzAxpy, sum => brzSum}
20+
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, axpy => brzAxpy}
2121
import com.github.cloudml.zen.ml.linalg.BLAS
2222
import com.github.cloudml.zen.ml.util._
2323
import com.github.cloudml.zen.ml.optimization._
2424
import org.apache.spark.mllib.util.Loader
25-
import org.apache.spark.{SparkContext, Logging}
25+
import org.apache.spark.SparkContext
2626
import org.apache.spark.annotation.Experimental
2727
import org.apache.spark.mllib.linalg.{DenseVector => SDV, Vector => SV}
2828
import org.apache.spark.rdd.RDD
@@ -31,7 +31,6 @@ import org.json4s.DefaultFormats
3131
import org.json4s.JsonDSL._
3232
import org.json4s.jackson.JsonMethods._
3333

34-
3534
@Experimental
3635
object RBM extends Logging with Loader[RBMModel] {
3736

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/RBMModel.scala

+8-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import breeze.linalg.{Axis => BrzAxis, DenseMatrix => BDM, DenseVector => BDV, s
2323
import com.github.cloudml.zen.ml.util._
2424
import org.apache.commons.math3.random.JDKRandomGenerator
2525
import org.apache.spark.mllib.util.Saveable
26-
import org.apache.spark.{SparkContext, Logging}
26+
import org.apache.spark.SparkContext
2727
import org.apache.spark.annotation.Experimental
2828

2929
@Experimental
@@ -38,13 +38,19 @@ class RBMModel(
3838
def this(
3939
numIn: Int,
4040
numOut: Int,
41-
dropout: Double = 0.5D) {
41+
dropout: Double) {
4242
this(NNUtil.initUniformDistWeight(numIn, numOut, math.sqrt(6D / (numIn + numOut))),
4343
NNUtil.initializeBias(numIn),
4444
NNUtil.initializeBias(numOut),
4545
dropout)
4646
}
4747

48+
def this(
49+
numIn: Int,
50+
numOut: Int) {
51+
this(numIn, numOut, 0.5D)
52+
}
53+
4854
require(dropoutRate >= 0 && dropoutRate < 1)
4955
@transient protected lazy val rand: Random = new JDKRandomGenerator()
5056
@transient protected[ml] lazy val visibleLayer: Layer = {

ml/src/main/scala/com/github/cloudml/zen/ml/neuralNetwork/StackedRBM.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
package com.github.cloudml.zen.ml.neuralNetwork
1919

20-
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, sum => brzSum}
20+
import breeze.linalg.{DenseMatrix => BDM}
21+
import com.github.cloudml.zen.ml.util.Logging
2122
import com.github.cloudml.zen.ml.util.SparkUtils._
22-
import org.apache.spark.Logging
2323
import org.apache.spark.annotation.Experimental
2424
import org.apache.spark.broadcast.Broadcast
25-
import org.apache.spark.mllib.linalg.{DenseMatrix => SDM, DenseVector => SDV, Matrix => SM, SparseMatrix => SSM, SparseVector => SSV, Vector => SV}
25+
import org.apache.spark.mllib.linalg.{Vector => SV}
2626
import org.apache.spark.rdd.RDD
2727

2828
@Experimental

ml/src/main/scala/com/github/cloudml/zen/ml/optimization/GradientDescent.scala

+3-4
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,11 @@
1818
package com.github.cloudml.zen.ml.optimization
1919

2020
import scala.collection.mutable.ArrayBuffer
21-
2221
import com.github.cloudml.zen.ml.linalg.BLAS
23-
import org.apache.spark.annotation.{Experimental, DeveloperApi}
24-
import org.apache.spark.Logging
22+
import com.github.cloudml.zen.ml.util.Logging
23+
import org.apache.spark.annotation.{DeveloperApi, Experimental}
2524
import org.apache.spark.rdd.RDD
26-
import org.apache.spark.mllib.linalg.{Vectors, Vector}
25+
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2726

2827
/**
2928
* Class used to solve an optimization problem using Gradient Descent.

ml/src/main/scala/com/github/cloudml/zen/ml/optimization/LBFGS.scala

+1-5
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@
1717

1818
package com.github.cloudml.zen.ml.optimization
1919

20-
import com.github.cloudml.zen.ml.util.SparkUtils
20+
import com.github.cloudml.zen.ml.util.{Logging, SparkUtils}
2121

2222
import scala.collection.mutable
23-
import scala.collection.mutable.ArrayBuffer
24-
2523
import breeze.linalg.{DenseVector => BDV}
2624
import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS}
27-
28-
import org.apache.spark.Logging
2925
import org.apache.spark.annotation.DeveloperApi
3026
import org.apache.spark.mllib.linalg.{Vector, Vectors}
3127
import com.github.cloudml.zen.ml.linalg.BLAS.axpy

ml/src/main/scala/com/github/cloudml/zen/ml/recommendation/BSFM.scala

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ package com.github.cloudml.zen.ml.recommendation
1919

2020
import com.github.cloudml.zen.ml.partitioner._
2121
import java.util.{Random => JavaRandom}
22+
2223
import com.github.cloudml.zen.ml.recommendation.BSFM._
2324
import com.github.cloudml.zen.ml.util.SparkUtils._
24-
import com.github.cloudml.zen.ml.util.{XORShiftRandom, Utils}
25-
import org.apache.spark.{SparkContext, Logging}
25+
import com.github.cloudml.zen.ml.util.{Logging, Utils, XORShiftRandom}
26+
import org.apache.spark.SparkContext
2627
import org.apache.spark.graphx2._
2728
import org.apache.spark.graphx2.impl.{EdgeRDDImpl, GraphImpl}
2829
import org.apache.spark.mllib.linalg.{Vector => SV}

0 commit comments

Comments
 (0)