Skip to content

Commit cda07ed

Browse files
author
Bhoppi Chaw
committedSep 2, 2015
remove graphx dependency, migrate to graphx
1 parent 5d8f4cb commit cda07ed

File tree

14 files changed

+49
-45
lines changed

14 files changed

+49
-45
lines changed
 

‎examples/src/main/scala/com/github/cloudml/zen/examples/ml/LDADriver.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import com.github.cloudml.zen.ml.util.SparkHacker
2323
import breeze.linalg.{SparseVector => BSV}
2424
import org.apache.hadoop.fs.Path
2525
import org.apache.spark.deploy.SparkHadoopUtil
26-
import org.apache.spark.graphx.GraphXUtils
26+
import org.apache.spark.graphx2.GraphXUtils
2727
import org.apache.spark.rdd.RDD
2828
import org.apache.spark.storage.StorageLevel
2929
import org.apache.spark.{SparkContext, SparkConf}

‎ml/src/main/scala/com/github/cloudml/zen/ml/clustering/LDA.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ import LDADefines._
2525
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, sum => brzSum}
2626
import com.github.cloudml.zen.ml.partitioner._
2727
import com.github.cloudml.zen.ml.util.XORShiftRandom
28-
import org.apache.spark.graphx._
29-
import org.apache.spark.graphx.impl.GraphImpl
28+
import org.apache.spark.graphx2._
29+
import org.apache.spark.graphx2.impl.GraphImpl
3030
import org.apache.spark.mllib.linalg.{SparseVector => SSV, Vector => SV}
3131
import org.apache.spark.mllib.linalg.distributed.{MatrixEntry, RowMatrix}
3232
import org.apache.spark.rdd.RDD

‎ml/src/main/scala/com/github/cloudml/zen/ml/clustering/LDAAlgorithm.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import java.util.concurrent.CountDownLatch
2424
import LDADefines._
2525
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
2626
import com.github.cloudml.zen.ml.util._
27-
import org.apache.spark.graphx._
28-
import org.apache.spark.graphx.impl.GraphImpl
27+
import org.apache.spark.graphx2._
28+
import org.apache.spark.graphx2.impl.GraphImpl
2929
import org.apache.spark.util.collection.AppendOnlyMap
3030

3131

‎ml/src/main/scala/com/github/cloudml/zen/ml/clustering/LDADefines.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import com.github.cloudml.zen.ml.partitioner._
2323
import com.github.cloudml.zen.ml.util.{FTree, AliasTable, XORShiftRandom}
2424
import breeze.linalg.{SparseVector => BSV, DenseVector => BDV}
2525
import org.apache.spark.SparkConf
26-
import org.apache.spark.graphx._
26+
import org.apache.spark.graphx2._
2727

2828

2929
object LDADefines {

‎ml/src/main/scala/com/github/cloudml/zen/ml/clustering/LDAModel.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import com.google.common.base.Charsets
2828
import com.google.common.io.Files
2929
import org.apache.hadoop.io.{NullWritable, Text}
3030
import org.apache.hadoop.mapred.TextOutputFormat
31-
import org.apache.spark.graphx._
31+
import org.apache.spark.graphx2._
3232
import org.apache.spark.mllib.util.{Loader, Saveable}
3333
import org.apache.spark.rdd.RDD
3434
import org.apache.spark.storage.StorageLevel

‎ml/src/main/scala/com/github/cloudml/zen/ml/partitioner/BBRPartitioner.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import scala.reflect.ClassTag
2222
import com.github.cloudml.zen.ml.util.{AliasTable, XORShiftRandom}
2323
import breeze.linalg.{SparseVector => BSV}
2424
import org.apache.spark.Partitioner
25-
import org.apache.spark.graphx._
26-
import org.apache.spark.graphx.impl.GraphImpl
25+
import org.apache.spark.graphx2._
26+
import org.apache.spark.graphx2.impl.GraphImpl
2727
import org.apache.spark.storage.StorageLevel
2828

2929

‎ml/src/main/scala/com/github/cloudml/zen/ml/partitioner/DBHPartitioner.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ package com.github.cloudml.zen.ml.partitioner
2020
import scala.math._
2121
import scala.reflect.ClassTag
2222
import org.apache.spark.Partitioner
23-
import org.apache.spark.graphx._
24-
import org.apache.spark.graphx.impl.GraphImpl
23+
import org.apache.spark.graphx2._
24+
import org.apache.spark.graphx2.impl.GraphImpl
2525
import org.apache.spark.storage.StorageLevel
2626

2727
/**

‎ml/src/main/scala/com/github/cloudml/zen/ml/partitioner/VSDLPPartitioner.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ import scala.reflect.ClassTag
2121
import breeze.linalg.{DenseMatrix, SparseVector => BSV}
2222
import com.github.cloudml.zen.ml.util.{FTree, XORShiftRandom}
2323
import org.apache.spark.Partitioner
24-
import org.apache.spark.graphx._
25-
import org.apache.spark.graphx.impl.GraphImpl
24+
import org.apache.spark.graphx2._
25+
import org.apache.spark.graphx2.impl.GraphImpl
2626
import org.apache.spark.storage.StorageLevel
2727

2828

‎ml/src/main/scala/com/github/cloudml/zen/ml/recommendation/BSFM.scala

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package com.github.cloudml.zen.ml.recommendation
1919

20-
import com.github.cloudml.zen.ml.partitioner.DBHPartitioner
20+
import com.github.cloudml.zen.ml.DBHPartitioner
2121
import com.github.cloudml.zen.ml.recommendation.BSFM._
2222
import com.github.cloudml.zen.ml.util.SparkUtils._
2323
import com.github.cloudml.zen.ml.util.Utils
@@ -113,10 +113,10 @@ private[ml] abstract class BSFM extends Serializable with Logging {
113113
primes = Primes.nextPrime(primes + 1)
114114
val startedAt = System.nanoTime()
115115
val previousVertices = vertices
116-
val margin = forward(iter)
117-
var gradient = backward(margin, iter)
118-
gradient = updateGradientSum(gradient, iter)
119-
vertices = updateWeight(gradient, iter)
116+
val margin = forward(innerIter)
117+
var gradient = backward(margin, innerIter)
118+
gradient = updateGradientSum(gradient, innerIter)
119+
vertices = updateWeight(gradient, innerIter)
120120
checkpointVertices()
121121
vertices.count()
122122
dataSet = GraphImpl.fromExistingRDDs(vertices, edges)

‎ml/src/main/scala/com/github/cloudml/zen/ml/recommendation/FM.scala

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
package com.github.cloudml.zen.ml.recommendation
1919

2020
import java.util.{Random => JavaRandom}
21-
import com.github.cloudml.zen.ml.partitioner.DBHPartitioner
21+
22+
import com.github.cloudml.zen.ml.DBHPartitioner
2223
import com.github.cloudml.zen.ml.recommendation.FM._
2324
import com.github.cloudml.zen.ml.util.SparkUtils._
2425
import com.github.cloudml.zen.ml.util.Utils
@@ -111,10 +112,10 @@ private[ml] abstract class FM extends Serializable with Logging {
111112
logInfo(s"Start train (Iteration $iter/$iterations)")
112113
val startedAt = System.nanoTime()
113114
val previousVertices = vertices
114-
val margin = forward(iter)
115-
var (_, rmse, gradient) = backward(margin, iter)
116-
gradient = updateGradientSum(gradient, iter)
117-
vertices = updateWeight(gradient, iter)
115+
val margin = forward(innerIter)
116+
var (_, rmse, gradient) = backward(margin, innerIter)
117+
gradient = updateGradientSum(gradient, innerIter)
118+
vertices = updateWeight(gradient, innerIter)
118119
checkpointVertices()
119120
vertices.count()
120121
dataSet = GraphImpl.fromExistingRDDs(vertices, edges)

‎ml/src/main/scala/com/github/cloudml/zen/ml/recommendation/MVM.scala

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
package com.github.cloudml.zen.ml.recommendation
1919

2020
import java.util.{Random => JavaRandom}
21-
import com.github.cloudml.zen.ml.partitioner.DBHPartitioner
21+
22+
import com.github.cloudml.zen.ml.DBHPartitioner
2223
import com.github.cloudml.zen.ml.recommendation.MVM._
2324
import com.github.cloudml.zen.ml.util.SparkUtils._
2425
import com.github.cloudml.zen.ml.util.Utils
@@ -118,10 +119,10 @@ private[ml] abstract class MVM extends Serializable with Logging {
118119
logInfo(s"Start train (Iteration $iter/$iterations)")
119120
val startedAt = System.nanoTime()
120121
val previousVertices = vertices
121-
val margin = forward(iter)
122-
var (_, rmse, gradient) = backward(margin, iter)
123-
gradient = updateGradientSum(gradient, iter)
124-
vertices = updateWeight(gradient, iter)
122+
val margin = forward(innerIter)
123+
var (_, rmse, gradient) = backward(margin, innerIter)
124+
gradient = updateGradientSum(gradient, innerIter)
125+
vertices = updateWeight(gradient, innerIter)
125126
checkpointVertices()
126127
vertices.count()
127128
dataSet = GraphImpl.fromExistingRDDs(vertices, edges)

‎ml/src/main/scala/org/apache/spark/graphx2/Edge.scala

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED]
5757
}
5858

5959
object Edge {
60+
// scalastyle:off
6061
def lexicographicOrdering[ED] = new Ordering[Edge[ED]] {
6162
override def compare(a: Edge[ED], b: Edge[ED]): Int = {
6263
if (a.srcId == b.srcId) {

‎ml/src/main/scala/org/apache/spark/graphx2/impl/EdgePartitionBuilder.scala

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ private[impl] case class EdgeWithLocalIds[@specialized ED](
128128
srcId: VertexId, dstId: VertexId, localSrcId: Int, localDstId: Int, attr: ED)
129129

130130
private[impl] object EdgeWithLocalIds {
131+
// scalastyle:off
131132
implicit def lexicographicOrdering[ED]: Ordering[EdgeWithLocalIds[ED]] =
132133
new Ordering[EdgeWithLocalIds[ED]] {
133134
override def compare(a: EdgeWithLocalIds[ED], b: EdgeWithLocalIds[ED]): Int = {

‎scalastyle-config.xml

+17-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
<scalastyle>
2626
<name>Scalastyle standard configuration</name>
27-
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
27+
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
2828
<!-- <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true"> -->
2929
<!-- <parameters> -->
3030
<!-- <parameter name="maxFileLength"><![CDATA[800]]></parameter> -->
@@ -50,9 +50,9 @@
5050
*/]]></parameter>
5151
</parameters>
5252
</check>
53-
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
54-
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="false"></check>
55-
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
53+
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/>
54+
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="false"/>
55+
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"/>
5656
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
5757
<parameters>
5858
<parameter name="maxLineLength"><![CDATA[120]]></parameter>
@@ -62,20 +62,20 @@
6262
</check>
6363
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
6464
<parameters>
65-
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
65+
<parameter name="regex"><![CDATA[[A-Z][A-Za-z0-9]*]]></parameter>
6666
</parameters>
6767
</check>
6868
<check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
6969
<parameters>
70-
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
70+
<parameter name="regex"><![CDATA[[A-Z][A-Za-z0-9]*]]></parameter>
7171
</parameters>
7272
</check>
7373
<check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
7474
<parameters>
75-
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
75+
<parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
7676
</parameters>
7777
</check>
78-
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="false"></check>
78+
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="false"/>
7979
<!-- <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> -->
8080
<!-- <parameters> -->
8181
<!-- <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> -->
@@ -91,8 +91,8 @@
9191
<!-- <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter> -->
9292
<!-- </parameters> -->
9393
<!-- </check> -->
94-
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
95-
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
94+
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"/>
95+
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"/>
9696
<!-- <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check> -->
9797
<!-- <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check> -->
9898
<!-- <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check> -->
@@ -114,8 +114,8 @@
114114
<!-- <parameter name="maximum"><![CDATA[10]]></parameter> -->
115115
<!-- </parameters> -->
116116
<!-- </check> -->
117-
<check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
118-
<check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
117+
<check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"/>
118+
<check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"/>
119119
<check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
120120
<parameters>
121121
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
@@ -137,9 +137,9 @@
137137
<!-- <parameter name="maxMethods"><![CDATA[30]]></parameter> -->
138138
<!-- </parameters> -->
139139
<!-- </check> -->
140-
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
141-
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
142-
<check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
143-
<check level="error" class="org.apache.spark.scalastyle.NonASCIICharacterChecker" enabled="false"></check>
144-
<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
140+
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"/>
141+
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"/>
142+
<check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"/>
143+
<check level="error" class="org.apache.spark.scalastyle.NonASCIICharacterChecker" enabled="false"/>
144+
<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"/>
145145
</scalastyle>

0 commit comments

Comments
 (0)
Please sign in to comment.