Skip to content

Commit 102e401

Browse files
authored
Ground work for making mleap scala 2.13 compatible (#864)
1. Using sbt 1.9 2. Using most recent versions of the scala libraries where possible 3. Replaced scala-arm with more idiomatic scala 2.13+ way (scala-arm was not updated for a long time) 4. Updated scalatest. 5. Updated ClassloaderUtils to use java 9 compatible code. 6. Replaced commons-io with java 9+ code.
1 parent d1d4afd commit 102e401

File tree

227 files changed

+1093
-1010
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

227 files changed

+1093
-1010
lines changed

.travis.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Use container-based infrastructure
22
os: linux
3-
dist: focal
3+
dist: jammy
44

55
# Set default python env
66
# because the xgboost-spark library when running training code, it will
@@ -22,7 +22,7 @@ services:
2222

2323
language: scala
2424
scala:
25-
- 2.12.13
25+
- 2.12.18
2626
jdk:
2727
- openjdk11
2828

@@ -43,27 +43,27 @@ jobs:
4343

4444
- name: "Python 3.7 tests"
4545
language: python
46-
python: 3.7.9
46+
python: 3.7.15
4747
install:
4848
- pip install tox
4949
before_script:
5050
- >
5151
curl
52-
--create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar
53-
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar
52+
--create-dirs -L -o /home/travis/.sbt/launchers/1.9.4/sbt-launch.jar
53+
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.9.4/sbt-launch-1.9.4.jar
5454
script:
5555
- make test_python37
5656

5757
- name: "Python 3.8 tests"
5858
language: python
59-
python: 3.8.15
59+
python: 3.8.16
6060
install:
6161
- pip install tox
6262
before_script:
6363
- >
6464
curl
65-
--create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar
66-
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar
65+
--create-dirs -L -o /home/travis/.sbt/launchers/1.9.4/sbt-launch.jar
66+
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.9.4/sbt-launch-1.9.4.jar
6767
script:
6868
- make test_python38
6969

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ import org.apache.spark.ml.bundle.SparkBundleContext
117117
import org.apache.spark.ml.feature.{Binarizer, StringIndexer}
118118
import org.apache.spark.sql._
119119
import org.apache.spark.sql.functions._
120-
import resource._
120+
import scala.util.Using
121121

122122
val datasetName = "./examples/spark-demo.csv"
123123

@@ -143,7 +143,7 @@ import resource._
143143

144144
// then serialize pipeline
145145
val sbc = SparkBundleContext().withDataset(pipeline.transform(dataframe))
146-
for(bf <- managed(BundleFile("jar:file:/tmp/simple-spark-pipeline.zip"))) {
146+
Using(BundleFile("jar:file:/tmp/simple-spark-pipeline.zip")) { bf =>
147147
pipeline.writeBundle.save(bf)(sbc).get
148148
}
149149
```
@@ -215,9 +215,9 @@ Because we export Spark and Scikit-learn pipelines to a standard format, we can
215215
```scala
216216
import ml.combust.bundle.BundleFile
217217
import ml.combust.mleap.runtime.MleapSupport._
218-
import resource._
218+
import scala.util.Using
219219
// load the Spark pipeline we saved in the previous section
220-
val bundle = (for(bundleFile <- managed(BundleFile("jar:file:/tmp/simple-spark-pipeline.zip"))) yield {
220+
val bundle = Using(BundleFile("jar:file:/tmp/simple-spark-pipeline.zip"))) { bundleFile =>
221221
bundleFile.loadMleapBundle().get
222222
}).opt.get
223223

@@ -271,7 +271,7 @@ For more documentation, please see our [documentation](https://combust.github.io
271271

272272
## Building
273273

274-
Please ensure you have sbt 1.4.9, java 11, scala 2.12.13
274+
Please ensure you have sbt 1.9.3, java 11, scala 2.12.13
275275

276276
1. Initialize the git submodules `git submodule update --init --recursive`
277277
2. Run `sbt compile`

bundle-hdfs/src/test/scala/ml/bundle/hdfs/HadoopBundleFileSystemSpec.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import java.nio.file.{Files, Paths}
55

66
import org.apache.hadoop.conf.Configuration
77
import org.apache.hadoop.fs.{FileSystem, Path}
8-
import org.scalatest.FunSpec
8+
import org.scalatest.funspec.AnyFunSpec
99

10-
class HadoopBundleFileSystemSpec extends FunSpec {
10+
class HadoopBundleFileSystemSpec extends org.scalatest.funspec.AnyFunSpec {
1111
private val fs = FileSystem.get(new Configuration())
1212
private val bundleFs = new HadoopBundleFileSystem(fs)
1313

bundle-ml/src/main/scala/ml/combust/bundle/BundleFile.scala

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,15 @@ import java.io.{Closeable, File}
44
import java.net.URI
55
import java.nio.file.{FileSystem, FileSystems, Files, Path}
66
import java.util.stream.Collectors
7-
87
import ml.combust.bundle.dsl.{Bundle, BundleInfo}
98
import ml.combust.bundle.fs.BundleFileSystem
9+
import ml.combust.bundle.json.JsonSupport.bundleBundleInfoFormat
1010
import ml.combust.bundle.serializer.BundleSerializer
11-
import ml.combust.bundle.json.JsonSupport._
1211
import spray.json._
13-
import resource._
1412

15-
import scala.collection.JavaConverters._
13+
import scala.jdk.CollectionConverters._
1614
import scala.language.implicitConversions
17-
import scala.util.Try
15+
import scala.util.{Try, Using}
1816

1917
/**
2018
* Created by hollinwilkins on 12/24/16.
@@ -40,6 +38,14 @@ object BundleFile {
4038
apply(new URI(unbackslash(uri)))
4139
}
4240

41+
implicit def apply(path: Path): BundleFile = {
42+
if(path.getFileName.toString.endsWith("zip")) {
43+
apply(s"jar:${path.toUri}")
44+
} else {
45+
apply(path.toUri)
46+
}
47+
}
48+
4349
implicit def apply(file: File): BundleFile = {
4450
val uri: String = if (file.getPath.endsWith(".zip")) {
4551
s"jar:${file.toURI.toString}"
@@ -101,10 +107,12 @@ case class BundleFile(fs: FileSystem,
101107

102108
def writeNote(name: String, note: String): Try[String] = {
103109
Files.createDirectories(fs.getPath(path.toString, "notes"))
104-
(for(out <- managed(Files.newOutputStream(fs.getPath(path.toString, "notes", name)))) yield {
105-
out.write(note.getBytes)
106-
note
107-
}).tried
110+
Using(Files.newOutputStream(fs.getPath(path.toString, "notes", name))) {
111+
out => {
112+
out.write(note.getBytes)
113+
note
114+
}
115+
}
108116
}
109117

110118
def readNote(name: String): String = {

bundle-ml/src/main/scala/ml/combust/bundle/BundleRegistry.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import ml.combust.bundle.fs.BundleFileSystem
77
import ml.combust.bundle.op.{OpModel, OpNode}
88
import ml.combust.mleap.ClassLoaderUtil
99

10-
import scala.collection.JavaConverters._
10+
import scala.jdk.CollectionConverters._
1111

1212
/** Trait for classes that contain a bundle registry.
1313
*
@@ -39,7 +39,7 @@ object BundleRegistry {
3939

4040
val br = ops.foldLeft(Map[String, OpNode[_, _, _]]()) {
4141
(m, opClass) =>
42-
val opNode = cl.loadClass(opClass).newInstance().asInstanceOf[OpNode[_, _, _]]
42+
val opNode = cl.loadClass(opClass).getDeclaredConstructor().newInstance().asInstanceOf[OpNode[_, _, _]]
4343
m + (opNode.Model.opName -> opNode)
4444
}.values.foldLeft(BundleRegistry(cl)) {
4545
(br, opNode) => br.register(opNode)

bundle-ml/src/main/scala/ml/combust/bundle/BundleWriter.scala

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,10 @@ package ml.combust.bundle
22

33
import java.net.URI
44
import java.nio.file.{Files, Paths}
5-
65
import ml.combust.bundle.dsl.Bundle
7-
import ml.combust.bundle.fs.BundleFileSystem
8-
import ml.combust.bundle.serializer.{BundleSerializer, SerializationFormat}
6+
import ml.combust.bundle.serializer.{BundleSerializer, SerializationFormat}
97

10-
import scala.util.Try
11-
import resource._
8+
import scala.util.{Try, Using}
129

1310
/**
1411
* Created by hollinwilkins on 12/24/16.
@@ -37,16 +34,16 @@ Transformer <: AnyRef](root: Transformer,
3734
def save(uri: URI)
3835
(implicit context: Context): Try[Bundle[Transformer]] = uri.getScheme match {
3936
case "jar" | "file" =>
40-
(for (bf <- managed(BundleFile(uri))) yield {
37+
Using(BundleFile(uri)) { bf =>
4138
save(bf).get
42-
}).tried
39+
}
4340
case _ =>
4441
val tmpDir = Files.createTempDirectory("bundle")
4542
val tmp = Paths.get(tmpDir.toString, "tmp.zip")
4643

47-
(for (bf <- managed(BundleFile(tmp.toFile))) yield {
44+
Using(BundleFile(tmp.toFile)) { bf =>
4845
save(bf).get
49-
}).tried.map {
46+
}.map {
5047
r =>
5148
context.bundleRegistry.fileSystemForUri(uri).save(uri, tmp.toFile)
5249
r

bundle-ml/src/main/scala/ml/combust/bundle/serializer/BundleSerializer.scala

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@ package ml.combust.bundle.serializer
22

33
import java.io.Closeable
44
import java.nio.file.Files
5-
65
import ml.combust.bundle.{BundleContext, BundleFile, HasBundleRegistry}
76
import ml.combust.bundle.dsl.Bundle
87
import ml.combust.bundle.json.JsonSupport._
98
import spray.json._
10-
import resource._
119

12-
import scala.util.Try
10+
import scala.util.{Try, Using}
1311

1412
/** Class for serializing/deserializing Bundle.ML [[ml.combust.bundle.dsl.Bundle]] objects.
1513
*
@@ -28,18 +26,17 @@ case class BundleSerializer[Context](context: Context,
2826
*/
2927
def write[Transformer <: AnyRef](bundle: Bundle[Transformer]): Try[Bundle[Transformer]] = Try {
3028
val bundleContext = bundle.bundleContext(context, hr.bundleRegistry, file.fs, file.path)
31-
implicit val format = bundleContext.format
32-
3329
Files.createDirectories(file.path)
3430
NodeSerializer(bundleContext.bundleContext("root")).write(bundle.root).flatMap {
3531
_ =>
36-
(for (out <- managed(Files.newOutputStream(bundleContext.file(Bundle.bundleJson)))) yield {
37-
val json = bundle.info.asBundle.toJson.prettyPrint.getBytes("UTF-8")
38-
out.write(json)
39-
bundle
40-
}).tried
32+
Using(Files.newOutputStream(bundleContext.file(Bundle.bundleJson))) {
33+
out =>
34+
val json = bundle.info.asBundle.toJson.prettyPrint.getBytes("UTF-8")
35+
out.write(json)
36+
bundle
37+
}
4138
}
42-
}.flatMap(identity)
39+
}.flatten
4340

4441
/** Read a bundle from the path.
4542
*
Lines changed: 18 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,40 @@
11
package ml.combust.bundle.serializer
22

3-
import java.io.{File, FileInputStream, FileOutputStream}
4-
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream}
5-
6-
import resource._
3+
import java.io.File
4+
import java.util.zip.{ZipInputStream, ZipOutputStream}
75

86
/**
97
* Created by hollinwilkins on 9/11/16.
108
*/
9+
@deprecated("Prefer ml.combust.bundle.util.FileUtil object.")
1110
case class FileUtil() {
11+
import ml.combust.bundle.util.{FileUtil => FileUtils}
12+
@deprecated("use FileUtil.rmRF(Path).")
1213
def rmRF(path: File): Array[(String, Boolean)] = {
13-
Option(path.listFiles).map(_.flatMap(f => rmRF(f))).getOrElse(Array()) :+ (path.getPath -> path.delete)
14+
FileUtils.rmRF(path.toPath)
1415
}
1516

17+
@deprecated("use extract(Path, Path).")
1618
def extract(source: File, dest: File): Unit = {
17-
dest.mkdirs()
18-
for(in <- managed(new ZipInputStream(new FileInputStream(source)))) {
19-
extract(in, dest)
20-
}
19+
FileUtils.extract(source.toPath, dest.toPath)
2120
}
2221

22+
@deprecated("use extract(ZipInputStream, Path).")
2323
def extract(in: ZipInputStream, dest: File): Unit = {
24-
dest.mkdirs()
25-
val buffer = new Array[Byte](1024 * 1024)
26-
27-
var entry = in.getNextEntry
28-
while(entry != null) {
29-
if(entry.isDirectory) {
30-
new File(dest, entry.getName).mkdirs()
31-
} else {
32-
val filePath = new File(dest, entry.getName)
33-
for(out <- managed(new FileOutputStream(filePath))) {
34-
var len = in.read(buffer)
35-
while(len > 0) {
36-
out.write(buffer, 0, len)
37-
len = in.read(buffer)
38-
}
39-
}
40-
}
41-
entry = in.getNextEntry
42-
}
24+
FileUtils.extract(in, dest.toPath)
4325
}
4426

27+
@deprecated("use FileUtil.extract(Path, Path).")
4528
def zip(source: File, dest: File): Unit = {
46-
for(out <- managed(new ZipOutputStream(new FileOutputStream(dest)))) {
47-
zip(source, out)
48-
}
29+
FileUtils.zip(source.toPath, dest.toPath)
4930
}
5031

51-
def zip(source: File, dest: ZipOutputStream): Unit = zip(source, source, dest)
32+
@deprecated("use FileUtil.extract(Path, ZipOutputStream).")
33+
def zip(source: File, dest: ZipOutputStream): Unit = FileUtils.zip(source.toPath, source.toPath, dest)
5234

35+
@deprecated("use FileUtil.extract(Path, Path, ZipOutputStream).")
5336
def zip(base: File, source: File, dest: ZipOutputStream): Unit = {
54-
val buffer = new Array[Byte](1024 * 1024)
55-
56-
for(files <- Option(source.listFiles);
57-
file <- files) {
58-
val name = file.toString.substring(base.toString.length + 1)
59-
60-
if(file.isDirectory) {
61-
dest.putNextEntry(new ZipEntry(s"$name/"))
62-
zip(base, file, dest)
63-
} else {
64-
dest.putNextEntry(new ZipEntry(name))
65-
66-
for (in <- managed(new FileInputStream(file))) {
67-
var read = in.read(buffer)
68-
while (read > 0) {
69-
dest.write(buffer, 0, read)
70-
read = in.read(buffer)
71-
}
72-
}
73-
}
74-
}
37+
FileUtils.zip(base.toPath, source.toPath, dest)
7538
}
76-
}
39+
40+
}

bundle-ml/src/main/scala/ml/combust/bundle/serializer/NodeSerializer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ case class NodeSerializer[Context](bundleContext: BundleContext[Context]) {
8888
val shape = op.shape(obj)(bundleContext)
8989
Node(name = name, shape = shape)
9090
}
91-
}.flatMap(identity).flatMap {
91+
}.flatten.flatMap {
9292
node => Try(FormatNodeSerializer.serializer.write(bundleContext.file(Bundle.nodeFile), node))
9393
}
9494

0 commit comments

Comments
 (0)