Skip to content

Commit aeed545

Browse files
committed
add JMH benchmark for Parquet
1 parent be75cc3 commit aeed545

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

Diff for: build.sbt

+7-1
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,7 @@ lazy val jmh: Project = project
708708
cats % Test,
709709
datastore % Test,
710710
guava % Test,
711+
parquet % "test->test",
711712
protobuf % "test->test",
712713
scalacheck % Test,
713714
tensorflow % Test,
@@ -727,7 +728,12 @@ lazy val jmh: Project = project
727728
"com.google.apis" % "google-api-services-bigquery" % bigqueryVersion % Test,
728729
"com.google.cloud.datastore" % "datastore-v1-proto-client" % datastoreVersion % Test,
729730
"org.apache.avro" % "avro" % avroVersion % Test,
730-
"org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test
731+
"org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test,
732+
"org.apache.parquet" % "parquet-avro" % parquetVersion % Test,
733+
"org.apache.parquet" % "parquet-column" % parquetVersion % Test,
734+
"org.apache.parquet" % "parquet-hadoop" % parquetVersion % Test,
735+
"org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test,
736+
"org.apache.hadoop" % "hadoop-mapreduce-client-core" % hadoopVersion % Test
731737
)
732738
)
733739

Diff for: jmh/src/test/scala/magnolify/jmh/MagnolifyBench.scala

+55-1
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@
1616

1717
package magnolify.jmh
1818

19-
import java.util.concurrent.TimeUnit
19+
import magnolify.jmh.MagnolifyBench.nested
20+
import magnolify.parquet.{ParquetType, TestInputFile, TestOutputFile}
2021

22+
import java.util.concurrent.TimeUnit
2123
import magnolify.scalacheck.auto._
2224
import magnolify.test.Simple._
25+
import org.apache.parquet.hadoop.{ParquetReader, ParquetWriter}
2326
import org.scalacheck._
2427
import org.openjdk.jmh.annotations._
2528

@@ -87,6 +90,57 @@ class AvroBench {
8790
@Benchmark def avroSchema: Schema = AvroType[Nested].schema
8891
}
8992

93+
@State(Scope.Benchmark)
94+
class ParquetReadState {
95+
private val parquetType = ParquetType[Nested]
96+
var out: TestOutputFile = null
97+
var reader: ParquetReader[Nested] = null
98+
99+
@Setup(Level.Invocation)
100+
def setup(): Unit = {
101+
out = new TestOutputFile
102+
val writer = parquetType.writeBuilder(out).build()
103+
writer.write(nested)
104+
writer.close()
105+
106+
val in = new TestInputFile(out.getBytes)
107+
reader = parquetType.readBuilder(in).build()
108+
}
109+
110+
@TearDown(Level.Invocation)
111+
def tearDown(): Unit = {
112+
reader.close()
113+
}
114+
}
115+
116+
@State(Scope.Benchmark)
117+
class ParquetWriteState {
118+
private val parquetType = ParquetType[Nested]
119+
var writer: ParquetWriter[Nested] = null
120+
121+
@Setup(Level.Invocation)
122+
def setup(): Unit = {
123+
val out = new TestOutputFile
124+
writer = parquetType.writeBuilder(out).build()
125+
}
126+
127+
@TearDown(Level.Invocation)
128+
def tearDown(): Unit = {
129+
writer.close()
130+
}
131+
}
132+
133+
134+
@BenchmarkMode(Array(Mode.AverageTime))
135+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
136+
@State(Scope.Thread)
137+
class ParquetBench {
138+
import MagnolifyBench._
139+
140+
@Benchmark def parquetWrite(state: ParquetWriteState): Unit = state.writer.write(nested)
141+
@Benchmark def parquetRead(state: ParquetReadState): Nested = state.reader.read()
142+
}
143+
90144
@BenchmarkMode(Array(Mode.AverageTime))
91145
@OutputTimeUnit(TimeUnit.NANOSECONDS)
92146
@State(Scope.Thread)

0 commit comments

Comments
 (0)