Skip to content

Commit 947370a

Browse files
authored
tech.ml.dataset example for graalvm-clojure (#39)
* Adding vanilly deps-based tmd pathway. * Removing large file
1 parent 62d835e commit 947370a

File tree

8 files changed

+142
-0
lines changed

8 files changed

+142
-0
lines changed

Diff for: tech.ml.dataset/.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.cpcache
2+
*.crc
3+
*.parquet
4+
ds-graal
5+
graalvm*
6+
pom.xml
7+
target
8+
hello-tmd

Diff for: tech.ml.dataset/README.md

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Graal Native And Data Frames
2+
3+
This repository is a simple example of how to build a graal native application that
4+
processes a CSV and both reads and writes to parquet. With graal native you
5+
lose some level of performance but you get a single packaged executable
6+
with very few dependencies and of course instant startup time.
7+
8+
9+
## Building
10+
11+
```console
12+
scripts/compile
13+
```
14+
15+
16+
## Running
17+
18+
```console
19+
./ds-graal
20+
```

Diff for: tech.ml.dataset/deps.edn

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{:paths ["src" "classes" "resources"]
2+
:deps {org.clojure/clojure {:mvn/version "1.10.2"}
3+
techascent/tech.ml.dataset {:mvn/version "5.19"}
4+
5+
org.apache.parquet/parquet-hadoop {:mvn/version "1.12.0"
6+
:exclusions [org.slf4j/slf4j-log4j12]}
7+
org.apache.hadoop/hadoop-common {:mvn/version "3.3.0"
8+
:exclusions [org.slf4j/slf4j-log4j12]}
9+
;; We literally need this for 1 POJO formatting object.
10+
org.apache.hadoop/hadoop-mapreduce-client-core {:mvn/version "3.3.0"
11+
:exclusions [org.slf4j/slf4j-log4j12]}}
12+
:aliases
13+
{:depstar
14+
{:replace-deps {com.github.seancorfield/depstar {:mvn/version "2.0.193"}}
15+
:ns-default hf.depstar
16+
:exec-fn hf.depstar/uberjar
17+
:exec-args {:group-id "graalvm-clojure"
18+
:artifact-id "hello-tmd"
19+
:version "1.00-beta-1"
20+
:sync-pom true
21+
:aot true
22+
:compile-ns [hello-tmd.main]
23+
:main-class hello-tmd.main
24+
:jar "target/hello-tmd.jar"
25+
;;Disable tensor code generation and ensure direct linking.
26+
:jvm-opts ["-Dtech.v3.datatype.graal-native=true"
27+
"-Dclojure.compiler.direct-linking=true"
28+
"-Dclojure.spec.skip-macros=true"]}}}}

Diff for: tech.ml.dataset/resources/logback.xml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<configuration debug="false">
2+
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
3+
<!-- encoders are assigned the type
4+
ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
5+
<encoder>
6+
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
7+
</encoder>
8+
</appender>
9+
10+
<root level="info">
11+
<appender-ref ref="STDOUT" />
12+
</root>
13+
</configuration>

Diff for: tech.ml.dataset/scripts/activate-graal

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
if [ ! -e graalvm ]; then
4+
scripts/get-graal
5+
fi
6+
7+
export GRAALVM_HOME="$(pwd)/graalvm"
8+
export PATH="$(pwd)/graalvm/bin:$PATH"
9+
export JAVA_HOME="$(pwd)/graalvm"

Diff for: tech.ml.dataset/scripts/compile

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
source scripts/activate-graal
6+
7+
rm -rf classes && mkdir classes
8+
echo "Building uberjar"
9+
clojure -X:depstar
10+
11+
12+
graalvm/bin/native-image \
13+
--report-unsupported-elements-at-runtime \
14+
--initialize-at-build-time \
15+
--no-fallback \
16+
--no-server \
17+
-H:+ReportExceptionStackTraces \
18+
--enable-https \
19+
--allow-incomplete-classpath \
20+
--initialize-at-run-time=org.apache.hadoop.util.DataChecksum$Java9Crc32CFactory \
21+
--initialize-at-run-time=org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawDecoder \
22+
--initialize-at-run-time=org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawEncoder \
23+
--initialize-at-run-time=org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawEncoder \
24+
--initialize-at-run-time=org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawDecoder \
25+
-J-Dclojure.spec.skip-macros=true \
26+
-J-Dclojure.compiler.direct-linking=true \
27+
-J-Dtech.v3.datatype.graal-native=true \
28+
-jar target/hello-tmd.jar hello-tmd

Diff for: tech.ml.dataset/scripts/get-graal

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
4+
GRAAL_VERSION="21.0.0.2"
5+
JDK_VERSION="11"
6+
7+
wget https://github.com/graalvm/graalvm-ce-builds/releases/download/vm-$GRAAL_VERSION/graalvm-ce-java$JDK_VERSION-linux-amd64-$GRAAL_VERSION.tar.gz
8+
tar -xvzf graalvm-ce-java$JDK_VERSION-linux-amd64-$GRAAL_VERSION.tar.gz
9+
ln -s "$(pwd)/graalvm-ce-java$JDK_VERSION-$GRAAL_VERSION" "$(pwd)/graalvm"
10+
rm graalvm-ce-java$JDK_VERSION-linux-amd64-$GRAAL_VERSION.tar.gz
11+
graalvm/bin/gu install native-image
12+
13+
## Dev builds
14+
15+
# wget https://github.com/graalvm/graalvm-ce-dev-builds/releases/download/21.1.0-dev-20210325_0249/graalvm-ce-java16-linux-amd64-dev.tar.gz
16+
# tar -xvzf graalvm-ce-java16-linux-amd64-dev.tar.gz
17+
# ln -s "$(pwd)/graalvm-ce-java16-21.1.0-dev" "$(pwd)/graalvm"
18+
# rm -f graalvm-ce-java16-linux-amd64-dev.tar.gz
19+
# graalvm/bin/gu install native-image

Diff for: tech.ml.dataset/src/hello_tmd/main.clj

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
(ns hello-tmd.main
2+
(:require [tech.v3.dataset :as ds]
3+
[tech.v3.datatype.functional :as dfn]
4+
[tech.v3.libs.parquet :as parquet])
5+
(:gen-class))
6+
7+
8+
(defn -main
9+
[& args]
10+
(let [test-ds (ds/->dataset "https://raw.githubusercontent.com/techascent/tech.ml.dataset/master/test/data/stocks.csv" {:key-fn keyword})]
11+
(println test-ds)
12+
(println "price mean:" (dfn/mean (test-ds :price)))
13+
(parquet/ds->parquet test-ds "stocks.parquet")
14+
(println "succesfully wrote stocks.parquet")
15+
(let [pds (first (parquet/parquet->ds-seq "stocks.parquet"))]
16+
(println "price mean:" (dfn/mean (pds "price"))))
17+
0))

0 commit comments

Comments
 (0)