Skip to content

Commit a84eff9

Browse files
authored
GPU accelerated encoder (Layr-Labs#895)
1 parent a36ddb9 commit a84eff9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1940
-803
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ lightnode/docker/args.sh
1616
.idea
1717
.env
1818
.vscode
19+
20+
icicle/*

api/clients/retrieval_client_test.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,15 @@ func makeTestComponents() (encoding.Prover, encoding.Verifier, error) {
3535
SRSOrder: 3000,
3636
SRSNumberToLoad: 3000,
3737
NumWorker: uint64(runtime.GOMAXPROCS(0)),
38+
LoadG2Points: true,
3839
}
3940

40-
p, err := prover.NewProver(config, true)
41+
p, err := prover.NewProver(config, nil)
4142
if err != nil {
4243
return nil, nil, err
4344
}
4445

45-
v, err := verifier.NewVerifier(config, true)
46+
v, err := verifier.NewVerifier(config, nil)
4647
if err != nil {
4748
return nil, nil, err
4849
}

core/test/core_test.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,15 @@ func makeTestComponents() (encoding.Prover, encoding.Verifier, error) {
5151
SRSOrder: 3000,
5252
SRSNumberToLoad: 3000,
5353
NumWorker: uint64(runtime.GOMAXPROCS(0)),
54+
LoadG2Points: true,
5455
}
5556

56-
p, err := prover.NewProver(config, true)
57+
p, err := prover.NewProver(config, nil)
5758
if err != nil {
5859
return nil, nil, err
5960
}
6061

61-
v, err := verifier.NewVerifier(config, true)
62+
v, err := verifier.NewVerifier(config, nil)
6263
if err != nil {
6364
return nil, nil, err
6465
}

core/v2/core_test.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,15 @@ func makeTestComponents() (encoding.Prover, encoding.Verifier, error) {
7979
SRSOrder: 8192,
8080
SRSNumberToLoad: 8192,
8181
NumWorker: uint64(runtime.GOMAXPROCS(0)),
82+
LoadG2Points: true,
8283
}
8384

84-
p, err := prover.NewProver(config, true)
85+
p, err := prover.NewProver(config, nil)
8586
if err != nil {
8687
return nil, nil, err
8788
}
8889

89-
v, err := verifier.NewVerifier(config, true)
90+
v, err := verifier.NewVerifier(config, nil)
9091
if err != nil {
9192
return nil, nil, err
9293
}

disperser/apiserver/server_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -652,8 +652,10 @@ func setup() {
652652
SRSOrder: 8192,
653653
SRSNumberToLoad: 8192,
654654
NumWorker: uint64(runtime.GOMAXPROCS(0)),
655+
LoadG2Points: true,
655656
}
656-
prover, err = p.NewProver(config, true)
657+
658+
prover, err = p.NewProver(config, nil)
657659
if err != nil {
658660
teardown()
659661
panic(fmt.Sprintf("failed to initialize KZG prover: %s", err.Error()))

disperser/batcher/batcher_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,10 @@ func makeTestProver() (encoding.Prover, error) {
5858
SRSOrder: 3000,
5959
SRSNumberToLoad: 3000,
6060
NumWorker: uint64(runtime.GOMAXPROCS(0)),
61+
LoadG2Points: true,
6162
}
6263

63-
return prover.NewProver(config, true)
64+
return prover.NewProver(config, nil)
6465
}
6566

6667
func makeTestBlob(securityParams []*core.SecurityParam) core.Blob {

disperser/cmd/apiserver/main.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ func RunDisperserServer(ctx *cli.Context) error {
162162
bucketName := config.BlobstoreConfig.BucketName
163163
logger.Info("Blob store", "bucket", bucketName)
164164
if config.DisperserVersion == V2 {
165-
prover, err := prover.NewProver(&config.EncodingConfig, true)
165+
config.EncodingConfig.LoadG2Points = true
166+
prover, err := prover.NewProver(&config.EncodingConfig, nil)
166167
if err != nil {
167168
return fmt.Errorf("failed to create encoder: %w", err)
168169
}

disperser/cmd/encoder/config.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type Config struct {
2828
EncoderConfig kzg.KzgConfig
2929
LoggerConfig common.LoggerConfig
3030
ServerConfig *encoder.ServerConfig
31-
MetricsConfig encoder.MetrisConfig
31+
MetricsConfig *encoder.MetricsConfig
3232
}
3333

3434
func NewConfig(ctx *cli.Context) (Config, error) {
@@ -58,10 +58,12 @@ func NewConfig(ctx *cli.Context) (Config, error) {
5858
RequestPoolSize: ctx.GlobalInt(flags.RequestPoolSizeFlag.Name),
5959
EnableGnarkChunkEncoding: ctx.Bool(flags.EnableGnarkChunkEncodingFlag.Name),
6060
PreventReencoding: ctx.Bool(flags.PreventReencodingFlag.Name),
61+
Backend: ctx.String(flags.BackendFlag.Name),
62+
GPUEnable: ctx.Bool(flags.GPUEnableFlag.Name),
6163
PprofHttpPort: ctx.GlobalString(flags.PprofHttpPort.Name),
6264
EnablePprof: ctx.GlobalBool(flags.EnablePprof.Name),
6365
},
64-
MetricsConfig: encoder.MetrisConfig{
66+
MetricsConfig: &encoder.MetricsConfig{
6567
HTTPPort: ctx.GlobalString(flags.MetricsHTTPPort.Name),
6668
EnableMetrics: ctx.GlobalBool(flags.EnableMetrics.Name),
6769
},

disperser/cmd/encoder/flags/flags.go

+16
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package flags
33
import (
44
"github.com/Layr-Labs/eigenda/common"
55
"github.com/Layr-Labs/eigenda/common/aws"
6+
"github.com/Layr-Labs/eigenda/encoding"
67
"github.com/Layr-Labs/eigenda/encoding/kzg"
78
"github.com/urfave/cli"
89
)
@@ -67,6 +68,19 @@ var (
6768
Required: false,
6869
EnvVar: common.PrefixEnvVar(envVarPrefix, "ENABLE_GNARK_CHUNK_ENCODING"),
6970
}
71+
GPUEnableFlag = cli.BoolFlag{
72+
Name: common.PrefixFlag(FlagPrefix, "gpu-enable"),
73+
Usage: "Enable GPU, falls back to CPU if not available",
74+
Required: false,
75+
EnvVar: common.PrefixEnvVar(envVarPrefix, "GPU_ENABLE"),
76+
}
77+
BackendFlag = cli.StringFlag{
78+
Name: common.PrefixFlag(FlagPrefix, "backend"),
79+
Usage: "Backend to use for encoding",
80+
Required: false,
81+
Value: string(encoding.GnarkBackend),
82+
EnvVar: common.PrefixEnvVar(envVarPrefix, "BACKEND"),
83+
}
7084
PreventReencodingFlag = cli.BoolTFlag{
7185
Name: common.PrefixFlag(FlagPrefix, "prevent-reencoding"),
7286
Usage: "if true, will prevent reencoding of chunks by checking if the chunk already exists in the chunk store",
@@ -100,6 +114,8 @@ var optionalFlags = []cli.Flag{
100114
EnableGnarkChunkEncodingFlag,
101115
EncoderVersionFlag,
102116
S3BucketNameFlag,
117+
GPUEnableFlag,
118+
BackendFlag,
103119
PreventReencodingFlag,
104120
PprofHttpPort,
105121
EnablePprof,
+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS builder
2+
3+
# Install Go
4+
ENV GOLANG_VERSION=1.21.1
5+
ENV GOLANG_SHA256=b3075ae1ce5dab85f89bc7905d1632de23ca196bd8336afd93fa97434cfa55ae
6+
7+
ADD https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz /tmp/go.tar.gz
8+
RUN echo "${GOLANG_SHA256} /tmp/go.tar.gz" | sha256sum -c - && \
9+
tar -C /usr/local -xzf /tmp/go.tar.gz && \
10+
rm /tmp/go.tar.gz
11+
ENV PATH="/usr/local/go/bin:${PATH}"
12+
13+
# Set up the working directory
14+
WORKDIR /app
15+
16+
# Copy go.mod and go.sum first to leverage Docker cache
17+
COPY go.mod go.sum ./
18+
19+
# Download dependencies
20+
RUN go mod download
21+
22+
# Copy the source code
23+
COPY ./disperser /app/disperser
24+
COPY common /app/common
25+
COPY contracts /app/contracts
26+
COPY core /app/core
27+
COPY api /app/api
28+
COPY indexer /app/indexer
29+
COPY encoding /app/encoding
30+
COPY relay /app/relay
31+
32+
# Define Icicle versions and checksums
33+
ENV ICICLE_VERSION=3.1.0
34+
ENV ICICLE_BASE_SHA256=2e4e33b8bc3e335b2dd33dcfb10a9aaa18717885509614a24f492f47a2e4f4b1
35+
ENV ICICLE_CUDA_SHA256=cdba907eac6297445a6c128081ebba5c711d352003f69310145406a8fd781647
36+
37+
# Download Icicle tarballs
38+
ADD https://github.com/ingonyama-zk/icicle/releases/download/v${ICICLE_VERSION}/icicle_${ICICLE_VERSION//./_}-ubuntu22.tar.gz /tmp/icicle.tar.gz
39+
ADD https://github.com/ingonyama-zk/icicle/releases/download/v${ICICLE_VERSION}/icicle_${ICICLE_VERSION//./_}-ubuntu22-cuda122.tar.gz /tmp/icicle-cuda.tar.gz
40+
41+
# Verify checksums and install Icicle
42+
RUN echo "${ICICLE_BASE_SHA256} /tmp/icicle.tar.gz" | sha256sum -c - && \
43+
echo "${ICICLE_CUDA_SHA256} /tmp/icicle-cuda.tar.gz" | sha256sum -c - && \
44+
tar xzf /tmp/icicle.tar.gz && \
45+
cp -r ./icicle/lib/* /usr/lib/ && \
46+
cp -r ./icicle/include/icicle/ /usr/local/include/ && \
47+
tar xzf /tmp/icicle-cuda.tar.gz -C /opt && \
48+
rm /tmp/icicle.tar.gz /tmp/icicle-cuda.tar.gz
49+
50+
# Build the server with icicle backend
51+
WORKDIR /app/disperser
52+
RUN go build -tags=icicle -o ./bin/server ./cmd/encoder
53+
54+
# Start a new stage for the base image
55+
FROM nvidia/cuda:12.2.2-base-ubuntu22.04
56+
57+
COPY --from=builder /app/disperser/bin/server /usr/local/bin/server
58+
COPY --from=builder /usr/lib/libicicle* /usr/lib/
59+
COPY --from=builder /usr/local/include/icicle /usr/local/include/icicle
60+
COPY --from=builder /opt/icicle /opt/icicle
61+
62+
ENTRYPOINT ["server"]

disperser/cmd/encoder/main.go

+25-6
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ import (
1010
"github.com/Layr-Labs/eigenda/common/aws/s3"
1111
"github.com/Layr-Labs/eigenda/disperser/cmd/encoder/flags"
1212
blobstorev2 "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore"
13-
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
1413
"github.com/Layr-Labs/eigenda/disperser/encoder"
14+
"github.com/Layr-Labs/eigenda/encoding"
1515
"github.com/Layr-Labs/eigenda/encoding/kzg/prover"
16-
"github.com/prometheus/client_golang/prometheus"
1716
"github.com/Layr-Labs/eigenda/relay/chunkstore"
17+
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
18+
"github.com/prometheus/client_golang/prometheus"
1819
"github.com/urfave/cli"
1920
)
2021

@@ -69,9 +70,23 @@ func RunEncoderServer(ctx *cli.Context) error {
6970
reg.MustRegister(grpcMetrics)
7071
}
7172

73+
backendType, err := encoding.ParseBackendType(config.ServerConfig.Backend)
74+
if err != nil {
75+
return err
76+
}
77+
78+
// Set the encoding config
79+
encodingConfig := &encoding.Config{
80+
BackendType: backendType,
81+
GPUEnable: config.ServerConfig.GPUEnable,
82+
NumWorker: config.EncoderConfig.NumWorker,
83+
}
84+
7285
if config.EncoderVersion == V2 {
73-
// We no longer compute the commitments in the encoder, so we don't need to load the G2 points
74-
prover, err := prover.NewProver(&config.EncoderConfig, false)
86+
// We no longer load the G2 points in V2 because the KZG commitments are computed
87+
// on the API server side.
88+
config.EncoderConfig.LoadG2Points = false
89+
prover, err := prover.NewProver(&config.EncoderConfig, encodingConfig)
7590
if err != nil {
7691
return fmt.Errorf("failed to create encoder: %w", err)
7792
}
@@ -82,6 +97,10 @@ func RunEncoderServer(ctx *cli.Context) error {
8297
}
8398

8499
blobStoreBucketName := config.BlobStoreConfig.BucketName
100+
if blobStoreBucketName == "" {
101+
return fmt.Errorf("blob store bucket name is required")
102+
}
103+
85104
blobStore := blobstorev2.NewBlobStore(blobStoreBucketName, s3Client, logger)
86105
logger.Info("Blob store", "bucket", blobStoreBucketName)
87106

@@ -101,13 +120,13 @@ func RunEncoderServer(ctx *cli.Context) error {
101120
return server.Start()
102121
}
103122

104-
prover, err := prover.NewProver(&config.EncoderConfig, true)
123+
config.EncoderConfig.LoadG2Points = true
124+
prover, err := prover.NewProver(&config.EncoderConfig, encodingConfig)
105125
if err != nil {
106126
return fmt.Errorf("failed to create encoder: %w", err)
107127
}
108128

109129
server := encoder.NewEncoderServer(*config.ServerConfig, logger, prover, metrics, grpcMetrics)
110130

111131
return server.Start()
112-
113132
}

disperser/encoder/config.go

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ type ServerConfig struct {
1010
RequestPoolSize int
1111
EnableGnarkChunkEncoding bool
1212
PreventReencoding bool
13+
Backend string
14+
GPUEnable bool
1315
PprofHttpPort string
1416
EnablePprof bool
1517
}

disperser/encoder/metrics.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
"github.com/prometheus/client_golang/prometheus/promhttp"
1414
)
1515

16-
type MetrisConfig struct {
16+
type MetricsConfig struct {
1717
HTTPPort string
1818
EnableMetrics bool
1919
}

disperser/encoder/server.go

+7-8
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,6 @@ func (s *EncoderServer) Start() error {
9999
return gs.Serve(listener)
100100
}
101101

102-
func (s *EncoderServer) Close() {
103-
if s.close == nil {
104-
return
105-
}
106-
s.close()
107-
}
108-
109102
func (s *EncoderServer) EncodeBlob(ctx context.Context, req *pb.EncodeBlobRequest) (*pb.EncodeBlobReply, error) {
110103
startTime := time.Now()
111104
blobSize := len(req.GetData())
@@ -193,7 +186,6 @@ func (s *EncoderServer) handleEncoding(ctx context.Context, req *pb.EncodeBlobRe
193186
}
194187

195188
var chunksData [][]byte
196-
197189
var format pb.ChunkEncodingFormat
198190
if s.config.EnableGnarkChunkEncoding {
199191
format = pb.ChunkEncodingFormat_GNARK
@@ -228,3 +220,10 @@ func (s *EncoderServer) handleEncoding(ctx context.Context, req *pb.EncodeBlobRe
228220
ChunkEncodingFormat: format,
229221
}, nil
230222
}
223+
224+
func (s *EncoderServer) Close() {
225+
if s.close == nil {
226+
return
227+
}
228+
s.close()
229+
}

disperser/encoder/server_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,10 @@ func makeTestProver(numPoint uint64) (encoding.Prover, ServerConfig) {
4242
SRSOrder: 3000,
4343
SRSNumberToLoad: numPoint,
4444
NumWorker: uint64(runtime.GOMAXPROCS(0)),
45+
LoadG2Points: true,
4546
}
4647

47-
p, _ := prover.NewProver(kzgConfig, true)
48+
p, _ := prover.NewProver(kzgConfig, nil)
4849
encoderServerConfig := ServerConfig{
4950
GrpcPort: "3000",
5051
MaxConcurrentRequests: 16,

disperser/encoder/server_v2.go

+7
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,10 @@ func extractProofsAndCoeffs(frames []*encoding.Frame) ([]*encoding.Proof, []*rs.
249249
}
250250
return proofs, coeffs
251251
}
252+
253+
func (s *EncoderServerV2) Close() {
254+
if s.close == nil {
255+
return
256+
}
257+
s.close()
258+
}

disperser/encoder/server_v2_test.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,17 @@ func makeTestProver(numPoint uint64) (encoding.Prover, error) {
4949
SRSOrder: 300000,
5050
SRSNumberToLoad: numPoint,
5151
NumWorker: uint64(runtime.GOMAXPROCS(0)),
52+
LoadG2Points: false,
5253
}
53-
54-
p, err := prover.NewProver(kzgConfig, false)
54+
p, err := prover.NewProver(kzgConfig, nil)
5555

5656
return p, err
5757
}
5858

5959
func TestEncodeBlob(t *testing.T) {
6060
const (
6161
testDataSize = 16 * 1024
62-
timeoutSeconds = 30
62+
timeoutSeconds = 60
6363
randSeed = uint64(42)
6464
)
6565

@@ -176,6 +176,12 @@ func TestEncodeBlob(t *testing.T) {
176176
// Create and execute encoding request again
177177
resp, err := server.EncodeBlob(ctx, req)
178178
assert.NoError(t, err)
179+
180+
if !assert.NotNil(t, resp, "Response should not be nil") {
181+
t.FailNow() // Stop the test here to prevent nil pointer panic
182+
return
183+
}
184+
179185
assert.Equal(t, uint32(294916), resp.FragmentInfo.TotalChunkSizeBytes, "Unexpected total chunk size")
180186
assert.Equal(t, uint32(512*1024), resp.FragmentInfo.FragmentSizeBytes, "Unexpected fragment size")
181187
assert.Equal(t, c.s3Client.Called["UploadObject"], expectedUploadCalls)

0 commit comments

Comments
 (0)