Skip to content

Commit 1f8d94b

Browse files
committed
inference: Return memory requirement in estimation error
Signed-off-by: Piotr Stankiewicz <[email protected]>
1 parent 2d039ec commit 1f8d94b

File tree

8 files changed

+27
-26
lines changed

8 files changed

+27
-26
lines changed

pkg/inference/backend.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,5 @@ type Backend interface {
9191
GetDiskUsage() (int64, error)
9292
// GetRequiredMemoryForModel returns the required working memory for a given
9393
// model.
94-
GetRequiredMemoryForModel(ctx context.Context, model string, config *BackendConfiguration) (*RequiredMemory, error)
94+
GetRequiredMemoryForModel(ctx context.Context, model string, config *BackendConfiguration) (RequiredMemory, error)
9595
}

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,22 +230,22 @@ func (l *llamaCpp) GetDiskUsage() (int64, error) {
230230
return size, nil
231231
}
232232

233-
func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
233+
func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
234234
var mdlGguf *parser.GGUFFile
235235
var mdlConfig types.Config
236236
inStore, err := l.modelManager.IsModelInStore(model)
237237
if err != nil {
238-
return nil, fmt.Errorf("checking if model is in local store: %w", err)
238+
return inference.RequiredMemory{}, fmt.Errorf("checking if model is in local store: %w", err)
239239
}
240240
if inStore {
241241
mdlGguf, mdlConfig, err = l.parseLocalModel(model)
242242
if err != nil {
243-
return nil, &inference.ErrGGUFParse{Err: err}
243+
return inference.RequiredMemory{}, &inference.ErrGGUFParse{Err: err}
244244
}
245245
} else {
246246
mdlGguf, mdlConfig, err = l.parseRemoteModel(ctx, model)
247247
if err != nil {
248-
return nil, &inference.ErrGGUFParse{Err: err}
248+
return inference.RequiredMemory{}, &inference.ErrGGUFParse{Err: err}
249249
}
250250
}
251251

@@ -278,7 +278,7 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string,
278278
vram = 1
279279
}
280280

281-
return &inference.RequiredMemory{
281+
return inference.RequiredMemory{
282282
RAM: ram,
283283
VRAM: vram,
284284
}, nil

pkg/inference/backends/mlx/mlx.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,6 @@ func (m *mlx) GetDiskUsage() (int64, error) {
6363
return 0, nil
6464
}
6565

66-
func (m *mlx) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
67-
return nil, errors.New("not implemented")
66+
func (m *mlx) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
67+
return inference.RequiredMemory{}, errors.New("not implemented")
6868
}

pkg/inference/backends/vllm/vllm.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,6 @@ func (v *vLLM) GetDiskUsage() (int64, error) {
6363
return 0, nil
6464
}
6565

66-
func (v *vLLM) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
67-
return nil, errors.New("not implemented")
66+
func (v *vLLM) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
67+
return inference.RequiredMemory{}, errors.New("not implemented")
6868
}

pkg/inference/memory/estimator.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ import (
1010

1111
type MemoryEstimator interface {
1212
SetDefaultBackend(MemoryEstimatorBackend)
13-
GetRequiredMemoryForModel(context.Context, string, *inference.BackendConfiguration) (*inference.RequiredMemory, error)
14-
HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, error)
13+
GetRequiredMemoryForModel(context.Context, string, *inference.BackendConfiguration) (inference.RequiredMemory, error)
14+
HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, inference.RequiredMemory, inference.RequiredMemory, error)
1515
}
1616

1717
type MemoryEstimatorBackend interface {
18-
GetRequiredMemoryForModel(context.Context, string, *inference.BackendConfiguration) (*inference.RequiredMemory, error)
18+
GetRequiredMemoryForModel(context.Context, string, *inference.BackendConfiguration) (inference.RequiredMemory, error)
1919
}
2020

2121
type memoryEstimator struct {
@@ -31,18 +31,18 @@ func (m *memoryEstimator) SetDefaultBackend(backend MemoryEstimatorBackend) {
3131
m.defaultBackend = backend
3232
}
3333

34-
func (m *memoryEstimator) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
34+
func (m *memoryEstimator) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
3535
if m.defaultBackend == nil {
36-
return nil, errors.New("default backend not configured")
36+
return inference.RequiredMemory{}, errors.New("default backend not configured")
3737
}
3838

3939
return m.defaultBackend.GetRequiredMemoryForModel(ctx, model, config)
4040
}
4141

42-
func (m *memoryEstimator) HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, error) {
42+
func (m *memoryEstimator) HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, inference.RequiredMemory, inference.RequiredMemory, error) {
4343
req, err := m.GetRequiredMemoryForModel(ctx, model, config)
4444
if err != nil {
45-
return false, fmt.Errorf("estimating required memory for model: %w", err)
45+
return false, inference.RequiredMemory{}, inference.RequiredMemory{}, fmt.Errorf("estimating required memory for model: %w", err)
4646
}
47-
return m.systemMemoryInfo.HaveSufficientMemory(*req), nil
47+
return m.systemMemoryInfo.HaveSufficientMemory(req), req, m.systemMemoryInfo.GetTotalMemory(), nil
4848
}

pkg/inference/models/manager.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,16 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
163163
// besides pulling (such as model building).
164164
if !request.IgnoreRuntimeMemoryCheck {
165165
m.log.Infof("Will estimate memory required for %q", request.From)
166-
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
166+
proceed, req, totalMem, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
167167
if err != nil {
168168
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
169169
// Prefer staying functional in case of unexpected estimation errors.
170170
proceed = true
171171
}
172172
if !proceed {
173-
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
174-
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
173+
errstr := fmt.Sprintf("Runtime memory requirement for model %q exceeds total system memory: required %d RAM %d VRAM, system %d RAM %d VRAM", request.From, req.RAM, req.VRAM, totalMem.RAM, totalMem.VRAM)
174+
m.log.Warnf(errstr)
175+
http.Error(w, errstr, http.StatusInsufficientStorage)
175176
return
176177
}
177178
}

pkg/inference/models/manager_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ type mockMemoryEstimator struct{}
2626

2727
func (me *mockMemoryEstimator) SetDefaultBackend(_ memory.MemoryEstimatorBackend) {}
2828

29-
func (me *mockMemoryEstimator) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
30-
return &inference.RequiredMemory{RAM: 0, VRAM: 0}, nil
29+
func (me *mockMemoryEstimator) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (inference.RequiredMemory, error) {
30+
return inference.RequiredMemory{RAM: 0, VRAM: 0}, nil
3131
}
3232

33-
func (me *mockMemoryEstimator) HaveSufficientMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (bool, error) {
34-
return true, nil
33+
func (me *mockMemoryEstimator) HaveSufficientMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (bool, inference.RequiredMemory, inference.RequiredMemory, error) {
34+
return true, inference.RequiredMemory{}, inference.RequiredMemory{}, nil
3535
}
3636

3737
// getProjectRoot returns the absolute path to the project root directory

pkg/inference/scheduling/loader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
401401
// e.g. model is too new for gguf-parser-go to know. We should provide a cleaner
402402
// way to bypass these checks.
403403
l.log.Warnf("Could not parse model(%s), memory checks will be ignored for it. Error: %s", modelID, parseErr)
404-
memory = &inference.RequiredMemory{
404+
memory = inference.RequiredMemory{
405405
RAM: 0,
406406
VRAM: 0,
407407
}

0 commit comments

Comments
 (0)