From 47066c526d80dfcdcebbac801f4d037733caf14d Mon Sep 17 00:00:00 2001 From: wanxiangchwng Date: Mon, 25 Nov 2024 16:55:10 +0800 Subject: [PATCH 01/56] chore: fix some function names in comment (#3272) Signed-off-by: wanxiangchwng --- core/ai.go | 2 +- core/ai_worker.go | 2 +- crypto/verify.go | 2 +- pm/broker.go | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/ai.go b/core/ai.go index 871b99b446..25ad3c2470 100644 --- a/core/ai.go +++ b/core/ai.go @@ -48,7 +48,7 @@ func (s JSONRat) String() string { return s.FloatString(2) } -// parsePipelineFromModelID converts a pipeline name to a capability enum. +// PipelineToCapability converts a pipeline name to a capability enum. func PipelineToCapability(pipeline string) (Capability, error) { if pipeline == "" { return Capability_Unused, errPipelineNotAvailable diff --git a/core/ai_worker.go b/core/ai_worker.go index 8498191d97..48b76c84bc 100644 --- a/core/ai_worker.go +++ b/core/ai_worker.go @@ -241,7 +241,7 @@ func (rwm *RemoteAIWorkerManager) workerHasCapacity(pipeline, modelID string) bo return false } -// completeRequestSessions end a AI request session for a remote ai worker +// completeAIRequest end a AI request session for a remote ai worker // caller should hold the mutex lock func (rwm *RemoteAIWorkerManager) completeAIRequest(requestID, pipeline, modelID string) { rwm.RWmutex.Lock() diff --git a/crypto/verify.go b/crypto/verify.go index 6595616c78..39e163f18e 100644 --- a/crypto/verify.go +++ b/crypto/verify.go @@ -14,7 +14,7 @@ var ( secp256k1halfN = new(big.Int).Div(secp256k1N, big.NewInt(2)) ) -// Verify verifies that a ETH ECDSA signature over a given message +// VerifySig verifies that a ETH ECDSA signature over a given message // is produced by a given ETH address func VerifySig(addr ethcommon.Address, msg, sig []byte) bool { recovered, err := ecrecover(msg, sig) diff --git a/pm/broker.go b/pm/broker.go index fba8277227..1b2d55b463 100644 --- a/pm/broker.go +++ b/pm/broker.go @@ -76,11 +76,11 @@ type TimeManager interface { PreLastInitializedL1BlockHash() [32]byte // GetTranscoderPoolSize returns the size of the active transcoder set for a round GetTranscoderPoolSize() *big.Int - // LastSeenBlock returns the last seen block number + // LastSeenL1Block returns the last seen block number LastSeenL1Block() *big.Int // SubscribeRounds allows one to subscribe to new round events SubscribeRounds(sink chan<- types.Log) event.Subscription - // SubscribeBlocks allows one to subscribe to newly seen block numbers + // SubscribeL1Blocks allows one to subscribe to newly seen block numbers SubscribeL1Blocks(sink chan<- *big.Int) event.Subscription } From b6c4487ba62041517087a2b2a1e03cc93cd37566 Mon Sep 17 00:00:00 2001 From: Marco van Dijk Date: Mon, 25 Nov 2024 11:16:50 +0100 Subject: [PATCH 02/56] Bump LPMS (#3269) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tibia2000 <149428838+Tibia2000@users.noreply.github.com> Co-authored-by: Rafał Leszko --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 65d7a5bcaf..bd6662f427 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/livepeer/ai-worker v0.12.3 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 - github.com/livepeer/lpms v0.0.0-20240909171057-fe5aff1fa6a2 + github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 github.com/livepeer/m3u8 v0.11.1 github.com/mattn/go-sqlite3 v1.14.18 github.com/oapi-codegen/nethttp-middleware v1.0.1 diff --git a/go.sum b/go.sum index 6019486ec7..8e91ed4962 100644 --- a/go.sum +++ b/go.sum @@ -613,8 +613,8 @@ github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cO github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded/go.mod h1:xkDdm+akniYxVT9KW1Y2Y7Hso6aW+rZObz3nrA9yTHw= github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 h1:4oH3NqV0NvcdS44Ld3zK2tO8IUiNozIggm74yobQeZg= github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18/go.mod h1:Jpf4jHK+fbWioBHRDRM1WadNT1qmY27g2YicTdO0Rtc= -github.com/livepeer/lpms v0.0.0-20240909171057-fe5aff1fa6a2 h1:UYVfhBuJ2h6eYOCBaCzjoWoj3onhZ+6wFhXNllELYDA= -github.com/livepeer/lpms v0.0.0-20240909171057-fe5aff1fa6a2/go.mod h1:z5ROP1l5OzAKSoqVRLc34MjUdueil6wHSecQYV7llIw= +github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 h1:YalnQu8BB9vRh+7gcEjfzfHNl9NEwagBTHQqnlUYDrA= +github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204/go.mod h1:z5ROP1l5OzAKSoqVRLc34MjUdueil6wHSecQYV7llIw= github.com/livepeer/m3u8 v0.11.1 h1:VkUJzfNTyjy9mqsgp5JPvouwna8wGZMvd/gAfT5FinU= github.com/livepeer/m3u8 v0.11.1/go.mod h1:IUqAtwWPAG2CblfQa4SVzTQoDcEMPyfNOaBSxqHMS04= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= From 3ea003f112cd01ba5c3a8ab410e359675bcface8 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Mon, 25 Nov 2024 10:22:29 +0000 Subject: [PATCH 03/56] Introduce new config param for AI webhook auth (#3271) * Introduce new config param for AI webhook auth * Set default * Fix auth --- cmd/livepeer/livepeer.go | 1 + cmd/livepeer/starter/starter.go | 22 +++++++++++++++++----- server/ai_mediaserver.go | 4 ++-- server/auth.go | 15 +++++++++++++-- server/mediaserver.go | 1 + 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index 2675bc7858..4bfd274635 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -221,6 +221,7 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.FVfailGsKey = flag.String("FVfailGskey", *cfg.FVfailGsKey, "Google Cloud Storage private key file name or key in JSON format for accessing FVfailGsBucket") // API cfg.AuthWebhookURL = flag.String("authWebhookUrl", *cfg.AuthWebhookURL, "RTMP authentication webhook URL") + cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") // flags cfg.TestOrchAvail = flag.Bool("startupAvailabilityCheck", *cfg.TestOrchAvail, "Set to false to disable the startup Orchestrator availability check on the configured serviceAddr") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 0e91a7f322..5bc6d820c9 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -156,6 +156,7 @@ type LivepeerConfig struct { FVfailGsBucket *string FVfailGsKey *string AuthWebhookURL *string + LiveAIAuthWebhookURL *string OrchWebhookURL *string OrchBlacklist *string OrchMinLivepeerVersion *string @@ -209,6 +210,7 @@ func DefaultLivepeerConfig() LivepeerConfig { defaultAIModels := "" defaultAIModelsDir := "" defaultAIRunnerImage := "livepeer/ai-runner:latest" + defaultLiveAIAuthWebhookURL := "" // Onchain: defaultEthAcctAddr := "" @@ -310,11 +312,12 @@ func DefaultLivepeerConfig() LivepeerConfig { TestTranscoder: &defaultTestTranscoder, // AI: - AIServiceRegistry: &defaultAIServiceRegistry, - AIWorker: &defaultAIWorker, - AIModels: &defaultAIModels, - AIModelsDir: &defaultAIModelsDir, - AIRunnerImage: &defaultAIRunnerImage, + AIServiceRegistry: &defaultAIServiceRegistry, + AIWorker: &defaultAIWorker, + AIModels: &defaultAIModels, + AIModelsDir: &defaultAIModelsDir, + AIRunnerImage: &defaultAIRunnerImage, + LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL, // Onchain: EthAcctAddr: &defaultEthAcctAddr, @@ -1378,6 +1381,15 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { server.AuthWebhookURL = parsedUrl } + if *cfg.LiveAIAuthWebhookURL != "" { + parsedUrl, err := validateURL(*cfg.LiveAIAuthWebhookURL) + if err != nil { + glog.Exit("Error setting live AI auth webhook URL ", err) + } + glog.Info("Using live AI auth webhook URL ", parsedUrl.Redacted()) + server.LiveAIAuthWebhookURL = parsedUrl + } + httpIngest := true if n.NodeType == core.BroadcasterNode { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 42ee541066..d0996fac4e 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -432,8 +432,8 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { } } - if AuthWebhookURL != nil { - authResp, err := authenticateAIStream(AuthWebhookURL, AIAuthRequest{ + if LiveAIAuthWebhookURL != nil { + authResp, err := authenticateAIStream(LiveAIAuthWebhookURL, AIAuthRequest{ Stream: streamName, Type: sourceTypeStr, QueryParams: queryParams, diff --git a/server/auth.go b/server/auth.go index 6f53ae0317..27f8468285 100644 --- a/server/auth.go +++ b/server/auth.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "net/url" + "os" "time" "github.com/golang/glog" @@ -98,7 +99,8 @@ func (a authWebhookResponse) areProfilesEqual(b authWebhookResponse) bool { type AIAuthRequest struct { // Stream name or stream key - Stream string `json:"stream"` + Stream string `json:"stream"` + StreamKey string `json:"stream_key"` // Stream type, eg RTMP or WHIP Type string `json:"type"` @@ -123,6 +125,7 @@ type AIAuthResponse struct { } func authenticateAIStream(authURL *url.URL, req AIAuthRequest) (*AIAuthResponse, error) { + req.StreamKey = req.Stream if authURL == nil { return nil, fmt.Errorf("No auth URL configured") } @@ -133,7 +136,15 @@ func authenticateAIStream(authURL *url.URL, req AIAuthRequest) (*AIAuthResponse, return nil, err } - resp, err := http.Post(authURL.String(), "application/json", bytes.NewBuffer(jsonValue)) + request, err := http.NewRequest("POST", authURL.String(), bytes.NewBuffer(jsonValue)) + if err != nil { + return nil, err + } + + request.Header.Set("Content-Type", "application/json") + request.Header.Set("x-api-key", os.Getenv("SHOWCASE_API_KEY")) + + resp, err := http.DefaultClient.Do(request) if err != nil { return nil, err } diff --git a/server/mediaserver.go b/server/mediaserver.go index 9714ba464f..3a95ec72a3 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -67,6 +67,7 @@ const AISessionManagerTTL = 10 * time.Minute var BroadcastJobVideoProfiles = []ffmpeg.VideoProfile{ffmpeg.P240p30fps4x3, ffmpeg.P360p30fps16x9} var AuthWebhookURL *url.URL +var LiveAIAuthWebhookURL *url.URL func PixelFormatNone() ffmpeg.PixelFormat { return ffmpeg.PixelFormat{RawValue: ffmpeg.PixelFormatNone} From 77f803b9620846d3fc564593a254d95ca13906da Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Mon, 25 Nov 2024 19:20:54 -0300 Subject: [PATCH 04/56] ai: Handle live-video-to-video requests and start runner (#3263) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * pass model to orchestrator from request * (wip) pass trickle url to ai-runner * go.mod: Update ai-worker lib * ai_worker: Handle live-video-to-video jobs * (wip) pass trickle url to ai-runner * (wip) sending trickle server url to ai-runner * swap urls for ai-runner, add control-url * handoff trickle url and params to ai-runner * go fmt, cleanup * remove unused code * code cleanup, comments * disable verification on video-to-video in handleAIrequest * fix variable name * cleanup params from rebase * develop gateway handler for live-video-video requests * go fmt * go fmt and cleanup * Revert "disable verification on video-to-video in handleAIrequest" This reverts commit f1034e1ce3aae5e49ef7ebf82c283f4dada537b0. * re-enable verifySegCreds in handleAiRequest * remove un-used code * only create trickle channels when job accepted by ai-runner * go fmt * update go.mod * remove unused code * improve trickle url handoff, use reliable host value * Update ai-worker version * go mod tidy * update control api to be not nullable, fix trickle url swap, update go mod * change info to debug for max buf log Co-authored-by: Victor Elias * (wip-testing) ffmpeg encoding to gateway * Revert "(wip-testing) ffmpeg encoding to gateway" This reverts commit 89d2ea432dcf1cf1243325afb1988e51853dd0f5. * improve trickle hostname, fix params input * update go mod * go fmt, update comment * server: Fix trickle URL and error handling * DEV-WIP: Use system ffmpeg while LPMS doesn't work * update go.mod, controlUrl * core: Fix tests * Revert "DEV-WIP: Use system ffmpeg while LPMS doesn't work" This reverts commit 42b16bceaccb9ab27461e2e146eb529e8e6a2ed3. * go.mod: Update to ai-worker w/ fixed lifecyle * go.mod: Update ai-worker for noop image * server: Make noop image the default --------- Co-authored-by: John | Elite Encoder Co-authored-by: Rafał Leszko --- common/util.go | 24 +++++++++++++ core/ai.go | 1 + core/ai_test.go | 4 +++ core/ai_worker.go | 37 +++++++++++++++++++ go.mod | 12 +++---- go.sum | 32 ++++++++--------- media/rtmp2segment.go | 2 +- server/ai_http.go | 78 ++++++++++++++++++++++++++++++++-------- server/ai_live_video.go | 6 ++-- server/ai_process.go | 39 +++++++------------- server/ai_worker_test.go | 9 +++++ server/rpc.go | 1 + server/rpc_test.go | 7 ++++ 13 files changed, 185 insertions(+), 67 deletions(-) diff --git a/common/util.go b/common/util.go index dd6e73b872..4a408ba6c8 100644 --- a/common/util.go +++ b/common/util.go @@ -497,3 +497,27 @@ func MimeTypeToExtension(mimeType string) (string, error) { } return "", ErrNoExtensionsForType } + +func AppendHostname(urlPath string, host string) (*url.URL, error) { + if urlPath == "" { + return nil, fmt.Errorf("invalid url from orch") + } + pu, err := url.Parse(urlPath) + if err != nil { + return nil, err + } + if pu.Hostname() != "" { + // url has a hostname already so use it + return pu, nil + } else { + // no hostname, so append one + if !strings.HasPrefix(urlPath, "/") { + urlPath = "/" + urlPath + } + u, err := url.Parse(host + urlPath) + if err != nil { + return nil, err + } + return u, nil + } +} diff --git a/core/ai.go b/core/ai.go index 25ad3c2470..348ffd21c7 100644 --- a/core/ai.go +++ b/core/ai.go @@ -27,6 +27,7 @@ type AI interface { SegmentAnything2(context.Context, worker.GenSegmentAnything2MultipartRequestBody) (*worker.MasksResponse, error) ImageToText(context.Context, worker.GenImageToTextMultipartRequestBody) (*worker.ImageToTextResponse, error) TextToSpeech(context.Context, worker.GenTextToSpeechJSONRequestBody) (*worker.AudioResponse, error) + LiveVideoToVideo(context.Context, worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error) Warm(context.Context, string, string, worker.RunnerEndpoint, worker.OptimizationFlags) error Stop(context.Context) error HasCapacity(pipeline, modelID string) bool diff --git a/core/ai_test.go b/core/ai_test.go index dc924760ee..e82b658ba5 100644 --- a/core/ai_test.go +++ b/core/ai_test.go @@ -663,6 +663,10 @@ func (a *stubAIWorker) TextToSpeech(ctx context.Context, req worker.GenTextToSpe return &worker.AudioResponse{Audio: worker.MediaURL{Url: "http://example.com/audio.wav"}}, nil } +func (a *stubAIWorker) LiveVideoToVideo(ctx context.Context, req worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error) { + return &worker.LiveVideoToVideoResponse{}, nil +} + func (a *stubAIWorker) Warm(ctx context.Context, arg1, arg2 string, endpoint worker.RunnerEndpoint, flags worker.OptimizationFlags) error { return nil } diff --git a/core/ai_worker.go b/core/ai_worker.go index 48b76c84bc..235338fca8 100644 --- a/core/ai_worker.go +++ b/core/ai_worker.go @@ -549,6 +549,39 @@ func (orch *orchestrator) TextToImage(ctx context.Context, requestID string, req return res.Results, nil } +func (orch *orchestrator) LiveVideoToVideo(ctx context.Context, requestID string, req worker.GenLiveVideoToVideoJSONRequestBody) (interface{}, error) { + // local AIWorker processes job if combined orchestrator/ai worker + if orch.node.AIWorker != nil { + workerResp, err := orch.node.LiveVideoToVideo(ctx, req) + if err == nil { + return orch.node.saveLocalAIWorkerResults(ctx, *workerResp, requestID, "application/json") + } else { + clog.Errorf(ctx, "Error processing with local ai worker err=%q", err) + if monitor.Enabled { + monitor.AIResultSaveError(ctx, "live-video-to-video", *req.ModelId, string(monitor.SegmentUploadErrorUnknown)) + } + return nil, err + } + } + + // remote ai worker processes job + res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "live-video-to-video", *req.ModelId, "", AIJobRequestData{Request: req}) + if err != nil { + return nil, err + } + + res, err = orch.node.saveRemoteAIWorkerResults(ctx, res, requestID) + if err != nil { + clog.Errorf(ctx, "Error saving remote ai result err=%q", err) + if monitor.Enabled { + monitor.AIResultSaveError(ctx, "live-video-to-video", *req.ModelId, string(monitor.SegmentUploadErrorUnknown)) + } + return nil, err + } + + return res.Results, nil +} + func (orch *orchestrator) ImageToImage(ctx context.Context, requestID string, req worker.GenImageToImageMultipartRequestBody) (interface{}, error) { // local AIWorker processes job if combined orchestrator/ai worker if orch.node.AIWorker != nil { @@ -1025,6 +1058,10 @@ func (n *LivepeerNode) TextToSpeech(ctx context.Context, req worker.GenTextToSpe return n.AIWorker.TextToSpeech(ctx, req) } +func (n *LivepeerNode) LiveVideoToVideo(ctx context.Context, req worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error) { + return n.AIWorker.LiveVideoToVideo(ctx, req) +} + // transcodeFrames converts a series of image URLs into a video segment for the image-to-video pipeline. func (n *LivepeerNode) transcodeFrames(ctx context.Context, sessionID string, urls []string, inProfile ffmpeg.VideoProfile, outProfile ffmpeg.VideoProfile) *TranscodeResult { ctx = clog.AddOrchSessionID(ctx, sessionID) diff --git a/go.mod b/go.mod index bd6662f427..1764ebb0a7 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.3 + github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 @@ -34,7 +34,7 @@ require ( go.opencensus.io v0.24.0 go.uber.org/goleak v1.3.0 golang.org/x/net v0.28.0 - golang.org/x/sys v0.26.0 + golang.org/x/sys v0.27.0 google.golang.org/grpc v1.65.0 google.golang.org/protobuf v1.34.1 pgregory.net/rapid v1.1.0 @@ -226,11 +226,11 @@ require ( github.com/whyrusleeping/cbor-gen v0.0.0-20230418232409-daab9ece03a0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect - go.opentelemetry.io/otel v1.31.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 // indirect + go.opentelemetry.io/otel v1.32.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect - go.opentelemetry.io/otel/metric v1.31.0 // indirect - go.opentelemetry.io/otel/trace v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index 8e91ed4962..bcbe5de0ae 100644 --- a/go.sum +++ b/go.sum @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.3 h1:LoSfYlh66clbG5j2Q0WLvyGDM6HKCWeHHmzrxT6lMnQ= -github.com/livepeer/ai-worker v0.12.3/go.mod h1:/Deme7XXRP4BiYXt/j694Ygw+dh8rWJdikJsKY64sjE= +github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d h1:4Ab7zR46jOfTX2vPQnrxdO0bJEe4azzhFeoPpMvBYM8= +github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d/go.mod h1:pfWCS5v8TIWNImxAZ6ikhiJW9Re88rsDnlW5Ktn7r2k= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= @@ -977,20 +977,20 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= -go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= -go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4= -go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= -go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= -go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= -go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= -go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= -go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0 h1:cMyu9O88joYEaI47CnQkxO1XZdpoTF9fEnW2duIddhw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0/go.mod h1:6Am3rn7P9TVVeXYG+wtcGE7IE1tsQ+bP3AuWcKt/gOI= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= @@ -1215,8 +1215,8 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= diff --git a/media/rtmp2segment.go b/media/rtmp2segment.go index 42c207a9a1..44db3a8c11 100644 --- a/media/rtmp2segment.go +++ b/media/rtmp2segment.go @@ -252,7 +252,7 @@ func readSegment(segmentHandler SegmentHandler, file *os.File, pipeName string) } if n == len(buf) && n < 1024*1024 { newLen := int(float64(len(buf)) * 1.5) - slog.Info("Max buf hit, increasing", "oldSize", humanBytes(int64(len(buf))), "newSize", humanBytes(int64(newLen))) + slog.Debug("Max buf hit, increasing", "oldSize", humanBytes(int64(len(buf))), "newSize", humanBytes(int64(newLen))) buf = make([]byte, newLen) } diff --git a/server/ai_http.go b/server/ai_http.go index 388b1b64df..be4682d2c6 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -18,6 +18,7 @@ import ( "time" "unicode/utf8" + ethcommon "github.com/ethereum/go-ethereum/common" "github.com/getkin/kin-openapi/openapi3filter" "github.com/golang/glog" "github.com/livepeer/ai-worker/worker" @@ -67,7 +68,6 @@ func startAIServer(lp lphttp) error { lp.transRPC.Handle("/segment-anything-2", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody]))) lp.transRPC.Handle("/image-to-text", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenImageToTextMultipartRequestBody]))) lp.transRPC.Handle("/text-to-speech", oapiReqValidator(aiHttpHandle(&lp, jsonDecoder[worker.GenTextToSpeechJSONRequestBody]))) - lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo())) // Additionally, there is the '/aiResults' endpoint registered in server/rpc.go @@ -96,26 +96,41 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { remoteAddr := getRemoteAddr(r) ctx := clog.AddVal(r.Context(), clog.ClientIP, remoteAddr) + requestID := string(core.RandomManifestID()) + + var req worker.GenLiveVideoToVideoJSONRequestBody + if err := jsonDecoder[worker.GenLiveVideoToVideoJSONRequestBody](&req, r); err != nil { + respondWithError(w, err.Error(), http.StatusBadRequest) + return + } - // skipping handleAIRequest for now until we have payments + orch := h.orchestrator + pipeline := "live-video-to-video" + cap := core.Capability_LiveVideoToVideo + modelID := *req.ModelId + clog.V(common.VERBOSE).Infof(ctx, "Received request id=%v cap=%v modelID=%v", requestID, cap, modelID) + // Create storage for the request (for AI Workers, must run before CheckAICapacity) + err := orch.CreateStorageForRequest(requestID) + if err != nil { + respondWithError(w, "Could not create storage to receive results", http.StatusInternalServerError) + } + + // Check if there is capacity for the request + if !orch.CheckAICapacity(pipeline, modelID) { + respondWithError(w, fmt.Sprintf("Insufficient capacity for pipeline=%v modelID=%v", pipeline, modelID), http.StatusServiceUnavailable) + return + } + + // Start trickle server for live-video var ( - mid = string(core.RandomManifestID()) - pubUrl = TrickleHTTPPath + mid + mid = requestID // Request ID is used for the manifest ID + pubUrl = orch.ServiceURI().JoinPath(TrickleHTTPPath, mid).String() subUrl = pubUrl + "-out" controlUrl = pubUrl + "-control" ) - jsonData, err := json.Marshal( - &worker.LiveVideoToVideoResponse{ - PublishUrl: pubUrl, - SubscribeUrl: subUrl, - ControlUrl: controlUrl, - }) - if err != nil { - respondWithError(w, err.Error(), http.StatusInternalServerError) - return - } + //If successful, then create the trickle channels // Precreate the channels to avoid race conditions // TODO get the expected mime type from the request pubCh := trickle.NewLocalPublisher(h.trickleSrv, mid, "video/MP2T") @@ -139,8 +154,41 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { } }() - // TODO subscribe to the subscribeUrl for output monitoring + // Prepare request to worker + workerReq := worker.LiveVideoToVideoParams{ + ModelId: req.ModelId, + PublishUrl: subUrl, + SubscribeUrl: pubUrl, + ControlUrl: &controlUrl, + Params: req.Params, + } + + // Send request to the worker + _, err = orch.LiveVideoToVideo(ctx, requestID, workerReq) + if err != nil { + if monitor.Enabled { + monitor.AIProcessingError(err.Error(), pipeline, modelID, ethcommon.Address{}.String()) + } + + pubCh.Close() + subCh.Close() + controlPubCh.Close() + respondWithError(w, err.Error(), http.StatusInternalServerError) + return + } + + // Prepare the response + jsonData, err := json.Marshal(&worker.LiveVideoToVideoResponse{ + PublishUrl: pubUrl, + SubscribeUrl: subUrl, + ControlUrl: controlUrl, + }) + if err != nil { + respondWithError(w, err.Error(), http.StatusInternalServerError) + return + } + clog.Infof(ctx, "Processed request id=%v cap=%v modelID=%v took=%v", requestID, cap, modelID) respondJsonOk(w, jsonData) }) } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index f2f26dcac8..f3eade7af1 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -57,8 +57,10 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { return } defer segment.Body.Close() - // TODO send this into ffmpeg - io.Copy(w, segment.Body) + if _, err = io.Copy(w, segment.Body); err != nil { + slog.Info("Error copying to ffmpeg stdin", "url", url, "err", err) + return + } } }() diff --git a/server/ai_process.go b/server/ai_process.go index c532891f9b..12337ca3ea 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -12,7 +12,6 @@ import ( "math" "math/big" "net/http" - "net/url" "path/filepath" "strconv" "strings" @@ -37,7 +36,7 @@ const defaultAudioToTextModelID = "openai/whisper-large-v3" const defaultLLMModelID = "meta-llama/llama-3.1-8B-Instruct" const defaultSegmentAnything2ModelID = "facebook/sam2-hiera-large" const defaultImageToTextModelID = "Salesforce/blip-image-captioning-large" -const defaultLiveVideoToVideoModelID = "cumulo-autumn/stream-diffusion" +const defaultLiveVideoToVideoModelID = "noop" const defaultTextToSpeechModelID = "parler-tts/parler-tts-large-v1" var errWrongFormat = fmt.Errorf("result not in correct format") @@ -1013,41 +1012,27 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A } return nil, err } + + // Send request to orchestrator resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req) if err != nil { return nil, err } + if resp.JSON200 != nil { - // append orch hostname to the given url if necessary - appendHostname := func(urlPath string) (*url.URL, error) { - if urlPath == "" { - return nil, fmt.Errorf("invalid url from orch") - } - pu, err := url.Parse(urlPath) - if err != nil { - return nil, err - } - if pu.Hostname() != "" { - // url has a hostname already so use it - return pu, nil - } - // no hostname, so append one - u := sess.Transcoder() + urlPath - return url.Parse(u) - } - pub, err := appendHostname(resp.JSON200.PublishUrl) + host := sess.Transcoder() + pub, err := common.AppendHostname(resp.JSON200.PublishUrl, host) if err != nil { - return nil, fmt.Errorf("pub url - %w", err) + return nil, fmt.Errorf("invalid publish URL: %w", err) } - sub, err := appendHostname(resp.JSON200.SubscribeUrl) + sub, err := common.AppendHostname(resp.JSON200.SubscribeUrl, host) if err != nil { - return nil, fmt.Errorf("sub url %w", err) + return nil, fmt.Errorf("invalid subscribe URL: %w", err) } - control, err := appendHostname(resp.JSON200.ControlUrl) + control, err := common.AppendHostname(resp.JSON200.ControlUrl, host) if err != nil { - return nil, fmt.Errorf("control pub url - %w", err) + return nil, fmt.Errorf("invalid control URL: %w", err) } - clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s", pub, sub, control) startTricklePublish(pub, params) startTrickleSubscribe(sub, params) startControlPublish(control, params) @@ -1457,7 +1442,7 @@ func processAIRequest(ctx context.Context, params aiRequestParams, req interface } clog.Infof(ctx, "Error submitting request modelID=%v try=%v orch=%v err=%v", modelID, tries, sess.Transcoder(), err) - params.sessManager.Remove(ctx, sess) + params.sessManager.Remove(ctx, sess) //TODO: Improve session selection logic for live-video-to-video if errors.Is(err, common.ErrAudioDurationCalculation) { return nil, &BadRequestError{err} diff --git a/server/ai_worker_test.go b/server/ai_worker_test.go index 4ecbd8768e..602c01fed9 100644 --- a/server/ai_worker_test.go +++ b/server/ai_worker_test.go @@ -626,6 +626,15 @@ func (a *stubAIWorker) TextToSpeech(ctx context.Context, req worker.GenTextToSpe } } +func (a *stubAIWorker) LiveVideoToVideo(ctx context.Context, req worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error) { + a.Called++ + if a.Err != nil { + return nil, a.Err + } else { + return &worker.LiveVideoToVideoResponse{}, nil + } +} + func (a *stubAIWorker) Warm(ctx context.Context, arg1, arg2 string, endpoint worker.RunnerEndpoint, flags worker.OptimizationFlags) error { a.Called++ return nil diff --git a/server/rpc.go b/server/rpc.go index ca9ca05f74..4ae04a48f4 100644 --- a/server/rpc.go +++ b/server/rpc.go @@ -78,6 +78,7 @@ type Orchestrator interface { SegmentAnything2(ctx context.Context, requestID string, req worker.GenSegmentAnything2MultipartRequestBody) (interface{}, error) ImageToText(ctx context.Context, requestID string, req worker.GenImageToTextMultipartRequestBody) (interface{}, error) TextToSpeech(ctx context.Context, requestID string, req worker.GenTextToSpeechJSONRequestBody) (interface{}, error) + LiveVideoToVideo(ctx context.Context, requestID string, req worker.GenLiveVideoToVideoJSONRequestBody) (interface{}, error) } // Balance describes methods for a session's balance maintenance diff --git a/server/rpc_test.go b/server/rpc_test.go index 162dacc2e2..43ec1a3045 100644 --- a/server/rpc_test.go +++ b/server/rpc_test.go @@ -223,6 +223,10 @@ func (r *stubOrchestrator) TextToSpeech(ctx context.Context, requestID string, r return nil, nil } +func (r *stubOrchestrator) LiveVideoToVideo(ctx context.Context, requestID string, req worker.GenLiveVideoToVideoJSONRequestBody) (interface{}, error) { + return nil, nil +} + func (r *stubOrchestrator) CheckAICapacity(pipeline, modelID string) bool { return true } @@ -1425,6 +1429,9 @@ func (r *mockOrchestrator) ImageToText(ctx context.Context, requestID string, re func (r *mockOrchestrator) TextToSpeech(ctx context.Context, requestID string, req worker.GenTextToSpeechJSONRequestBody) (interface{}, error) { return nil, nil } +func (r *mockOrchestrator) LiveVideoToVideo(ctx context.Context, requestID string, req worker.GenLiveVideoToVideoJSONRequestBody) (interface{}, error) { + return nil, nil +} func (r *mockOrchestrator) CheckAICapacity(pipeline, modelID string) bool { return true } From 976b115cda586f232350c003d09fb314fc8bf54a Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Wed, 27 Nov 2024 06:53:39 -0300 Subject: [PATCH 05/56] Use system ffmpeg for streaming trickle into RTMP (#3276) --- docker/Dockerfile | 2 ++ server/ai_live_video.go | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 120dc74761..d9a2635917 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -68,4 +68,6 @@ COPY --from=build /usr/bin/grpc_health_probe /usr/local/bin/grpc_health_probe COPY --from=build /src/tasmodel.pb /tasmodel.pb COPY --from=build /usr/share/misc/pci.ids /usr/share/misc/pci.ids +RUN apt update && apt install -yqq ffmpeg + ENTRYPOINT ["/usr/local/bin/livepeer"] diff --git a/server/ai_live_video.go b/server/ai_live_video.go index f3eade7af1..c43a92d2b1 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -8,11 +8,12 @@ import ( "net/http" "net/url" "os" + "os/exec" + "time" "github.com/livepeer/go-livepeer/core" "github.com/livepeer/go-livepeer/media" "github.com/livepeer/go-livepeer/trickle" - "github.com/livepeer/lpms/ffmpeg" ) func startTricklePublish(url *url.URL, params aiRequestParams) { @@ -64,16 +65,25 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { } }() - // lpms + // TODO: Change this to LPMS go func() { - ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ - Fname: fmt.Sprintf("pipe:%d", r.Fd()), - }, []ffmpeg.TranscodeOptions{{ - Oname: params.liveParams.outputRTMPURL, - AudioEncoder: ffmpeg.ComponentOptions{Name: "copy"}, - VideoEncoder: ffmpeg.ComponentOptions{Name: "copy"}, - Muxer: ffmpeg.ComponentOptions{Name: "flv"}, - }}) + defer r.Close() + for { + cmd := exec.Command("ffmpeg", + "-i", "pipe:0", + "-c:a", "copy", + "-c:v", "copy", + "-f", "flv", + params.liveParams.outputRTMPURL, + ) + cmd.Stdin = r + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + slog.Info("Error running ffmpeg command", "err", err, "url", url) + } + time.Sleep(5 * time.Second) + } }() } From 15a37cf53e68764c090614c328259f8bb5bd3980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Wed, 27 Nov 2024 10:54:21 +0100 Subject: [PATCH 06/56] Add -liveAITrickleHostForRunner flag (#3281) --- cmd/livepeer/livepeer.go | 1 + cmd/livepeer/starter/starter.go | 182 ++++++++++++++++---------------- core/livepeernode.go | 3 +- go.mod | 2 +- go.sum | 4 +- server/ai_http.go | 24 ++++- 6 files changed, 120 insertions(+), 96 deletions(-) diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index 4bfd274635..aee8f2b324 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -164,6 +164,7 @@ func parseLivepeerConfig() starter.LivepeerConfig { // Live AI Media Server: cfg.MediaMTXApiPassword = flag.String("mediaMTXApiPassword", "", "HTTP basic auth password for MediaMTX API requests") + cfg.LiveAITrickleHostForRunner = flag.String("liveAITrickleHostForRunner", "", "Trickle Host used by AI Runner; It's used to overwrite the publicly available Trickle Host") // Onchain: cfg.EthAcctAddr = flag.String("ethAcctAddr", *cfg.EthAcctAddr, "Existing Eth account address. For use when multiple ETH accounts exist in the keystore directory") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 5bc6d820c9..2de0a1e673 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -78,95 +78,96 @@ const ( ) type LivepeerConfig struct { - Network *string - RtmpAddr *string - CliAddr *string - HttpAddr *string - ServiceAddr *string - OrchAddr *string - VerifierURL *string - EthController *string - VerifierPath *string - LocalVerify *bool - HttpIngest *bool - Orchestrator *bool - Transcoder *bool - AIServiceRegistry *bool - AIWorker *bool - Gateway *bool - Broadcaster *bool - OrchSecret *string - TranscodingOptions *string - AIModels *string - MaxAttempts *int - SelectRandWeight *float64 - SelectStakeWeight *float64 - SelectPriceWeight *float64 - SelectPriceExpFactor *float64 - OrchPerfStatsURL *string - Region *string - MaxPricePerUnit *string - MaxPricePerCapability *string - IgnoreMaxPriceIfNeeded *bool - MinPerfScore *float64 - DiscoveryTimeout *time.Duration - MaxSessions *string - CurrentManifest *bool - Nvidia *string - Netint *string - HevcDecoding *bool - TestTranscoder *bool - EthAcctAddr *string - EthPassword *string - EthKeystorePath *string - EthOrchAddr *string - EthUrl *string - TxTimeout *time.Duration - MaxTxReplacements *int - GasLimit *int - MinGasPrice *int64 - MaxGasPrice *int - InitializeRound *bool - InitializeRoundMaxDelay *time.Duration - TicketEV *string - MaxFaceValue *string - MaxTicketEV *string - MaxTotalEV *string - DepositMultiplier *int - PricePerUnit *string - PixelsPerUnit *string - PriceFeedAddr *string - AutoAdjustPrice *bool - PricePerGateway *string - PricePerBroadcaster *string - BlockPollingInterval *int - Redeemer *bool - RedeemerAddr *string - Reward *bool - Monitor *bool - MetricsPerStream *bool - MetricsExposeClientIP *bool - MetadataQueueUri *string - MetadataAmqpExchange *string - MetadataPublishTimeout *time.Duration - Datadir *string - AIModelsDir *string - Objectstore *string - Recordstore *string - FVfailGsBucket *string - FVfailGsKey *string - AuthWebhookURL *string - LiveAIAuthWebhookURL *string - OrchWebhookURL *string - OrchBlacklist *string - OrchMinLivepeerVersion *string - TestOrchAvail *bool - AIRunnerImage *string - KafkaBootstrapServers *string - KafkaUsername *string - KafkaPassword *string - KafkaGatewayTopic *string - MediaMTXApiPassword *string + Network *string + RtmpAddr *string + CliAddr *string + HttpAddr *string + ServiceAddr *string + OrchAddr *string + VerifierURL *string + EthController *string + VerifierPath *string + LocalVerify *bool + HttpIngest *bool + Orchestrator *bool + Transcoder *bool + AIServiceRegistry *bool + AIWorker *bool + Gateway *bool + Broadcaster *bool + OrchSecret *string + TranscodingOptions *string + AIModels *string + MaxAttempts *int + SelectRandWeight *float64 + SelectStakeWeight *float64 + SelectPriceWeight *float64 + SelectPriceExpFactor *float64 + OrchPerfStatsURL *string + Region *string + MaxPricePerUnit *string + MaxPricePerCapability *string + IgnoreMaxPriceIfNeeded *bool + MinPerfScore *float64 + DiscoveryTimeout *time.Duration + MaxSessions *string + CurrentManifest *bool + Nvidia *string + Netint *string + HevcDecoding *bool + TestTranscoder *bool + EthAcctAddr *string + EthPassword *string + EthKeystorePath *string + EthOrchAddr *string + EthUrl *string + TxTimeout *time.Duration + MaxTxReplacements *int + GasLimit *int + MinGasPrice *int64 + MaxGasPrice *int + InitializeRound *bool + InitializeRoundMaxDelay *time.Duration + TicketEV *string + MaxFaceValue *string + MaxTicketEV *string + MaxTotalEV *string + DepositMultiplier *int + PricePerUnit *string + PixelsPerUnit *string + PriceFeedAddr *string + AutoAdjustPrice *bool + PricePerGateway *string + PricePerBroadcaster *string + BlockPollingInterval *int + Redeemer *bool + RedeemerAddr *string + Reward *bool + Monitor *bool + MetricsPerStream *bool + MetricsExposeClientIP *bool + MetadataQueueUri *string + MetadataAmqpExchange *string + MetadataPublishTimeout *time.Duration + Datadir *string + AIModelsDir *string + Objectstore *string + Recordstore *string + FVfailGsBucket *string + FVfailGsKey *string + AuthWebhookURL *string + LiveAIAuthWebhookURL *string + LiveAITrickleHostForRunner *string + OrchWebhookURL *string + OrchBlacklist *string + OrchMinLivepeerVersion *string + TestOrchAvail *bool + AIRunnerImage *string + KafkaBootstrapServers *string + KafkaUsername *string + KafkaPassword *string + KafkaGatewayTopic *string + MediaMTXApiPassword *string } // DefaultLivepeerConfig creates LivepeerConfig exactly the same as when no flags are passed to the livepeer process. @@ -1555,6 +1556,9 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { if cfg.MediaMTXApiPassword != nil { n.MediaMTXApiPassword = *cfg.MediaMTXApiPassword } + if cfg.LiveAITrickleHostForRunner != nil { + n.LiveAITrickleHostForRunner = *cfg.LiveAITrickleHostForRunner + } //Create Livepeer Node diff --git a/core/livepeernode.go b/core/livepeernode.go index 1323523882..fb1ceeeb8f 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -153,7 +153,8 @@ type LivepeerNode struct { LivePipelines map[string]*LivePipeline LiveMu *sync.RWMutex - MediaMTXApiPassword string + MediaMTXApiPassword string + LiveAITrickleHostForRunner string } type LivePipeline struct { diff --git a/go.mod b/go.mod index 1764ebb0a7..c0c352d580 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d + github.com/livepeer/ai-worker v0.12.4 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 diff --git a/go.sum b/go.sum index bcbe5de0ae..d6bba2ec19 100644 --- a/go.sum +++ b/go.sum @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d h1:4Ab7zR46jOfTX2vPQnrxdO0bJEe4azzhFeoPpMvBYM8= -github.com/livepeer/ai-worker v0.12.4-0.20241125220901-b9bb93fec45d/go.mod h1:pfWCS5v8TIWNImxAZ6ikhiJW9Re88rsDnlW5Ktn7r2k= +github.com/livepeer/ai-worker v0.12.4 h1:RuCZP/JUEOo/q10Ry+s0oOr06DOSnpEDTE6y/NqXFxs= +github.com/livepeer/ai-worker v0.12.4/go.mod h1:pfWCS5v8TIWNImxAZ6ikhiJW9Re88rsDnlW5Ktn7r2k= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= diff --git a/server/ai_http.go b/server/ai_http.go index be4682d2c6..9d0dcf29a3 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -10,9 +10,11 @@ import ( "fmt" "image" "io" + "log/slog" "mime" "mime/multipart" "net/http" + url2 "net/url" "strconv" "strings" "time" @@ -155,11 +157,12 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { }() // Prepare request to worker + controlUrlOverwrite := overwriteHost(h.node.LiveAITrickleHostForRunner, controlUrl) workerReq := worker.LiveVideoToVideoParams{ ModelId: req.ModelId, - PublishUrl: subUrl, - SubscribeUrl: pubUrl, - ControlUrl: &controlUrl, + PublishUrl: overwriteHost(h.node.LiveAITrickleHostForRunner, subUrl), + SubscribeUrl: overwriteHost(h.node.LiveAITrickleHostForRunner, pubUrl), + ControlUrl: &controlUrlOverwrite, Params: req.Params, } @@ -193,6 +196,21 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { }) } +// overwriteHost is used to overwrite the trickle host, because it may be different for runner +// runner may run inside Docker container, in a different network, or even on a different machine +func overwriteHost(hostOverwrite, url string) string { + if hostOverwrite == "" { + return url + } + u, err := url2.ParseRequestURI(url) + if err != nil { + slog.Warn("Couldn't parse url to overwrite for worker, using original url", "url", url, "err", err) + return url + } + u.Host = hostOverwrite + return u.String() +} + func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request, orch Orchestrator, req interface{}) { payment, err := getPayment(r.Header.Get(paymentHeader)) if err != nil { From c5729cafefb6745470f6c7c429283d39a09a1f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Wilczy=C5=84ski?= Date: Thu, 28 Nov 2024 08:36:34 +0100 Subject: [PATCH 07/56] Dockerfile.mediamtx remove custom mediamtx.yml config --- docker/Dockerfile.mediamtx | 4 +- docker/mediamtx.yml | 705 ------------------------------------- 2 files changed, 1 insertion(+), 708 deletions(-) delete mode 100644 docker/mediamtx.yml diff --git a/docker/Dockerfile.mediamtx b/docker/Dockerfile.mediamtx index 6c26fdfe94..6c01819286 100644 --- a/docker/Dockerfile.mediamtx +++ b/docker/Dockerfile.mediamtx @@ -1,8 +1,8 @@ FROM ubuntu:24.04 -ARG TARGETARCH ENV MEDIAMTX_VERSION="v1.9.3" +# we need curl in the image as it's later used in the runOnReady command RUN apt-get update \ && apt-get install -y \ ca-certificates \ @@ -14,6 +14,4 @@ RUN curl -L https://github.com/bluenviron/mediamtx/releases/download/${MEDIAMTX_ && tar xzvf /mediamtx.tar.gz \ && rm /mediamtx.tar.gz -COPY ./mediamtx.yml /mediamtx.yml - ENTRYPOINT ["/mediamtx"] diff --git a/docker/mediamtx.yml b/docker/mediamtx.yml deleted file mode 100644 index aeb14dd066..0000000000 --- a/docker/mediamtx.yml +++ /dev/null @@ -1,705 +0,0 @@ -############################################### -# Global settings - -# Settings in this section are applied anywhere. - -############################################### -# Global settings -> General - -# Verbosity of the program; available values are "error", "warn", "info", "debug". -logLevel: debug -# Destinations of log messages; available values are "stdout", "file" and "syslog". -logDestinations: [ stdout ] -# If "file" is in logDestinations, this is the file which will receive the logs. -logFile: mediamtx.log - -# Timeout of read operations. -readTimeout: 10s -# Timeout of write operations. -writeTimeout: 10s -# Size of the queue of outgoing packets. -# A higher value allows to increase throughput, a lower value allows to save RAM. -writeQueueSize: 512 -# Maximum size of outgoing UDP packets. -# This can be decreased to avoid fragmentation on networks with a low UDP MTU. -udpMaxPayloadSize: 1472 - -# Command to run when a client connects to the server. -# This is terminated with SIGINT when a client disconnects from the server. -# The following environment variables are available: -# * RTSP_PORT: RTSP server port -# * MTX_CONN_TYPE: connection type -# * MTX_CONN_ID: connection ID -runOnConnect: -# Restart the command if it exits. -runOnConnectRestart: no -# Command to run when a client disconnects from the server. -# Environment variables are the same of runOnConnect. -runOnDisconnect: - -############################################### -# Global settings -> Authentication - -# Authentication method. Available values are: -# * internal: users are stored in the configuration file -# * http: an external HTTP URL is contacted to perform authentication -# * jwt: an external identity server provides authentication through JWTs -authMethod: internal - -# Internal authentication. -# list of users. -authInternalUsers: - # Default unprivileged user. - # Username. 'any' means any user, including anonymous ones. - - user: any - # Password. Not used in case of 'any' user. - pass: - # IPs or networks allowed to use this user. An empty list means any IP. - ips: [ ] - # List of permissions. - permissions: - # Available actions are: publish, read, playback, api, metrics, pprof. - - action: publish - # Paths can be set to further restrict access to a specific path. - # An empty path means any path. - # Regular expressions can be used by using a tilde as prefix. - path: - - action: read - path: - - action: playback - path: - - # Default administrator. - # This allows to use API, metrics and PPROF without authentication, - # if the IP is localhost. - - user: any - pass: - ips: [ '127.0.0.1', '::1' ] - permissions: - - action: api - - action: metrics - - action: pprof - -# HTTP-based authentication. -# URL called to perform authentication. Every time a user wants -# to authenticate, the server calls this URL with the POST method -# and a body containing: -# { -# "user": "user", -# "password": "password", -# "ip": "ip", -# "action": "publish|read|playback|api|metrics|pprof", -# "path": "path", -# "protocol": "rtsp|rtmp|hls|webrtc|srt", -# "id": "id", -# "query": "query" -# } -# If the response code is 20x, authentication is accepted, otherwise -# it is discarded. -authHTTPAddress: -# Actions to exclude from HTTP-based authentication. -# Format is the same as the one of user permissions. -authHTTPExclude: - - action: api - - action: metrics - - action: pprof - -# JWT-based authentication. -# Users have to login through an external identity server and obtain a JWT. -# This JWT must contain the claim "mediamtx_permissions" with permissions, -# for instance: -# { -# ... -# "mediamtx_permissions": [ -# { -# "action": "publish", -# "path": "somepath" -# } -# ] -# } -# Users are expected to pass the JWT in the Authorization header or as a query parameter. -# This is the JWKS URL that will be used to pull (once) the public key that allows -# to validate JWTs. -authJWTJWKS: -# name of the claim that contains permissions. -authJWTClaimKey: mediamtx_permissions - -############################################### -# Global settings -> Control API - -# Enable controlling the server through the Control API. -api: yes -# Address of the Control API listener. -apiAddress: :9997 -# Enable TLS/HTTPS on the Control API server. -apiEncryption: no -# Path to the server key. This is needed only when encryption is yes. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -apiServerKey: server.key -# Path to the server certificate. -apiServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -apiAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the HTTP server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -apiTrustedProxies: [ ] - -############################################### -# Global settings -> Metrics - -# Enable Prometheus-compatible metrics. -metrics: no -# Address of the metrics HTTP listener. -metricsAddress: :9998 -# Enable TLS/HTTPS on the Metrics server. -metricsEncryption: no -# Path to the server key. This is needed only when encryption is yes. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -metricsServerKey: server.key -# Path to the server certificate. -metricsServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -metricsAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the HTTP server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -metricsTrustedProxies: [ ] - -############################################### -# Global settings -> PPROF - -# Enable pprof-compatible endpoint to monitor performances. -pprof: no -# Address of the pprof listener. -pprofAddress: :9999 -# Enable TLS/HTTPS on the pprof server. -pprofEncryption: no -# Path to the server key. This is needed only when encryption is yes. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -pprofServerKey: server.key -# Path to the server certificate. -pprofServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -pprofAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the HTTP server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -pprofTrustedProxies: [ ] - -############################################### -# Global settings -> Playback server - -# Enable downloading recordings from the playback server. -playback: no -# Address of the playback server listener. -playbackAddress: :9996 -# Enable TLS/HTTPS on the playback server. -playbackEncryption: no -# Path to the server key. This is needed only when encryption is yes. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -playbackServerKey: server.key -# Path to the server certificate. -playbackServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -playbackAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the HTTP server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -playbackTrustedProxies: [ ] - -############################################### -# Global settings -> RTSP server - -# Enable publishing and reading streams with the RTSP protocol. -rtsp: yes -# List of enabled RTSP transport protocols. -# UDP is the most performant, but doesn't work when there's a NAT/firewall between -# server and clients, and doesn't support encryption. -# UDP-multicast allows to save bandwidth when clients are all in the same LAN. -# TCP is the most versatile, and does support encryption. -# The handshake is always performed with TCP. -protocols: [ udp, multicast, tcp ] -# Encrypt handshakes and TCP streams with TLS (RTSPS). -# Available values are "no", "strict", "optional". -encryption: "no" -# Address of the TCP/RTSP listener. This is needed only when encryption is "no" or "optional". -rtspAddress: :8554 -# Address of the TCP/TLS/RTSPS listener. This is needed only when encryption is "strict" or "optional". -rtspsAddress: :8322 -# Address of the UDP/RTP listener. This is needed only when "udp" is in protocols. -rtpAddress: :8000 -# Address of the UDP/RTCP listener. This is needed only when "udp" is in protocols. -rtcpAddress: :8001 -# IP range of all UDP-multicast listeners. This is needed only when "multicast" is in protocols. -multicastIPRange: 224.1.0.0/16 -# Port of all UDP-multicast/RTP listeners. This is needed only when "multicast" is in protocols. -multicastRTPPort: 8002 -# Port of all UDP-multicast/RTCP listeners. This is needed only when "multicast" is in protocols. -multicastRTCPPort: 8003 -# Path to the server key. This is needed only when encryption is "strict" or "optional". -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -serverKey: server.key -# Path to the server certificate. This is needed only when encryption is "strict" or "optional". -serverCert: server.crt -# Authentication methods. Available are "basic" and "digest". -# "digest" doesn't provide any additional security and is available for compatibility only. -rtspAuthMethods: [ basic ] - -############################################### -# Global settings -> RTMP server - -# Enable publishing and reading streams with the RTMP protocol. -rtmp: yes -# Address of the RTMP listener. This is needed only when encryption is "no" or "optional". -rtmpAddress: :1935 -# Encrypt connections with TLS (RTMPS). -# Available values are "no", "strict", "optional". -rtmpEncryption: "no" -# Address of the RTMPS listener. This is needed only when encryption is "strict" or "optional". -rtmpsAddress: :1936 -# Path to the server key. This is needed only when encryption is "strict" or "optional". -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -rtmpServerKey: server.key -# Path to the server certificate. This is needed only when encryption is "strict" or "optional". -rtmpServerCert: server.crt - -############################################### -# Global settings -> HLS server - -# Enable reading streams with the HLS protocol. -hls: yes -# Address of the HLS listener. -hlsAddress: :8888 -# Enable TLS/HTTPS on the HLS server. -# This is required for Low-Latency HLS. -hlsEncryption: no -# Path to the server key. This is needed only when encryption is yes. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -hlsServerKey: server.key -# Path to the server certificate. -hlsServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -# This allows to play the HLS stream from an external website. -hlsAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the HLS server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -hlsTrustedProxies: [ ] -# By default, HLS is generated only when requested by a user. -# This option allows to generate it always, avoiding the delay between request and generation. -hlsAlwaysRemux: no -# Variant of the HLS protocol to use. Available options are: -# * mpegts - uses MPEG-TS segments, for maximum compatibility. -# * fmp4 - uses fragmented MP4 segments, more efficient. -# * lowLatency - uses Low-Latency HLS. -hlsVariant: lowLatency -# Number of HLS segments to keep on the server. -# Segments allow to seek through the stream. -# Their number doesn't influence latency. -hlsSegmentCount: 7 -# Minimum duration of each segment. -# A player usually puts 3 segments in a buffer before reproducing the stream. -# The final segment duration is also influenced by the interval between IDR frames, -# since the server changes the duration in order to include at least one IDR frame -# in each segment. -hlsSegmentDuration: 1s -# Minimum duration of each part. -# A player usually puts 3 parts in a buffer before reproducing the stream. -# Parts are used in Low-Latency HLS in place of segments. -# Part duration is influenced by the distance between video/audio samples -# and is adjusted in order to produce segments with a similar duration. -hlsPartDuration: 200ms -# Maximum size of each segment. -# This prevents RAM exhaustion. -hlsSegmentMaxSize: 50M -# Directory in which to save segments, instead of keeping them in the RAM. -# This decreases performance, since reading from disk is less performant than -# reading from RAM, but allows to save RAM. -hlsDirectory: '' -# The muxer will be closed when there are no -# reader requests and this amount of time has passed. -hlsMuxerCloseAfter: 60s - -############################################### -# Global settings -> WebRTC server - -# Enable publishing and reading streams with the WebRTC protocol. -webrtc: yes -# Address of the WebRTC HTTP listener. -webrtcAddress: :8889 -# Enable TLS/HTTPS on the WebRTC server. -webrtcEncryption: no -# Path to the server key. -# This can be generated with: -# openssl genrsa -out server.key 2048 -# openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650 -webrtcServerKey: server.key -# Path to the server certificate. -webrtcServerCert: server.crt -# Value of the Access-Control-Allow-Origin header provided in every HTTP response. -# This allows to play the WebRTC stream from an external website. -webrtcAllowOrigin: '*' -# List of IPs or CIDRs of proxies placed before the WebRTC server. -# If the server receives a request from one of these entries, IP in logs -# will be taken from the X-Forwarded-For header. -webrtcTrustedProxies: [ ] -# Address of a local UDP listener that will receive connections. -# Use a blank string to disable. -webrtcLocalUDPAddress: :8189 -# Address of a local TCP listener that will receive connections. -# This is disabled by default since TCP is less efficient than UDP and -# introduces a progressive delay when network is congested. -webrtcLocalTCPAddress: '' -# WebRTC clients need to know the IP of the server. -# Gather IPs from interfaces and send them to clients. -webrtcIPsFromInterfaces: yes -# List of interfaces whose IPs will be sent to clients. -# An empty value means to use all available interfaces. -webrtcIPsFromInterfacesList: [ ] -# List of additional hosts or IPs to send to clients. -webrtcAdditionalHosts: [ ] -# ICE servers. Needed only when local listeners can't be reached by clients. -# STUN servers allows to obtain and share the public IP of the server. -# TURN/TURNS servers forces all traffic through them. -webrtcICEServers2: - - url: stun:stun.l.google.com:19302 - # if user is "AUTH_SECRET", then authentication is secret based. - # the secret must be inserted into the password field. - # username: '' - # password: '' - # clientOnly: false -# Time to wait for the WebRTC handshake to complete. -webrtcHandshakeTimeout: 10s -# Maximum time to gather video tracks. -webrtcTrackGatherTimeout: 2s - -############################################### -# Global settings -> SRT server - -# Enable publishing and reading streams with the SRT protocol. -srt: yes -# Address of the SRT listener. -srtAddress: :8890 - -############################################### -# Default path settings - -# Settings in "pathDefaults" are applied anywhere, -# unless they are overridden in "paths". -pathDefaults: - - ############################################### - # Default path settings -> General - - # Source of the stream. This can be: - # * publisher -> the stream is provided by a RTSP, RTMP, WebRTC or SRT client - # * rtsp://existing-url -> the stream is pulled from another RTSP server / camera - # * rtsps://existing-url -> the stream is pulled from another RTSP server / camera with RTSPS - # * rtmp://existing-url -> the stream is pulled from another RTMP server / camera - # * rtmps://existing-url -> the stream is pulled from another RTMP server / camera with RTMPS - # * http://existing-url/stream.m3u8 -> the stream is pulled from another HLS server / camera - # * https://existing-url/stream.m3u8 -> the stream is pulled from another HLS server / camera with HTTPS - # * udp://ip:port -> the stream is pulled with UDP, by listening on the specified IP and port - # * srt://existing-url -> the stream is pulled from another SRT server / camera - # * whep://existing-url -> the stream is pulled from another WebRTC server / camera - # * wheps://existing-url -> the stream is pulled from another WebRTC server / camera with HTTPS - # * redirect -> the stream is provided by another path or server - # * rpiCamera -> the stream is provided by a Raspberry Pi Camera - # The following variables can be used in the source string: - # * $MTX_QUERY: query parameters (passed by first reader) - # * $G1, $G2, ...: regular expression groups, if path name is - # a regular expression. - source: publisher - # If the source is a URL, and the source certificate is self-signed - # or invalid, you can provide the fingerprint of the certificate in order to - # validate it anyway. It can be obtained by running: - # openssl s_client -connect source_ip:source_port /dev/null | sed -n '/BEGIN/,/END/p' > server.crt - # openssl x509 -in server.crt -noout -fingerprint -sha256 | cut -d "=" -f2 | tr -d ':' - sourceFingerprint: - # If the source is a URL, it will be pulled only when at least - # one reader is connected, saving bandwidth. - sourceOnDemand: no - # If sourceOnDemand is "yes", readers will be put on hold until the source is - # ready or until this amount of time has passed. - sourceOnDemandStartTimeout: 10s - # If sourceOnDemand is "yes", the source will be closed when there are no - # readers connected and this amount of time has passed. - sourceOnDemandCloseAfter: 10s - # Maximum number of readers. Zero means no limit. - maxReaders: 0 - # SRT encryption passphrase require to read from this path - srtReadPassphrase: - # If the stream is not available, redirect readers to this path. - # It can be can be a relative path (i.e. /otherstream) or an absolute RTSP URL. - fallback: - - ############################################### - # Default path settings -> Record - - # Record streams to disk. - record: no - # Path of recording segments. - # Extension is added automatically. - # Available variables are %path (path name), %Y %m %d %H %M %S %f %s (time in strftime format) - recordPath: ./recordings/%path/%Y-%m-%d_%H-%M-%S-%f - # Format of recorded segments. - # Available formats are "fmp4" (fragmented MP4) and "mpegts" (MPEG-TS). - recordFormat: fmp4 - # fMP4 segments are concatenation of small MP4 files (parts), each with this duration. - # MPEG-TS segments are concatenation of 188-bytes packets, flushed to disk with this period. - # When a system failure occurs, the last part gets lost. - # Therefore, the part duration is equal to the RPO (recovery point objective). - recordPartDuration: 1s - # Minimum duration of each segment. - recordSegmentDuration: 1h - # Delete segments after this timespan. - # Set to 0s to disable automatic deletion. - recordDeleteAfter: 24h - - ############################################### - # Default path settings -> Publisher source (when source is "publisher") - - # Allow another client to disconnect the current publisher and publish in its place. - overridePublisher: yes - # SRT encryption passphrase required to publish to this path - srtPublishPassphrase: - - ############################################### - # Default path settings -> RTSP source (when source is a RTSP or a RTSPS URL) - - # Transport protocol used to pull the stream. available values are "automatic", "udp", "multicast", "tcp". - rtspTransport: automatic - # Support sources that don't provide server ports or use random server ports. This is a security issue - # and must be used only when interacting with sources that require it. - rtspAnyPort: no - # Range header to send to the source, in order to start streaming from the specified offset. - # available values: - # * clock: Absolute time - # * npt: Normal Play Time - # * smpte: SMPTE timestamps relative to the start of the recording - rtspRangeType: - # Available values: - # * clock: UTC ISO 8601 combined date and time string, e.g. 20230812T120000Z - # * npt: duration such as "300ms", "1.5m" or "2h45m", valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h" - # * smpte: duration such as "300ms", "1.5m" or "2h45m", valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h" - rtspRangeStart: - - ############################################### - # Default path settings -> Redirect source (when source is "redirect") - - # RTSP URL which clients will be redirected to. - sourceRedirect: - - ############################################### - # Default path settings -> Raspberry Pi Camera source (when source is "rpiCamera") - - # ID of the camera - rpiCameraCamID: 0 - # Width of frames - rpiCameraWidth: 1920 - # Height of frames - rpiCameraHeight: 1080 - # Flip horizontally - rpiCameraHFlip: false - # Flip vertically - rpiCameraVFlip: false - # Brightness [-1, 1] - rpiCameraBrightness: 0 - # Contrast [0, 16] - rpiCameraContrast: 1 - # Saturation [0, 16] - rpiCameraSaturation: 1 - # Sharpness [0, 16] - rpiCameraSharpness: 1 - # Exposure mode. - # values: normal, short, long, custom - rpiCameraExposure: normal - # Auto-white-balance mode. - # values: auto, incandescent, tungsten, fluorescent, indoor, daylight, cloudy, custom - rpiCameraAWB: auto - # Auto-white-balance fixed gains. This can be used in place of rpiCameraAWB. - # format: [red,blue] - rpiCameraAWBGains: [ 0, 0 ] - # Denoise operating mode. - # values: off, cdn_off, cdn_fast, cdn_hq - rpiCameraDenoise: "off" - # Fixed shutter speed, in microseconds. - rpiCameraShutter: 0 - # Metering mode of the AEC/AGC algorithm. - # values: centre, spot, matrix, custom - rpiCameraMetering: centre - # Fixed gain - rpiCameraGain: 0 - # EV compensation of the image [-10, 10] - rpiCameraEV: 0 - # Region of interest, in format x,y,width,height - rpiCameraROI: - # Whether to enable HDR on Raspberry Camera 3. - rpiCameraHDR: false - # Tuning file - rpiCameraTuningFile: - # Sensor mode, in format [width]:[height]:[bit-depth]:[packing] - # bit-depth and packing are optional. - rpiCameraMode: - # frames per second - rpiCameraFPS: 30 - # Autofocus mode - # values: auto, manual, continuous - rpiCameraAfMode: continuous - # Autofocus range - # values: normal, macro, full - rpiCameraAfRange: normal - # Autofocus speed - # values: normal, fast - rpiCameraAfSpeed: normal - # Lens position (for manual autofocus only), will be set to focus to a specific distance - # calculated by the following formula: d = 1 / value - # Examples: 0 moves the lens to infinity. - # 0.5 moves the lens to focus on objects 2m away. - # 2 moves the lens to focus on objects 50cm away. - rpiCameraLensPosition: 0.0 - # Specifies the autofocus window, in the form x,y,width,height where the coordinates - # are given as a proportion of the entire image. - rpiCameraAfWindow: - # Manual flicker correction period, in microseconds. - rpiCameraFlickerPeriod: 0 - # Enables printing text on each frame. - rpiCameraTextOverlayEnable: false - # Text that is printed on each frame. - # format is the one of the strftime() function. - rpiCameraTextOverlay: '%Y-%m-%d %H:%M:%S - MediaMTX' - # Codec. Available values: auto, hardwareH264, softwareH264 - rpiCameraCodec: auto - # Period between IDR frames - rpiCameraIDRPeriod: 60 - # Bitrate - rpiCameraBitrate: 1000000 - # H264 profile - rpiCameraProfile: main - # H264 level - rpiCameraLevel: '4.1' - - ############################################### - # Default path settings -> Hooks - - # Command to run when this path is initialized. - # This can be used to publish a stream when the server is launched. - # This is terminated with SIGINT when the program closes. - # The following environment variables are available: - # * MTX_PATH: path name - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - runOnInit: - # Restart the command if it exits. - runOnInitRestart: no - - # Command to run when this path is requested by a reader - # and no one is publishing to this path yet. - # This can be used to publish a stream on demand. - # This is terminated with SIGINT when there are no readers anymore. - # The following environment variables are available: - # * MTX_PATH: path name - # * MTX_QUERY: query parameters (passed by first reader) - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - runOnDemand: - # Restart the command if it exits. - runOnDemandRestart: no - # Readers will be put on hold until the runOnDemand command starts publishing - # or until this amount of time has passed. - runOnDemandStartTimeout: 10s - # The command will be closed when there are no - # readers connected and this amount of time has passed. - runOnDemandCloseAfter: 10s - # Command to run when there are no readers anymore. - # Environment variables are the same of runOnDemand. - runOnUnDemand: - - # Command to run when the stream is ready to be read, whenever it is - # published by a client or pulled from a server / camera. - # This is terminated with SIGINT when the stream is not ready anymore. - # The following environment variables are available: - # * MTX_PATH: path name - # * MTX_QUERY: query parameters (passed by publisher) - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - # * MTX_SOURCE_TYPE: source type - # * MTX_SOURCE_ID: source ID - runOnReady: curl http://localhost:5936/live/video-to-video/$MTX_PATH/start -F callback=$HOSTNAME -F source_id=$MTX_SOURCE_ID -F source_type=$MTX_SOURCE_TYPE -F query=$MTX_QUERY - # Restart the command if it exits. - runOnReadyRestart: no - # Command to run when the stream is not available anymore. - # Environment variables are the same of runOnReady. - runOnNotReady: - - # Command to run when a client starts reading. - # This is terminated with SIGINT when a client stops reading. - # The following environment variables are available: - # * MTX_PATH: path name - # * MTX_QUERY: query parameters (passed by reader) - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - # * MTX_READER_TYPE: reader type - # * MTX_READER_ID: reader ID - runOnRead: - # Restart the command if it exits. - runOnReadRestart: no - # Command to run when a client stops reading. - # Environment variables are the same of runOnRead. - runOnUnread: - - # Command to run when a recording segment is created. - # The following environment variables are available: - # * MTX_PATH: path name - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - # * MTX_SEGMENT_PATH: segment file path - runOnRecordSegmentCreate: - - # Command to run when a recording segment is complete. - # The following environment variables are available: - # * MTX_PATH: path name - # * RTSP_PORT: RTSP server port - # * G1, G2, ...: regular expression groups, if path name is - # a regular expression. - # * MTX_SEGMENT_PATH: segment file path - # * MTX_SEGMENT_DURATION: segment duration - runOnRecordSegmentComplete: - -############################################### -# Path settings - -# Settings in "paths" are applied to specific paths, and the map key -# is the name of the path. -# Any setting in "pathDefaults" can be overridden here. -# It's possible to use regular expressions by using a tilde as prefix, -# for example "~^(test1|test2)$" will match both "test1" and "test2", -# for example "~^prefix" will match all paths that start with "prefix". -paths: - # example: - # my_camera: - # source: rtsp://my_camera - - # Settings under path "all_others" are applied to all paths that - # do not match another entry. - all_others: From de9b4477adac501fd7d2346b597f8924c0fa1429 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Fri, 29 Nov 2024 07:53:36 +0000 Subject: [PATCH 08/56] Handle webrtc path prefix (#3282) * Handle webrtc path prefix * fix * Contextual logging * debug logs * segmentation retries * Fix logging --------- Co-authored-by: Thom Shutt --- media/rtmp2segment.go | 31 ++++++++++++++++++++----------- server/ai_live_video.go | 30 +++++++++++++++++++----------- server/ai_mediaserver.go | 9 ++++++++- server/ai_process.go | 2 +- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/media/rtmp2segment.go b/media/rtmp2segment.go index 44db3a8c11..ef9b67b48b 100644 --- a/media/rtmp2segment.go +++ b/media/rtmp2segment.go @@ -4,6 +4,7 @@ package media import ( "bufio" + "context" "encoding/base32" "fmt" "io" @@ -16,6 +17,7 @@ import ( "syscall" "time" + "github.com/livepeer/go-livepeer/clog" "github.com/livepeer/lpms/ffmpeg" "golang.org/x/sys/unix" ) @@ -36,17 +38,22 @@ func (ms *MediaSegmenter) RunSegmentation(in string, segmentHandler SegmentHandl processSegments(segmentHandler, outFilePattern, completionSignal) }() - ffmpeg.FfmpegSetLogLevel(ffmpeg.FFLogWarning) - _, err := ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ - Fname: in, - }, []ffmpeg.TranscodeOptions{{ - Oname: outFilePattern, - AudioEncoder: ffmpeg.ComponentOptions{Name: "copy"}, - VideoEncoder: ffmpeg.ComponentOptions{Name: "copy"}, - Muxer: ffmpeg.ComponentOptions{Name: "segment"}, - }}) - if err != nil { - slog.Error("Failed to run segmentation", "in", in, "err", err) + retryCount := 0 + for retryCount < 5 { + ffmpeg.FfmpegSetLogLevel(ffmpeg.FFLogWarning) + _, err := ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ + Fname: in, + }, []ffmpeg.TranscodeOptions{{ + Oname: outFilePattern, + AudioEncoder: ffmpeg.ComponentOptions{Name: "copy"}, + VideoEncoder: ffmpeg.ComponentOptions{Name: "copy"}, + Muxer: ffmpeg.ComponentOptions{Name: "segment"}, + }}) + if err != nil { + slog.Error("Failed to run segmentation", "in", in, "err", err) + } + retryCount++ + time.Sleep(5 * time.Second) } completionSignal <- true slog.Info("sent completion signal, now waiting") @@ -265,6 +272,8 @@ func readSegment(segmentHandler SegmentHandler, file *os.File, pipeName string) break } } + clog.V(8).Infof(context.Background(), "read segment. totalRead=%s", humanBytes(totalBytesRead)) + } func randomString() string { diff --git a/server/ai_live_video.go b/server/ai_live_video.go index c43a92d2b1..ca75bb51d7 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -1,6 +1,7 @@ package server import ( + "context" "errors" "fmt" "io" @@ -11,6 +12,7 @@ import ( "os/exec" "time" + "github.com/livepeer/go-livepeer/clog" "github.com/livepeer/go-livepeer/core" "github.com/livepeer/go-livepeer/media" "github.com/livepeer/go-livepeer/trickle" @@ -30,6 +32,7 @@ func startTricklePublish(url *url.URL, params aiRequestParams) { return } go func() { + clog.V(8).Infof(context.Background(), "publishing trickle. url=%s", url.Redacted()) // TODO this blocks! very bad! if err := publisher.Write(reader); err != nil { slog.Info("Error writing to trickle publisher", "err", err) @@ -39,13 +42,14 @@ func startTricklePublish(url *url.URL, params aiRequestParams) { slog.Info("trickle pub", "url", url) } -func startTrickleSubscribe(url *url.URL, params aiRequestParams) { +func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestParams) { // subscribe to the outputs and send them into LPMS subscriber := trickle.NewTrickleSubscriber(url.String()) r, w, err := os.Pipe() if err != nil { slog.Info("error getting pipe for trickle-ffmpeg", "url", url, "err", err) } + ctx = clog.AddVal(ctx, "url", url.Redacted()) // read segments from trickle subscription go func() { @@ -54,12 +58,12 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { segment, err := subscriber.Read() if err != nil { // TODO if not EOS then signal a new orchestrator is needed - slog.Info("Error reading trickle subscription", "url", url, "err", err) + clog.Infof(ctx, "Error reading trickle subscription: %s", err) return } defer segment.Body.Close() if _, err = io.Copy(w, segment.Body); err != nil { - slog.Info("Error copying to ffmpeg stdin", "url", url, "err", err) + clog.Infof(ctx, "Error copying to ffmpeg stdin: %s", err) return } } @@ -68,7 +72,8 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { // TODO: Change this to LPMS go func() { defer r.Close() - for { + retryCount := 0 + for retryCount < 10 { cmd := exec.Command("ffmpeg", "-i", "pipe:0", "-c:a", "copy", @@ -80,8 +85,9 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { - slog.Info("Error running ffmpeg command", "err", err, "url", url) + clog.Infof(ctx, "Error running trickle subscribe ffmpeg: %s", err) } + retryCount++ time.Sleep(5 * time.Second) } }() @@ -89,9 +95,9 @@ func startTrickleSubscribe(url *url.URL, params aiRequestParams) { func mediamtxSourceTypeToString(s string) (string, error) { switch s { - case "webrtcSession": + case mediaMTXWebrtcSession: return "whip", nil - case "rtmpConn": + case mediaMTXRtmpConn: return "rtmp", nil default: return "", errors.New("unknown media source") @@ -111,16 +117,18 @@ func startControlPublish(control *url.URL, params aiRequestParams) { } const ( - mediaMTXControlPort = "9997" - mediaMTXControlUser = "admin" + mediaMTXControlPort = "9997" + mediaMTXControlUser = "admin" + mediaMTXWebrtcSession = "webrtcSession" + mediaMTXRtmpConn = "rtmpConn" ) func (ls *LivepeerServer) kickInputConnection(mediaMTXHost, sourceID, sourceType string) error { var apiPath string switch sourceType { - case "webrtcSession": + case mediaMTXWebrtcSession: apiPath = "webrtcsessions" - case "rtmpConn": + case mediaMTXRtmpConn: apiPath = "rtmpconns" default: return fmt.Errorf("invalid sourceType: %s", sourceType) diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index d0996fac4e..95ba032b6f 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -82,6 +82,7 @@ func startAIMediaServer(ls *LivepeerServer) error { // This is called by the media server when the stream is ready ls.HTTPMux.Handle("/live/video-to-video/{stream}/start", ls.StartLiveVideo()) + ls.HTTPMux.Handle("/live/video-to-video/{prefix}/{stream}/start", ls.StartLiveVideo()) ls.HTTPMux.Handle("/live/video-to-video/{stream}/update", ls.UpdateLiveVideo()) return nil @@ -367,6 +368,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { http.Error(w, "Missing stream name", http.StatusBadRequest) return } + ctx = clog.AddVal(ctx, "stream", streamName) sourceID := r.FormValue("source_id") if sourceID == "" { @@ -468,8 +470,13 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { // Kick off the RTMP pull and segmentation as soon as possible ssr := media.NewSwitchableSegmentReader() go func() { + // Currently for webrtc we need to add a path prefix due to the ingress setup + mediaMTXStreamPrefix := r.PathValue("prefix") + if mediaMTXStreamPrefix != "" { + mediaMTXStreamPrefix = mediaMTXStreamPrefix + "/" + } ms := media.MediaSegmenter{Workdir: ls.LivepeerNode.WorkDir} - ms.RunSegmentation(fmt.Sprintf("rtmp://%s/%s", remoteHost, streamName), ssr.Read) + ms.RunSegmentation(fmt.Sprintf("rtmp://%s/%s%s", remoteHost, mediaMTXStreamPrefix, streamName), ssr.Read) ssr.Close() ls.cleanupLive(streamName) }() diff --git a/server/ai_process.go b/server/ai_process.go index 12337ca3ea..5f7c9ce2c9 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1034,7 +1034,7 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A return nil, fmt.Errorf("invalid control URL: %w", err) } startTricklePublish(pub, params) - startTrickleSubscribe(sub, params) + startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) } return resp, nil From 1d55e9bd438fe33b36a59c3d89dd67d8a4484a51 Mon Sep 17 00:00:00 2001 From: hjpotter92 Date: Sat, 30 Nov 2024 13:07:36 +0530 Subject: [PATCH 09/56] mediamtx: Publish deployed version if runtime args are present (#3285) * mediamtx: Publish deployed version if runtime args are present * dockerfile.mediamtx: Make use of layer caching when building docker image * mediamtx-entry.bash: Use exec to hand off the script --- .editorconfig | 6 ++--- .github/labeler.yml | 5 ++++ .github/workflows/docker-mediamtx.yaml | 9 +++++--- docker/Dockerfile.mediamtx | 32 +++++++++++++++----------- docker/mediamtx-entry.bash | 19 +++++++++++++++ 5 files changed, 52 insertions(+), 19 deletions(-) create mode 100755 docker/mediamtx-entry.bash diff --git a/.editorconfig b/.editorconfig index 6687730d94..886cff6345 100644 --- a/.editorconfig +++ b/.editorconfig @@ -23,12 +23,12 @@ end_of_line = unset insert_final_newline = unset # Makefiles/Dockerfile/golang files -[{Makefile,Dockerfile{,.debian},*.go}] +[{Makefile,Dockerfile{,.cuda-base,.mediamtx},*.go}] indent_style = tab indent_size = 8 -# YAML/JSON Files -[{.ecrc,*.{yml,yaml,sh,json}}] +# YAML/JSON/sh Files +[{.ecrc,*.{yml,yaml,sh,json,bash}}] indent_size = 2 [{server/handlers_test,eth/accountmanager_test}.go] diff --git a/.github/labeler.yml b/.github/labeler.yml index 16b62af25c..8d012f381b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -10,6 +10,11 @@ docker: - any-glob-to-any-file: - "docker/**" +docs: + - changed-files: + - any-glob-to-any-file: + - "doc/**" + github_actions: - changed-files: - any-glob-to-any-file: diff --git a/.github/workflows/docker-mediamtx.yaml b/.github/workflows/docker-mediamtx.yaml index d6540b4e55..dee3a31d1a 100644 --- a/.github/workflows/docker-mediamtx.yaml +++ b/.github/workflows/docker-mediamtx.yaml @@ -2,12 +2,15 @@ name: MediaMTX Docker build on: pull_request: + branches: + - master + paths: + - 'docker/*mediamtx*' push: branches: - master paths: - - 'docker/Dockerfile.mediamtx' - - 'docker/mediamtx.yml' + - 'docker/*mediamtx*' concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -20,7 +23,7 @@ jobs: permissions: packages: write contents: read - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Check out code uses: actions/checkout@v4.1.7 diff --git a/docker/Dockerfile.mediamtx b/docker/Dockerfile.mediamtx index 6c01819286..a6646a23de 100644 --- a/docker/Dockerfile.mediamtx +++ b/docker/Dockerfile.mediamtx @@ -1,17 +1,23 @@ -FROM ubuntu:24.04 - -ENV MEDIAMTX_VERSION="v1.9.3" +FROM ubuntu:24.04 # we need curl in the image as it's later used in the runOnReady command -RUN apt-get update \ - && apt-get install -y \ - ca-certificates \ - curl \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update \ + && apt-get install -y \ + ca-certificates \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ENV MEDIAMTX_VERSION="1.9.3" + +ADD "https://github.com/bluenviron/mediamtx/releases/download/v${MEDIAMTX_VERSION}/mediamtx_v${MEDIAMTX_VERSION}_linux_amd64.tar.gz" /opt/mediamtx.tar.gz + +RUN tar xzf /opt/mediamtx.tar.gz -C /opt/ \ + && mkdir -p /usr/local/bin /etc/mediamtx/ \ + && mv /opt/mediamtx /usr/local/bin/mediamtx \ + && mv /opt/mediamtx.yml /etc/mediamtx/mediamtx.yml \ + && rm -rf /opt/ -RUN curl -L https://github.com/bluenviron/mediamtx/releases/download/${MEDIAMTX_VERSION}/mediamtx_${MEDIAMTX_VERSION}_linux_amd64.tar.gz -o /mediamtx.tar.gz \ - && tar xzvf /mediamtx.tar.gz \ - && rm /mediamtx.tar.gz +COPY mediamtx-entry.bash / -ENTRYPOINT ["/mediamtx"] +ENTRYPOINT ["/mediamtx-entry.bash"] diff --git a/docker/mediamtx-entry.bash b/docker/mediamtx-entry.bash new file mode 100755 index 0000000000..fc70db141d --- /dev/null +++ b/docker/mediamtx-entry.bash @@ -0,0 +1,19 @@ +#!/bin/bash + +[ -v DEBUG ] && set -x + +set -euo pipefail + +if [ -v LP_PUBLISH_MEDIAMTX_METRICS ]; then + if [ -z "$LP_PUBLISH_MEDIAMTX_METRICS_ENDPOINT" ]; then + echo >&2 "No endpoint specified for publishing mediamtx metrics." + fi + echo < Date: Mon, 2 Dec 2024 09:02:53 +0100 Subject: [PATCH 10/56] AI Live Video: Local CPU setup (#3280) * Local dev setup * Local dev setup * Victor's workaround for ffmpeg * Add -liveAITrickleHostForRunner flag * Allow local CPU setup * Update dep --- cmd/livepeer/starter/starter.go | 4 ++++ go.mod | 4 ++-- go.sum | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 2de0a1e673..8f3254f0b8 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -1183,6 +1183,10 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { glog.Errorf("Error parsing -nvidia for devices: %v", err) return } + } else { + glog.Warningf("!!! No GPU discovered, using CPU for AIWorker !!!") + // Create 2 fake GPU instances, intended for the local non-GPU setup + gpus = []string{"emulated-0", "emulated-1"} } modelsDir := *cfg.AIModelsDir diff --git a/go.mod b/go.mod index c0c352d580..f92d9a716f 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.4 + github.com/livepeer/ai-worker v0.12.6 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 @@ -47,7 +47,7 @@ require ( cloud.google.com/go/storage v1.30.1 // indirect dario.cat/mergo v1.0.0 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect - github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect + github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/DataDog/zstd v1.4.5 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/StackExchange/wmi v1.2.1 // indirect diff --git a/go.sum b/go.sum index d6bba2ec19..d8412d273b 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,8 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7 github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= +github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8= @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.4 h1:RuCZP/JUEOo/q10Ry+s0oOr06DOSnpEDTE6y/NqXFxs= -github.com/livepeer/ai-worker v0.12.4/go.mod h1:pfWCS5v8TIWNImxAZ6ikhiJW9Re88rsDnlW5Ktn7r2k= +github.com/livepeer/ai-worker v0.12.6 h1:1RN7eYy4C3D+iVaK5WuUu8Jgm7hTQ08J8EBeRekGJSo= +github.com/livepeer/ai-worker v0.12.6/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= From fc69945480fdf62cbeef7bdefde65983ea4fc160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Wilczy=C5=84ski?= Date: Fri, 29 Nov 2024 16:43:54 +0100 Subject: [PATCH 11/56] github workflow - use go.mod for go version --- .github/workflows/build.yaml | 4 ++-- .github/workflows/test.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 114051556b..0b8997096d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -72,7 +72,7 @@ jobs: id: go uses: actions/setup-go@v5 with: - go-version: 1.23.2 + go-version-file: './go.mod' cache: true cache-dependency-path: go.sum @@ -174,7 +174,7 @@ jobs: id: go uses: actions/setup-go@v5 with: - go-version: 1.23.2 + go-version-file: './go.mod' cache: true cache-dependency-path: go.sum diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 92beb4cc8c..7088c768ec 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -46,7 +46,7 @@ jobs: id: go uses: actions/setup-go@v5 with: - go-version: 1.23.2 + go-version-file: './go.mod' cache: true cache-dependency-path: go.sum From 3b60038947d9815968f2640edb03d8c351ddd1e9 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Mon, 2 Dec 2024 12:23:43 +0000 Subject: [PATCH 12/56] Move Live Auth api key to a config param (#3277) * Move Live Auth api key to a config param * fix after merge conflicts --- cmd/livepeer/livepeer.go | 5 +++-- cmd/livepeer/starter/starter.go | 4 ++++ core/livepeernode.go | 1 + server/ai_mediaserver.go | 2 +- server/auth.go | 5 ++--- server/auth_test.go | 2 +- server/mediaserver.go | 2 ++ 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index aee8f2b324..d7866ba78e 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -162,9 +162,11 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.AIModelsDir = flag.String("aiModelsDir", *cfg.AIModelsDir, "Set directory where AI model weights are stored") cfg.AIRunnerImage = flag.String("aiRunnerImage", *cfg.AIRunnerImage, "Set the docker image for the AI runner: Example - livepeer/ai-runner:0.0.1") - // Live AI Media Server: + // Live AI: cfg.MediaMTXApiPassword = flag.String("mediaMTXApiPassword", "", "HTTP basic auth password for MediaMTX API requests") cfg.LiveAITrickleHostForRunner = flag.String("liveAITrickleHostForRunner", "", "Trickle Host used by AI Runner; It's used to overwrite the publicly available Trickle Host") + cfg.LiveAIAuthApiKey = flag.String("liveAIAuthApiKey", "", "API key to use for Live AI authentication requests") + cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") // Onchain: cfg.EthAcctAddr = flag.String("ethAcctAddr", *cfg.EthAcctAddr, "Existing Eth account address. For use when multiple ETH accounts exist in the keystore directory") @@ -222,7 +224,6 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.FVfailGsKey = flag.String("FVfailGskey", *cfg.FVfailGsKey, "Google Cloud Storage private key file name or key in JSON format for accessing FVfailGsBucket") // API cfg.AuthWebhookURL = flag.String("authWebhookUrl", *cfg.AuthWebhookURL, "RTMP authentication webhook URL") - cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") // flags cfg.TestOrchAvail = flag.Bool("startupAvailabilityCheck", *cfg.TestOrchAvail, "Set to false to disable the startup Orchestrator availability check on the configured serviceAddr") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 8f3254f0b8..645522403d 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -168,6 +168,7 @@ type LivepeerConfig struct { KafkaPassword *string KafkaGatewayTopic *string MediaMTXApiPassword *string + LiveAIAuthApiKey *string } // DefaultLivepeerConfig creates LivepeerConfig exactly the same as when no flags are passed to the livepeer process. @@ -1560,6 +1561,9 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { if cfg.MediaMTXApiPassword != nil { n.MediaMTXApiPassword = *cfg.MediaMTXApiPassword } + if cfg.LiveAIAuthApiKey != nil { + n.LiveAIAuthApiKey = *cfg.LiveAIAuthApiKey + } if cfg.LiveAITrickleHostForRunner != nil { n.LiveAITrickleHostForRunner = *cfg.LiveAITrickleHostForRunner } diff --git a/core/livepeernode.go b/core/livepeernode.go index fb1ceeeb8f..06853f20e2 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -155,6 +155,7 @@ type LivepeerNode struct { MediaMTXApiPassword string LiveAITrickleHostForRunner string + LiveAIAuthApiKey string } type LivePipeline struct { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 95ba032b6f..8c7024feba 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -435,7 +435,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { } if LiveAIAuthWebhookURL != nil { - authResp, err := authenticateAIStream(LiveAIAuthWebhookURL, AIAuthRequest{ + authResp, err := authenticateAIStream(LiveAIAuthWebhookURL, ls.liveAIAuthApiKey, AIAuthRequest{ Stream: streamName, Type: sourceTypeStr, QueryParams: queryParams, diff --git a/server/auth.go b/server/auth.go index 27f8468285..7a63198d68 100644 --- a/server/auth.go +++ b/server/auth.go @@ -8,7 +8,6 @@ import ( "io" "net/http" "net/url" - "os" "time" "github.com/golang/glog" @@ -124,7 +123,7 @@ type AIAuthResponse struct { paramsMap map[string]interface{} // unmarshaled params } -func authenticateAIStream(authURL *url.URL, req AIAuthRequest) (*AIAuthResponse, error) { +func authenticateAIStream(authURL *url.URL, apiKey string, req AIAuthRequest) (*AIAuthResponse, error) { req.StreamKey = req.Stream if authURL == nil { return nil, fmt.Errorf("No auth URL configured") @@ -142,7 +141,7 @@ func authenticateAIStream(authURL *url.URL, req AIAuthRequest) (*AIAuthResponse, } request.Header.Set("Content-Type", "application/json") - request.Header.Set("x-api-key", os.Getenv("SHOWCASE_API_KEY")) + request.Header.Set("x-api-key", apiKey) resp, err := http.DefaultClient.Do(request) if err != nil { diff --git a/server/auth_test.go b/server/auth_test.go index f0c9ca70ed..f7ef445751 100644 --- a/server/auth_test.go +++ b/server/auth_test.go @@ -72,7 +72,7 @@ func TestAILiveAuthSucceeds(t *testing.T) { s, serverURL := stubAuthServer(t, http.StatusOK, `{}`) defer s.Close() - resp, err := authenticateAIStream(serverURL, AIAuthRequest{ + resp, err := authenticateAIStream(serverURL, "", AIAuthRequest{ Stream: "stream", }) require.NoError(t, err) diff --git a/server/mediaserver.go b/server/mediaserver.go index 3a95ec72a3..1f31d7bf35 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -127,6 +127,7 @@ type LivepeerServer struct { serverLock *sync.RWMutex mediaMTXApiPassword string + liveAIAuthApiKey string } func (s *LivepeerServer) SetContextFromUnitTest(c context.Context) { @@ -193,6 +194,7 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo recordingsAuthResponses: cache.New(time.Hour, 2*time.Hour), AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), mediaMTXApiPassword: lpNode.MediaMTXApiPassword, + liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, } if lpNode.NodeType == core.BroadcasterNode && httpIngest { opts.HttpMux.HandleFunc("/live/", ls.HandlePush) From 442325439c70c330bd145cfb4c55a4c5be48b512 Mon Sep 17 00:00:00 2001 From: hjpotter92 Date: Mon, 2 Dec 2024 18:12:46 +0530 Subject: [PATCH 13/56] mediamtx: Add metric labels for mediamtx (#3287) Fix `echo` to `cat` for curl to be able to process stdin input --- docker/mediamtx-entry.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/mediamtx-entry.bash b/docker/mediamtx-entry.bash index fc70db141d..ffc3a0cb1e 100755 --- a/docker/mediamtx-entry.bash +++ b/docker/mediamtx-entry.bash @@ -8,10 +8,10 @@ if [ -v LP_PUBLISH_MEDIAMTX_METRICS ]; then if [ -z "$LP_PUBLISH_MEDIAMTX_METRICS_ENDPOINT" ]; then echo >&2 "No endpoint specified for publishing mediamtx metrics." fi - echo < Date: Tue, 3 Dec 2024 14:08:02 +0100 Subject: [PATCH 14/56] Live Video Payments (#3261) --- cmd/livepeer/livepeer.go | 1 + cmd/livepeer/starter/starter.go | 4 + core/livepeernode.go | 1 + server/ai_http.go | 64 ++++++++++++- server/ai_live_video.go | 31 ++++++- server/ai_process.go | 20 ++++- server/live_payment.go | 17 +++- server/live_payment_processor.go | 148 +++++++++++++++++++++++++++++++ server/live_payment_test.go | 9 +- server/mediaserver.go | 2 + server/segment_rpc.go | 20 +++-- server/segment_rpc_test.go | 5 ++ 12 files changed, 298 insertions(+), 24 deletions(-) create mode 100644 server/live_payment_processor.go diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index d7866ba78e..9881014192 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -167,6 +167,7 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.LiveAITrickleHostForRunner = flag.String("liveAITrickleHostForRunner", "", "Trickle Host used by AI Runner; It's used to overwrite the publicly available Trickle Host") cfg.LiveAIAuthApiKey = flag.String("liveAIAuthApiKey", "", "API key to use for Live AI authentication requests") cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") + cfg.LivePaymentInterval = flag.Duration("livePaymentInterval", *cfg.LivePaymentInterval, "Interval to pay process Gateway <> Orchestrator Payments for Live AI Video") // Onchain: cfg.EthAcctAddr = flag.String("ethAcctAddr", *cfg.EthAcctAddr, "Existing Eth account address. For use when multiple ETH accounts exist in the keystore directory") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 645522403d..b7a11b70d6 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -169,6 +169,7 @@ type LivepeerConfig struct { KafkaGatewayTopic *string MediaMTXApiPassword *string LiveAIAuthApiKey *string + LivePaymentInterval *time.Duration } // DefaultLivepeerConfig creates LivepeerConfig exactly the same as when no flags are passed to the livepeer process. @@ -213,6 +214,7 @@ func DefaultLivepeerConfig() LivepeerConfig { defaultAIModelsDir := "" defaultAIRunnerImage := "livepeer/ai-runner:latest" defaultLiveAIAuthWebhookURL := "" + defaultLivePaymentInterval := 5 * time.Second // Onchain: defaultEthAcctAddr := "" @@ -320,6 +322,7 @@ func DefaultLivepeerConfig() LivepeerConfig { AIModelsDir: &defaultAIModelsDir, AIRunnerImage: &defaultAIRunnerImage, LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL, + LivePaymentInterval: &defaultLivePaymentInterval, // Onchain: EthAcctAddr: &defaultEthAcctAddr, @@ -1564,6 +1567,7 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { if cfg.LiveAIAuthApiKey != nil { n.LiveAIAuthApiKey = *cfg.LiveAIAuthApiKey } + n.LivePaymentInterval = *cfg.LivePaymentInterval if cfg.LiveAITrickleHostForRunner != nil { n.LiveAITrickleHostForRunner = *cfg.LiveAITrickleHostForRunner } diff --git a/core/livepeernode.go b/core/livepeernode.go index 06853f20e2..6bfddc71c9 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -156,6 +156,7 @@ type LivepeerNode struct { MediaMTXApiPassword string LiveAITrickleHostForRunner string LiveAIAuthApiKey string + LivePaymentInterval time.Duration } type LivePipeline struct { diff --git a/server/ai_http.go b/server/ai_http.go index 9d0dcf29a3..d694679987 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -132,6 +132,28 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlUrl = pubUrl + "-control" ) + // Handle initial payment, the rest of the payments are done separately from the stream processing + // Note that this payment is debit from the balance and acts as a buffer for the AI Realtime Video processing + payment, err := getPayment(r.Header.Get(paymentHeader)) + if err != nil { + respondWithError(w, err.Error(), http.StatusPaymentRequired) + return + } + sender := getPaymentSender(payment) + _, ctx, err = verifySegCreds(ctx, h.orchestrator, r.Header.Get(segmentHeader), sender) + if err != nil { + respondWithError(w, err.Error(), http.StatusForbidden) + return + } + if err := orch.ProcessPayment(ctx, payment, core.ManifestID(mid)); err != nil { + respondWithError(w, err.Error(), http.StatusBadRequest) + return + } + if payment.GetExpectedPrice().GetPricePerUnit() > 0 && !orch.SufficientBalance(sender, core.ManifestID(mid)) { + respondWithError(w, "Insufficient balance", http.StatusBadRequest) + return + } + //If successful, then create the trickle channels // Precreate the channels to avoid race conditions // TODO get the expected mime type from the request @@ -142,7 +164,39 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlPubCh := trickle.NewLocalPublisher(h.trickleSrv, mid+"-control", "application/json") controlPubCh.CreateChannel() - // Subscribe to the publishUrl for payments monitoring + // Start payment receiver which accounts the payments and stops the stream if the payment is insufficient + priceInfo, err := h.orchestrator.PriceInfo(sender, core.ManifestID(mid)) + if err != nil { + respondWithError(w, err.Error(), http.StatusInternalServerError) + return + + } + var paymentProcessor *LivePaymentProcessor + ctx, cancel := context.WithCancel(context.Background()) + if priceInfo != nil { + paymentReceiver := livePaymentReceiver{orchestrator: h.orchestrator} + accountPaymentFunc := func(inPixels int64) error { + err := paymentReceiver.AccountPayment(context.Background(), &SegmentInfoReceiver{ + sender: sender, + inPixels: inPixels, + priceInfo: priceInfo, + sessionID: mid, + }) + if err != nil { + slog.Warn("Error accounting payment, stopping stream processing", "err", err) + pubCh.Close() + subCh.Close() + controlPubCh.Close() + cancel() + } + return err + } + paymentProcessor = NewLivePaymentProcessor(ctx, h.node.LivePaymentInterval, accountPaymentFunc) + } else { + clog.Warningf(ctx, "No price info found for model %v, Orchestrator will not charge for video processing", modelID) + } + + // Subscribe to the publishUrl for payments monitoring and payment processing go func() { sub := trickle.NewLocalSubscriber(h.trickleSrv, mid) for { @@ -151,8 +205,11 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { clog.Infof(ctx, "Error getting local trickle segment err=%v", err) return } - // We can do something with the segment data here - io.Copy(io.Discard, segment.Reader) + reader := segment.Reader + if paymentProcessor != nil { + reader = paymentProcessor.process(segment.Reader) + } + io.Copy(io.Discard, reader) } }() @@ -176,6 +233,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { pubCh.Close() subCh.Close() controlPubCh.Close() + cancel() respondWithError(w, err.Error(), http.StatusInternalServerError) return } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index ca75bb51d7..c2da1af8f8 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -18,23 +18,50 @@ import ( "github.com/livepeer/go-livepeer/trickle" ) -func startTricklePublish(url *url.URL, params aiRequestParams) { +func startTricklePublish(url *url.URL, params aiRequestParams, sess *AISession) { publisher, err := trickle.NewTricklePublisher(url.String()) if err != nil { slog.Info("error publishing trickle", "err", err) } + + // Start payments which probes a segment every "paymentProcessInterval" and sends a payment + ctx, cancel := context.WithCancel(context.Background()) + priceInfo := sess.OrchestratorInfo.PriceInfo + var paymentProcessor *LivePaymentProcessor + if priceInfo != nil { + paymentSender := livePaymentSender{} + sendPaymentFunc := func(inPixels int64) error { + return paymentSender.SendPayment(context.Background(), &SegmentInfoSender{ + sess: sess.BroadcastSession, + inPixels: inPixels, + priceInfo: priceInfo, + mid: extractMid(url.Path), + }) + } + paymentProcessor = NewLivePaymentProcessor(ctx, params.liveParams.paymentProcessInterval, sendPaymentFunc) + } else { + clog.Warningf(ctx, "No price info found from Orchestrator, Gateway will not send payments for the video processing") + } + params.liveParams.segmentReader.SwitchReader(func(reader io.Reader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { slog.Info("Error closing trickle publisher", "err", err) } + cancel() return } go func() { clog.V(8).Infof(context.Background(), "publishing trickle. url=%s", url.Redacted()) + + r := reader + if paymentProcessor != nil { + r = paymentProcessor.process(reader) + } + // TODO this blocks! very bad! - if err := publisher.Write(reader); err != nil { + if err := publisher.Write(r); err != nil { slog.Info("Error writing to trickle publisher", "err", err) } }() diff --git a/server/ai_process.go b/server/ai_process.go index 5f7c9ce2c9..bb9df55a69 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -93,6 +93,8 @@ type liveRequestParams struct { segmentReader *media.SwitchableSegmentReader outputRTMPURL string stream string + + paymentProcessInterval time.Duration } // CalculateTextToImageLatencyScore computes the time taken per pixel for an text-to-image request. @@ -1004,6 +1006,8 @@ func submitAudioToText(ctx context.Context, params aiRequestParams, sess *AISess return &res, nil } +const initPixelsToPay = 30 * 30 * 1280 * 720 // 30 seconds, 30fps, 720p + func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLiveVideoToVideoJSONRequestBody) (any, error) { client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient)) if err != nil { @@ -1012,9 +1016,11 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A } return nil, err } + setHeaders, balUpdate, err := prepareAIPayment(ctx, sess, initPixelsToPay) + defer completeBalanceUpdate(sess.BroadcastSession, balUpdate) // Send request to orchestrator - resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req) + resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req, setHeaders) if err != nil { return nil, err } @@ -1033,13 +1039,23 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } - startTricklePublish(pub, params) + clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s", pub, sub, control) + startTricklePublish(pub, params, sess) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) } return resp, nil } +// extractMid extracts the mid (manifest ID) from the publish URL +// e.g. public URL passed from orchestrator: /live/manifest/123456, then mid is 123456 +// we can consider improving it and passing mid directly in the JSON response from Orchestrator, +// but currently it would require changing the OpenAPI schema in livepeer/ai-worker repo +func extractMid(path string) string { + pubSplit := strings.Split(path, "/") + return pubSplit[len(pubSplit)-1] +} + func CalculateLLMLatencyScore(took time.Duration, tokensUsed int) float64 { if tokensUsed <= 0 { return 0 diff --git a/server/live_payment.go b/server/live_payment.go index 0e20f48216..d529ee2bc1 100644 --- a/server/live_payment.go +++ b/server/live_payment.go @@ -24,6 +24,7 @@ type SegmentInfoSender struct { sess *BroadcastSession inPixels int64 priceInfo *net.PriceInfo + mid string } type SegmentInfoReceiver struct { @@ -46,7 +47,6 @@ type LivePaymentReceiver interface { } type livePaymentSender struct { - segmentsToPayUpfront int64 } type livePaymentReceiver struct { @@ -54,17 +54,22 @@ type livePaymentReceiver struct { } func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *SegmentInfoSender) error { + if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { + clog.V(common.DEBUG).Infof(ctx, "Skipping sending payment, priceInfo not set for requestID=%s, ", segmentInfo.mid) + return nil + } sess := segmentInfo.sess if err := refreshSessionIfNeeded(ctx, sess); err != nil { return err } + sess.lock.Lock() + sess.Params.ManifestID = core.ManifestID(segmentInfo.mid) + sess.lock.Unlock() fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) - // We pay a few segments upfront to avoid race condition between payment and segment processing - minCredit := new(big.Rat).Mul(fee, new(big.Rat).SetInt64(r.segmentsToPayUpfront)) - balUpdate, err := newBalanceUpdate(sess, minCredit) + balUpdate, err := newBalanceUpdate(sess, fee) if err != nil { return err } @@ -135,6 +140,10 @@ func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *Segmen func (r *livePaymentReceiver) AccountPayment( ctx context.Context, segmentInfo *SegmentInfoReceiver) error { + if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { + clog.V(common.DEBUG).Infof(ctx, "Skipping accounting, priceInfo not set for sessionID=%s, ", segmentInfo.sessionID) + return nil + } fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) balance := r.orchestrator.Balance(segmentInfo.sender, core.ManifestID(segmentInfo.sessionID)) diff --git a/server/live_payment_processor.go b/server/live_payment_processor.go new file mode 100644 index 0000000000..dc902bf011 --- /dev/null +++ b/server/live_payment_processor.go @@ -0,0 +1,148 @@ +package server + +import ( + "bytes" + "context" + "fmt" + "github.com/livepeer/lpms/ffmpeg" + "io" + "log/slog" + "os" + "sync" + "time" +) + +type LivePaymentProcessor struct { + processInterval time.Duration + + lastProcessedAt time.Time + lastProcessedMu sync.RWMutex + segCh chan *segment + + processSegmentFunc func(inPixels int64) error +} + +type segment struct { + timestamp time.Time + segData []byte +} + +func NewLivePaymentProcessor(ctx context.Context, processInterval time.Duration, processSegmentFunc func(inPixels int64) error) *LivePaymentProcessor { + pp := &LivePaymentProcessor{ + processInterval: processInterval, + segCh: make(chan *segment, 1), + processSegmentFunc: processSegmentFunc, + lastProcessedAt: time.Now(), + } + pp.start(ctx) + return pp +} + +func (p *LivePaymentProcessor) start(ctx context.Context) { + go func() { + for { + select { + case seg := <-p.segCh: + p.processSegment(seg) + case <-ctx.Done(): + slog.Info("Done processing payments for session") + return + } + + } + }() +} + +func (p *LivePaymentProcessor) processSegment(seg *segment) { + if p.shouldSkip(seg.timestamp) { + return + } + + info, err := probeSegment(seg) + if err != nil { + slog.Error("Error probing segment", "err", err) + return + } + + pixelsPerSec := float64(info.Height) * float64(info.Width) * float64(info.FPS) + secSinceLastProcessed := seg.timestamp.Sub(p.lastProcessedAt).Seconds() + pixelsSinceLastProcessed := pixelsPerSec * secSinceLastProcessed + + err = p.processSegmentFunc(int64(pixelsSinceLastProcessed)) + if err != nil { + slog.Error("Error processing payment", "err", err) + return + } + + p.lastProcessedMu.Lock() + defer p.lastProcessedMu.Unlock() + p.lastProcessedAt = seg.timestamp +} + +func (p *LivePaymentProcessor) process(reader io.Reader) io.Reader { + timestamp := time.Now() + if p.shouldSkip(timestamp) { + // We don't process every segment, because it's too compute-expensive + return reader + } + + pipeReader, pipeWriter, err := os.Pipe() + if err != nil { + slog.Error("Error creating pipe", "err", err) + return reader + } + + resReader := io.TeeReader(reader, pipeWriter) + go func() { + // read the segment into the buffer, because the direct use of the reader causes Broken pipe + // it's probably related to different pace of reading by trickle and ffmpeg.GetCodecInfo() + defer pipeReader.Close() + segData, err := io.ReadAll(pipeReader) + if err != nil { + slog.Error("Error reading segment data", "err", err) + return + } + + select { + case p.segCh <- &segment{timestamp: timestamp, segData: segData}: + default: + // We process one segment at the time, no need to buffer them + } + }() + + return resReader +} + +func (p *LivePaymentProcessor) shouldSkip(timestamp time.Time) bool { + p.lastProcessedMu.RLock() + lastProcessedAt := p.lastProcessedAt + p.lastProcessedMu.RUnlock() + if lastProcessedAt.Add(p.processInterval).After(timestamp) { + // We don't process every segment, because it's too compute-expensive + return true + } + return false +} + +func probeSegment(seg *segment) (ffmpeg.MediaFormatInfo, error) { + pipeReader, pipeWriter, err := os.Pipe() + if err != nil { + return ffmpeg.MediaFormatInfo{}, err + } + + go func() { + defer pipeWriter.Close() + io.Copy(pipeWriter, bytes.NewReader(seg.segData)) + }() + + fname := fmt.Sprintf("pipe:%d", pipeReader.Fd()) + status, info, err := ffmpeg.GetCodecInfo(fname) + if err != nil { + return ffmpeg.MediaFormatInfo{}, err + } + if status != ffmpeg.CodecStatusOk { + slog.Error("Invalid CodecStatus while probing segment", "status", status) + return ffmpeg.MediaFormatInfo{}, fmt.Errorf("invalid CodecStatus while probing segment, status=%d", status) + } + return info, nil +} diff --git a/server/live_payment_test.go b/server/live_payment_test.go index 4d93f30ad4..bd4620a95c 100644 --- a/server/live_payment_test.go +++ b/server/live_payment_test.go @@ -46,9 +46,7 @@ func TestSendPayment(t *testing.T) { sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId), sess.Balances) // Create Payment sender and segment info - paymentSender := livePaymentSender{ - segmentsToPayUpfront: 10, - } + paymentSender := livePaymentSender{} segmentInfo := &SegmentInfoSender{ sess: sess, inPixels: 1000000, @@ -64,11 +62,10 @@ func TestSendPayment(t *testing.T) { // then require.Nil(err) // One segment costs 1000000 - // Paid upfront for 10 segments => 10000000 // Spent cost for 1 segment => 1000000 - // The balance should be 9000000 + // The balance should be 0 balance := sess.Balances.Balance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId)) - require.Equal(new(big.Rat).SetInt64(9000000), balance) + require.Equal(new(big.Rat).SetInt64(0), balance) } func mockSender() pm.Sender { diff --git a/server/mediaserver.go b/server/mediaserver.go index 1f31d7bf35..aaec9210f6 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -128,6 +128,7 @@ type LivepeerServer struct { mediaMTXApiPassword string liveAIAuthApiKey string + livePaymentInterval time.Duration } func (s *LivepeerServer) SetContextFromUnitTest(c context.Context) { @@ -195,6 +196,7 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), mediaMTXApiPassword: lpNode.MediaMTXApiPassword, liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, + livePaymentInterval: lpNode.LivePaymentInterval, } if lpNode.NodeType == core.BroadcasterNode && httpIngest { opts.HttpMux.HandleFunc("/live/", ls.HandlePush) diff --git a/server/segment_rpc.go b/server/segment_rpc.go index d416e0348c..0dd96adad1 100644 --- a/server/segment_rpc.go +++ b/server/segment_rpc.go @@ -73,6 +73,12 @@ func (h *lphttp) ServeSegment(w http.ResponseWriter, r *http.Request) { return } + if err := h.orchestrator.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { + clog.Errorf(ctx, "error processing payment: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + ctx = clog.AddSeqNo(ctx, uint64(segData.Seq)) clog.V(common.VERBOSE).Infof(ctx, "Received segment dur=%v", segData.Duration) if monitor.Enabled { @@ -226,6 +232,12 @@ func (h *lphttp) Payment(w http.ResponseWriter, r *http.Request) { return } + if err := h.orchestrator.ProcessPayment(ctx, payment, segData.ManifestID); err != nil { + clog.Errorf(ctx, "error processing payment: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + buf, err := proto.Marshal(&net.PaymentResult{Info: oInfo}) if err != nil { clog.Errorf(ctx, "Unable to marshal transcode result err=%q", err) @@ -240,7 +252,7 @@ func currentBalanceLog(h *lphttp, payment net.Payment, segData *core.SegTranscod if h == nil || h.node == nil || h.node.Balances == nil || segData == nil || segData.AuthToken == nil { return "invalid configuration" } - currentBalance := h.node.Balances.Balance(getPaymentSender(payment), core.ManifestID(segData.AuthToken.SessionId)) + currentBalance := h.node.Balances.Balance(getPaymentSender(payment), segData.ManifestID) if currentBalance == nil { return "no balance available" } @@ -273,12 +285,6 @@ func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http. return net.Payment{}, nil, nil, ctx, err } - if err := orch.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { - clog.Errorf(ctx, "error processing payment: %v", err) - http.Error(w, err.Error(), http.StatusBadRequest) - return net.Payment{}, nil, nil, ctx, err - } - oInfo, err := orchestratorInfo(orch, sender, orch.ServiceURI().String(), core.ManifestID(segData.AuthToken.SessionId)) if err != nil { clog.Errorf(ctx, "Error updating orchestrator info - err=%q", err) diff --git a/server/segment_rpc_test.go b/server/segment_rpc_test.go index a1f54ce822..73a266f48a 100644 --- a/server/segment_rpc_test.go +++ b/server/segment_rpc_test.go @@ -875,8 +875,13 @@ func TestServeSegment_ProcessPaymentError(t *testing.T) { require := require.New(t) assert := assert.New(t) + drivers.NodeStorage = drivers.NewMemoryDriver(nil) orch.On("VerifySig", mock.Anything, mock.Anything, mock.Anything).Return(true) orch.On("AuthToken", mock.Anything, mock.Anything).Return(stubAuthToken) + orch.On("ServiceURI").Return(url.Parse("http://someuri.com")) + orch.On("PriceInfo", mock.Anything).Return(&net.PriceInfo{}, nil) + orch.On("TicketParams", mock.Anything, mock.Anything).Return(&net.TicketParams{}, nil) + orch.On("Address").Return(ethcommon.Address{}) s := &BroadcastSession{ Broadcaster: stubBroadcaster2(), From ad019df831fbb83ba84d408c98839f54ec756564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Tue, 3 Dec 2024 15:02:10 +0100 Subject: [PATCH 15/56] Revert "Live Video Payments (#3261)" (#3292) This reverts commit 5f3d211095bff4a97b2b7c863b81af20757890e1. --- cmd/livepeer/livepeer.go | 1 - cmd/livepeer/starter/starter.go | 4 - core/livepeernode.go | 1 - server/ai_http.go | 64 +------------ server/ai_live_video.go | 31 +------ server/ai_process.go | 20 +---- server/live_payment.go | 17 +--- server/live_payment_processor.go | 148 ------------------------------- server/live_payment_test.go | 9 +- server/mediaserver.go | 2 - server/segment_rpc.go | 20 ++--- server/segment_rpc_test.go | 5 -- 12 files changed, 24 insertions(+), 298 deletions(-) delete mode 100644 server/live_payment_processor.go diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index 9881014192..d7866ba78e 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -167,7 +167,6 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.LiveAITrickleHostForRunner = flag.String("liveAITrickleHostForRunner", "", "Trickle Host used by AI Runner; It's used to overwrite the publicly available Trickle Host") cfg.LiveAIAuthApiKey = flag.String("liveAIAuthApiKey", "", "API key to use for Live AI authentication requests") cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") - cfg.LivePaymentInterval = flag.Duration("livePaymentInterval", *cfg.LivePaymentInterval, "Interval to pay process Gateway <> Orchestrator Payments for Live AI Video") // Onchain: cfg.EthAcctAddr = flag.String("ethAcctAddr", *cfg.EthAcctAddr, "Existing Eth account address. For use when multiple ETH accounts exist in the keystore directory") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index b7a11b70d6..645522403d 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -169,7 +169,6 @@ type LivepeerConfig struct { KafkaGatewayTopic *string MediaMTXApiPassword *string LiveAIAuthApiKey *string - LivePaymentInterval *time.Duration } // DefaultLivepeerConfig creates LivepeerConfig exactly the same as when no flags are passed to the livepeer process. @@ -214,7 +213,6 @@ func DefaultLivepeerConfig() LivepeerConfig { defaultAIModelsDir := "" defaultAIRunnerImage := "livepeer/ai-runner:latest" defaultLiveAIAuthWebhookURL := "" - defaultLivePaymentInterval := 5 * time.Second // Onchain: defaultEthAcctAddr := "" @@ -322,7 +320,6 @@ func DefaultLivepeerConfig() LivepeerConfig { AIModelsDir: &defaultAIModelsDir, AIRunnerImage: &defaultAIRunnerImage, LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL, - LivePaymentInterval: &defaultLivePaymentInterval, // Onchain: EthAcctAddr: &defaultEthAcctAddr, @@ -1567,7 +1564,6 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { if cfg.LiveAIAuthApiKey != nil { n.LiveAIAuthApiKey = *cfg.LiveAIAuthApiKey } - n.LivePaymentInterval = *cfg.LivePaymentInterval if cfg.LiveAITrickleHostForRunner != nil { n.LiveAITrickleHostForRunner = *cfg.LiveAITrickleHostForRunner } diff --git a/core/livepeernode.go b/core/livepeernode.go index 6bfddc71c9..06853f20e2 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -156,7 +156,6 @@ type LivepeerNode struct { MediaMTXApiPassword string LiveAITrickleHostForRunner string LiveAIAuthApiKey string - LivePaymentInterval time.Duration } type LivePipeline struct { diff --git a/server/ai_http.go b/server/ai_http.go index d694679987..9d0dcf29a3 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -132,28 +132,6 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlUrl = pubUrl + "-control" ) - // Handle initial payment, the rest of the payments are done separately from the stream processing - // Note that this payment is debit from the balance and acts as a buffer for the AI Realtime Video processing - payment, err := getPayment(r.Header.Get(paymentHeader)) - if err != nil { - respondWithError(w, err.Error(), http.StatusPaymentRequired) - return - } - sender := getPaymentSender(payment) - _, ctx, err = verifySegCreds(ctx, h.orchestrator, r.Header.Get(segmentHeader), sender) - if err != nil { - respondWithError(w, err.Error(), http.StatusForbidden) - return - } - if err := orch.ProcessPayment(ctx, payment, core.ManifestID(mid)); err != nil { - respondWithError(w, err.Error(), http.StatusBadRequest) - return - } - if payment.GetExpectedPrice().GetPricePerUnit() > 0 && !orch.SufficientBalance(sender, core.ManifestID(mid)) { - respondWithError(w, "Insufficient balance", http.StatusBadRequest) - return - } - //If successful, then create the trickle channels // Precreate the channels to avoid race conditions // TODO get the expected mime type from the request @@ -164,39 +142,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlPubCh := trickle.NewLocalPublisher(h.trickleSrv, mid+"-control", "application/json") controlPubCh.CreateChannel() - // Start payment receiver which accounts the payments and stops the stream if the payment is insufficient - priceInfo, err := h.orchestrator.PriceInfo(sender, core.ManifestID(mid)) - if err != nil { - respondWithError(w, err.Error(), http.StatusInternalServerError) - return - - } - var paymentProcessor *LivePaymentProcessor - ctx, cancel := context.WithCancel(context.Background()) - if priceInfo != nil { - paymentReceiver := livePaymentReceiver{orchestrator: h.orchestrator} - accountPaymentFunc := func(inPixels int64) error { - err := paymentReceiver.AccountPayment(context.Background(), &SegmentInfoReceiver{ - sender: sender, - inPixels: inPixels, - priceInfo: priceInfo, - sessionID: mid, - }) - if err != nil { - slog.Warn("Error accounting payment, stopping stream processing", "err", err) - pubCh.Close() - subCh.Close() - controlPubCh.Close() - cancel() - } - return err - } - paymentProcessor = NewLivePaymentProcessor(ctx, h.node.LivePaymentInterval, accountPaymentFunc) - } else { - clog.Warningf(ctx, "No price info found for model %v, Orchestrator will not charge for video processing", modelID) - } - - // Subscribe to the publishUrl for payments monitoring and payment processing + // Subscribe to the publishUrl for payments monitoring go func() { sub := trickle.NewLocalSubscriber(h.trickleSrv, mid) for { @@ -205,11 +151,8 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { clog.Infof(ctx, "Error getting local trickle segment err=%v", err) return } - reader := segment.Reader - if paymentProcessor != nil { - reader = paymentProcessor.process(segment.Reader) - } - io.Copy(io.Discard, reader) + // We can do something with the segment data here + io.Copy(io.Discard, segment.Reader) } }() @@ -233,7 +176,6 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { pubCh.Close() subCh.Close() controlPubCh.Close() - cancel() respondWithError(w, err.Error(), http.StatusInternalServerError) return } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index c2da1af8f8..ca75bb51d7 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -18,50 +18,23 @@ import ( "github.com/livepeer/go-livepeer/trickle" ) -func startTricklePublish(url *url.URL, params aiRequestParams, sess *AISession) { +func startTricklePublish(url *url.URL, params aiRequestParams) { publisher, err := trickle.NewTricklePublisher(url.String()) if err != nil { slog.Info("error publishing trickle", "err", err) } - - // Start payments which probes a segment every "paymentProcessInterval" and sends a payment - ctx, cancel := context.WithCancel(context.Background()) - priceInfo := sess.OrchestratorInfo.PriceInfo - var paymentProcessor *LivePaymentProcessor - if priceInfo != nil { - paymentSender := livePaymentSender{} - sendPaymentFunc := func(inPixels int64) error { - return paymentSender.SendPayment(context.Background(), &SegmentInfoSender{ - sess: sess.BroadcastSession, - inPixels: inPixels, - priceInfo: priceInfo, - mid: extractMid(url.Path), - }) - } - paymentProcessor = NewLivePaymentProcessor(ctx, params.liveParams.paymentProcessInterval, sendPaymentFunc) - } else { - clog.Warningf(ctx, "No price info found from Orchestrator, Gateway will not send payments for the video processing") - } - params.liveParams.segmentReader.SwitchReader(func(reader io.Reader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { slog.Info("Error closing trickle publisher", "err", err) } - cancel() return } go func() { clog.V(8).Infof(context.Background(), "publishing trickle. url=%s", url.Redacted()) - - r := reader - if paymentProcessor != nil { - r = paymentProcessor.process(reader) - } - // TODO this blocks! very bad! - if err := publisher.Write(r); err != nil { + if err := publisher.Write(reader); err != nil { slog.Info("Error writing to trickle publisher", "err", err) } }() diff --git a/server/ai_process.go b/server/ai_process.go index bb9df55a69..5f7c9ce2c9 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -93,8 +93,6 @@ type liveRequestParams struct { segmentReader *media.SwitchableSegmentReader outputRTMPURL string stream string - - paymentProcessInterval time.Duration } // CalculateTextToImageLatencyScore computes the time taken per pixel for an text-to-image request. @@ -1006,8 +1004,6 @@ func submitAudioToText(ctx context.Context, params aiRequestParams, sess *AISess return &res, nil } -const initPixelsToPay = 30 * 30 * 1280 * 720 // 30 seconds, 30fps, 720p - func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLiveVideoToVideoJSONRequestBody) (any, error) { client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient)) if err != nil { @@ -1016,11 +1012,9 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A } return nil, err } - setHeaders, balUpdate, err := prepareAIPayment(ctx, sess, initPixelsToPay) - defer completeBalanceUpdate(sess.BroadcastSession, balUpdate) // Send request to orchestrator - resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req, setHeaders) + resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req) if err != nil { return nil, err } @@ -1039,23 +1033,13 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } - clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s", pub, sub, control) - startTricklePublish(pub, params, sess) + startTricklePublish(pub, params) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) } return resp, nil } -// extractMid extracts the mid (manifest ID) from the publish URL -// e.g. public URL passed from orchestrator: /live/manifest/123456, then mid is 123456 -// we can consider improving it and passing mid directly in the JSON response from Orchestrator, -// but currently it would require changing the OpenAPI schema in livepeer/ai-worker repo -func extractMid(path string) string { - pubSplit := strings.Split(path, "/") - return pubSplit[len(pubSplit)-1] -} - func CalculateLLMLatencyScore(took time.Duration, tokensUsed int) float64 { if tokensUsed <= 0 { return 0 diff --git a/server/live_payment.go b/server/live_payment.go index d529ee2bc1..0e20f48216 100644 --- a/server/live_payment.go +++ b/server/live_payment.go @@ -24,7 +24,6 @@ type SegmentInfoSender struct { sess *BroadcastSession inPixels int64 priceInfo *net.PriceInfo - mid string } type SegmentInfoReceiver struct { @@ -47,6 +46,7 @@ type LivePaymentReceiver interface { } type livePaymentSender struct { + segmentsToPayUpfront int64 } type livePaymentReceiver struct { @@ -54,22 +54,17 @@ type livePaymentReceiver struct { } func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *SegmentInfoSender) error { - if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { - clog.V(common.DEBUG).Infof(ctx, "Skipping sending payment, priceInfo not set for requestID=%s, ", segmentInfo.mid) - return nil - } sess := segmentInfo.sess if err := refreshSessionIfNeeded(ctx, sess); err != nil { return err } - sess.lock.Lock() - sess.Params.ManifestID = core.ManifestID(segmentInfo.mid) - sess.lock.Unlock() fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) - balUpdate, err := newBalanceUpdate(sess, fee) + // We pay a few segments upfront to avoid race condition between payment and segment processing + minCredit := new(big.Rat).Mul(fee, new(big.Rat).SetInt64(r.segmentsToPayUpfront)) + balUpdate, err := newBalanceUpdate(sess, minCredit) if err != nil { return err } @@ -140,10 +135,6 @@ func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *Segmen func (r *livePaymentReceiver) AccountPayment( ctx context.Context, segmentInfo *SegmentInfoReceiver) error { - if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { - clog.V(common.DEBUG).Infof(ctx, "Skipping accounting, priceInfo not set for sessionID=%s, ", segmentInfo.sessionID) - return nil - } fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) balance := r.orchestrator.Balance(segmentInfo.sender, core.ManifestID(segmentInfo.sessionID)) diff --git a/server/live_payment_processor.go b/server/live_payment_processor.go deleted file mode 100644 index dc902bf011..0000000000 --- a/server/live_payment_processor.go +++ /dev/null @@ -1,148 +0,0 @@ -package server - -import ( - "bytes" - "context" - "fmt" - "github.com/livepeer/lpms/ffmpeg" - "io" - "log/slog" - "os" - "sync" - "time" -) - -type LivePaymentProcessor struct { - processInterval time.Duration - - lastProcessedAt time.Time - lastProcessedMu sync.RWMutex - segCh chan *segment - - processSegmentFunc func(inPixels int64) error -} - -type segment struct { - timestamp time.Time - segData []byte -} - -func NewLivePaymentProcessor(ctx context.Context, processInterval time.Duration, processSegmentFunc func(inPixels int64) error) *LivePaymentProcessor { - pp := &LivePaymentProcessor{ - processInterval: processInterval, - segCh: make(chan *segment, 1), - processSegmentFunc: processSegmentFunc, - lastProcessedAt: time.Now(), - } - pp.start(ctx) - return pp -} - -func (p *LivePaymentProcessor) start(ctx context.Context) { - go func() { - for { - select { - case seg := <-p.segCh: - p.processSegment(seg) - case <-ctx.Done(): - slog.Info("Done processing payments for session") - return - } - - } - }() -} - -func (p *LivePaymentProcessor) processSegment(seg *segment) { - if p.shouldSkip(seg.timestamp) { - return - } - - info, err := probeSegment(seg) - if err != nil { - slog.Error("Error probing segment", "err", err) - return - } - - pixelsPerSec := float64(info.Height) * float64(info.Width) * float64(info.FPS) - secSinceLastProcessed := seg.timestamp.Sub(p.lastProcessedAt).Seconds() - pixelsSinceLastProcessed := pixelsPerSec * secSinceLastProcessed - - err = p.processSegmentFunc(int64(pixelsSinceLastProcessed)) - if err != nil { - slog.Error("Error processing payment", "err", err) - return - } - - p.lastProcessedMu.Lock() - defer p.lastProcessedMu.Unlock() - p.lastProcessedAt = seg.timestamp -} - -func (p *LivePaymentProcessor) process(reader io.Reader) io.Reader { - timestamp := time.Now() - if p.shouldSkip(timestamp) { - // We don't process every segment, because it's too compute-expensive - return reader - } - - pipeReader, pipeWriter, err := os.Pipe() - if err != nil { - slog.Error("Error creating pipe", "err", err) - return reader - } - - resReader := io.TeeReader(reader, pipeWriter) - go func() { - // read the segment into the buffer, because the direct use of the reader causes Broken pipe - // it's probably related to different pace of reading by trickle and ffmpeg.GetCodecInfo() - defer pipeReader.Close() - segData, err := io.ReadAll(pipeReader) - if err != nil { - slog.Error("Error reading segment data", "err", err) - return - } - - select { - case p.segCh <- &segment{timestamp: timestamp, segData: segData}: - default: - // We process one segment at the time, no need to buffer them - } - }() - - return resReader -} - -func (p *LivePaymentProcessor) shouldSkip(timestamp time.Time) bool { - p.lastProcessedMu.RLock() - lastProcessedAt := p.lastProcessedAt - p.lastProcessedMu.RUnlock() - if lastProcessedAt.Add(p.processInterval).After(timestamp) { - // We don't process every segment, because it's too compute-expensive - return true - } - return false -} - -func probeSegment(seg *segment) (ffmpeg.MediaFormatInfo, error) { - pipeReader, pipeWriter, err := os.Pipe() - if err != nil { - return ffmpeg.MediaFormatInfo{}, err - } - - go func() { - defer pipeWriter.Close() - io.Copy(pipeWriter, bytes.NewReader(seg.segData)) - }() - - fname := fmt.Sprintf("pipe:%d", pipeReader.Fd()) - status, info, err := ffmpeg.GetCodecInfo(fname) - if err != nil { - return ffmpeg.MediaFormatInfo{}, err - } - if status != ffmpeg.CodecStatusOk { - slog.Error("Invalid CodecStatus while probing segment", "status", status) - return ffmpeg.MediaFormatInfo{}, fmt.Errorf("invalid CodecStatus while probing segment, status=%d", status) - } - return info, nil -} diff --git a/server/live_payment_test.go b/server/live_payment_test.go index bd4620a95c..4d93f30ad4 100644 --- a/server/live_payment_test.go +++ b/server/live_payment_test.go @@ -46,7 +46,9 @@ func TestSendPayment(t *testing.T) { sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId), sess.Balances) // Create Payment sender and segment info - paymentSender := livePaymentSender{} + paymentSender := livePaymentSender{ + segmentsToPayUpfront: 10, + } segmentInfo := &SegmentInfoSender{ sess: sess, inPixels: 1000000, @@ -62,10 +64,11 @@ func TestSendPayment(t *testing.T) { // then require.Nil(err) // One segment costs 1000000 + // Paid upfront for 10 segments => 10000000 // Spent cost for 1 segment => 1000000 - // The balance should be 0 + // The balance should be 9000000 balance := sess.Balances.Balance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId)) - require.Equal(new(big.Rat).SetInt64(0), balance) + require.Equal(new(big.Rat).SetInt64(9000000), balance) } func mockSender() pm.Sender { diff --git a/server/mediaserver.go b/server/mediaserver.go index aaec9210f6..1f31d7bf35 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -128,7 +128,6 @@ type LivepeerServer struct { mediaMTXApiPassword string liveAIAuthApiKey string - livePaymentInterval time.Duration } func (s *LivepeerServer) SetContextFromUnitTest(c context.Context) { @@ -196,7 +195,6 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), mediaMTXApiPassword: lpNode.MediaMTXApiPassword, liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, - livePaymentInterval: lpNode.LivePaymentInterval, } if lpNode.NodeType == core.BroadcasterNode && httpIngest { opts.HttpMux.HandleFunc("/live/", ls.HandlePush) diff --git a/server/segment_rpc.go b/server/segment_rpc.go index 0dd96adad1..d416e0348c 100644 --- a/server/segment_rpc.go +++ b/server/segment_rpc.go @@ -73,12 +73,6 @@ func (h *lphttp) ServeSegment(w http.ResponseWriter, r *http.Request) { return } - if err := h.orchestrator.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { - clog.Errorf(ctx, "error processing payment: %v", err) - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - ctx = clog.AddSeqNo(ctx, uint64(segData.Seq)) clog.V(common.VERBOSE).Infof(ctx, "Received segment dur=%v", segData.Duration) if monitor.Enabled { @@ -232,12 +226,6 @@ func (h *lphttp) Payment(w http.ResponseWriter, r *http.Request) { return } - if err := h.orchestrator.ProcessPayment(ctx, payment, segData.ManifestID); err != nil { - clog.Errorf(ctx, "error processing payment: %v", err) - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - buf, err := proto.Marshal(&net.PaymentResult{Info: oInfo}) if err != nil { clog.Errorf(ctx, "Unable to marshal transcode result err=%q", err) @@ -252,7 +240,7 @@ func currentBalanceLog(h *lphttp, payment net.Payment, segData *core.SegTranscod if h == nil || h.node == nil || h.node.Balances == nil || segData == nil || segData.AuthToken == nil { return "invalid configuration" } - currentBalance := h.node.Balances.Balance(getPaymentSender(payment), segData.ManifestID) + currentBalance := h.node.Balances.Balance(getPaymentSender(payment), core.ManifestID(segData.AuthToken.SessionId)) if currentBalance == nil { return "no balance available" } @@ -285,6 +273,12 @@ func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http. return net.Payment{}, nil, nil, ctx, err } + if err := orch.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { + clog.Errorf(ctx, "error processing payment: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return net.Payment{}, nil, nil, ctx, err + } + oInfo, err := orchestratorInfo(orch, sender, orch.ServiceURI().String(), core.ManifestID(segData.AuthToken.SessionId)) if err != nil { clog.Errorf(ctx, "Error updating orchestrator info - err=%q", err) diff --git a/server/segment_rpc_test.go b/server/segment_rpc_test.go index 73a266f48a..a1f54ce822 100644 --- a/server/segment_rpc_test.go +++ b/server/segment_rpc_test.go @@ -875,13 +875,8 @@ func TestServeSegment_ProcessPaymentError(t *testing.T) { require := require.New(t) assert := assert.New(t) - drivers.NodeStorage = drivers.NewMemoryDriver(nil) orch.On("VerifySig", mock.Anything, mock.Anything, mock.Anything).Return(true) orch.On("AuthToken", mock.Anything, mock.Anything).Return(stubAuthToken) - orch.On("ServiceURI").Return(url.Parse("http://someuri.com")) - orch.On("PriceInfo", mock.Anything).Return(&net.PriceInfo{}, nil) - orch.On("TicketParams", mock.Anything, mock.Anything).Return(&net.TicketParams{}, nil) - orch.On("Address").Return(ethcommon.Address{}) s := &BroadcastSession{ Broadcaster: stubBroadcaster2(), From 302a4e238e643b50a1bd2c6e8ba73c68cbb7631b Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Tue, 3 Dec 2024 09:53:58 -0800 Subject: [PATCH 16/56] live-ai: Update to use FLV-enabled LPMS (#3288) Updates LPMS to use the FLV muxer introduced in livepeer/lpms#428 which should resolve issues with getting RTMP output. --- docker/Dockerfile | 2 -- go.mod | 2 +- go.sum | 4 ++-- install_ffmpeg.sh | 2 +- server/ai_live_video.go | 27 +++++++++++++-------------- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d9a2635917..120dc74761 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -68,6 +68,4 @@ COPY --from=build /usr/bin/grpc_health_probe /usr/local/bin/grpc_health_probe COPY --from=build /src/tasmodel.pb /tasmodel.pb COPY --from=build /usr/share/misc/pci.ids /usr/share/misc/pci.ids -RUN apt update && apt install -yqq ffmpeg - ENTRYPOINT ["/usr/local/bin/livepeer"] diff --git a/go.mod b/go.mod index f92d9a716f..54317943fc 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/livepeer/ai-worker v0.12.6 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 - github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 + github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393 github.com/livepeer/m3u8 v0.11.1 github.com/mattn/go-sqlite3 v1.14.18 github.com/oapi-codegen/nethttp-middleware v1.0.1 diff --git a/go.sum b/go.sum index d8412d273b..6aaffa9b62 100644 --- a/go.sum +++ b/go.sum @@ -613,8 +613,8 @@ github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cO github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded/go.mod h1:xkDdm+akniYxVT9KW1Y2Y7Hso6aW+rZObz3nrA9yTHw= github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 h1:4oH3NqV0NvcdS44Ld3zK2tO8IUiNozIggm74yobQeZg= github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18/go.mod h1:Jpf4jHK+fbWioBHRDRM1WadNT1qmY27g2YicTdO0Rtc= -github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204 h1:YalnQu8BB9vRh+7gcEjfzfHNl9NEwagBTHQqnlUYDrA= -github.com/livepeer/lpms v0.0.0-20241122145837-7b07ba3a2204/go.mod h1:z5ROP1l5OzAKSoqVRLc34MjUdueil6wHSecQYV7llIw= +github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393 h1:aoDFI66Kj1pQueka93PLY59WlnI7jy4cJUfPxteIgCE= +github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393/go.mod h1:z5ROP1l5OzAKSoqVRLc34MjUdueil6wHSecQYV7llIw= github.com/livepeer/m3u8 v0.11.1 h1:VkUJzfNTyjy9mqsgp5JPvouwna8wGZMvd/gAfT5FinU= github.com/livepeer/m3u8 v0.11.1/go.mod h1:IUqAtwWPAG2CblfQa4SVzTQoDcEMPyfNOaBSxqHMS04= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= diff --git a/install_ffmpeg.sh b/install_ffmpeg.sh index 6cb8815582..a170cc7e1e 100755 --- a/install_ffmpeg.sh +++ b/install_ffmpeg.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash echo 'WARNING: downloading and executing lpms/install_ffmpeg.sh, use it directly in case of issues' -curl https://raw.githubusercontent.com/livepeer/lpms/ffde2327537517b3345162e9544704571bc58a34/install_ffmpeg.sh | bash -s $1 +curl https://raw.githubusercontent.com/livepeer/lpms/b33cac634b43d2ecd160224417daf8e920b0f500/install_ffmpeg.sh | bash -s $1 diff --git a/server/ai_live_video.go b/server/ai_live_video.go index ca75bb51d7..71cf169b88 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -9,13 +9,13 @@ import ( "net/http" "net/url" "os" - "os/exec" "time" "github.com/livepeer/go-livepeer/clog" "github.com/livepeer/go-livepeer/core" "github.com/livepeer/go-livepeer/media" "github.com/livepeer/go-livepeer/trickle" + "github.com/livepeer/lpms/ffmpeg" ) func startTricklePublish(url *url.URL, params aiRequestParams) { @@ -69,23 +69,22 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa } }() - // TODO: Change this to LPMS go func() { defer r.Close() retryCount := 0 + // TODO check whether stream is actually terminated + // so we aren't just looping unnecessarily for retryCount < 10 { - cmd := exec.Command("ffmpeg", - "-i", "pipe:0", - "-c:a", "copy", - "-c:v", "copy", - "-f", "flv", - params.liveParams.outputRTMPURL, - ) - cmd.Stdin = r - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - clog.Infof(ctx, "Error running trickle subscribe ffmpeg: %s", err) + _, err := ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ + Fname: fmt.Sprintf("pipe:%d", r.Fd()), + }, []ffmpeg.TranscodeOptions{{ + Oname: params.liveParams.outputRTMPURL, + AudioEncoder: ffmpeg.ComponentOptions{Name: "copy"}, + VideoEncoder: ffmpeg.ComponentOptions{Name: "copy"}, + Muxer: ffmpeg.ComponentOptions{Name: "flv"}, + }}) + if err != nil { + clog.Infof(ctx, "Error sending RTMP out: %s", err) } retryCount++ time.Sleep(5 * time.Second) From b97f6cf3f169cbc5727faa5393f7b9dd16afada9 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Wed, 4 Dec 2024 12:09:48 +0000 Subject: [PATCH 17/56] Better retry logic for mediaserver pull and small refactor (#3290) * Refactor mediamtx api calls into separate package * Contextual logging * Stop retrying if stream does not exist * fix build * Move to media package --- media/mediamtx.go | 78 +++++++++++++++++++++++++++++++++++ media/rtmp2segment.go | 42 ++++++++++++------- media/rtmp2segment_windows.go | 8 +++- server/ai_live_video.go | 52 ++++------------------- server/ai_mediaserver.go | 6 +-- server/ai_process.go | 2 +- server/mediaserver.go | 7 ++-- 7 files changed, 127 insertions(+), 68 deletions(-) create mode 100644 media/mediamtx.go diff --git a/media/mediamtx.go b/media/mediamtx.go new file mode 100644 index 0000000000..e51eca1318 --- /dev/null +++ b/media/mediamtx.go @@ -0,0 +1,78 @@ +package media + +import ( + "fmt" + "io" + "net/http" +) + +type MediaMTXClient struct { + apiPassword string +} + +func NewMediaMTXClient(apiPassword string) *MediaMTXClient { + return &MediaMTXClient{apiPassword: apiPassword} +} + +const ( + mediaMTXControlPort = "9997" + mediaMTXControlUser = "admin" + MediaMTXWebrtcSession = "webrtcSession" + MediaMTXRtmpConn = "rtmpConn" +) + +func getApiPath(sourceType string) (string, error) { + var apiPath string + switch sourceType { + case MediaMTXWebrtcSession: + apiPath = "webrtcsessions" + case MediaMTXRtmpConn: + apiPath = "rtmpconns" + default: + return "", fmt.Errorf("invalid sourceType: %s", sourceType) + } + return apiPath, nil +} + +func (mc *MediaMTXClient) KickInputConnection(mediaMTXHost, sourceID, sourceType string) error { + apiPath, err := getApiPath(sourceType) + if err != nil { + return err + } + + req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://%s:%s/v3/%s/kick/%s", mediaMTXHost, mediaMTXControlPort, apiPath, sourceID), nil) + if err != nil { + return fmt.Errorf("failed to create kick request: %w", err) + } + req.SetBasicAuth(mediaMTXControlUser, mc.apiPassword) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("failed to kick connection: %w", err) + } + if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("kick connection failed with status code: %d body: %s", resp.StatusCode, body) + } + return nil +} + +func (mc *MediaMTXClient) StreamExists(mediaMTXHost, sourceID, sourceType string) (bool, error) { + apiPath, err := getApiPath(sourceType) + if err != nil { + return false, err + } + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s:%s/v3/%s/get/%s", mediaMTXHost, mediaMTXControlPort, apiPath, sourceID), nil) + if err != nil { + return false, fmt.Errorf("failed to create get stream request: %w", err) + } + req.SetBasicAuth(mediaMTXControlUser, mc.apiPassword) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return false, fmt.Errorf("failed to get stream: %w", err) + } + if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest { + body, _ := io.ReadAll(resp.Body) + return false, fmt.Errorf("get stream failed with status code: %d body: %s", resp.StatusCode, body) + } + return true, nil +} diff --git a/media/rtmp2segment.go b/media/rtmp2segment.go index ef9b67b48b..9f9e3c5719 100644 --- a/media/rtmp2segment.go +++ b/media/rtmp2segment.go @@ -25,23 +25,33 @@ import ( var waitTimeout = 20 * time.Second type MediaSegmenter struct { - Workdir string + Workdir string + MediaMTXClient *MediaMTXClient + MediaMTXHost string } -func (ms *MediaSegmenter) RunSegmentation(in string, segmentHandler SegmentHandler) { +func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler, id, sourceType string) { outFilePattern := filepath.Join(ms.Workdir, randomString()+"-%d.ts") completionSignal := make(chan bool, 1) wg := &sync.WaitGroup{} wg.Add(1) go func() { defer wg.Done() - processSegments(segmentHandler, outFilePattern, completionSignal) + processSegments(ctx, segmentHandler, outFilePattern, completionSignal) }() retryCount := 0 - for retryCount < 5 { + for { + streamExists, err := ms.MediaMTXClient.StreamExists(ms.MediaMTXHost, id, sourceType) + if err != nil { + clog.Errorf(ctx, "StreamExists check failed. err=%s", err) + } + if retryCount > 20 && !streamExists { + clog.Errorf(ctx, "Stopping segmentation, input stream does not exist. in=%s err=%s", in, err) + break + } ffmpeg.FfmpegSetLogLevel(ffmpeg.FFLogWarning) - _, err := ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ + _, err = ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ Fname: in, }, []ffmpeg.TranscodeOptions{{ Oname: outFilePattern, @@ -50,13 +60,13 @@ func (ms *MediaSegmenter) RunSegmentation(in string, segmentHandler SegmentHandl Muxer: ffmpeg.ComponentOptions{Name: "segment"}, }}) if err != nil { - slog.Error("Failed to run segmentation", "in", in, "err", err) + clog.Errorf(ctx, "Failed to run segmentation. in=%s err=%s", in, err) } retryCount++ time.Sleep(5 * time.Second) } completionSignal <- true - slog.Info("sent completion signal, now waiting") + clog.Infof(ctx, "sent completion signal, now waiting") wg.Wait() } @@ -160,7 +170,7 @@ func openNonBlockingWithRetry(name string, timeout time.Duration, completed <-ch } } -func processSegments(segmentHandler SegmentHandler, outFilePattern string, completionSignal <-chan bool) { +func processSegments(ctx context.Context, segmentHandler SegmentHandler, outFilePattern string, completionSignal <-chan bool) { // things protected by the mutex mu mu := &sync.Mutex{} @@ -208,7 +218,7 @@ func processSegments(segmentHandler SegmentHandler, outFilePattern string, compl mu.Unlock() // Handle the reading process - readSegment(segmentHandler, file, pipeName) + readSegment(ctx, segmentHandler, file, pipeName) // Increment to the next pipe pipeNum++ @@ -228,7 +238,7 @@ func processSegments(segmentHandler SegmentHandler, outFilePattern string, compl } } -func readSegment(segmentHandler SegmentHandler, file *os.File, pipeName string) { +func readSegment(ctx context.Context, segmentHandler SegmentHandler, file *os.File, pipeName string) { defer file.Close() reader := bufio.NewReader(file) @@ -246,33 +256,33 @@ func readSegment(segmentHandler SegmentHandler, file *os.File, pipeName string) n, err := reader.Read(buf) if n > 0 { if !firstByteRead { - slog.Debug("First byte read", "pipeName", pipeName) + clog.V(7).Infof(ctx, "First byte read. pipeName=%s", pipeName) firstByteRead = true } totalBytesRead += int64(n) if _, err := interfaceWriter.Write(buf[:n]); err != nil { if err != io.EOF { - slog.Error("Error writing", "pipeName", pipeName, "err", err) + clog.Errorf(ctx, "Error writing. pipeName=%s err=%s", pipeName, err) } } } if n == len(buf) && n < 1024*1024 { newLen := int(float64(len(buf)) * 1.5) - slog.Debug("Max buf hit, increasing", "oldSize", humanBytes(int64(len(buf))), "newSize", humanBytes(int64(newLen))) + clog.V(7).Infof(ctx, "Max buf hit, increasing. oldSize=%s newSize=%s", humanBytes(int64(len(buf))), humanBytes(int64(newLen))) buf = make([]byte, newLen) } if err != nil { if err.Error() == "EOF" { - slog.Debug("Last byte read", "pipeName", pipeName, "totalRead", humanBytes(totalBytesRead)) + clog.V(7).Infof(ctx, "Last byte read. pipeName=%s totalRead=%s", pipeName, humanBytes(totalBytesRead)) } else { - slog.Error("Error reading", "pipeName", pipeName, "err", err) + clog.Errorf(ctx, "Error reading. pipeName=%s err=%s", pipeName, err) } break } } - clog.V(8).Infof(context.Background(), "read segment. totalRead=%s", humanBytes(totalBytesRead)) + clog.V(8).Infof(ctx, "read segment. totalRead=%s", humanBytes(totalBytesRead)) } diff --git a/media/rtmp2segment_windows.go b/media/rtmp2segment_windows.go index 926fac8d46..321ab9257f 100644 --- a/media/rtmp2segment_windows.go +++ b/media/rtmp2segment_windows.go @@ -2,10 +2,14 @@ package media +import "context" + type MediaSegmenter struct { - Workdir string + Workdir string + MediaMTXClient *MediaMTXClient + MediaMTXHost string } -func (ms *MediaSegmenter) RunSegmentation(in string, segmentHandler SegmentHandler) { +func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler, id, sourceType string) { // Not supported for Windows } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 71cf169b88..c53883f71f 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "log/slog" - "net/http" "net/url" "os" "time" @@ -18,28 +17,29 @@ import ( "github.com/livepeer/lpms/ffmpeg" ) -func startTricklePublish(url *url.URL, params aiRequestParams) { +func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestParams) { + ctx = clog.AddVal(ctx, "url", url.Redacted()) publisher, err := trickle.NewTricklePublisher(url.String()) if err != nil { - slog.Info("error publishing trickle", "err", err) + clog.Infof(ctx, "error publishing trickle. err=%s", err) } params.liveParams.segmentReader.SwitchReader(func(reader io.Reader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { - slog.Info("Error closing trickle publisher", "err", err) + clog.Infof(ctx, "Error closing trickle publisher. err=%s", err) } return } go func() { - clog.V(8).Infof(context.Background(), "publishing trickle. url=%s", url.Redacted()) + clog.V(8).Infof(ctx, "trickle publish writing data") // TODO this blocks! very bad! if err := publisher.Write(reader); err != nil { - slog.Info("Error writing to trickle publisher", "err", err) + clog.Infof(ctx, "Error writing to trickle publisher. err=%s", err) } }() }) - slog.Info("trickle pub", "url", url) + clog.Infof(ctx, "trickle pub") } func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestParams) { @@ -94,9 +94,9 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa func mediamtxSourceTypeToString(s string) (string, error) { switch s { - case mediaMTXWebrtcSession: + case media.MediaMTXWebrtcSession: return "whip", nil - case mediaMTXRtmpConn: + case media.MediaMTXRtmpConn: return "rtmp", nil default: return "", errors.New("unknown media source") @@ -114,37 +114,3 @@ func startControlPublish(control *url.URL, params aiRequestParams) { defer params.node.LiveMu.Unlock() params.node.LivePipelines[stream] = &core.LivePipeline{ControlPub: controlPub} } - -const ( - mediaMTXControlPort = "9997" - mediaMTXControlUser = "admin" - mediaMTXWebrtcSession = "webrtcSession" - mediaMTXRtmpConn = "rtmpConn" -) - -func (ls *LivepeerServer) kickInputConnection(mediaMTXHost, sourceID, sourceType string) error { - var apiPath string - switch sourceType { - case mediaMTXWebrtcSession: - apiPath = "webrtcsessions" - case mediaMTXRtmpConn: - apiPath = "rtmpconns" - default: - return fmt.Errorf("invalid sourceType: %s", sourceType) - } - - req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://%s:%s/v3/%s/kick/%s", mediaMTXHost, mediaMTXControlPort, apiPath, sourceID), nil) - if err != nil { - return fmt.Errorf("failed to create kick request: %w", err) - } - req.SetBasicAuth(mediaMTXControlUser, ls.mediaMTXApiPassword) - resp, err := http.DefaultClient.Do(req) - if err != nil { - return fmt.Errorf("failed to kick connection: %w", err) - } - if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest { - body, _ := io.ReadAll(resp.Body) - return fmt.Errorf("kick connection failed with status code: %d body: %s", resp.StatusCode, body) - } - return nil -} diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 8c7024feba..c077cbd1aa 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -441,7 +441,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { QueryParams: queryParams, }) if err != nil { - kickErr := ls.kickInputConnection(remoteHost, sourceID, sourceType) + kickErr := ls.mediaMTXClient.KickInputConnection(remoteHost, sourceID, sourceType) if kickErr != nil { clog.Errorf(ctx, "failed to kick input connection: %s", kickErr.Error()) } @@ -475,8 +475,8 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { if mediaMTXStreamPrefix != "" { mediaMTXStreamPrefix = mediaMTXStreamPrefix + "/" } - ms := media.MediaSegmenter{Workdir: ls.LivepeerNode.WorkDir} - ms.RunSegmentation(fmt.Sprintf("rtmp://%s/%s%s", remoteHost, mediaMTXStreamPrefix, streamName), ssr.Read) + ms := media.MediaSegmenter{Workdir: ls.LivepeerNode.WorkDir, MediaMTXClient: ls.mediaMTXClient, MediaMTXHost: remoteHost} + ms.RunSegmentation(ctx, fmt.Sprintf("rtmp://%s/%s%s", remoteHost, mediaMTXStreamPrefix, streamName), ssr.Read, sourceID, sourceType) ssr.Close() ls.cleanupLive(streamName) }() diff --git a/server/ai_process.go b/server/ai_process.go index 5f7c9ce2c9..209be2ed44 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1033,7 +1033,7 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } - startTricklePublish(pub, params) + startTricklePublish(ctx, pub, params) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) } diff --git a/server/mediaserver.go b/server/mediaserver.go index 1f31d7bf35..0e82cdf5b6 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -28,6 +28,7 @@ import ( "time" "github.com/livepeer/go-livepeer/clog" + "github.com/livepeer/go-livepeer/media" "github.com/livepeer/go-livepeer/monitor" "github.com/livepeer/go-livepeer/pm" "github.com/livepeer/go-tools/drivers" @@ -126,8 +127,8 @@ type LivepeerServer struct { connectionLock *sync.RWMutex serverLock *sync.RWMutex - mediaMTXApiPassword string - liveAIAuthApiKey string + mediaMTXClient *media.MediaMTXClient + liveAIAuthApiKey string } func (s *LivepeerServer) SetContextFromUnitTest(c context.Context) { @@ -193,7 +194,7 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo internalManifests: make(map[core.ManifestID]core.ManifestID), recordingsAuthResponses: cache.New(time.Hour, 2*time.Hour), AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), - mediaMTXApiPassword: lpNode.MediaMTXApiPassword, + mediaMTXClient: media.NewMediaMTXClient(lpNode.MediaMTXApiPassword), liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, } if lpNode.NodeType == core.BroadcasterNode && httpIngest { From ff2d395275175d7804900ed0e3b98a4d6d3298f5 Mon Sep 17 00:00:00 2001 From: Rick Staa Date: Wed, 4 Dec 2024 17:04:25 +0100 Subject: [PATCH 18/56] feat(ai): enable worker image pulling (#3279) This commit ensures that the docker containers the worker uses are pulled during startup. They use the new changes implemented in https://github.com/livepeer/ai-worker/pull/200. --- CHANGELOG_PENDING.md | 2 ++ cmd/livepeer/starter/starter.go | 6 ++++++ core/ai.go | 3 ++- core/ai_test.go | 4 ++++ server/ai_worker_test.go | 5 +++++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 22f1b28ed7..0f597d50c7 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -2,6 +2,8 @@ ## v0.X.X +- [#3279](https://github.com/livepeer/go-livepeer/pull/3279) - Enable automatic worker image pulling. + ### Breaking Changes 🚨🚨 ### Features ⚒ diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 645522403d..e29633e586 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -1242,6 +1242,12 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { modelConstraint.Capacity = config.Capacity } + // Ensure the AI worker has the image needed to serve the job. + err := n.AIWorker.EnsureImageAvailable(ctx, config.Pipeline, config.ModelID) + if err != nil { + glog.Errorf("Error ensuring AI worker image available for %v: %v", config.Pipeline, err) + } + if config.Warm || config.URL != "" { // Register external container endpoint if URL is provided. endpoint := worker.RunnerEndpoint{URL: config.URL, Token: config.Token} diff --git a/core/ai.go b/core/ai.go index 348ffd21c7..a9eeae9f72 100644 --- a/core/ai.go +++ b/core/ai.go @@ -30,7 +30,8 @@ type AI interface { LiveVideoToVideo(context.Context, worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error) Warm(context.Context, string, string, worker.RunnerEndpoint, worker.OptimizationFlags) error Stop(context.Context) error - HasCapacity(pipeline, modelID string) bool + HasCapacity(string, string) bool + EnsureImageAvailable(context.Context, string, string) error } // Custom type to parse a big.Rat from a JSON number. diff --git a/core/ai_test.go b/core/ai_test.go index e82b658ba5..3e4ab8207b 100644 --- a/core/ai_test.go +++ b/core/ai_test.go @@ -679,6 +679,10 @@ func (a *stubAIWorker) HasCapacity(pipeline, modelID string) bool { return true } +func (a *stubAIWorker) EnsureImageAvailable(ctx context.Context, pipeline string, modelID string) error { + return nil +} + type StubAIWorkerServer struct { manager *RemoteAIWorkerManager SendError error diff --git a/server/ai_worker_test.go b/server/ai_worker_test.go index 602c01fed9..ab31a3e712 100644 --- a/server/ai_worker_test.go +++ b/server/ai_worker_test.go @@ -649,3 +649,8 @@ func (a *stubAIWorker) HasCapacity(pipeline, modelID string) bool { a.Called++ return true } + +func (a *stubAIWorker) EnsureImageAvailable(ctx context.Context, pipeline string, modelID string) error { + a.Called++ + return nil +} From 16fe45d5bd03dbf6ed356c69da28eb84e0a8a182 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Wed, 4 Dec 2024 11:19:52 -0800 Subject: [PATCH 19/56] trickle: Publisher error handling (#3259) This allows us to propagate normal HTTP errors down to the caller of publisher.Write() Currently we don't distinguish between 400/500 status codes (except for 404s which we do explicitly handle) but there is room to handle this in the future via separate error types. --- trickle/trickle_publisher.go | 66 +++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/trickle/trickle_publisher.go b/trickle/trickle_publisher.go index c6c41e1cb5..a6ca937ff8 100644 --- a/trickle/trickle_publisher.go +++ b/trickle/trickle_publisher.go @@ -2,6 +2,7 @@ package trickle import ( "crypto/tls" + "errors" "fmt" "io" "log/slog" @@ -9,6 +10,8 @@ import ( "sync" ) +var StreamNotFoundErr = errors.New("stream not found") + // TricklePublisher represents a trickle streaming client type TricklePublisher struct { baseURL string @@ -18,10 +21,21 @@ type TricklePublisher struct { contentType string } +// HTTPError gets returned with a >=400 status code (non-400) +type HTTPError struct { + Code int + Body string +} + +func (e *HTTPError) Error() string { + return fmt.Sprintf("Status code %d - %s", e.Code, e.Body) +} + // pendingPost represents a pre-initialized POST request waiting for data type pendingPost struct { index int writer *io.PipeWriter + errCh chan error } // NewTricklePublisher creates a new trickle stream client @@ -48,6 +62,7 @@ func (c *TricklePublisher) preconnect() (*pendingPost, error) { slog.Debug("Preconnecting", "url", url) + errCh := make(chan error, 1) pr, pw := io.Pipe() req, err := http.NewRequest("POST", url, pr) if err != nil { @@ -57,9 +72,7 @@ func (c *TricklePublisher) preconnect() (*pendingPost, error) { req.Header.Set("Content-Type", c.contentType) // Start the POST request in a background goroutine - // TODO error handling for these go func() { - slog.Debug("Initiailzing http client", "idx", index) // Createa new client to prevent connection reuse client := http.Client{Transport: &http.Transport{ // ignore orch certs for now @@ -68,25 +81,38 @@ func (c *TricklePublisher) preconnect() (*pendingPost, error) { resp, err := client.Do(req) if err != nil { slog.Error("Failed to complete POST for segment", "url", url, "err", err) + errCh <- err return } body, err := io.ReadAll(resp.Body) if err != nil { slog.Error("Error reading body", "url", url, "err", err) + errCh <- err + return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { slog.Error("Failed POST segment", "url", url, "status_code", resp.StatusCode, "msg", string(body)) + if resp.StatusCode == http.StatusNotFound { + errCh <- StreamNotFoundErr + return + } + if resp.StatusCode >= 400 { + errCh <- &HTTPError{Code: resp.StatusCode, Body: string(body)} + return + } } else { slog.Debug("Uploaded segment", "url", url) } + errCh <- nil }() c.index += 1 return &pendingPost{ writer: pw, index: index, + errCh: errCh, }, nil } @@ -128,6 +154,7 @@ func (c *TricklePublisher) Write(data io.Reader) error { } writer := pp.writer index := pp.index + errCh := pp.errCh // Set up the next connection nextPost, err := c.preconnect() @@ -140,16 +167,39 @@ func (c *TricklePublisher) Write(data io.Reader) error { // Now unlock so the copy does not block c.writeLock.Unlock() + // before writing, check for error from preconnects + select { + case err := <-errCh: + return err + default: + // no error, continue + } + // Start streaming data to the current POST request - n, err := io.Copy(writer, data) - if err != nil { - return fmt.Errorf("error streaming data to segment %d: %w", index, err) + n, ioError := io.Copy(writer, data) + + // if no io errors, close the writer + var closeErr error + if ioError == nil { + slog.Debug("Completed writing", "idx", index, "totalBytes", humanBytes(n)) + + // Close the pipe writer to signal end of data for the current POST request + closeErr = writer.Close() } - slog.Debug("Completed writing", "idx", index, "totalBytes", humanBytes(n)) + // check for errors after write, eg >=400 status codes + // these typically do not result in io errors eg, with io.Copy + // also prioritize errors over this channel compared to io errors + // such as "read/write on closed pipe" + if err := <-errCh; err != nil { + return err + } + + if ioError != nil { + return fmt.Errorf("error streaming data to segment %d: %w", index, err) + } - // Close the pipe writer to signal end of data for the current POST request - if err := writer.Close(); err != nil { + if closeErr != nil { return fmt.Errorf("error closing writer for segment %d: %w", index, err) } From 8643503ceec45dabfd32f5910492b5ad7ef953ce Mon Sep 17 00:00:00 2001 From: John | Elite Encoder Date: Wed, 4 Dec 2024 18:01:33 -0500 Subject: [PATCH 20/56] Update ControlUrl to pointer (#3295) * update controlUrl to use pointer * check for nil ControlUrl * update go mod --- go.mod | 2 +- go.sum | 4 ++-- server/ai_http.go | 2 +- server/ai_process.go | 6 +++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 54317943fc..16ae8e03a4 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.6 + github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393 diff --git a/go.sum b/go.sum index 6aaffa9b62..22e4917c85 100644 --- a/go.sum +++ b/go.sum @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.6 h1:1RN7eYy4C3D+iVaK5WuUu8Jgm7hTQ08J8EBeRekGJSo= -github.com/livepeer/ai-worker v0.12.6/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= +github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373 h1:+IepZubsJ1NeYcgoa+7tk8ycOh5DaRZ14I+SxtAbsZ0= +github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= diff --git a/server/ai_http.go b/server/ai_http.go index 9d0dcf29a3..ce79564bff 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -184,7 +184,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { jsonData, err := json.Marshal(&worker.LiveVideoToVideoResponse{ PublishUrl: pubUrl, SubscribeUrl: subUrl, - ControlUrl: controlUrl, + ControlUrl: &controlUrl, }) if err != nil { respondWithError(w, err.Error(), http.StatusInternalServerError) diff --git a/server/ai_process.go b/server/ai_process.go index 209be2ed44..c3d9d1d409 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1020,6 +1020,10 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A } if resp.JSON200 != nil { + if resp.JSON200.ControlUrl == nil { + return nil, errors.New("control URL is missing") + } + host := sess.Transcoder() pub, err := common.AppendHostname(resp.JSON200.PublishUrl, host) if err != nil { @@ -1029,7 +1033,7 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid subscribe URL: %w", err) } - control, err := common.AppendHostname(resp.JSON200.ControlUrl, host) + control, err := common.AppendHostname(*resp.JSON200.ControlUrl, host) if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } From 2c3794f18babc580600cab09bb6b9f2106787d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Thu, 5 Dec 2024 10:34:43 +0100 Subject: [PATCH 21/56] release v0.8.1 (#3297) --- CHANGELOG.md | 14 ++++++++++++++ CHANGELOG_PENDING.md | 2 -- VERSION | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ca0d2d14b..bf002df3bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## v0.8.1 + +- [#3279](https://github.com/livepeer/go-livepeer/pull/3279) Enable automatic worker image pulling. +- Live Video AI features +- [#3249](https://github.com/livepeer/go-livepeer/pull/3249) Add Gateway ETH Address to Kafka events + +### Features ⚒ + +#### Broadcaster + +### Bug Fixes 🐞 + +#### Broadcaster + ## v0.8.0 - [#2959](https://github.com/livepeer/go-livepeer/pull/2959) Add Livepeer AI Subnet features diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 0f597d50c7..22f1b28ed7 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -2,8 +2,6 @@ ## v0.X.X -- [#3279](https://github.com/livepeer/go-livepeer/pull/3279) - Enable automatic worker image pulling. - ### Breaking Changes 🚨🚨 ### Features ⚒ diff --git a/VERSION b/VERSION index 8adc70fdd9..c18d72be30 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.8.0 \ No newline at end of file +0.8.1 \ No newline at end of file From b971acc2318695e02ca3461fb5eab2bd20817783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Thu, 5 Dec 2024 10:53:26 +0100 Subject: [PATCH 22/56] Re-apply "Live Video Payments" (#3293) --- cmd/livepeer/livepeer.go | 1 + cmd/livepeer/starter/starter.go | 4 + core/accounting.go | 6 + core/livepeernode.go | 1 + server/ai_http.go | 59 ++++++++- server/ai_live_video.go | 30 ++++- server/ai_mediaserver.go | 7 +- server/ai_process.go | 19 ++- server/live_payment.go | 25 +++- server/live_payment_processor.go | 200 +++++++++++++++++++++++++++++++ server/live_payment_test.go | 9 +- server/mediaserver.go | 6 +- server/segment_rpc.go | 63 ++++++---- server/segment_rpc_test.go | 5 + 14 files changed, 389 insertions(+), 46 deletions(-) create mode 100644 server/live_payment_processor.go diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index d7866ba78e..9881014192 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -167,6 +167,7 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.LiveAITrickleHostForRunner = flag.String("liveAITrickleHostForRunner", "", "Trickle Host used by AI Runner; It's used to overwrite the publicly available Trickle Host") cfg.LiveAIAuthApiKey = flag.String("liveAIAuthApiKey", "", "API key to use for Live AI authentication requests") cfg.LiveAIAuthWebhookURL = flag.String("liveAIAuthWebhookUrl", "", "Live AI RTMP authentication webhook URL") + cfg.LivePaymentInterval = flag.Duration("livePaymentInterval", *cfg.LivePaymentInterval, "Interval to pay process Gateway <> Orchestrator Payments for Live AI Video") // Onchain: cfg.EthAcctAddr = flag.String("ethAcctAddr", *cfg.EthAcctAddr, "Existing Eth account address. For use when multiple ETH accounts exist in the keystore directory") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index e29633e586..9489d28070 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -169,6 +169,7 @@ type LivepeerConfig struct { KafkaGatewayTopic *string MediaMTXApiPassword *string LiveAIAuthApiKey *string + LivePaymentInterval *time.Duration } // DefaultLivepeerConfig creates LivepeerConfig exactly the same as when no flags are passed to the livepeer process. @@ -213,6 +214,7 @@ func DefaultLivepeerConfig() LivepeerConfig { defaultAIModelsDir := "" defaultAIRunnerImage := "livepeer/ai-runner:latest" defaultLiveAIAuthWebhookURL := "" + defaultLivePaymentInterval := 5 * time.Second // Onchain: defaultEthAcctAddr := "" @@ -320,6 +322,7 @@ func DefaultLivepeerConfig() LivepeerConfig { AIModelsDir: &defaultAIModelsDir, AIRunnerImage: &defaultAIRunnerImage, LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL, + LivePaymentInterval: &defaultLivePaymentInterval, // Onchain: EthAcctAddr: &defaultEthAcctAddr, @@ -1570,6 +1573,7 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { if cfg.LiveAIAuthApiKey != nil { n.LiveAIAuthApiKey = *cfg.LiveAIAuthApiKey } + n.LivePaymentInterval = *cfg.LivePaymentInterval if cfg.LiveAITrickleHostForRunner != nil { n.LiveAITrickleHostForRunner = *cfg.LiveAITrickleHostForRunner } diff --git a/core/accounting.go b/core/accounting.go index d6d972b0ee..ccb035b65d 100644 --- a/core/accounting.go +++ b/core/accounting.go @@ -1,11 +1,13 @@ package core import ( + "context" "math/big" "sync" "time" ethcommon "github.com/ethereum/go-ethereum/common" + "github.com/livepeer/go-livepeer/clog" ) // Balance holds the credit balance for a broadcast session @@ -41,6 +43,10 @@ func (b *Balance) StageUpdate(minCredit, ev *big.Rat) (int, *big.Rat, *big.Rat) } creditGap := new(big.Rat).Sub(minCredit, existingCredit) + if ev == nil || ev.Cmp(big.NewRat(0, 1)) == 0 { + clog.Warningf(context.Background(), "Error calculating tickets: ev is nil or zero") + return 0, big.NewRat(0, 1), existingCredit + } sizeRat := creditGap.Quo(creditGap, ev) res := sizeRat.Num() if !sizeRat.IsInt() { diff --git a/core/livepeernode.go b/core/livepeernode.go index 06853f20e2..6bfddc71c9 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -156,6 +156,7 @@ type LivepeerNode struct { MediaMTXApiPassword string LiveAITrickleHostForRunner string LiveAIAuthApiKey string + LivePaymentInterval time.Duration } type LivePipeline struct { diff --git a/server/ai_http.go b/server/ai_http.go index ce79564bff..7ee482c67a 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -132,6 +132,28 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlUrl = pubUrl + "-control" ) + // Handle initial payment, the rest of the payments are done separately from the stream processing + // Note that this payment is debit from the balance and acts as a buffer for the AI Realtime Video processing + payment, err := getPayment(r.Header.Get(paymentHeader)) + if err != nil { + respondWithError(w, err.Error(), http.StatusPaymentRequired) + return + } + sender := getPaymentSender(payment) + _, ctx, err = verifySegCreds(ctx, h.orchestrator, r.Header.Get(segmentHeader), sender) + if err != nil { + respondWithError(w, err.Error(), http.StatusForbidden) + return + } + if err := orch.ProcessPayment(ctx, payment, core.ManifestID(mid)); err != nil { + respondWithError(w, err.Error(), http.StatusBadRequest) + return + } + if payment.GetExpectedPrice().GetPricePerUnit() > 0 && !orch.SufficientBalance(sender, core.ManifestID(mid)) { + respondWithError(w, "Insufficient balance", http.StatusBadRequest) + return + } + //If successful, then create the trickle channels // Precreate the channels to avoid race conditions // TODO get the expected mime type from the request @@ -142,7 +164,34 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { controlPubCh := trickle.NewLocalPublisher(h.trickleSrv, mid+"-control", "application/json") controlPubCh.CreateChannel() - // Subscribe to the publishUrl for payments monitoring + // Start payment receiver which accounts the payments and stops the stream if the payment is insufficient + priceInfo := payment.GetExpectedPrice() + var paymentProcessor *LivePaymentProcessor + ctx, cancel := context.WithCancel(context.Background()) + if priceInfo != nil && priceInfo.PricePerUnit != 0 { + paymentReceiver := livePaymentReceiver{orchestrator: h.orchestrator} + accountPaymentFunc := func(inPixels int64) error { + err := paymentReceiver.AccountPayment(context.Background(), &SegmentInfoReceiver{ + sender: sender, + inPixels: inPixels, + priceInfo: priceInfo, + sessionID: mid, + }) + if err != nil { + slog.Warn("Error accounting payment, stopping stream processing", "err", err) + pubCh.Close() + subCh.Close() + controlPubCh.Close() + cancel() + } + return err + } + paymentProcessor = NewLivePaymentProcessor(ctx, h.node.LivePaymentInterval, accountPaymentFunc) + } else { + clog.Warningf(ctx, "No price info found for model %v, Orchestrator will not charge for video processing", modelID) + } + + // Subscribe to the publishUrl for payments monitoring and payment processing go func() { sub := trickle.NewLocalSubscriber(h.trickleSrv, mid) for { @@ -151,8 +200,11 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { clog.Infof(ctx, "Error getting local trickle segment err=%v", err) return } - // We can do something with the segment data here - io.Copy(io.Discard, segment.Reader) + reader := segment.Reader + if paymentProcessor != nil { + reader = paymentProcessor.process(segment.Reader) + } + io.Copy(io.Discard, reader) } }() @@ -176,6 +228,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { pubCh.Close() subCh.Close() controlPubCh.Close() + cancel() respondWithError(w, err.Error(), http.StatusInternalServerError) return } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index c53883f71f..cc0dcda07f 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -17,24 +17,50 @@ import ( "github.com/livepeer/lpms/ffmpeg" ) -func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestParams) { +func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestParams, sess *AISession) { ctx = clog.AddVal(ctx, "url", url.Redacted()) publisher, err := trickle.NewTricklePublisher(url.String()) if err != nil { clog.Infof(ctx, "error publishing trickle. err=%s", err) } + + // Start payments which probes a segment every "paymentProcessInterval" and sends a payment + ctx, cancel := context.WithCancel(context.Background()) + priceInfo := sess.OrchestratorInfo.PriceInfo + var paymentProcessor *LivePaymentProcessor + if priceInfo != nil && priceInfo.PricePerUnit != 0 { + paymentSender := livePaymentSender{} + sendPaymentFunc := func(inPixels int64) error { + return paymentSender.SendPayment(context.Background(), &SegmentInfoSender{ + sess: sess.BroadcastSession, + inPixels: inPixels, + priceInfo: priceInfo, + mid: extractMid(url.Path), + }) + } + paymentProcessor = NewLivePaymentProcessor(ctx, params.liveParams.paymentProcessInterval, sendPaymentFunc) + } else { + clog.Warningf(ctx, "No price info found from Orchestrator, Gateway will not send payments for the video processing") + } + params.liveParams.segmentReader.SwitchReader(func(reader io.Reader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { clog.Infof(ctx, "Error closing trickle publisher. err=%s", err) } + cancel() return } go func() { + r := reader + if paymentProcessor != nil { + r = paymentProcessor.process(reader) + } + clog.V(8).Infof(ctx, "trickle publish writing data") // TODO this blocks! very bad! - if err := publisher.Write(reader); err != nil { + if err := publisher.Write(r); err != nil { clog.Infof(ctx, "Error writing to trickle publisher. err=%s", err) } }() diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index c077cbd1aa..5b65d483dc 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -487,9 +487,10 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { sessManager: ls.AISessionManager, liveParams: liveRequestParams{ - segmentReader: ssr, - outputRTMPURL: outputURL, - stream: streamName, + segmentReader: ssr, + outputRTMPURL: outputURL, + stream: streamName, + paymentProcessInterval: ls.livePaymentInterval, }, } diff --git a/server/ai_process.go b/server/ai_process.go index c3d9d1d409..25f90af2f9 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -93,6 +93,8 @@ type liveRequestParams struct { segmentReader *media.SwitchableSegmentReader outputRTMPURL string stream string + + paymentProcessInterval time.Duration } // CalculateTextToImageLatencyScore computes the time taken per pixel for an text-to-image request. @@ -1004,6 +1006,8 @@ func submitAudioToText(ctx context.Context, params aiRequestParams, sess *AISess return &res, nil } +const initPixelsToPay = 45 * 30 * 1280 * 720 // 45 seconds, 30fps, 720p + func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLiveVideoToVideoJSONRequestBody) (any, error) { client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient)) if err != nil { @@ -1012,9 +1016,11 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A } return nil, err } + setHeaders, balUpdate, err := prepareAIPayment(ctx, sess, initPixelsToPay) + defer completeBalanceUpdate(sess.BroadcastSession, balUpdate) // Send request to orchestrator - resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req) + resp, err := client.GenLiveVideoToVideoWithResponse(ctx, req, setHeaders) if err != nil { return nil, err } @@ -1037,13 +1043,22 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } - startTricklePublish(ctx, pub, params) + startTricklePublish(ctx, pub, params, sess) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) } return resp, nil } +// extractMid extracts the mid (manifest ID) from the publish URL +// e.g. public URL passed from orchestrator: /live/manifest/123456, then mid is 123456 +// we can consider improving it and passing mid directly in the JSON response from Orchestrator, +// but currently it would require changing the OpenAPI schema in livepeer/ai-worker repo +func extractMid(path string) string { + pubSplit := strings.Split(path, "/") + return pubSplit[len(pubSplit)-1] +} + func CalculateLLMLatencyScore(took time.Duration, tokensUsed int) float64 { if tokensUsed <= 0 { return 0 diff --git a/server/live_payment.go b/server/live_payment.go index 0e20f48216..5fd42a45eb 100644 --- a/server/live_payment.go +++ b/server/live_payment.go @@ -2,7 +2,7 @@ package server import ( "context" - "errors" + "fmt" "io" "math/big" "net/http" @@ -24,6 +24,7 @@ type SegmentInfoSender struct { sess *BroadcastSession inPixels int64 priceInfo *net.PriceInfo + mid string } type SegmentInfoReceiver struct { @@ -46,7 +47,6 @@ type LivePaymentReceiver interface { } type livePaymentSender struct { - segmentsToPayUpfront int64 } type livePaymentReceiver struct { @@ -54,17 +54,22 @@ type livePaymentReceiver struct { } func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *SegmentInfoSender) error { + if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { + clog.V(common.DEBUG).Infof(ctx, "Skipping sending payment, priceInfo not set for requestID=%s, ", segmentInfo.mid) + return nil + } sess := segmentInfo.sess if err := refreshSessionIfNeeded(ctx, sess); err != nil { return err } + sess.lock.Lock() + sess.Params.ManifestID = core.ManifestID(segmentInfo.mid) + sess.lock.Unlock() fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) - // We pay a few segments upfront to avoid race condition between payment and segment processing - minCredit := new(big.Rat).Mul(fee, new(big.Rat).SetInt64(r.segmentsToPayUpfront)) - balUpdate, err := newBalanceUpdate(sess, minCredit) + balUpdate, err := newBalanceUpdate(sess, fee) if err != nil { return err } @@ -135,11 +140,19 @@ func (r *livePaymentSender) SendPayment(ctx context.Context, segmentInfo *Segmen func (r *livePaymentReceiver) AccountPayment( ctx context.Context, segmentInfo *SegmentInfoReceiver) error { + if segmentInfo.priceInfo == nil || segmentInfo.priceInfo.PricePerUnit == 0 { + clog.V(common.DEBUG).Infof(ctx, "Skipping accounting, priceInfo not set for sessionID=%s, ", segmentInfo.sessionID) + return nil + } fee := calculateFee(segmentInfo.inPixels, segmentInfo.priceInfo) balance := r.orchestrator.Balance(segmentInfo.sender, core.ManifestID(segmentInfo.sessionID)) if balance == nil || balance.Cmp(fee) < 0 { - return errors.New("insufficient balance") + balanceStr := "nil" + if balance != nil { + balanceStr = balance.FloatString(0) + } + return fmt.Errorf("insufficient balance, mid=%s, fee=%s, balance=%s", segmentInfo.sessionID, fee.FloatString(0), balanceStr) } r.orchestrator.DebitFees(segmentInfo.sender, core.ManifestID(segmentInfo.sessionID), segmentInfo.priceInfo, segmentInfo.inPixels) clog.V(common.DEBUG).Infof(ctx, "Accounted payment for sessionID=%s, fee=%s", segmentInfo.sessionID, fee.FloatString(0)) diff --git a/server/live_payment_processor.go b/server/live_payment_processor.go new file mode 100644 index 0000000000..48bfe99193 --- /dev/null +++ b/server/live_payment_processor.go @@ -0,0 +1,200 @@ +package server + +import ( + "bytes" + "context" + "fmt" + "github.com/livepeer/go-livepeer/clog" + "github.com/livepeer/lpms/ffmpeg" + "io" + "log/slog" + "os" + "sync" + "time" +) + +type LivePaymentProcessor struct { + interval time.Duration + + lastProcessedAt time.Time + lastProcessedMu sync.RWMutex + processCh chan time.Time + + lastProbedAt time.Time + lastProbedSegInfoMu sync.RWMutex + lastProbedSegInfo *ffmpeg.MediaFormatInfo + probeSegCh chan *segment + + processSegmentFunc func(inPixels int64) error +} + +type segment struct { + timestamp time.Time + segData []byte +} + +func NewLivePaymentProcessor(ctx context.Context, processInterval time.Duration, processSegmentFunc func(inPixels int64) error) *LivePaymentProcessor { + defaultSegInfo := &ffmpeg.MediaFormatInfo{Height: 480, Width: 640, FPS: 30.0} + pp := &LivePaymentProcessor{ + interval: processInterval, + + processCh: make(chan time.Time, 1), + processSegmentFunc: processSegmentFunc, + lastProcessedAt: time.Now(), + + lastProbedAt: time.Now(), + lastProbedSegInfo: defaultSegInfo, + probeSegCh: make(chan *segment, 1), + } + pp.start(ctx) + return pp +} + +func (p *LivePaymentProcessor) start(ctx context.Context) { + go func() { + for { + select { + case timestamp := <-p.processCh: + p.processOne(timestamp) + case <-ctx.Done(): + slog.Info("Done processing payments for session") + return + } + + } + }() + go func() { + for { + select { + case seg := <-p.probeSegCh: + p.probeOne(seg) + case <-ctx.Done(): + slog.Info("Done probing segments for session") + return + } + + } + }() +} + +func (p *LivePaymentProcessor) processOne(timestamp time.Time) { + if p.shouldSkip(timestamp) { + return + } + + p.lastProbedSegInfoMu.RLock() + info := p.lastProbedSegInfo + p.lastProbedSegInfoMu.RUnlock() + + pixelsPerSec := float64(info.Height) * float64(info.Width) * float64(info.FPS) + secSinceLastProcessed := timestamp.Sub(p.lastProcessedAt).Seconds() + pixelsSinceLastProcessed := pixelsPerSec * secSinceLastProcessed + clog.V(6).Infof(context.Background(), "Processing live payment: secSinceLastProcessed=%v, pixelsSinceLastProcessed=%v, height=%d, width=%d, FPS=%v", secSinceLastProcessed, pixelsSinceLastProcessed, info.Height, info.Width, info.FPS) + + err := p.processSegmentFunc(int64(pixelsSinceLastProcessed)) + if err != nil { + slog.Error("Error processing payment", "err", err) + return + } + + p.lastProcessedMu.Lock() + defer p.lastProcessedMu.Unlock() + p.lastProcessedAt = timestamp +} + +func (p *LivePaymentProcessor) process(reader io.Reader) io.Reader { + timestamp := time.Now() + if p.shouldSkip(timestamp) { + // We don't process every segment, because it's too compute-expensive + return reader + } + + pipeReader, pipeWriter, err := os.Pipe() + if err != nil { + slog.Error("Error creating pipe", "err", err) + return reader + } + + resReader := io.TeeReader(reader, pipeWriter) + go func() { + select { + case p.processCh <- timestamp: + default: + // We process one segment at the time, no need to buffer them + } + + // read the segment into the buffer, because the direct use of the reader causes Broken pipe + // it's probably related to different pace of reading by trickle and ffmpeg.GetCodecInfo() + defer pipeReader.Close() + segData, err := io.ReadAll(pipeReader) + if err != nil { + slog.Error("Error reading segment data", "err", err) + return + } + + select { + case p.probeSegCh <- &segment{timestamp: timestamp, segData: segData}: + default: + // We process one segment at the time, no need to buffer them + } + }() + + return resReader +} + +func (p *LivePaymentProcessor) shouldSkip(timestamp time.Time) bool { + p.lastProcessedMu.RLock() + lastProcessedAt := p.lastProcessedAt + p.lastProcessedMu.RUnlock() + if lastProcessedAt.Add(p.interval).After(timestamp) { + // We don't process every segment, because it's too compute-expensive + return true + } + return false +} + +func (p *LivePaymentProcessor) probeOne(seg *segment) { + if p.lastProbedAt.Add(p.interval).After(seg.timestamp) { + // We don't probe every segment, because it's too compute-expensive + return + } + + info, err := probeSegment(seg) + if err != nil { + clog.Warningf(context.Background(), "Error probing segment, err=%v", err) + return + } + clog.V(6).Infof(context.Background(), "Probed segment: height=%d, width=%d, FPS=%v", info.Height, info.Width, info.FPS) + + p.lastProbedSegInfoMu.Lock() + defer p.lastProbedSegInfoMu.Unlock() + p.lastProbedSegInfo = &info + p.lastProbedAt = seg.timestamp +} + +func probeSegment(seg *segment) (ffmpeg.MediaFormatInfo, error) { + pipeReader, pipeWriter, err := os.Pipe() + if err != nil { + return ffmpeg.MediaFormatInfo{}, err + } + + go func() { + defer pipeWriter.Close() + io.Copy(pipeWriter, bytes.NewReader(seg.segData)) + }() + + fname := fmt.Sprintf("pipe:%d", pipeReader.Fd()) + status, info, err := ffmpeg.GetCodecInfo(fname) + if err != nil { + return ffmpeg.MediaFormatInfo{}, err + } + if status != ffmpeg.CodecStatusOk { + slog.Error("Invalid CodecStatus while probing segment", "status", status) + return ffmpeg.MediaFormatInfo{}, fmt.Errorf("invalid CodecStatus while probing segment, status=%d", status) + } + + // For WebRTC the probing sometimes returns FPS=90000, which is incorrect and causes issues with payment, + // so as a hack let's hardcode FPS to 30 + info.FPS = 30.0 + return info, nil +} diff --git a/server/live_payment_test.go b/server/live_payment_test.go index 4d93f30ad4..bd4620a95c 100644 --- a/server/live_payment_test.go +++ b/server/live_payment_test.go @@ -46,9 +46,7 @@ func TestSendPayment(t *testing.T) { sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId), sess.Balances) // Create Payment sender and segment info - paymentSender := livePaymentSender{ - segmentsToPayUpfront: 10, - } + paymentSender := livePaymentSender{} segmentInfo := &SegmentInfoSender{ sess: sess, inPixels: 1000000, @@ -64,11 +62,10 @@ func TestSendPayment(t *testing.T) { // then require.Nil(err) // One segment costs 1000000 - // Paid upfront for 10 segments => 10000000 // Spent cost for 1 segment => 1000000 - // The balance should be 9000000 + // The balance should be 0 balance := sess.Balances.Balance(ethcommon.BytesToAddress(sess.OrchestratorInfo.Address), core.ManifestID(sess.OrchestratorInfo.AuthToken.SessionId)) - require.Equal(new(big.Rat).SetInt64(9000000), balance) + require.Equal(new(big.Rat).SetInt64(0), balance) } func mockSender() pm.Sender { diff --git a/server/mediaserver.go b/server/mediaserver.go index 0e82cdf5b6..68d0fb3acc 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -127,8 +127,9 @@ type LivepeerServer struct { connectionLock *sync.RWMutex serverLock *sync.RWMutex - mediaMTXClient *media.MediaMTXClient - liveAIAuthApiKey string + mediaMTXClient *media.MediaMTXClient + liveAIAuthApiKey string + livePaymentInterval time.Duration } func (s *LivepeerServer) SetContextFromUnitTest(c context.Context) { @@ -196,6 +197,7 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), mediaMTXClient: media.NewMediaMTXClient(lpNode.MediaMTXApiPassword), liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, + livePaymentInterval: lpNode.LivePaymentInterval, } if lpNode.NodeType == core.BroadcasterNode && httpIngest { opts.HttpMux.HandleFunc("/live/", ls.HandlePush) diff --git a/server/segment_rpc.go b/server/segment_rpc.go index d416e0348c..21207bb1eb 100644 --- a/server/segment_rpc.go +++ b/server/segment_rpc.go @@ -68,11 +68,26 @@ var httpClient = &http.Client{ } func (h *lphttp) ServeSegment(w http.ResponseWriter, r *http.Request) { - payment, segData, oInfo, ctx, err := h.processPaymentAndSegmentHeaders(w, r) + payment, segData, ctx, err := h.processPaymentAndSegmentHeaders(w, r) if err != nil { return } + oInfo, err := orchestratorInfo(h.orchestrator, getPaymentSender(payment), h.orchestrator.ServiceURI().String(), core.ManifestID(segData.AuthToken.SessionId)) + if err != nil { + clog.Errorf(ctx, "Error updating orchestrator info - err=%q", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + return + } + // Use existing auth token because new auth tokens should only be sent out in GetOrchestrator() RPC calls + oInfo.AuthToken = segData.AuthToken + + if err := h.orchestrator.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { + clog.Errorf(ctx, "error processing payment: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + ctx = clog.AddSeqNo(ctx, uint64(segData.Seq)) clog.V(common.VERBOSE).Infof(ctx, "Received segment dur=%v", segData.Duration) if monitor.Enabled { @@ -221,11 +236,30 @@ func (h *lphttp) ServeSegment(w http.ResponseWriter, r *http.Request) { // Payment receives payment from Gateway and adds it into the orchestrator's balance func (h *lphttp) Payment(w http.ResponseWriter, r *http.Request) { - payment, segData, oInfo, ctx, err := h.processPaymentAndSegmentHeaders(w, r) + payment, segData, ctx, err := h.processPaymentAndSegmentHeaders(w, r) if err != nil { return } + var netCaps *net.Capabilities + if segData != nil && segData.Caps != nil { + netCaps = segData.Caps.ToNetCapabilities() + } + oInfo, err := orchestratorInfoWithCaps(h.orchestrator, getPaymentSender(payment), h.orchestrator.ServiceURI().String(), core.ManifestID(segData.AuthToken.SessionId), netCaps) + if err != nil { + clog.Errorf(ctx, "Error updating orchestrator info - err=%q", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + return + } + // Use existing auth token because new auth tokens should only be sent out in GetOrchestrator() RPC calls + oInfo.AuthToken = segData.AuthToken + + if err := h.orchestrator.ProcessPayment(ctx, payment, segData.ManifestID); err != nil { + clog.Errorf(ctx, "error processing payment: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + buf, err := proto.Marshal(&net.PaymentResult{Info: oInfo}) if err != nil { clog.Errorf(ctx, "Unable to marshal transcode result err=%q", err) @@ -240,14 +274,14 @@ func currentBalanceLog(h *lphttp, payment net.Payment, segData *core.SegTranscod if h == nil || h.node == nil || h.node.Balances == nil || segData == nil || segData.AuthToken == nil { return "invalid configuration" } - currentBalance := h.node.Balances.Balance(getPaymentSender(payment), core.ManifestID(segData.AuthToken.SessionId)) + currentBalance := h.node.Balances.Balance(getPaymentSender(payment), segData.ManifestID) if currentBalance == nil { return "no balance available" } return currentBalance.FloatString(0) } -func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http.Request) (net.Payment, *core.SegTranscodingMetadata, *net.OrchestratorInfo, context.Context, error) { +func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http.Request) (net.Payment, *core.SegTranscodingMetadata, context.Context, error) { orch := h.orchestrator remoteAddr := getRemoteAddr(r) @@ -257,7 +291,7 @@ func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http. if err != nil { clog.Errorf(ctx, "Could not parse payment") http.Error(w, err.Error(), http.StatusPaymentRequired) - return net.Payment{}, nil, nil, ctx, err + return net.Payment{}, nil, ctx, err } sender := getPaymentSender(payment) @@ -270,25 +304,10 @@ func (h *lphttp) processPaymentAndSegmentHeaders(w http.ResponseWriter, r *http. if err != nil { clog.Errorf(ctx, "Could not verify segment creds err=%q", err) http.Error(w, err.Error(), http.StatusForbidden) - return net.Payment{}, nil, nil, ctx, err + return net.Payment{}, nil, ctx, err } - if err := orch.ProcessPayment(ctx, payment, core.ManifestID(segData.AuthToken.SessionId)); err != nil { - clog.Errorf(ctx, "error processing payment: %v", err) - http.Error(w, err.Error(), http.StatusBadRequest) - return net.Payment{}, nil, nil, ctx, err - } - - oInfo, err := orchestratorInfo(orch, sender, orch.ServiceURI().String(), core.ManifestID(segData.AuthToken.SessionId)) - if err != nil { - clog.Errorf(ctx, "Error updating orchestrator info - err=%q", err) - http.Error(w, "Internal Server Error", http.StatusInternalServerError) - return net.Payment{}, nil, nil, ctx, err - } - // Use existing auth token because new auth tokens should only be sent out in GetOrchestrator() RPC calls - oInfo.AuthToken = segData.AuthToken - - return payment, segData, oInfo, ctx, nil + return payment, segData, ctx, nil } func getPayment(header string) (net.Payment, error) { diff --git a/server/segment_rpc_test.go b/server/segment_rpc_test.go index a1f54ce822..73a266f48a 100644 --- a/server/segment_rpc_test.go +++ b/server/segment_rpc_test.go @@ -875,8 +875,13 @@ func TestServeSegment_ProcessPaymentError(t *testing.T) { require := require.New(t) assert := assert.New(t) + drivers.NodeStorage = drivers.NewMemoryDriver(nil) orch.On("VerifySig", mock.Anything, mock.Anything, mock.Anything).Return(true) orch.On("AuthToken", mock.Anything, mock.Anything).Return(stubAuthToken) + orch.On("ServiceURI").Return(url.Parse("http://someuri.com")) + orch.On("PriceInfo", mock.Anything).Return(&net.PriceInfo{}, nil) + orch.On("TicketParams", mock.Anything, mock.Anything).Return(&net.TicketParams{}, nil) + orch.On("Address").Return(ethcommon.Address{}) s := &BroadcastSession{ Broadcaster: stubBroadcaster2(), From 0e1c676707c57d1022f4003b4a8aae25ac1d5116 Mon Sep 17 00:00:00 2001 From: hjpotter92 Date: Fri, 6 Dec 2024 14:58:20 +0530 Subject: [PATCH 23/56] mediamtx: Add cronjob to publish mediamtx metrics to server (#3298) * mediamtx: Add cronjob to publish mediamtx metrics to server * dockerfile.mediamtx: Replace `ENTRYPOINT` with `CMD` * mediamtx: Fix cron environment issues --- docker/Dockerfile.mediamtx | 32 ++++++++++++------- docker/crontab | 4 +++ ...iamtx-entry.bash => mediamtx-metrics.bash} | 17 ++++++---- 3 files changed, 34 insertions(+), 19 deletions(-) create mode 100644 docker/crontab rename docker/{mediamtx-entry.bash => mediamtx-metrics.bash} (72%) diff --git a/docker/Dockerfile.mediamtx b/docker/Dockerfile.mediamtx index a6646a23de..80abe0dcc6 100644 --- a/docker/Dockerfile.mediamtx +++ b/docker/Dockerfile.mediamtx @@ -1,23 +1,31 @@ FROM ubuntu:24.04 # we need curl in the image as it's later used in the runOnReady command -RUN apt-get update \ - && apt-get install -y \ +RUN apt update \ + && apt install -yqq \ ca-certificates \ curl \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + cron \ + && apt clean \ + && rm -rf /var/lib/apt/lists/* /etc/cron.* + +COPY --chmod=0644 crontab /etc/crontab + +# Setup cron job for publishing metrics +RUN mkdir -p /var/log/ \ + && crontab /etc/crontab \ + && touch /var/log/cron.log + +COPY --chmod=0755 mediamtx-metrics.bash /opt/mediamtx-metrics.bash ENV MEDIAMTX_VERSION="1.9.3" -ADD "https://github.com/bluenviron/mediamtx/releases/download/v${MEDIAMTX_VERSION}/mediamtx_v${MEDIAMTX_VERSION}_linux_amd64.tar.gz" /opt/mediamtx.tar.gz +ADD "https://github.com/bluenviron/mediamtx/releases/download/v${MEDIAMTX_VERSION}/mediamtx_v${MEDIAMTX_VERSION}_linux_amd64.tar.gz" /opt/mediamtx/mediamtx.tar.gz -RUN tar xzf /opt/mediamtx.tar.gz -C /opt/ \ +RUN tar xzf /opt/mediamtx/mediamtx.tar.gz -C /opt/mediamtx/ \ && mkdir -p /usr/local/bin /etc/mediamtx/ \ - && mv /opt/mediamtx /usr/local/bin/mediamtx \ - && mv /opt/mediamtx.yml /etc/mediamtx/mediamtx.yml \ - && rm -rf /opt/ - -COPY mediamtx-entry.bash / + && mv /opt/mediamtx/mediamtx /usr/local/bin/mediamtx \ + && mv /opt/mediamtx/mediamtx.yml /etc/mediamtx/mediamtx.yml \ + && rm -rf /opt/mediamtx/ -ENTRYPOINT ["/mediamtx-entry.bash"] +CMD [ "/bin/bash", "-c", "declare -p >> /etc/environment && cron && /usr/local/bin/mediamtx" ] diff --git a/docker/crontab b/docker/crontab new file mode 100644 index 0000000000..17b0b60d60 --- /dev/null +++ b/docker/crontab @@ -0,0 +1,4 @@ +SHELL=/bin/bash +BASH_ENV=/etc/environment + +*/5 * * * * /opt/mediamtx-metrics.bash >> /var/log/cron.log 2>&1 diff --git a/docker/mediamtx-entry.bash b/docker/mediamtx-metrics.bash similarity index 72% rename from docker/mediamtx-entry.bash rename to docker/mediamtx-metrics.bash index ffc3a0cb1e..7fda6bc4d7 100755 --- a/docker/mediamtx-entry.bash +++ b/docker/mediamtx-metrics.bash @@ -5,15 +5,18 @@ set -euo pipefail if [ -v LP_PUBLISH_MEDIAMTX_METRICS ]; then - if [ -z "$LP_PUBLISH_MEDIAMTX_METRICS_ENDPOINT" ]; then - echo >&2 "No endpoint specified for publishing mediamtx metrics." - fi - cat <&2 "No endpoint specified for publishing mediamtx metrics." + exit 1 + fi + echo "$METRIC_DATA" | curl -X POST --data-binary @- "$LP_PUBLISH_MEDIAMTX_METRICS_ENDPOINT" +fi From 53e9fbe2238a4def8a7b6912ad15837381036918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Wilczy=C5=84ski?= Date: Fri, 6 Dec 2024 16:43:54 +0100 Subject: [PATCH 24/56] darwin-amd64 build issue - hack fix --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0b8997096d..2ad270a290 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -196,7 +196,7 @@ jobs: - name: Install dependencies run: | brew update - brew upgrade + # brew upgrade # temporarily disabled because of the issues it's causing brew uninstall --ignore-dependencies --force pkg-config@0.29.2 brew install coreutils pkgconf From 86cd1e95c0ea604c2a609d9f3e1164ad465fdb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Mon, 9 Dec 2024 11:24:04 +0100 Subject: [PATCH 25/56] Fix "Insufficient Balance" bug when sometimes starting the AI Live Video pipeline (#3304) --- server/ai_process.go | 5 +++++ server/broadcast.go | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/server/ai_process.go b/server/ai_process.go index 25f90af2f9..dbc9dd5b8a 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1009,6 +1009,11 @@ func submitAudioToText(ctx context.Context, params aiRequestParams, sess *AISess const initPixelsToPay = 45 * 30 * 1280 * 720 // 45 seconds, 30fps, 720p func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLiveVideoToVideoJSONRequestBody) (any, error) { + // Live Video should not reuse the existing session balance, because it could lead to not sending the init + // payment, which in turns may cause "Insufficient Balance" on the Orchestrator's side. + // It works differently than other AI Jobs, because Live Video is accounted by mid on the Orchestrator's side. + clearSessionBalance(sess.BroadcastSession, core.RandomManifestID()) + client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient)) if err != nil { if monitor.Enabled { diff --git a/server/broadcast.go b/server/broadcast.go index ec18c53ee4..dbc1675d27 100755 --- a/server/broadcast.go +++ b/server/broadcast.go @@ -1571,6 +1571,12 @@ func updateSession(sess *BroadcastSession, res *ReceivedTranscodeResult) { } } +func clearSessionBalance(sess *BroadcastSession, id core.ManifestID) { + sess.lock.Lock() + defer sess.lock.Unlock() + sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.TicketParams.Recipient), id, sess.Balances) +} + func refreshSessionIfNeeded(ctx context.Context, sess *BroadcastSession) error { shouldRefresh, err := shouldRefreshSession(ctx, sess) if err != nil { From ba63fe46ed59874b9003f2dd1cb86979043171a9 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Mon, 9 Dec 2024 12:21:14 -0800 Subject: [PATCH 26/56] ai/live: Keepalive for control channel. (#3299) The control channel only sees messages sporadically, so send down a keepalive periodically to prevent clients from timing out. This also prevents the server from sweeping idle channels. --- core/livepeernode.go | 3 ++- server/ai_live_video.go | 33 ++++++++++++++++++++++++++++++++- server/ai_mediaserver.go | 1 + 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/core/livepeernode.go b/core/livepeernode.go index 6bfddc71c9..2e9f2d7f05 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -160,7 +160,8 @@ type LivepeerNode struct { } type LivePipeline struct { - ControlPub *trickle.TricklePublisher + ControlPub *trickle.TricklePublisher + StopControl func() } // NewLivepeerNode creates a new Livepeer Node. Eth can be nil. diff --git a/server/ai_live_video.go b/server/ai_live_video.go index cc0dcda07f..9f99335a81 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -8,6 +8,7 @@ import ( "log/slog" "net/url" "os" + "strings" "time" "github.com/livepeer/go-livepeer/clog" @@ -138,5 +139,35 @@ func startControlPublish(control *url.URL, params aiRequestParams) { } params.node.LiveMu.Lock() defer params.node.LiveMu.Unlock() - params.node.LivePipelines[stream] = &core.LivePipeline{ControlPub: controlPub} + + ticker := time.NewTicker(10 * time.Second) + done := make(chan bool, 1) + stop := func() { + ticker.Stop() + done <- true + } + + params.node.LivePipelines[stream] = &core.LivePipeline{ + ControlPub: controlPub, + StopControl: stop, + } + + // send a keepalive periodically to keep both ends of the connection alive + go func() { + for { + select { + case <-ticker.C: + const msg = `{"keep":"alive"}` + err := controlPub.Write(strings.NewReader(msg)) + if err == trickle.StreamNotFoundErr { + // the channel doesn't exist anymore, so stop + stop() + continue // loop back to consume the `done` chan + } + // if there was another type of error, we'll just retry anyway + case <-done: + return + } + } + }() } diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 5b65d483dc..fd3aef1c23 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -559,5 +559,6 @@ func (ls *LivepeerServer) cleanupLive(stream string) { if err := pub.ControlPub.Close(); err != nil { slog.Info("Error closing trickle publisher", "err", err) } + pub.StopControl() } } From 51fd7134bd925cbd1bc8f36eab2c85e970d6d9c0 Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Mon, 9 Dec 2024 18:15:57 -0300 Subject: [PATCH 27/56] server/ai: Fix memory leak on trickle subscribe (#3303) --- server/ai_live_video.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 9f99335a81..cc7421229b 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -88,8 +88,9 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa clog.Infof(ctx, "Error reading trickle subscription: %s", err) return } - defer segment.Body.Close() - if _, err = io.Copy(w, segment.Body); err != nil { + _, err = io.Copy(w, segment.Body) + segment.Body.Close() + if err != nil { clog.Infof(ctx, "Error copying to ffmpeg stdin: %s", err) return } From bc854041df789e0976b760c9a44bfedee436fa1d Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Mon, 9 Dec 2024 13:54:17 -0800 Subject: [PATCH 28/56] trickle: Sweep idle channels on the server. (#3296) --- server/ai_http.go | 20 ++++---- server/rpc.go | 5 +- trickle/trickle_server.go | 96 +++++++++++++++++++++++++++++++++------ 3 files changed, 95 insertions(+), 26 deletions(-) diff --git a/server/ai_http.go b/server/ai_http.go index 7ee482c67a..e46821b170 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -36,7 +36,7 @@ var MaxAIRequestSize = 3000000000 // 3GB var TrickleHTTPPath = "/ai/trickle/" -func startAIServer(lp lphttp) error { +func startAIServer(lp *lphttp) error { swagger, err := worker.GetSwagger() if err != nil { return err @@ -61,15 +61,15 @@ func startAIServer(lp lphttp) error { BasePath: TrickleHTTPPath, }) - lp.transRPC.Handle("/text-to-image", oapiReqValidator(aiHttpHandle(&lp, jsonDecoder[worker.GenTextToImageJSONRequestBody]))) - lp.transRPC.Handle("/image-to-image", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenImageToImageMultipartRequestBody]))) - lp.transRPC.Handle("/image-to-video", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenImageToVideoMultipartRequestBody]))) - lp.transRPC.Handle("/upscale", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenUpscaleMultipartRequestBody]))) - lp.transRPC.Handle("/audio-to-text", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenAudioToTextMultipartRequestBody]))) - lp.transRPC.Handle("/llm", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenLLMFormdataRequestBody]))) - lp.transRPC.Handle("/segment-anything-2", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody]))) - lp.transRPC.Handle("/image-to-text", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenImageToTextMultipartRequestBody]))) - lp.transRPC.Handle("/text-to-speech", oapiReqValidator(aiHttpHandle(&lp, jsonDecoder[worker.GenTextToSpeechJSONRequestBody]))) + lp.transRPC.Handle("/text-to-image", oapiReqValidator(aiHttpHandle(lp, jsonDecoder[worker.GenTextToImageJSONRequestBody]))) + lp.transRPC.Handle("/image-to-image", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToImageMultipartRequestBody]))) + lp.transRPC.Handle("/image-to-video", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToVideoMultipartRequestBody]))) + lp.transRPC.Handle("/upscale", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenUpscaleMultipartRequestBody]))) + lp.transRPC.Handle("/audio-to-text", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenAudioToTextMultipartRequestBody]))) + lp.transRPC.Handle("/llm", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenLLMFormdataRequestBody]))) + lp.transRPC.Handle("/segment-anything-2", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody]))) + lp.transRPC.Handle("/image-to-text", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToTextMultipartRequestBody]))) + lp.transRPC.Handle("/text-to-speech", oapiReqValidator(aiHttpHandle(lp, jsonDecoder[worker.GenTextToSpeechJSONRequestBody]))) lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo())) // Additionally, there is the '/aiResults' endpoint registered in server/rpc.go diff --git a/server/rpc.go b/server/rpc.go index 4ae04a48f4..cb4b34d466 100644 --- a/server/rpc.go +++ b/server/rpc.go @@ -222,7 +222,7 @@ func StartTranscodeServer(orch Orchestrator, bind string, mux *http.ServeMux, wo lp.transRPC.HandleFunc("/transcodeResults", lp.TranscodeResults) } - startAIServer(lp) + startAIServer(&lp) if acceptRemoteAIWorkers { net.RegisterAIWorkerServer(s, &lp) lp.transRPC.Handle("/aiResults", lp.AIResults()) @@ -233,6 +233,9 @@ func StartTranscodeServer(orch Orchestrator, bind string, mux *http.ServeMux, wo return err } + stopTrickle := lp.trickleSrv.Start() + defer stopTrickle() + glog.Info("Listening for RPC on ", bind) srv := http.Server{ Addr: bind, diff --git a/trickle/trickle_server.go b/trickle/trickle_server.go index 3c80238787..46808aaddb 100644 --- a/trickle/trickle_server.go +++ b/trickle/trickle_server.go @@ -7,14 +7,15 @@ import ( "fmt" "io" "log/slog" + "maps" "net/http" + "slices" "strconv" + "strings" "sync" "time" ) -// TODO sweep idle streams connections - const CHANGEFEED = "_changes" type TrickleServerConfig struct { @@ -29,15 +30,21 @@ type TrickleServerConfig struct { // Whether to auto-create channels on first publish (default false) Autocreate bool + + // Amount of time a channel has no new segments before being swept (default 5 minutes) + IdleTimeout time.Duration + + // How often to sweep for idle channels (default 1 minute) + SweepInterval time.Duration } type Server struct { mutex sync.RWMutex streams map[string]*Stream + config TrickleServerConfig + // for internal channels - changefeed bool - autocreate bool internalPub *TrickleLocalPublisher } @@ -47,6 +54,7 @@ type Stream struct { latestWrite int name string mimeType string + writeTime time.Time } type Segment struct { @@ -78,33 +86,60 @@ func applyDefaults(config *TrickleServerConfig) { if config.Mux == nil { config.Mux = http.DefaultServeMux } + if config.IdleTimeout == 0 { + config.IdleTimeout = 5 * time.Minute + } + if config.SweepInterval == 0 { + config.SweepInterval = time.Minute + } } func ConfigureServer(config TrickleServerConfig) *Server { streamManager := &Server{ - streams: make(map[string]*Stream), - changefeed: config.Changefeed, - autocreate: config.Autocreate, + streams: make(map[string]*Stream), + config: config, } // set up changefeed - if streamManager.changefeed { + if config.Changefeed { streamManager.internalPub = NewLocalPublisher(streamManager, CHANGEFEED, "application/json") streamManager.internalPub.CreateChannel() } - applyDefaults(&config) + applyDefaults(&streamManager.config) var ( - mux = config.Mux - basePath = config.BasePath + mux = streamManager.config.Mux + basePath = streamManager.config.BasePath ) + mux.HandleFunc("POST "+basePath+"{streamName}", streamManager.handleCreate) mux.HandleFunc("GET "+basePath+"{streamName}/{idx}", streamManager.handleGet) mux.HandleFunc("POST "+basePath+"{streamName}/{idx}", streamManager.handlePost) mux.HandleFunc("DELETE "+basePath+"{streamName}", streamManager.handleDelete) return streamManager } +func (sm *Server) Start() func() { + ticker := time.NewTicker(sm.config.SweepInterval) + done := make(chan bool) + stop := func() { + ticker.Stop() + done <- true + } + go func() { + for { + select { + case <-ticker.C: + sm.sweepIdleChannels() + case <-done: + sm.clearAllStreams() + return + } + } + }() + return stop +} + func (sm *Server) getStream(streamName string) (*Stream, bool) { sm.mutex.RLock() defer sm.mutex.RUnlock() @@ -116,7 +151,7 @@ func (sm *Server) getOrCreateStream(streamName, mimeType string, isLocal bool) * sm.mutex.Lock() stream, exists := sm.streams[streamName] - if !exists && (isLocal || sm.autocreate) { + if !exists && (isLocal || sm.config.Autocreate) { stream = &Stream{ segments: make([]*Segment, maxSegmentsPerStream), name: streamName, @@ -133,7 +168,7 @@ func (sm *Server) getOrCreateStream(streamName, mimeType string, isLocal bool) * } // update changefeed - if !exists && sm.changefeed { + if !exists && sm.config.Changefeed { jb, _ := json.Marshal(&Changefeed{ Added: []string{streamName}, }) @@ -154,6 +189,29 @@ func (sm *Server) clearAllStreams() { sm.streams = make(map[string]*Stream) } +func (sm *Server) sweepIdleChannels() { + sm.mutex.Lock() + streams := slices.Collect(maps.Values(sm.streams)) + sm.mutex.Unlock() + now := time.Now() + for _, s := range streams { + // skip internal channels for now, eg changefeed + if strings.HasPrefix(s.name, "_") { + continue + } + s.mutex.Lock() + writeTime := s.writeTime + s.mutex.Unlock() + if now.Sub(writeTime) > sm.config.IdleTimeout { + if err := sm.closeStream(s.name); err != nil { + slog.Warn("Could not close idle channel", "channel", s.name, "err", err) + } else { + slog.Info("Closed idle channel", "channel", s.name) + } + } + } +} + func (s *Stream) clear() { s.mutex.Lock() defer s.mutex.Unlock() @@ -178,7 +236,7 @@ func (sm *Server) closeStream(streamName string) error { slog.Info("Deleted stream", "streamName", streamName) // update changefeed if needed - if !sm.changefeed { + if !sm.config.Changefeed { return nil } jb, err := json.Marshal(&Changefeed{ @@ -199,6 +257,14 @@ func (sm *Server) handleDelete(w http.ResponseWriter, r *http.Request) { } } +func (sm *Server) handleCreate(w http.ResponseWriter, r *http.Request) { + stream := sm.getOrCreateStream(r.PathValue("streamName"), r.Header.Get("Expect-Content"), false) + if stream == nil { + http.Error(w, "Stream not found", http.StatusNotFound) + return + } +} + func (sm *Server) handlePost(w http.ResponseWriter, r *http.Request) { stream := sm.getOrCreateStream(r.PathValue("streamName"), r.Header.Get("Content-Type"), false) if stream == nil { @@ -266,7 +332,6 @@ func (s *Stream) handlePost(w http.ResponseWriter, r *http.Request, idx int) { if exists { slog.Warn("Overwriting existing entry", "idx", idx) // Overwrite anything that exists now. TODO figure out a safer behavior? - http.Error(w, "Entry already exists for this index", http.StatusBadRequest) return } @@ -319,6 +384,7 @@ func (s *Stream) getForWrite(idx int) (*Segment, bool) { } else { s.latestWrite = idx } + s.writeTime = time.Now() slog.Info("POST segment", "stream", s.name, "idx", idx, "latest", s.latestWrite) segmentPos := idx % maxSegmentsPerStream if segment := s.segments[segmentPos]; segment != nil { From 3988ebcf14de9e20e77b1e71664174c0151cfeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Tue, 10 Dec 2024 09:13:25 +0100 Subject: [PATCH 29/56] Fix panic in clearSessionBalance for offchain (#3306) --- server/broadcast.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/broadcast.go b/server/broadcast.go index dbc1675d27..9a2f515550 100755 --- a/server/broadcast.go +++ b/server/broadcast.go @@ -1574,7 +1574,9 @@ func updateSession(sess *BroadcastSession, res *ReceivedTranscodeResult) { func clearSessionBalance(sess *BroadcastSession, id core.ManifestID) { sess.lock.Lock() defer sess.lock.Unlock() - sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.TicketParams.Recipient), id, sess.Balances) + if sess.Balances != nil && sess.OrchestratorInfo != nil && sess.OrchestratorInfo.TicketParams != nil { + sess.Balance = core.NewBalance(ethcommon.BytesToAddress(sess.OrchestratorInfo.TicketParams.Recipient), id, sess.Balances) + } } func refreshSessionIfNeeded(ctx context.Context, sess *BroadcastSession) error { From 4a4dc37c568a384b160ebbb0c647aa96034cd9de Mon Sep 17 00:00:00 2001 From: Max Holland Date: Tue, 10 Dec 2024 11:46:24 +0000 Subject: [PATCH 30/56] Check input stream exists in output stream retry logic (#3294) * Refactor mediamtx client * Check input stream exists in output stream retry logic * log line for reading data in trickle subscribe * Check if stream exists in local state rather than calling mediamtx * Add outputRTMPURL to logs * Reduce retries since this keeps the pipeline running for too long --- media/mediamtx.go | 36 +++++++++++++++++++++++++++-------- media/rtmp2segment.go | 7 +++---- media/rtmp2segment_windows.go | 3 +-- server/ai_live_video.go | 36 +++++++++++++++++------------------ server/ai_mediaserver.go | 11 ++++++----- server/ai_process.go | 1 + server/mediaserver.go | 5 ++--- 7 files changed, 58 insertions(+), 41 deletions(-) diff --git a/media/mediamtx.go b/media/mediamtx.go index e51eca1318..29756e11ce 100644 --- a/media/mediamtx.go +++ b/media/mediamtx.go @@ -1,17 +1,26 @@ package media import ( + "errors" "fmt" "io" "net/http" ) type MediaMTXClient struct { + host string apiPassword string + sourceID string + sourceType string } -func NewMediaMTXClient(apiPassword string) *MediaMTXClient { - return &MediaMTXClient{apiPassword: apiPassword} +func NewMediaMTXClient(host, apiPassword, sourceID, sourceType string) *MediaMTXClient { + return &MediaMTXClient{ + host: host, + apiPassword: apiPassword, + sourceID: sourceID, + sourceType: sourceType, + } } const ( @@ -21,6 +30,17 @@ const ( MediaMTXRtmpConn = "rtmpConn" ) +func MediamtxSourceTypeToString(s string) (string, error) { + switch s { + case MediaMTXWebrtcSession: + return "whip", nil + case MediaMTXRtmpConn: + return "rtmp", nil + default: + return "", errors.New("unknown media source") + } +} + func getApiPath(sourceType string) (string, error) { var apiPath string switch sourceType { @@ -34,13 +54,13 @@ func getApiPath(sourceType string) (string, error) { return apiPath, nil } -func (mc *MediaMTXClient) KickInputConnection(mediaMTXHost, sourceID, sourceType string) error { - apiPath, err := getApiPath(sourceType) +func (mc *MediaMTXClient) KickInputConnection() error { + apiPath, err := getApiPath(mc.sourceType) if err != nil { return err } - req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://%s:%s/v3/%s/kick/%s", mediaMTXHost, mediaMTXControlPort, apiPath, sourceID), nil) + req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://%s:%s/v3/%s/kick/%s", mc.host, mediaMTXControlPort, apiPath, mc.sourceID), nil) if err != nil { return fmt.Errorf("failed to create kick request: %w", err) } @@ -56,12 +76,12 @@ func (mc *MediaMTXClient) KickInputConnection(mediaMTXHost, sourceID, sourceType return nil } -func (mc *MediaMTXClient) StreamExists(mediaMTXHost, sourceID, sourceType string) (bool, error) { - apiPath, err := getApiPath(sourceType) +func (mc *MediaMTXClient) StreamExists() (bool, error) { + apiPath, err := getApiPath(mc.sourceType) if err != nil { return false, err } - req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s:%s/v3/%s/get/%s", mediaMTXHost, mediaMTXControlPort, apiPath, sourceID), nil) + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s:%s/v3/%s/get/%s", mc.host, mediaMTXControlPort, apiPath, mc.sourceID), nil) if err != nil { return false, fmt.Errorf("failed to create get stream request: %w", err) } diff --git a/media/rtmp2segment.go b/media/rtmp2segment.go index 9f9e3c5719..57f2095321 100644 --- a/media/rtmp2segment.go +++ b/media/rtmp2segment.go @@ -27,10 +27,9 @@ var waitTimeout = 20 * time.Second type MediaSegmenter struct { Workdir string MediaMTXClient *MediaMTXClient - MediaMTXHost string } -func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler, id, sourceType string) { +func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler) { outFilePattern := filepath.Join(ms.Workdir, randomString()+"-%d.ts") completionSignal := make(chan bool, 1) wg := &sync.WaitGroup{} @@ -42,11 +41,11 @@ func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmen retryCount := 0 for { - streamExists, err := ms.MediaMTXClient.StreamExists(ms.MediaMTXHost, id, sourceType) + streamExists, err := ms.MediaMTXClient.StreamExists() if err != nil { clog.Errorf(ctx, "StreamExists check failed. err=%s", err) } - if retryCount > 20 && !streamExists { + if retryCount > 2 && !streamExists { clog.Errorf(ctx, "Stopping segmentation, input stream does not exist. in=%s err=%s", in, err) break } diff --git a/media/rtmp2segment_windows.go b/media/rtmp2segment_windows.go index 321ab9257f..444943a990 100644 --- a/media/rtmp2segment_windows.go +++ b/media/rtmp2segment_windows.go @@ -7,9 +7,8 @@ import "context" type MediaSegmenter struct { Workdir string MediaMTXClient *MediaMTXClient - MediaMTXHost string } -func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler, id, sourceType string) { +func (ms *MediaSegmenter) RunSegmentation(ctx context.Context, in string, segmentHandler SegmentHandler) { // Not supported for Windows } diff --git a/server/ai_live_video.go b/server/ai_live_video.go index cc7421229b..85c2c6f27a 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -2,10 +2,10 @@ package server import ( "context" - "errors" "fmt" "io" "log/slog" + "net/http" "net/url" "os" "strings" @@ -77,6 +77,7 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa slog.Info("error getting pipe for trickle-ffmpeg", "url", url, "err", err) } ctx = clog.AddVal(ctx, "url", url.Redacted()) + ctx = clog.AddVal(ctx, "outputRTMPURL", params.liveParams.outputRTMPURL) // read segments from trickle subscription go func() { @@ -88,9 +89,9 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa clog.Infof(ctx, "Error reading trickle subscription: %s", err) return } - _, err = io.Copy(w, segment.Body) - segment.Body.Close() - if err != nil { + clog.V(8).Infof(ctx, "trickle subscribe read data") + + if err = copySegment(segment, w); err != nil { clog.Infof(ctx, "Error copying to ffmpeg stdin: %s", err) return } @@ -99,11 +100,14 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa go func() { defer r.Close() - retryCount := 0 - // TODO check whether stream is actually terminated - // so we aren't just looping unnecessarily - for retryCount < 10 { - _, err := ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ + for { + _, ok := params.node.LivePipelines[params.liveParams.stream] + if !ok { + clog.Errorf(ctx, "Stopping output rtmp stream, input stream does not exist. err=%s", err) + break + } + + _, err = ffmpeg.Transcode3(&ffmpeg.TranscodeOptionsIn{ Fname: fmt.Sprintf("pipe:%d", r.Fd()), }, []ffmpeg.TranscodeOptions{{ Oname: params.liveParams.outputRTMPURL, @@ -114,21 +118,15 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa if err != nil { clog.Infof(ctx, "Error sending RTMP out: %s", err) } - retryCount++ time.Sleep(5 * time.Second) } }() } -func mediamtxSourceTypeToString(s string) (string, error) { - switch s { - case media.MediaMTXWebrtcSession: - return "whip", nil - case media.MediaMTXRtmpConn: - return "rtmp", nil - default: - return "", errors.New("unknown media source") - } +func copySegment(segment *http.Response, w io.Writer) error { + defer segment.Body.Close() + _, err := io.Copy(w, segment.Body) + return err } func startControlPublish(control *url.URL, params aiRequestParams) { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index fd3aef1c23..5f5eb0fdf9 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -383,7 +383,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { http.Error(w, "Missing source_type", http.StatusBadRequest) return } - sourceTypeStr, err := mediamtxSourceTypeToString(sourceType) + sourceTypeStr, err := media.MediamtxSourceTypeToString(sourceType) if err != nil { clog.Errorf(ctx, "Invalid source type %s", sourceType) http.Error(w, err.Error(), http.StatusBadRequest) @@ -433,6 +433,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { return } } + mediaMTXClient := media.NewMediaMTXClient(remoteHost, ls.mediaMTXApiPassword, sourceID, sourceType) if LiveAIAuthWebhookURL != nil { authResp, err := authenticateAIStream(LiveAIAuthWebhookURL, ls.liveAIAuthApiKey, AIAuthRequest{ @@ -441,7 +442,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { QueryParams: queryParams, }) if err != nil { - kickErr := ls.mediaMTXClient.KickInputConnection(remoteHost, sourceID, sourceType) + kickErr := mediaMTXClient.KickInputConnection() if kickErr != nil { clog.Errorf(ctx, "failed to kick input connection: %s", kickErr.Error()) } @@ -465,7 +466,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { requestID := string(core.RandomManifestID()) ctx = clog.AddVal(ctx, "request_id", requestID) - clog.Infof(ctx, "Received live video AI request for %s", streamName) + clog.Infof(ctx, "Received live video AI request for %s. pipelineParams=%v", streamName, pipelineParams) // Kick off the RTMP pull and segmentation as soon as possible ssr := media.NewSwitchableSegmentReader() @@ -475,8 +476,8 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { if mediaMTXStreamPrefix != "" { mediaMTXStreamPrefix = mediaMTXStreamPrefix + "/" } - ms := media.MediaSegmenter{Workdir: ls.LivepeerNode.WorkDir, MediaMTXClient: ls.mediaMTXClient, MediaMTXHost: remoteHost} - ms.RunSegmentation(ctx, fmt.Sprintf("rtmp://%s/%s%s", remoteHost, mediaMTXStreamPrefix, streamName), ssr.Read, sourceID, sourceType) + ms := media.MediaSegmenter{Workdir: ls.LivepeerNode.WorkDir, MediaMTXClient: mediaMTXClient} + ms.RunSegmentation(ctx, fmt.Sprintf("rtmp://%s/%s%s", remoteHost, mediaMTXStreamPrefix, streamName), ssr.Read) ssr.Close() ls.cleanupLive(streamName) }() diff --git a/server/ai_process.go b/server/ai_process.go index dbc9dd5b8a..f69c2a5784 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1048,6 +1048,7 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } + // TODO any errors from these funcs should we kill the input stream? startTricklePublish(ctx, pub, params, sess) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) diff --git a/server/mediaserver.go b/server/mediaserver.go index 68d0fb3acc..aaec9210f6 100644 --- a/server/mediaserver.go +++ b/server/mediaserver.go @@ -28,7 +28,6 @@ import ( "time" "github.com/livepeer/go-livepeer/clog" - "github.com/livepeer/go-livepeer/media" "github.com/livepeer/go-livepeer/monitor" "github.com/livepeer/go-livepeer/pm" "github.com/livepeer/go-tools/drivers" @@ -127,7 +126,7 @@ type LivepeerServer struct { connectionLock *sync.RWMutex serverLock *sync.RWMutex - mediaMTXClient *media.MediaMTXClient + mediaMTXApiPassword string liveAIAuthApiKey string livePaymentInterval time.Duration } @@ -195,7 +194,7 @@ func NewLivepeerServer(rtmpAddr string, lpNode *core.LivepeerNode, httpIngest bo internalManifests: make(map[core.ManifestID]core.ManifestID), recordingsAuthResponses: cache.New(time.Hour, 2*time.Hour), AISessionManager: NewAISessionManager(lpNode, AISessionManagerTTL), - mediaMTXClient: media.NewMediaMTXClient(lpNode.MediaMTXApiPassword), + mediaMTXApiPassword: lpNode.MediaMTXApiPassword, liveAIAuthApiKey: lpNode.LiveAIAuthApiKey, livePaymentInterval: lpNode.LivePaymentInterval, } From 33dc743a32872d926596da3b055d0e38d505901c Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Tue, 10 Dec 2024 09:33:46 -0800 Subject: [PATCH 31/56] ai/live: Set write time when creating stream (#3307) Not setting the write time caused some premature sweeping --- trickle/trickle_server.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/trickle/trickle_server.go b/trickle/trickle_server.go index 46808aaddb..ddeb6679ff 100644 --- a/trickle/trickle_server.go +++ b/trickle/trickle_server.go @@ -153,9 +153,10 @@ func (sm *Server) getOrCreateStream(streamName, mimeType string, isLocal bool) * stream, exists := sm.streams[streamName] if !exists && (isLocal || sm.config.Autocreate) { stream = &Stream{ - segments: make([]*Segment, maxSegmentsPerStream), - name: streamName, - mimeType: mimeType, + segments: make([]*Segment, maxSegmentsPerStream), + name: streamName, + mimeType: mimeType, + writeTime: time.Now(), } sm.streams[streamName] = stream slog.Info("Creating stream", "stream", streamName) From 8c7b755f444cfa9a2a454dc60656ea1adc3e9580 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Tue, 10 Dec 2024 13:37:35 -0800 Subject: [PATCH 32/56] ai/live: Add buffered segment read/writes (#3305) Replaces most of the old pipe logic in the RTMP segmenter which didn't really do anything except some logging. This change allows for segment writes to an io.Writer (from RTMP) to proceed without being blocked by reads (from trickle publish). This avoids some potential back-pressure with MediaMTX in case publishes are slow in going out for whatever reason. This will also allow us to 1. have concurrent segments in-flight, and 2. track how many segments are in-flight as a proxy for "is this orchestrator slow" so we can switch orchestrators if necessary, since we should not have more than 1 segment in-flight (sometimes 2 for a very brief overlap). This logic will be added in a follow-up PR. The reader interface here also has a Clone method (also non-blocking) which has the potential to simplify some of the reader handling around payments, but payments is not changed right now. Most of the code here was borrowed from the trickle server but generalized into a proper reader/writer interface. Eventually we'll back-port this code into the trickle server but it's not needed there right now. --- media/rtmp2segment.go | 60 ++-------------------- media/rw.go | 110 ++++++++++++++++++++++++++++++++++++++++ media/segment_reader.go | 15 +++--- server/ai_live_video.go | 4 +- 4 files changed, 124 insertions(+), 65 deletions(-) create mode 100644 media/rw.go diff --git a/media/rtmp2segment.go b/media/rtmp2segment.go index 57f2095321..db1d5a8eb0 100644 --- a/media/rtmp2segment.go +++ b/media/rtmp2segment.go @@ -239,50 +239,11 @@ func processSegments(ctx context.Context, segmentHandler SegmentHandler, outFile func readSegment(ctx context.Context, segmentHandler SegmentHandler, file *os.File, pipeName string) { defer file.Close() - reader := bufio.NewReader(file) - firstByteRead := false - totalBytesRead := int64(0) - - buf := make([]byte, 32*1024) - - // TODO should be explicitly buffered for better management - interfaceReader, interfaceWriter := io.Pipe() - defer interfaceWriter.Close() - segmentHandler(interfaceReader) - - for { - n, err := reader.Read(buf) - if n > 0 { - if !firstByteRead { - clog.V(7).Infof(ctx, "First byte read. pipeName=%s", pipeName) - firstByteRead = true - - } - totalBytesRead += int64(n) - if _, err := interfaceWriter.Write(buf[:n]); err != nil { - if err != io.EOF { - clog.Errorf(ctx, "Error writing. pipeName=%s err=%s", pipeName, err) - } - } - } - if n == len(buf) && n < 1024*1024 { - newLen := int(float64(len(buf)) * 1.5) - clog.V(7).Infof(ctx, "Max buf hit, increasing. oldSize=%s newSize=%s", humanBytes(int64(len(buf))), humanBytes(int64(newLen))) - buf = make([]byte, newLen) - } - - if err != nil { - if err.Error() == "EOF" { - clog.V(7).Infof(ctx, "Last byte read. pipeName=%s totalRead=%s", pipeName, humanBytes(totalBytesRead)) - } else { - clog.Errorf(ctx, "Error reading. pipeName=%s err=%s", pipeName, err) - } - break - } - } - clog.V(8).Infof(ctx, "read segment. totalRead=%s", humanBytes(totalBytesRead)) - + writer := NewMediaWriter() + segmentHandler(writer.MakeReader()) + io.Copy(writer, reader) + writer.Close() } func randomString() string { @@ -293,16 +254,3 @@ func randomString() string { } return strings.TrimRight(base32.StdEncoding.EncodeToString(b), "=") } - -func humanBytes(bytes int64) string { - var unit int64 = 1024 - if bytes < unit { - return fmt.Sprintf("%d B", bytes) - } - div, exp := unit, 0 - for n := bytes / unit; n >= unit; n /= unit { - div *= unit - exp++ - } - return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp]) -} diff --git a/media/rw.go b/media/rw.go new file mode 100644 index 0000000000..f66509ddfb --- /dev/null +++ b/media/rw.go @@ -0,0 +1,110 @@ +package media + +import ( + "bytes" + "io" + "log/slog" + "sync" +) + +type CloneableReader interface { + io.Reader + Clone() CloneableReader +} + +type MediaWriter struct { + mu *sync.Mutex + cond *sync.Cond + buffer *bytes.Buffer + closed bool +} + +type MediaReader struct { + source *MediaWriter + readPos int +} + +func NewMediaWriter() *MediaWriter { + mu := &sync.Mutex{} + return &MediaWriter{ + buffer: new(bytes.Buffer), + cond: sync.NewCond(mu), + mu: mu, + } +} + +func (mw *MediaWriter) Write(data []byte) (int, error) { + mw.mu.Lock() + defer mw.mu.Unlock() + + // Write to buffer + n, err := mw.buffer.Write(data) + + // Signal waiting readers + mw.cond.Broadcast() + + return n, err +} + +func (mw *MediaWriter) readData(startPos int) ([]byte, bool) { + mw.mu.Lock() + defer mw.mu.Unlock() + for { + totalLen := mw.buffer.Len() + if startPos < totalLen { + data := mw.buffer.Bytes()[startPos:totalLen] + return data, mw.closed + } + if startPos > totalLen { + slog.Info("Invalid start pos, invoking eof") + return nil, true + } + if mw.closed { + return nil, true + } + // Wait for new data + mw.cond.Wait() + } +} + +func (mw *MediaWriter) Close() { + if mw == nil { + return // sometimes happens, weird + } + mw.mu.Lock() + defer mw.mu.Unlock() + if !mw.closed { + mw.closed = true + mw.cond.Broadcast() + } +} + +func (mw *MediaWriter) MakeReader() CloneableReader { + return &MediaReader{ + source: mw, + } +} + +func (mr *MediaReader) Read(p []byte) (int, error) { + data, eof := mr.source.readData(mr.readPos) + toRead := len(p) + if len(data) < toRead { + toRead = len(data) + } + + copy(p, data[:toRead]) + mr.readPos += toRead + + var err error = nil + if eof { + err = io.EOF + } + + return toRead, err +} + +func (mr *MediaReader) Clone() CloneableReader { + return &MediaReader{ + source: mr.source, + } +} diff --git a/media/segment_reader.go b/media/segment_reader.go index f059e7491b..5804cc583c 100644 --- a/media/segment_reader.go +++ b/media/segment_reader.go @@ -5,19 +5,20 @@ import ( "sync" ) -type SegmentHandler func(reader io.Reader) +type SegmentHandler func(reader CloneableReader) -func NoopReader(reader io.Reader) { - go func() { - io.Copy(io.Discard, reader) - }() +func NoopReader(reader CloneableReader) { + // don't have to do anything here } type EOSReader struct{} -func (r EOSReader) Read(p []byte) (n int, err error) { +func (r *EOSReader) Read(p []byte) (n int, err error) { return 0, io.EOF } +func (r *EOSReader) Clone() CloneableReader { + return r +} type SwitchableSegmentReader struct { mu sync.RWMutex @@ -36,7 +37,7 @@ func (sr *SwitchableSegmentReader) SwitchReader(newReader SegmentHandler) { sr.reader = newReader } -func (sr *SwitchableSegmentReader) Read(reader io.Reader) { +func (sr *SwitchableSegmentReader) Read(reader CloneableReader) { sr.mu.RLock() defer sr.mu.RUnlock() sr.reader(reader) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 85c2c6f27a..7a367050ab 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -44,7 +44,7 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara clog.Warningf(ctx, "No price info found from Orchestrator, Gateway will not send payments for the video processing") } - params.liveParams.segmentReader.SwitchReader(func(reader io.Reader) { + params.liveParams.segmentReader.SwitchReader(func(reader media.CloneableReader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { @@ -54,7 +54,7 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara return } go func() { - r := reader + var r io.Reader = reader if paymentProcessor != nil { r = paymentProcessor.process(reader) } From abd997dbd1fba0668c5e2170c4313f78f8064d19 Mon Sep 17 00:00:00 2001 From: gioelecerati <50955448+gioelecerati@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:21:44 +0100 Subject: [PATCH 33/56] monitoring: add events trickle api (#3300) * monitoring: add events trickle api * add logs for debugging * add close channel on ai processing error * fix events subscribe & make it async * remove ctx update from events subscribe * server: Ignore context for cancellation on events trickle sub * parse pipeline status incoming from worker and send it to kafka * in-memory latest pipeline status * fix serializing * debug logs * removed debug logs * update pipeline status schema * address comments --------- Co-authored-by: Victor Elias --- core/livepeernode.go | 1 + go.mod | 2 +- go.sum | 4 +-- monitor/kafka.go | 17 ++++++++++++ monitor/pipeline_status.go | 30 ++++++++++++++++++++ server/ai_http.go | 15 ++++++++-- server/ai_live_video.go | 56 ++++++++++++++++++++++++++++++++++++++ server/ai_process.go | 7 +++++ 8 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 monitor/pipeline_status.go diff --git a/core/livepeernode.go b/core/livepeernode.go index 2e9f2d7f05..93f79b3ab2 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -162,6 +162,7 @@ type LivepeerNode struct { type LivePipeline struct { ControlPub *trickle.TricklePublisher StopControl func() + EventsPub *trickle.TricklePublisher } // NewLivepeerNode creates a new Livepeer Node. Eth can be nil. diff --git a/go.mod b/go.mod index 16ae8e03a4..2c35deb7ff 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373 + github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393 diff --git a/go.sum b/go.sum index 22e4917c85..c6ffdd039a 100644 --- a/go.sum +++ b/go.sum @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373 h1:+IepZubsJ1NeYcgoa+7tk8ycOh5DaRZ14I+SxtAbsZ0= -github.com/livepeer/ai-worker v0.12.7-0.20241204213602-1021eaf4c373/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= +github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510 h1:rPMpkf43tOa8eixmQkBvYbgGleRWPEpKu3P7FKgtPnc= +github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= diff --git a/monitor/kafka.go b/monitor/kafka.go index a6aa21ab70..bf31d293b4 100644 --- a/monitor/kafka.go +++ b/monitor/kafka.go @@ -35,6 +35,23 @@ type GatewayEvent struct { Data interface{} `json:"data"` } +type PipelineStatus struct { + Pipeline string `json:"pipeline"` + StartTime float64 `json:"start_time"` + LastParamsUpdateTime float64 `json:"last_params_update_time"` + LastParams interface{} `json:"last_params"` + LastParamsHash string `json:"last_params_hash"` + InputFPS float64 `json:"input_fps"` + OutputFPS float64 `json:"output_fps"` + LastInputTime float64 `json:"last_input_time"` + LastOutputTime float64 `json:"last_output_time"` + RestartCount int `json:"restart_count"` + LastRestartTime float64 `json:"last_restart_time"` + LastRestartLogs []string `json:"last_restart_logs"` + LastError *string `json:"last_error"` + StreamID *string `json:"stream_id"` +} + var kafkaProducer *KafkaProducer func InitKafkaProducer(bootstrapServers, user, password, topic, gatewayAddress string) error { diff --git a/monitor/pipeline_status.go b/monitor/pipeline_status.go new file mode 100644 index 0000000000..648d56c50a --- /dev/null +++ b/monitor/pipeline_status.go @@ -0,0 +1,30 @@ +package monitor + +import ( + "sync" +) + +var ( + // pipelineStatusMap stores the latest pipeline status for each stream + pipelineStatusMap = make(map[string]PipelineStatus) + pipelineStatusMu sync.RWMutex +) + +func UpdatePipelineStatus(stream string, status PipelineStatus) { + pipelineStatusMu.Lock() + defer pipelineStatusMu.Unlock() + pipelineStatusMap[stream] = status +} + +func GetPipelineStatus(stream string) (PipelineStatus, bool) { + pipelineStatusMu.RLock() + defer pipelineStatusMu.RUnlock() + status, exists := pipelineStatusMap[stream] + return status, exists +} + +func DeletePipelineStatus(stream string) { + pipelineStatusMu.Lock() + defer pipelineStatusMu.Unlock() + delete(pipelineStatusMap, stream) +} diff --git a/server/ai_http.go b/server/ai_http.go index e46821b170..c2937b2ce1 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -130,6 +130,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { pubUrl = orch.ServiceURI().JoinPath(TrickleHTTPPath, mid).String() subUrl = pubUrl + "-out" controlUrl = pubUrl + "-control" + eventsUrl = pubUrl + "-events" ) // Handle initial payment, the rest of the payments are done separately from the stream processing @@ -163,6 +164,8 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { subCh.CreateChannel() controlPubCh := trickle.NewLocalPublisher(h.trickleSrv, mid+"-control", "application/json") controlPubCh.CreateChannel() + eventsCh := trickle.NewLocalPublisher(h.trickleSrv, mid+"-events", "application/json") + eventsCh.CreateChannel() // Start payment receiver which accounts the payments and stops the stream if the payment is insufficient priceInfo := payment.GetExpectedPrice() @@ -181,6 +184,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { slog.Warn("Error accounting payment, stopping stream processing", "err", err) pubCh.Close() subCh.Close() + eventsCh.Close() controlPubCh.Close() cancel() } @@ -210,10 +214,15 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { // Prepare request to worker controlUrlOverwrite := overwriteHost(h.node.LiveAITrickleHostForRunner, controlUrl) + eventsUrlOverwrite := overwriteHost(h.node.LiveAITrickleHostForRunner, eventsUrl) + subscribeUrlOverwrite := overwriteHost(h.node.LiveAITrickleHostForRunner, pubUrl) + publishUrlOverwrite := overwriteHost(h.node.LiveAITrickleHostForRunner, subUrl) + workerReq := worker.LiveVideoToVideoParams{ ModelId: req.ModelId, - PublishUrl: overwriteHost(h.node.LiveAITrickleHostForRunner, subUrl), - SubscribeUrl: overwriteHost(h.node.LiveAITrickleHostForRunner, pubUrl), + PublishUrl: publishUrlOverwrite, + SubscribeUrl: subscribeUrlOverwrite, + EventsUrl: &eventsUrlOverwrite, ControlUrl: &controlUrlOverwrite, Params: req.Params, } @@ -228,6 +237,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { pubCh.Close() subCh.Close() controlPubCh.Close() + eventsCh.Close() cancel() respondWithError(w, err.Error(), http.StatusInternalServerError) return @@ -238,6 +248,7 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { PublishUrl: pubUrl, SubscribeUrl: subUrl, ControlUrl: &controlUrl, + EventsUrl: &eventsUrl, }) if err != nil { respondWithError(w, err.Error(), http.StatusInternalServerError) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 7a367050ab..8241c851f8 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -2,6 +2,7 @@ package server import ( "context" + "encoding/json" "fmt" "io" "log/slog" @@ -14,6 +15,7 @@ import ( "github.com/livepeer/go-livepeer/clog" "github.com/livepeer/go-livepeer/core" "github.com/livepeer/go-livepeer/media" + "github.com/livepeer/go-livepeer/monitor" "github.com/livepeer/go-livepeer/trickle" "github.com/livepeer/lpms/ffmpeg" ) @@ -170,3 +172,57 @@ func startControlPublish(control *url.URL, params aiRequestParams) { } }() } + +func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestParams) { + subscriber := trickle.NewTrickleSubscriber(url.String()) + + clog.Infof(ctx, "Starting event subscription for URL: %s", url.String()) + + go func() { + for { + clog.Infof(ctx, "Attempting to read from event subscription for URL: %s", url.String()) + segment, err := subscriber.Read() + if err != nil { + clog.Infof(ctx, "Error reading events subscription: %s", err) + // TODO + // monitor.DeletePipelineStatus(params.liveParams.stream) + return + } + + clog.Infof(ctx, "Successfully read segment from event subscription for URL: %s", url.String()) + + body, err := io.ReadAll(segment.Body) + segment.Body.Close() + + if err != nil { + clog.Infof(ctx, "Error reading events subscription body: %s", err) + continue + } + + stream := params.liveParams.stream + + if stream == "" { + clog.Infof(ctx, "Stream ID is missing") + continue + } + + var status monitor.PipelineStatus + if err := json.Unmarshal(body, &status); err != nil { + clog.Infof(ctx, "Failed to parse JSON from events subscription: %s", err) + continue + } + + status.StreamID = &stream + + // TODO: update the in-memory pipeline status + // monitor.UpdatePipelineStatus(stream, status) + + clog.Infof(ctx, "Received event for stream=%s status=%+v", stream, status) + + monitor.SendQueueEventAsync( + "stream_status", + status, + ) + } + }() +} diff --git a/server/ai_process.go b/server/ai_process.go index f69c2a5784..99bdb582de 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1049,9 +1049,16 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A return nil, fmt.Errorf("invalid control URL: %w", err) } // TODO any errors from these funcs should we kill the input stream? + + events, err := common.AppendHostname(*resp.JSON200.EventsUrl, host) + if err != nil { + return nil, fmt.Errorf("invalid events URL: %w", err) + } + clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s events %s", pub, sub, control, events) startTricklePublish(ctx, pub, params, sess) startTrickleSubscribe(ctx, sub, params) startControlPublish(control, params) + startEventsSubscribe(ctx, events, params) } return resp, nil } From a367c90e534232d8e65b234969a8e979491635f8 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Thu, 12 Dec 2024 01:33:23 -0800 Subject: [PATCH 34/56] ai/live: Fix early EOFs (#3314) Sometimes if the reader is slow to pull data (eg, during a retry) and the buffer grows larger than a read then closes, the reader may incorrectly signal EOF and return incomplete data. Don't do that. --- media/rw.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/media/rw.go b/media/rw.go index f66509ddfb..544a4eb407 100644 --- a/media/rw.go +++ b/media/rw.go @@ -88,8 +88,11 @@ func (mw *MediaWriter) MakeReader() CloneableReader { func (mr *MediaReader) Read(p []byte) (int, error) { data, eof := mr.source.readData(mr.readPos) toRead := len(p) - if len(data) < toRead { + if len(data) <= toRead { toRead = len(data) + } else { + // there is more data to read + eof = false } copy(p, data[:toRead]) From 77711d4c72fedf45e7c1b218649be18828074602 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Thu, 12 Dec 2024 11:04:52 +0000 Subject: [PATCH 35/56] Fix race condition with stream existence check (#3309) * Fix race condition with stream existence check * Simplify, just call startControlPublish first --- server/ai_live_video.go | 2 +- server/ai_process.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 8241c851f8..6c1c482278 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -132,8 +132,8 @@ func copySegment(segment *http.Response, w io.Writer) error { } func startControlPublish(control *url.URL, params aiRequestParams) { - controlPub, err := trickle.NewTricklePublisher(control.String()) stream := params.liveParams.stream + controlPub, err := trickle.NewTricklePublisher(control.String()) if err != nil { slog.Info("error starting control publisher", "stream", stream, "err", err) return diff --git a/server/ai_process.go b/server/ai_process.go index 99bdb582de..41948251b2 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1048,16 +1048,15 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A if err != nil { return nil, fmt.Errorf("invalid control URL: %w", err) } - // TODO any errors from these funcs should we kill the input stream? - events, err := common.AppendHostname(*resp.JSON200.EventsUrl, host) if err != nil { return nil, fmt.Errorf("invalid events URL: %w", err) } clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s events %s", pub, sub, control, events) + + startControlPublish(control, params) startTricklePublish(ctx, pub, params, sess) startTrickleSubscribe(ctx, sub, params) - startControlPublish(control, params) startEventsSubscribe(ctx, events, params) } return resp, nil From 183b8bca2362c90701b21918d3212b658a91c6a5 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Thu, 12 Dec 2024 11:06:46 +0000 Subject: [PATCH 36/56] Record current number of live pipelines running (#3312) --- monitor/census.go | 32 +++++++++++++++++++++++--------- server/ai_live_video.go | 3 +++ server/ai_mediaserver.go | 4 ++++ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/monitor/census.go b/monitor/census.go index d4d52b317b..0993a7f74f 100644 --- a/monitor/census.go +++ b/monitor/census.go @@ -195,15 +195,16 @@ type ( mSceneClassification *stats.Int64Measure // Metrics for AI jobs - mAIModelsRequested *stats.Int64Measure - mAIRequestLatencyScore *stats.Float64Measure - mAIRequestPrice *stats.Float64Measure - mAIRequestError *stats.Int64Measure - mAIResultDownloaded *stats.Int64Measure - mAIResultDownloadTime *stats.Float64Measure - mAIResultUploaded *stats.Int64Measure - mAIResultUploadTime *stats.Float64Measure - mAIResultSaveFailed *stats.Int64Measure + mAIModelsRequested *stats.Int64Measure + mAIRequestLatencyScore *stats.Float64Measure + mAIRequestPrice *stats.Float64Measure + mAIRequestError *stats.Int64Measure + mAIResultDownloaded *stats.Int64Measure + mAIResultDownloadTime *stats.Float64Measure + mAIResultUploaded *stats.Int64Measure + mAIResultUploadTime *stats.Float64Measure + mAIResultSaveFailed *stats.Int64Measure + mAICurrentLivePipelines *stats.Int64Measure lock sync.Mutex emergeTimes map[uint64]map[uint64]time.Time // nonce:seqNo @@ -373,6 +374,7 @@ func InitCensus(nodeType NodeType, version string) { census.mAIResultUploaded = stats.Int64("ai_result_uploaded_total", "AIResultUploaded", "tot") census.mAIResultUploadTime = stats.Float64("ai_result_upload_time_seconds", "Upload (to Orchestrator) time", "sec") census.mAIResultSaveFailed = stats.Int64("ai_result_upload_failed_total", "AIResultUploadFailed", "tot") + census.mAICurrentLivePipelines = stats.Int64("ai_current_live_pipelines", "Number of live AI pipelines currently running", "tot") glog.Infof("Compiler: %s Arch %s OS %s Go version %s", runtime.Compiler, runtime.GOARCH, runtime.GOOS, runtime.Version()) glog.Infof("Livepeer version: %s", version) @@ -973,6 +975,13 @@ func InitCensus(nodeType NodeType, version string) { TagKeys: append([]tag.Key{census.kOrchestratorURI, census.kPipeline, census.kModelName}, baseTags...), Aggregation: view.Distribution(0, .10, .20, .50, .100, .150, .200, .500, .1000, .5000, 10.000), }, + { + Name: "ai_current_live_pipelines", + Measure: census.mAICurrentLivePipelines, + Description: "Number of live AI pipelines currently running", + TagKeys: append([]tag.Key{census.kOrchestratorURI, census.kPipeline, census.kModelName}, baseTags...), + Aggregation: view.LastValue(), + }, } // Register the views @@ -1006,6 +1015,7 @@ func InitCensus(nodeType NodeType, version string) { stats.Record(census.ctx, census.mWinningTicketsRecv.M(int64(0))) stats.Record(census.ctx, census.mCurrentSessions.M(int64(0))) stats.Record(census.ctx, census.mValueRedeemed.M(float64(0))) + stats.Record(census.ctx, census.mAICurrentLivePipelines.M(int64(0))) } /* @@ -1901,6 +1911,10 @@ func AIRequestError(code string, pipeline string, model string, orchInfo *lpnet. } } +func AICurrentLiveSessions(currentPipelines int) { + stats.Record(census.ctx, census.mAICurrentLivePipelines.M(int64(currentPipelines))) +} + // AIJobProcessed records orchestrator AI job processing metrics. func AIJobProcessed(ctx context.Context, pipeline string, model string, jobInfo AIJobInfo) { census.recordModelRequested(pipeline, model) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 6c1c482278..1390797f0f 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -152,6 +152,9 @@ func startControlPublish(control *url.URL, params aiRequestParams) { ControlPub: controlPub, StopControl: stop, } + if monitor.Enabled { + monitor.AICurrentLiveSessions(len(params.node.LivePipelines)) + } // send a keepalive periodically to keep both ends of the connection alive go func() { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 5f5eb0fdf9..09a9381944 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/livepeer/go-livepeer/monitor" "io" "log/slog" "net/http" @@ -555,6 +556,9 @@ func (ls *LivepeerServer) cleanupLive(stream string) { pub, ok := ls.LivepeerNode.LivePipelines[stream] delete(ls.LivepeerNode.LivePipelines, stream) ls.LivepeerNode.LiveMu.Unlock() + if monitor.Enabled { + monitor.AICurrentLiveSessions(len(ls.LivepeerNode.LivePipelines)) + } if ok && pub != nil && pub.ControlPub != nil { if err := pub.ControlPub.Close(); err != nil { From 75af747c460e486d5d5b2c16485bdbb19fa6a4a0 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Thu, 12 Dec 2024 09:01:50 -0800 Subject: [PATCH 37/56] ai/live: Set the default model on the request if needed (#3313) Without it, the O does not have a model ID to initialize the job --- server/ai_process.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/ai_process.go b/server/ai_process.go index 41948251b2..f31a71c249 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1424,6 +1424,9 @@ func processAIRequest(ctx context.Context, params aiRequestParams, req interface modelID = defaultLiveVideoToVideoModelID if v.ModelId != nil && *v.ModelId != "" { modelID = *v.ModelId + } else { + // set default model + v.ModelId = &modelID } submitFn = func(ctx context.Context, params aiRequestParams, sess *AISession) (interface{}, error) { return submitLiveVideoToVideo(ctx, params, sess, v) From ae0cb358e1567316cf2ceb2bbdf3af43ad036865 Mon Sep 17 00:00:00 2001 From: gioelecerati <50955448+gioelecerati@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:34:53 +0100 Subject: [PATCH 38/56] gateway/events: Create status API with stream/pipeline ID info (#3310) * gateway: events: better parsing and latest last_params * add api endpoint * leave log for received event * move defer clear in-memory inside goroutine * remove types, use interface, improvements * remove payload from stream status * added request id to live params * address comments * fetch stream_id * add pipeline_id coming from showcase * refactor * events and status event type * comment * refactor stream status pipeline * update * store by streamid and not stream --- monitor/pipeline_status.go | 30 -------------------- server/ai_http.go | 33 ++++++++++++++++++++++ server/ai_live_video.go | 53 +++++++++++++++++++++--------------- server/ai_mediaserver.go | 17 ++++++++++-- server/ai_pipeline_status.go | 34 +++++++++++++++++++++++ server/ai_process.go | 3 ++ server/auth.go | 6 ++++ 7 files changed, 122 insertions(+), 54 deletions(-) delete mode 100644 monitor/pipeline_status.go create mode 100644 server/ai_pipeline_status.go diff --git a/monitor/pipeline_status.go b/monitor/pipeline_status.go deleted file mode 100644 index 648d56c50a..0000000000 --- a/monitor/pipeline_status.go +++ /dev/null @@ -1,30 +0,0 @@ -package monitor - -import ( - "sync" -) - -var ( - // pipelineStatusMap stores the latest pipeline status for each stream - pipelineStatusMap = make(map[string]PipelineStatus) - pipelineStatusMu sync.RWMutex -) - -func UpdatePipelineStatus(stream string, status PipelineStatus) { - pipelineStatusMu.Lock() - defer pipelineStatusMu.Unlock() - pipelineStatusMap[stream] = status -} - -func GetPipelineStatus(stream string) (PipelineStatus, bool) { - pipelineStatusMu.RLock() - defer pipelineStatusMu.RUnlock() - status, exists := pipelineStatusMap[stream] - return status, exists -} - -func DeletePipelineStatus(stream string) { - pipelineStatusMu.Lock() - defer pipelineStatusMu.Unlock() - delete(pipelineStatusMap, stream) -} diff --git a/server/ai_http.go b/server/ai_http.go index c2937b2ce1..edb35664e0 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -73,6 +73,9 @@ func startAIServer(lp *lphttp) error { lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo())) // Additionally, there is the '/aiResults' endpoint registered in server/rpc.go + // This endpoint is used to get the latest status of a live-video-to-video stream + lp.transRPC.HandleFunc("/stream-status/{streamID}", lp.handleStreamStatus()) + return nil } @@ -801,3 +804,33 @@ func parseMultiPartResult(body io.Reader, boundary string, pipeline string) core return wkrResult } + +// handleStreamStatus returns the latest available status of a live-video-to-video stream +func (h *lphttp) handleStreamStatus() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + streamID := strings.TrimPrefix(r.URL.Path, "/stream-status/") + if streamID == "" { + respondWithError(w, "stream ID is required", http.StatusBadRequest) + return + } + + // Get status for specific stream + status, exists := StreamStatusStore.Get(streamID) + if !exists { + respondWithError(w, "Stream status not found", http.StatusNotFound) + return + } + + jsonData, err := json.Marshal(status) + if err != nil { + respondWithError(w, "Failed to marshal status", http.StatusInternalServerError) + return + } + respondJsonOk(w, jsonData) + } +} diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 1390797f0f..a6f2b6bce2 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -178,22 +178,21 @@ func startControlPublish(control *url.URL, params aiRequestParams) { func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestParams) { subscriber := trickle.NewTrickleSubscriber(url.String()) + stream := params.liveParams.stream + streamId := params.liveParams.streamID clog.Infof(ctx, "Starting event subscription for URL: %s", url.String()) go func() { + defer StreamStatusStore.Clear(streamId) for { - clog.Infof(ctx, "Attempting to read from event subscription for URL: %s", url.String()) + clog.Infof(ctx, "Reading from event subscription for URL: %s", url.String()) segment, err := subscriber.Read() if err != nil { clog.Infof(ctx, "Error reading events subscription: %s", err) - // TODO - // monitor.DeletePipelineStatus(params.liveParams.stream) return } - clog.Infof(ctx, "Successfully read segment from event subscription for URL: %s", url.String()) - body, err := io.ReadAll(segment.Body) segment.Body.Close() @@ -202,30 +201,40 @@ func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestPar continue } - stream := params.liveParams.stream - - if stream == "" { - clog.Infof(ctx, "Stream ID is missing") - continue - } - - var status monitor.PipelineStatus - if err := json.Unmarshal(body, &status); err != nil { + var event map[string]interface{} + if err := json.Unmarshal(body, &event); err != nil { clog.Infof(ctx, "Failed to parse JSON from events subscription: %s", err) continue } - status.StreamID = &stream + event["stream_id"] = streamId + event["request_id"] = params.liveParams.requestID + event["pipeline_id"] = params.liveParams.pipelineID - // TODO: update the in-memory pipeline status - // monitor.UpdatePipelineStatus(stream, status) + clog.Infof(ctx, "Received event for stream=%s event=%+v", stream, event) - clog.Infof(ctx, "Received event for stream=%s status=%+v", stream, status) + eventType, ok := event["type"].(string) + if !ok { + eventType = "unknown" + clog.Warningf(ctx, "Received event without a type stream=%s event=%+v", stream, event) + } + + queueEventType := "ai_stream_events" + if eventType == "status" { + queueEventType = "ai_stream_status" + // The large logs and params fields are only sent once and then cleared to save bandwidth. So coalesce the + // incoming status with the last non-null value that we received on such fields for the status API. + lastStreamStatus, _ := StreamStatusStore.Get(streamId) + if logs, ok := event["last_restart_logs"]; !ok || logs == nil { + event["last_restart_logs"] = lastStreamStatus["last_restart_logs"] + } + if params, ok := event["last_params"]; !ok || params == nil { + event["last_params"] = lastStreamStatus["last_params"] + } + StreamStatusStore.Store(streamId, event) + } - monitor.SendQueueEventAsync( - "stream_status", - status, - ) + monitor.SendQueueEventAsync(queueEventType, event) } }() } diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 09a9381944..ddd040cc8d 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/livepeer/go-livepeer/monitor" "io" "log/slog" "net/http" @@ -14,6 +13,8 @@ import ( "strings" "time" + "github.com/livepeer/go-livepeer/monitor" + "github.com/getkin/kin-openapi/openapi3filter" "github.com/livepeer/ai-worker/worker" "github.com/livepeer/go-livepeer/clog" @@ -426,6 +427,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { // if auth webhook returns pipeline config these will be replaced pipeline := qp.Get("pipeline") rawParams := qp.Get("params") + var streamID, pipelineID string var pipelineParams map[string]interface{} if rawParams != "" { if err := json.Unmarshal([]byte(rawParams), &pipelineParams); err != nil { @@ -463,11 +465,19 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { if len(authResp.paramsMap) > 0 { pipelineParams = authResp.paramsMap } + + if authResp.StreamID != "" { + streamID = authResp.StreamID + } + + if authResp.PipelineID != "" { + pipelineID = authResp.PipelineID + } } requestID := string(core.RandomManifestID()) ctx = clog.AddVal(ctx, "request_id", requestID) - clog.Infof(ctx, "Received live video AI request for %s. pipelineParams=%v", streamName, pipelineParams) + clog.Infof(ctx, "Received live video AI request for %s. pipelineParams=%v streamID=%s", streamName, pipelineParams, streamID) // Kick off the RTMP pull and segmentation as soon as possible ssr := media.NewSwitchableSegmentReader() @@ -493,6 +503,9 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { outputRTMPURL: outputURL, stream: streamName, paymentProcessInterval: ls.livePaymentInterval, + requestID: requestID, + streamID: streamID, + pipelineID: pipelineID, }, } diff --git a/server/ai_pipeline_status.go b/server/ai_pipeline_status.go new file mode 100644 index 0000000000..8bc951607d --- /dev/null +++ b/server/ai_pipeline_status.go @@ -0,0 +1,34 @@ +package server + +import ( + "sync" +) + +type streamStatusStore struct { + store map[string]map[string]interface{} + mu sync.RWMutex +} + +var StreamStatusStore = streamStatusStore{store: make(map[string]map[string]interface{})} + +// StoreStreamStatus updates the status for a stream +func (s *streamStatusStore) Store(streamID string, status map[string]interface{}) { + s.mu.Lock() + s.store[streamID] = status + s.mu.Unlock() +} + +// ClearStreamStatus removes a stream's status from the store +func (s *streamStatusStore) Clear(streamID string) { + s.mu.Lock() + delete(s.store, streamID) + s.mu.Unlock() +} + +// GetStreamStatus returns the current status for a stream +func (s *streamStatusStore) Get(streamID string) (map[string]interface{}, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + status, exists := s.store[streamID] + return status, exists +} diff --git a/server/ai_process.go b/server/ai_process.go index f31a71c249..801147852c 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -93,6 +93,9 @@ type liveRequestParams struct { segmentReader *media.SwitchableSegmentReader outputRTMPURL string stream string + requestID string + streamID string + pipelineID string paymentProcessInterval time.Duration } diff --git a/server/auth.go b/server/auth.go index 7a63198d68..5d768f97b1 100644 --- a/server/auth.go +++ b/server/auth.go @@ -118,6 +118,12 @@ type AIAuthResponse struct { // Name of the pipeline to run Pipeline string `json:"pipeline"` + // ID of the pipeline to run + PipelineID string `json:"pipeline_id"` + + // ID of the stream + StreamID string `json:"stream_id"` + // Parameters for the pipeline PipelineParams json.RawMessage `json:"pipeline_parameters"` paramsMap map[string]interface{} // unmarshaled params From ee8c7097513323ef2c9e2a10e0aca07d3e75a5c9 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Fri, 13 Dec 2024 14:20:27 +0000 Subject: [PATCH 39/56] Improve trickle subscribe error handling (#3302) * Check for insufficient capacity * Mirror the error logic of the other pipelines Unless there is a 200 then we need to handle the error in some way * Improve trickle subscribe error handling * Only call stopPipeline when we really need to * Reduce retry pause time --- media/mediamtx.go | 6 +++- server/ai_live_video.go | 39 +++++++++++++++++++----- server/ai_mediaserver.go | 21 +++++++++++-- server/ai_process.go | 64 ++++++++++++++++++++++++---------------- server/rpc.go | 5 +++- 5 files changed, 97 insertions(+), 38 deletions(-) diff --git a/media/mediamtx.go b/media/mediamtx.go index 29756e11ce..3588519e2a 100644 --- a/media/mediamtx.go +++ b/media/mediamtx.go @@ -1,10 +1,13 @@ package media import ( + "context" "errors" "fmt" "io" "net/http" + + "github.com/livepeer/go-livepeer/clog" ) type MediaMTXClient struct { @@ -54,7 +57,8 @@ func getApiPath(sourceType string) (string, error) { return apiPath, nil } -func (mc *MediaMTXClient) KickInputConnection() error { +func (mc *MediaMTXClient) KickInputConnection(ctx context.Context) error { + clog.V(8).Infof(ctx, "Kicking mediamtx input connection") apiPath, err := getApiPath(mc.sourceType) if err != nil { return err diff --git a/server/ai_live_video.go b/server/ai_live_video.go index a6f2b6bce2..2dc84604a0 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -3,6 +3,7 @@ package server import ( "context" "encoding/json" + "errors" "fmt" "io" "log/slog" @@ -76,25 +77,48 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa subscriber := trickle.NewTrickleSubscriber(url.String()) r, w, err := os.Pipe() if err != nil { - slog.Info("error getting pipe for trickle-ffmpeg", "url", url, "err", err) + params.liveParams.stopPipeline(fmt.Errorf("error getting pipe for trickle-ffmpeg. url=%s %w", url, err)) + return } ctx = clog.AddVal(ctx, "url", url.Redacted()) ctx = clog.AddVal(ctx, "outputRTMPURL", params.liveParams.outputRTMPURL) // read segments from trickle subscription go func() { + var err error defer w.Close() + retries := 0 + // we're trying to keep (retryPause x maxRetries) duration to fall within one output GOP length + const retryPause = 300 * time.Millisecond + const maxRetries = 5 for { - segment, err := subscriber.Read() + if !params.inputStreamExists() { + clog.Infof(ctx, "trickle subscribe stopping, input stream does not exist.") + break + } + var segment *http.Response + segment, err = subscriber.Read() if err != nil { + if errors.Is(err, trickle.EOS) { + params.liveParams.stopPipeline(fmt.Errorf("trickle subscribe end of stream: %w", err)) + return + } // TODO if not EOS then signal a new orchestrator is needed - clog.Infof(ctx, "Error reading trickle subscription: %s", err) - return + err = fmt.Errorf("trickle subscribe error reading: %w", err) + clog.Infof(ctx, "%s", err) + if retries > maxRetries { + params.liveParams.stopPipeline(err) + return + } + retries++ + time.Sleep(retryPause) + continue } + retries = 0 clog.V(8).Infof(ctx, "trickle subscribe read data") if err = copySegment(segment, w); err != nil { - clog.Infof(ctx, "Error copying to ffmpeg stdin: %s", err) + params.liveParams.stopPipeline(fmt.Errorf("trickle subscribe error copying: %w", err)) return } } @@ -103,9 +127,8 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa go func() { defer r.Close() for { - _, ok := params.node.LivePipelines[params.liveParams.stream] - if !ok { - clog.Errorf(ctx, "Stopping output rtmp stream, input stream does not exist. err=%s", err) + if !params.inputStreamExists() { + clog.Errorf(ctx, "Stopping output rtmp stream, input stream does not exist.") break } diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index ddd040cc8d..c3042a130c 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -445,7 +445,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { QueryParams: queryParams, }) if err != nil { - kickErr := mediaMTXClient.KickInputConnection() + kickErr := mediaMTXClient.KickInputConnection(ctx) if kickErr != nil { clog.Errorf(ctx, "failed to kick input connection: %s", kickErr.Error()) } @@ -493,6 +493,19 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { ls.cleanupLive(streamName) }() + // this function is called when the pipeline hits a fatal error, we kick the input connection to allow + // the client to reconnect and restart the pipeline + stopPipeline := func(err error) { + if err == nil { + return + } + clog.Errorf(ctx, "Live video pipeline stopping: %s", err) + err = mediaMTXClient.KickInputConnection(ctx) + if err != nil { + clog.Errorf(ctx, "Failed to kick input connection: %s", err) + } + } + params := aiRequestParams{ node: ls.LivepeerNode, os: drivers.NodeStorage.NewSession(requestID), @@ -506,6 +519,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { requestID: requestID, streamID: streamID, pipelineID: pipelineID, + stopPipeline: stopPipeline, }, } @@ -513,7 +527,10 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { ModelId: &pipeline, Params: &pipelineParams, } - processAIRequest(ctx, params, req) + _, err = processAIRequest(ctx, params, req) + if err != nil { + stopPipeline(err) + } }) } diff --git a/server/ai_process.go b/server/ai_process.go index 801147852c..1caa6f4d9e 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -88,6 +88,14 @@ type aiRequestParams struct { liveParams liveRequestParams } +func (a aiRequestParams) inputStreamExists() bool { + if a.node == nil { + return false + } + _, ok := a.node.LivePipelines[a.liveParams.stream] + return ok +} + // For live video pipelines type liveRequestParams struct { segmentReader *media.SwitchableSegmentReader @@ -98,6 +106,7 @@ type liveRequestParams struct { pipelineID string paymentProcessInterval time.Duration + stopPipeline func(error) } // CalculateTextToImageLatencyScore computes the time taken per pixel for an text-to-image request. @@ -1033,35 +1042,38 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A return nil, err } - if resp.JSON200 != nil { - if resp.JSON200.ControlUrl == nil { - return nil, errors.New("control URL is missing") - } + if resp.JSON200 == nil { + // TODO: Replace trim newline with better error spec from O + return nil, errors.New(strings.TrimSuffix(string(resp.Body), "\n")) + } - host := sess.Transcoder() - pub, err := common.AppendHostname(resp.JSON200.PublishUrl, host) - if err != nil { - return nil, fmt.Errorf("invalid publish URL: %w", err) - } - sub, err := common.AppendHostname(resp.JSON200.SubscribeUrl, host) - if err != nil { - return nil, fmt.Errorf("invalid subscribe URL: %w", err) - } - control, err := common.AppendHostname(*resp.JSON200.ControlUrl, host) - if err != nil { - return nil, fmt.Errorf("invalid control URL: %w", err) - } - events, err := common.AppendHostname(*resp.JSON200.EventsUrl, host) - if err != nil { - return nil, fmt.Errorf("invalid events URL: %w", err) - } - clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s events %s", pub, sub, control, events) + if resp.JSON200.ControlUrl == nil { + return nil, errors.New("control URL is missing") + } - startControlPublish(control, params) - startTricklePublish(ctx, pub, params, sess) - startTrickleSubscribe(ctx, sub, params) - startEventsSubscribe(ctx, events, params) + host := sess.Transcoder() + pub, err := common.AppendHostname(resp.JSON200.PublishUrl, host) + if err != nil { + return nil, fmt.Errorf("invalid publish URL: %w", err) + } + sub, err := common.AppendHostname(resp.JSON200.SubscribeUrl, host) + if err != nil { + return nil, fmt.Errorf("invalid subscribe URL: %w", err) + } + control, err := common.AppendHostname(*resp.JSON200.ControlUrl, host) + if err != nil { + return nil, fmt.Errorf("invalid control URL: %w", err) } + events, err := common.AppendHostname(*resp.JSON200.EventsUrl, host) + if err != nil { + return nil, fmt.Errorf("invalid events URL: %w", err) + } + clog.V(common.VERBOSE).Infof(ctx, "pub %s sub %s control %s events %s", pub, sub, control, events) + + startControlPublish(control, params) + startTricklePublish(ctx, pub, params, sess) + startTrickleSubscribe(ctx, sub, params) + startEventsSubscribe(ctx, events, params) return resp, nil } diff --git a/server/rpc.go b/server/rpc.go index cb4b34d466..7223c56a98 100644 --- a/server/rpc.go +++ b/server/rpc.go @@ -222,7 +222,10 @@ func StartTranscodeServer(orch Orchestrator, bind string, mux *http.ServeMux, wo lp.transRPC.HandleFunc("/transcodeResults", lp.TranscodeResults) } - startAIServer(&lp) + err := startAIServer(&lp) + if err != nil { + return err + } if acceptRemoteAIWorkers { net.RegisterAIWorkerServer(s, &lp) lp.transRPC.Handle("/aiResults", lp.AIResults()) From 8b2cd992e4415b0da929d558728d7b57f3f53cdd Mon Sep 17 00:00:00 2001 From: gioelecerati <50955448+gioelecerati@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:55:34 +0100 Subject: [PATCH 40/56] gateway: fix stream-status route (#3316) * gateway: fix stream-status route * fix response * update return type * stream-id * fix wildcard name --- server/ai_http.go | 33 --------------------------------- server/ai_mediaserver.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/server/ai_http.go b/server/ai_http.go index edb35664e0..c2937b2ce1 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -73,9 +73,6 @@ func startAIServer(lp *lphttp) error { lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo())) // Additionally, there is the '/aiResults' endpoint registered in server/rpc.go - // This endpoint is used to get the latest status of a live-video-to-video stream - lp.transRPC.HandleFunc("/stream-status/{streamID}", lp.handleStreamStatus()) - return nil } @@ -804,33 +801,3 @@ func parseMultiPartResult(body io.Reader, boundary string, pipeline string) core return wkrResult } - -// handleStreamStatus returns the latest available status of a live-video-to-video stream -func (h *lphttp) handleStreamStatus() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodGet { - http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) - return - } - - streamID := strings.TrimPrefix(r.URL.Path, "/stream-status/") - if streamID == "" { - respondWithError(w, "stream ID is required", http.StatusBadRequest) - return - } - - // Get status for specific stream - status, exists := StreamStatusStore.Get(streamID) - if !exists { - respondWithError(w, "Stream status not found", http.StatusNotFound) - return - } - - jsonData, err := json.Marshal(status) - if err != nil { - respondWithError(w, "Failed to marshal status", http.StatusInternalServerError) - return - } - respondJsonOk(w, jsonData) - } -} diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index c3042a130c..5e0ccc2af7 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -87,6 +87,9 @@ func startAIMediaServer(ls *LivepeerServer) error { ls.HTTPMux.Handle("/live/video-to-video/{prefix}/{stream}/start", ls.StartLiveVideo()) ls.HTTPMux.Handle("/live/video-to-video/{stream}/update", ls.UpdateLiveVideo()) + // Stream status + ls.HTTPMux.Handle("/live/video-to-video/{streamId}/status", ls.GetLiveVideoToVideoStatus()) + return nil } @@ -581,6 +584,33 @@ func (ls *LivepeerServer) UpdateLiveVideo() http.Handler { }) } +func (ls *LivepeerServer) GetLiveVideoToVideoStatus() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + streamId := r.PathValue("streamId") + if streamId == "" { + http.Error(w, "stream id is required", http.StatusBadRequest) + return + } + + ctx := r.Context() + ctx = clog.AddVal(ctx, "stream", streamId) + + // Get status for specific stream + status, exists := StreamStatusStore.Get(streamId) + if !exists { + http.Error(w, "Stream not found", http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(status); err != nil { + clog.Errorf(ctx, "Failed to encode stream status err=%v", err) + http.Error(w, "Failed to encode status", http.StatusInternalServerError) + return + } + }) +} + func (ls *LivepeerServer) cleanupLive(stream string) { ls.LivepeerNode.LiveMu.Lock() pub, ok := ls.LivepeerNode.LivePipelines[stream] From e19235d543df61f29c3de74e3d895d5d4cd8b22f Mon Sep 17 00:00:00 2001 From: gioelecerati <50955448+gioelecerati@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:27:21 +0100 Subject: [PATCH 41/56] gateway: stream-status: nest last_restart_logs under inference_status (#3317) --- server/ai_live_video.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 2dc84604a0..aca5939281 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -248,12 +248,24 @@ func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestPar // The large logs and params fields are only sent once and then cleared to save bandwidth. So coalesce the // incoming status with the last non-null value that we received on such fields for the status API. lastStreamStatus, _ := StreamStatusStore.Get(streamId) - if logs, ok := event["last_restart_logs"]; !ok || logs == nil { - event["last_restart_logs"] = lastStreamStatus["last_restart_logs"] - } - if params, ok := event["last_params"]; !ok || params == nil { - event["last_params"] = lastStreamStatus["last_params"] + + // Check if inference_status exists in both current and last status + inferenceStatus, hasInference := event["inference_status"].(map[string]interface{}) + lastInferenceStatus, hasLastInference := lastStreamStatus["inference_status"].(map[string]interface{}) + + if hasInference { + if logs, ok := inferenceStatus["last_restart_logs"]; !ok || logs == nil { + if hasLastInference { + inferenceStatus["last_restart_logs"] = lastInferenceStatus["last_restart_logs"] + } + } + if params, ok := inferenceStatus["last_params"]; !ok || params == nil { + if hasLastInference { + inferenceStatus["last_params"] = lastInferenceStatus["last_params"] + } + } } + StreamStatusStore.Store(streamId, event) } From e497fab91105ecd8a900a1f00b2a4648a9dfeda5 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Fri, 13 Dec 2024 21:02:39 +0000 Subject: [PATCH 42/56] Publish pipeline stream events on errors (#3318) * Publish pipeline stream events on errors * review suggestion --- server/ai_mediaserver.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 5e0ccc2af7..5191c412f1 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -503,6 +503,18 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { return } clog.Errorf(ctx, "Live video pipeline stopping: %s", err) + + capability := clog.GetVal(ctx, "capability") + monitor.SendQueueEventAsync("ai_stream_events", map[string]string{ + "type": "error", + "request_id": requestID, + "capability": capability, + "message": err.Error(), + "stream_id": streamID, + "pipeline_id": pipelineID, + "pipeline": pipeline, + }) + err = mediaMTXClient.KickInputConnection(ctx) if err != nil { clog.Errorf(ctx, "Failed to kick input connection: %s", err) From df2ed58edb8b6704acad77766c981f71e965d289 Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Mon, 16 Dec 2024 16:51:30 -0300 Subject: [PATCH 43/56] server: Add orch info on AI events (#3321) * server: Add orch info on AI events * Change log --- CHANGELOG_PENDING.md | 1 + server/ai_live_video.go | 8 +++++++- server/ai_process.go | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 22f1b28ed7..dba2dfde60 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -9,6 +9,7 @@ #### General #### Broadcaster +- [#3321](https://github.com/livepeer/go-livepeer/pull/3321) Add orchestrator info on live AI monitoring events #### Orchestrator diff --git a/server/ai_live_video.go b/server/ai_live_video.go index aca5939281..1069fd52d3 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -199,7 +199,7 @@ func startControlPublish(control *url.URL, params aiRequestParams) { }() } -func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestParams) { +func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestParams, sess *AISession) { subscriber := trickle.NewTrickleSubscriber(url.String()) stream := params.liveParams.stream streamId := params.liveParams.streamID @@ -233,6 +233,12 @@ func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestPar event["stream_id"] = streamId event["request_id"] = params.liveParams.requestID event["pipeline_id"] = params.liveParams.pipelineID + if sess != nil { + event["orchestrator_info"] = map[string]interface{}{ + "address": sess.Address(), + "url": sess.Transcoder(), + } + } clog.Infof(ctx, "Received event for stream=%s event=%+v", stream, event) diff --git a/server/ai_process.go b/server/ai_process.go index 1caa6f4d9e..39f28340b0 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1073,7 +1073,7 @@ func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *A startControlPublish(control, params) startTricklePublish(ctx, pub, params, sess) startTrickleSubscribe(ctx, sub, params) - startEventsSubscribe(ctx, events, params) + startEventsSubscribe(ctx, events, params, sess) return resp, nil } From 7fe7949af9f75d5f74a4d3b6bb6e5ad1f758acbe Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Mon, 16 Dec 2024 14:51:47 -0800 Subject: [PATCH 44/56] ai/live: Slow orchestrator detection (#3308) Detect 'slow' orchs by keeping track of in-flight segments. Count the difference between segments produced and segments completed. Allow up to 3 segments in flight for now. --- server/ai_live_video.go | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 1069fd52d3..bd96128dcb 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -11,6 +11,7 @@ import ( "net/url" "os" "strings" + "sync" "time" "github.com/livepeer/go-livepeer/clog" @@ -47,6 +48,8 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara clog.Warningf(ctx, "No price info found from Orchestrator, Gateway will not send payments for the video processing") } + slowOrchChecker := &SlowOrchChecker{} + params.liveParams.segmentReader.SwitchReader(func(reader media.CloneableReader) { // check for end of stream if _, eos := reader.(*media.EOSReader); eos { @@ -56,7 +59,15 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara cancel() return } + if _, atMax := slowOrchChecker.BeginSegment(); atMax { + clog.Infof(ctx, "Orchestrator is slow - terminating") + cancel() + return + // TODO kill the rest of the processing, including ingest + // TODO switch orchestrators + } go func() { + defer slowOrchChecker.EndSegment() var r io.Reader = reader if paymentProcessor != nil { r = paymentProcessor.process(reader) @@ -279,3 +290,41 @@ func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestPar } }() } + +// Detect 'slow' orchs by keeping track of in-flight segments +// Count the difference between segments produced and segments completed +type SlowOrchChecker struct { + mu sync.Mutex + segmentCount int + completeCount int +} + +// Number of in flight segments to allow. +// Should generally not be less than 1, because +// sometimes the beginning of the current segment +// may briefly overlap with the end of the previous segment +const maxInflightSegments = 3 + +// Returns the number of segments begun so far and +// whether the max number of inflight segments was hit. +// Number of segments is not incremented if inflight max is hit. +// If inflight max is hit, returns true, false otherwise. +func (s *SlowOrchChecker) BeginSegment() (int, bool) { + // Returns `false` if there are multiple segments in-flight + // this means the orchestrator is slow reading them + // If all-OK, returns `true` + s.mu.Lock() + defer s.mu.Unlock() + if s.segmentCount >= s.completeCount+maxInflightSegments { + // There is > 1 segment in flight ... orchestrator is slow reading + return s.segmentCount, true + } + s.segmentCount += 1 + return s.segmentCount, false +} + +func (s *SlowOrchChecker) EndSegment() { + s.mu.Lock() + defer s.mu.Unlock() + s.completeCount += 1 +} From a842a0d3bf911ef9e918468ada6f8db4ee94e899 Mon Sep 17 00:00:00 2001 From: gioelecerati <50955448+gioelecerati@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:14:42 +0100 Subject: [PATCH 45/56] gateway: ai authWebhook: send gatewayHost to auth webhook (#3320) * gateway: AIauthWebhook: send additional node info to webhook * use gateway host * remove region from livepeernode * descriptive cfg * formatting * Improve new flag description * remove todo comment --------- Co-authored-by: Thom Shutt --- cmd/livepeer/livepeer.go | 1 + cmd/livepeer/starter/starter.go | 7 +++++++ core/livepeernode.go | 3 +++ server/ai_mediaserver.go | 1 + server/auth.go | 3 +++ 5 files changed, 15 insertions(+) diff --git a/cmd/livepeer/livepeer.go b/cmd/livepeer/livepeer.go index 9881014192..fc2c9d4a58 100755 --- a/cmd/livepeer/livepeer.go +++ b/cmd/livepeer/livepeer.go @@ -139,6 +139,7 @@ func parseLivepeerConfig() starter.LivepeerConfig { cfg.IgnoreMaxPriceIfNeeded = flag.Bool("ignoreMaxPriceIfNeeded", *cfg.IgnoreMaxPriceIfNeeded, "Set to true to allow exceeding max price condition if there is no O that meets this requirement") cfg.MinPerfScore = flag.Float64("minPerfScore", *cfg.MinPerfScore, "The minimum orchestrator's performance score a broadcaster is willing to accept") cfg.DiscoveryTimeout = flag.Duration("discoveryTimeout", *cfg.DiscoveryTimeout, "Time to wait for orchestrators to return info to be included in transcoding sessions for manifest (default = 500ms)") + cfg.GatewayHost = flag.String("gatewayHost", *cfg.GatewayHost, "External hostname on which the Gateway node is running. Used when telling external services how to reach the node.") // Transcoding: cfg.Orchestrator = flag.Bool("orchestrator", *cfg.Orchestrator, "Set to true to be an orchestrator") diff --git a/cmd/livepeer/starter/starter.go b/cmd/livepeer/starter/starter.go index 9489d28070..78dfc96f46 100755 --- a/cmd/livepeer/starter/starter.go +++ b/cmd/livepeer/starter/starter.go @@ -116,6 +116,7 @@ type LivepeerConfig struct { Netint *string HevcDecoding *bool TestTranscoder *bool + GatewayHost *string EthAcctAddr *string EthPassword *string EthKeystorePath *string @@ -215,6 +216,7 @@ func DefaultLivepeerConfig() LivepeerConfig { defaultAIRunnerImage := "livepeer/ai-runner:latest" defaultLiveAIAuthWebhookURL := "" defaultLivePaymentInterval := 5 * time.Second + defaultGatewayHost := "" // Onchain: defaultEthAcctAddr := "" @@ -323,6 +325,7 @@ func DefaultLivepeerConfig() LivepeerConfig { AIRunnerImage: &defaultAIRunnerImage, LiveAIAuthWebhookURL: &defaultLiveAIAuthWebhookURL, LivePaymentInterval: &defaultLivePaymentInterval, + GatewayHost: &defaultGatewayHost, // Onchain: EthAcctAddr: &defaultEthAcctAddr, @@ -1414,6 +1417,10 @@ func StartLivepeer(ctx context.Context, cfg LivepeerConfig) { *cfg.HttpAddr = defaultAddr(*cfg.HttpAddr, "127.0.0.1", BroadcasterRpcPort) *cfg.CliAddr = defaultAddr(*cfg.CliAddr, "127.0.0.1", BroadcasterCliPort) + if *cfg.GatewayHost != "" { + n.GatewayHost = *cfg.GatewayHost + } + bcast := core.NewBroadcaster(n) orchBlacklist := parseOrchBlacklist(cfg.OrchBlacklist) if *cfg.OrchPerfStatsURL != "" && *cfg.Region != "" { diff --git a/core/livepeernode.go b/core/livepeernode.go index 93f79b3ab2..40cfa53fe4 100644 --- a/core/livepeernode.go +++ b/core/livepeernode.go @@ -157,6 +157,9 @@ type LivepeerNode struct { LiveAITrickleHostForRunner string LiveAIAuthApiKey string LivePaymentInterval time.Duration + + // Gateway + GatewayHost string } type LivePipeline struct { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 5191c412f1..dc1a43c8d8 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -446,6 +446,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { Stream: streamName, Type: sourceTypeStr, QueryParams: queryParams, + GatewayHost: ls.LivepeerNode.GatewayHost, }) if err != nil { kickErr := mediaMTXClient.KickInputConnection(ctx) diff --git a/server/auth.go b/server/auth.go index 5d768f97b1..400e29742a 100644 --- a/server/auth.go +++ b/server/auth.go @@ -107,6 +107,9 @@ type AIAuthRequest struct { // Query parameters that came with the stream, if any QueryParams string `json:"query_params,omitempty"` + // Gateway host + GatewayHost string `json:"gateway_host"` + // TODO not sure what params we need yet } From 3ac597235b1458a6f3154458b4000e09d8dfa2ec Mon Sep 17 00:00:00 2001 From: Max Holland Date: Tue, 17 Dec 2024 15:49:37 +0000 Subject: [PATCH 46/56] Fix context for logging (#3322) --- server/ai_live_video.go | 2 +- server/ai_mediaserver.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index bd96128dcb..3f1a59aacc 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -30,7 +30,7 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara } // Start payments which probes a segment every "paymentProcessInterval" and sends a payment - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(ctx) priceInfo := sess.OrchestratorInfo.PriceInfo var paymentProcessor *LivePaymentProcessor if priceInfo != nil && priceInfo.PricePerUnit != 0 { diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index dc1a43c8d8..cca993883f 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -481,7 +481,8 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { requestID := string(core.RandomManifestID()) ctx = clog.AddVal(ctx, "request_id", requestID) - clog.Infof(ctx, "Received live video AI request for %s. pipelineParams=%v streamID=%s", streamName, pipelineParams, streamID) + ctx = clog.AddVal(ctx, "stream_id", streamID) + clog.Infof(ctx, "Received live video AI request for %s. pipelineParams=%v", streamName, pipelineParams) // Kick off the RTMP pull and segmentation as soon as possible ssr := media.NewSwitchableSegmentReader() From 706fda152df193947892bd4664cedda55da97f3d Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Tue, 17 Dec 2024 13:03:18 -0800 Subject: [PATCH 47/56] ai/live: Add limited publish retries (#3315) Only retry if the error occurs before sending any data, and if the next segment hasn't arrived yet. --- server/ai_live_video.go | 49 ++++++++++++++++++----- trickle/trickle_publisher.go | 77 ++++++++++++++++++++++++++++-------- trickle/trickle_server.go | 6 ++- 3 files changed, 106 insertions(+), 26 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 3f1a59aacc..c37eddb5fd 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -54,31 +54,56 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara // check for end of stream if _, eos := reader.(*media.EOSReader); eos { if err := publisher.Close(); err != nil { - clog.Infof(ctx, "Error closing trickle publisher. err=%s", err) + clog.Infof(ctx, "Error closing trickle publisher. err=%v", err) } cancel() return } - if _, atMax := slowOrchChecker.BeginSegment(); atMax { + thisSeq, atMax := slowOrchChecker.BeginSegment() + if atMax { clog.Infof(ctx, "Orchestrator is slow - terminating") cancel() return // TODO kill the rest of the processing, including ingest // TODO switch orchestrators } - go func() { + go func(seq int) { defer slowOrchChecker.EndSegment() var r io.Reader = reader if paymentProcessor != nil { r = paymentProcessor.process(reader) } - clog.V(8).Infof(ctx, "trickle publish writing data") - // TODO this blocks! very bad! - if err := publisher.Write(r); err != nil { - clog.Infof(ctx, "Error writing to trickle publisher. err=%s", err) + clog.V(8).Infof(ctx, "trickle publish writing data seq=%d", seq) + segment, err := publisher.Next() + if err != nil { + clog.Infof(ctx, "error getting next publish handle; dropping segment err=%v", err) + return } - }() + for { + currentSeq := slowOrchChecker.GetCount() + if seq != currentSeq { + clog.Infof(ctx, "Next segment has already started; skipping this one seq=%d currentSeq=%d", seq, currentSeq) + return + } + n, err := segment.Write(r) + if err == nil { + // no error, all done, let's leave + return + } + // Retry segment only if nothing has been sent yet + // and the next segment has not yet started + // otherwise drop + if n > 0 { + clog.Infof(ctx, "Error publishing segment; dropping remainder wrote=%d err=%v", n, err) + return + } + clog.Infof(ctx, "Error publishing segment before writing; retrying err=%v", err) + // Clone in case read head was incremented somewhere, which cloning ressets + r = reader.Clone() + time.Sleep(250 * time.Millisecond) + } + }(thisSeq) }) clog.Infof(ctx, "trickle pub") } @@ -251,7 +276,7 @@ func startEventsSubscribe(ctx context.Context, url *url.URL, params aiRequestPar } } - clog.Infof(ctx, "Received event for stream=%s event=%+v", stream, event) + clog.V(8).Infof(ctx, "Received event for stream=%s event=%+v", stream, event) eventType, ok := event["type"].(string) if !ok { @@ -328,3 +353,9 @@ func (s *SlowOrchChecker) EndSegment() { defer s.mu.Unlock() s.completeCount += 1 } + +func (s *SlowOrchChecker) GetCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return s.segmentCount +} diff --git a/trickle/trickle_publisher.go b/trickle/trickle_publisher.go index a6ca937ff8..2caae4459f 100644 --- a/trickle/trickle_publisher.go +++ b/trickle/trickle_publisher.go @@ -36,6 +36,10 @@ type pendingPost struct { index int writer *io.PipeWriter errCh chan error + + // needed to help with reconnects + written bool + client *TricklePublisher } // NewTricklePublisher creates a new trickle stream client @@ -53,7 +57,6 @@ func NewTricklePublisher(url string) (*TricklePublisher, error) { return c, nil } -// Acquire lock to manage access to pendingPost and index // NB expects to have the lock already since we mutate the index func (c *TricklePublisher) preconnect() (*pendingPost, error) { @@ -113,6 +116,7 @@ func (c *TricklePublisher) preconnect() (*pendingPost, error) { writer: pw, index: index, errCh: errCh, + client: c, }, nil } @@ -136,11 +140,10 @@ func (c *TricklePublisher) Close() error { return nil } -// Write sends data to the current segment, sets up the next segment concurrently, and blocks until completion -func (c *TricklePublisher) Write(data io.Reader) error { - +func (c *TricklePublisher) Next() (*pendingPost, error) { // Acquire lock to manage access to pendingPost and index c.writeLock.Lock() + defer c.writeLock.Unlock() // Get the writer to use pp := c.pendingPost @@ -148,29 +151,61 @@ func (c *TricklePublisher) Write(data io.Reader) error { p, err := c.preconnect() if err != nil { c.writeLock.Unlock() - return err + return nil, err } pp = p } - writer := pp.writer - index := pp.index - errCh := pp.errCh // Set up the next connection nextPost, err := c.preconnect() if err != nil { c.writeLock.Unlock() - return err + return nil, err } c.pendingPost = nextPost - // Now unlock so the copy does not block - c.writeLock.Unlock() + return pp, nil +} + +func (p *pendingPost) reconnect() (*pendingPost, error) { + // This is a little gnarly but works for now: + // Set the publisher's sequence sequence to the intended reconnect + // Call publisher's preconnect (which increments its sequence) + // then reset publisher's sequence back to the original + //slog.Info("Re-connecting", "url", p.client.baseURL, "seq", p.client.index) + p.client.writeLock.Lock() + defer p.client.writeLock.Unlock() + currentSeq := p.client.index + p.client.index = p.index + pp, err := p.client.preconnect() + p.client.index = currentSeq + return pp, err +} + +func (p *pendingPost) Write(data io.Reader) (int64, error) { + + // If writing multiple times, reconnect + if p.written { + pp, err := p.reconnect() + if err != nil { + return 0, err + } + p = pp + } + + var ( + writer = p.writer + index = p.index + errCh = p.errCh + ) + + // Mark as written + p.written = true // before writing, check for error from preconnects select { case err := <-errCh: - return err + return 0, err default: // no error, continue } @@ -192,18 +227,28 @@ func (c *TricklePublisher) Write(data io.Reader) error { // also prioritize errors over this channel compared to io errors // such as "read/write on closed pipe" if err := <-errCh; err != nil { - return err + return n, err } if ioError != nil { - return fmt.Errorf("error streaming data to segment %d: %w", index, err) + return n, fmt.Errorf("error streaming data to segment %d: %w", index, ioError) } if closeErr != nil { - return fmt.Errorf("error closing writer for segment %d: %w", index, err) + return n, fmt.Errorf("error closing writer for segment %d: %w", index, closeErr) } - return nil + return n, nil +} + +// Write sends data to the current segment, sets up the next segment concurrently, and blocks until completion +func (c *TricklePublisher) Write(data io.Reader) error { + pp, err := c.Next() + if err != nil { + return err + } + _, err = pp.Write(data) + return err } func humanBytes(bytes int64) string { diff --git a/trickle/trickle_server.go b/trickle/trickle_server.go index ddeb6679ff..785cd38590 100644 --- a/trickle/trickle_server.go +++ b/trickle/trickle_server.go @@ -333,7 +333,9 @@ func (s *Stream) handlePost(w http.ResponseWriter, r *http.Request, idx int) { if exists { slog.Warn("Overwriting existing entry", "idx", idx) // Overwrite anything that exists now. TODO figure out a safer behavior? - return + // TODO fix concurrent writes to the same segment; would be very bad + segment.buffer.Reset() + segment.closed = false } // Wrap the request body with the custom timeoutReader so we can send @@ -527,6 +529,8 @@ func (s *Segment) readData(startPos int) ([]byte, bool) { } if startPos > totalLen { slog.Info("Invalid start pos, invoking eof") + // This might happen if the buffer was reset + // eg because of a repeated POST return nil, true } if s.closed { From d924c181285f1f23fee0fc6f88c1a7c689fe50ac Mon Sep 17 00:00:00 2001 From: Max Holland Date: Wed, 18 Dec 2024 23:07:39 +0000 Subject: [PATCH 48/56] Add trickle subscribe log line (#3326) --- server/ai_live_video.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index c37eddb5fd..9e5dc5ec64 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -133,6 +133,7 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa break } var segment *http.Response + clog.V(8).Infof(ctx, "trickle subscribe read data begin") segment, err = subscriber.Read() if err != nil { if errors.Is(err, trickle.EOS) { @@ -151,7 +152,7 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa continue } retries = 0 - clog.V(8).Infof(ctx, "trickle subscribe read data") + clog.V(8).Infof(ctx, "trickle subscribe read data end") if err = copySegment(segment, w); err != nil { params.liveParams.stopPipeline(fmt.Errorf("trickle subscribe error copying: %w", err)) From fa50112c23bbe4e8243380245e13f68bfc28b311 Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Wed, 18 Dec 2024 16:44:22 -0800 Subject: [PATCH 49/56] ai/live: Emit events for trickle errors (#3324) Also clean up the stream properly on a couple of publish errors. --- server/ai_live_video.go | 18 +++++++++++++++++- server/ai_mediaserver.go | 20 ++++++++++---------- server/ai_process.go | 7 ++++++- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index 9e5dc5ec64..edaebdd6eb 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "log/slog" + "maps" "net/http" "net/url" "os" @@ -27,6 +28,8 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara publisher, err := trickle.NewTricklePublisher(url.String()) if err != nil { clog.Infof(ctx, "error publishing trickle. err=%s", err) + params.liveParams.stopPipeline(fmt.Errorf("Error publishing trickle %w", err)) + return } // Start payments which probes a segment every "paymentProcessInterval" and sends a payment @@ -62,9 +65,9 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara thisSeq, atMax := slowOrchChecker.BeginSegment() if atMax { clog.Infof(ctx, "Orchestrator is slow - terminating") + params.liveParams.stopPipeline(fmt.Errorf("slow orchestrator")) cancel() return - // TODO kill the rest of the processing, including ingest // TODO switch orchestrators } go func(seq int) { @@ -78,12 +81,14 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara segment, err := publisher.Next() if err != nil { clog.Infof(ctx, "error getting next publish handle; dropping segment err=%v", err) + params.liveParams.sendErrorEvent(fmt.Errorf("Missing next handle %v", err)) return } for { currentSeq := slowOrchChecker.GetCount() if seq != currentSeq { clog.Infof(ctx, "Next segment has already started; skipping this one seq=%d currentSeq=%d", seq, currentSeq) + params.liveParams.sendErrorEvent(fmt.Errorf("Next segment has started")) return } n, err := segment.Write(r) @@ -96,6 +101,7 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara // otherwise drop if n > 0 { clog.Infof(ctx, "Error publishing segment; dropping remainder wrote=%d err=%v", n, err) + params.liveParams.sendErrorEvent(fmt.Errorf("Error publishing, wrote %d dropping %v", n, err)) return } clog.Infof(ctx, "Error publishing segment before writing; retrying err=%v", err) @@ -148,6 +154,7 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa return } retries++ + params.liveParams.sendErrorEvent(err) time.Sleep(retryPause) continue } @@ -360,3 +367,12 @@ func (s *SlowOrchChecker) GetCount() int { defer s.mu.Unlock() return s.segmentCount } + +func LiveErrorEventSender(ctx context.Context, event map[string]string) func(err error) { + return func(err error) { + ev := maps.Clone(event) + ev["capability"] = clog.GetVal(ctx, "capability") + ev["message"] = err.Error() + monitor.SendQueueEventAsync("ai_stream_events", ev) + } +} diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index cca993883f..4099a8e1dc 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -498,6 +498,14 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { ls.cleanupLive(streamName) }() + sendErrorEvent := LiveErrorEventSender(ctx, map[string]string{ + "type": "error", + "request_id": requestID, + "stream_id": streamID, + "pipeline_id": pipelineID, + "pipeline": pipeline, + }) + // this function is called when the pipeline hits a fatal error, we kick the input connection to allow // the client to reconnect and restart the pipeline stopPipeline := func(err error) { @@ -506,16 +514,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { } clog.Errorf(ctx, "Live video pipeline stopping: %s", err) - capability := clog.GetVal(ctx, "capability") - monitor.SendQueueEventAsync("ai_stream_events", map[string]string{ - "type": "error", - "request_id": requestID, - "capability": capability, - "message": err.Error(), - "stream_id": streamID, - "pipeline_id": pipelineID, - "pipeline": pipeline, - }) + sendErrorEvent(err) err = mediaMTXClient.KickInputConnection(ctx) if err != nil { @@ -537,6 +536,7 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { streamID: streamID, pipelineID: pipelineID, stopPipeline: stopPipeline, + sendErrorEvent: sendErrorEvent, }, } diff --git a/server/ai_process.go b/server/ai_process.go index 39f28340b0..e25763a404 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -106,7 +106,12 @@ type liveRequestParams struct { pipelineID string paymentProcessInterval time.Duration - stopPipeline func(error) + + // Stops the pipeline with an error. Also kicks the input + stopPipeline func(error) + + // Report an error event + sendErrorEvent func(error) } // CalculateTextToImageLatencyScore computes the time taken per pixel for an text-to-image request. From e781cacaa32583e68328d3b8831df65e3253b6cd Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Thu, 19 Dec 2024 02:21:11 -0800 Subject: [PATCH 50/56] ai/live: Signal "stream exists but segment doesn't" with status 470 (#3327) Clients can use this information to jump to the leading edge if there are gaps in the sequence or they have fallen behind the window of available segments. --- trickle/local_subscriber.go | 7 ++++--- trickle/trickle_server.go | 15 +++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/trickle/local_subscriber.go b/trickle/local_subscriber.go index 55f6a8dba8..a7590b2d5b 100644 --- a/trickle/local_subscriber.go +++ b/trickle/local_subscriber.go @@ -39,7 +39,7 @@ func (c *TrickleLocalSubscriber) Read() (*TrickleData, error) { } c.mu.Lock() defer c.mu.Unlock() - segment, exists := stream.getForRead(c.seq) + segment, latestSeq, exists := stream.getForRead(c.seq) if !exists { return nil, errors.New("seq not found") } @@ -70,8 +70,9 @@ func (c *TrickleLocalSubscriber) Read() (*TrickleData, error) { return &TrickleData{ Reader: r, Metadata: map[string]string{ - "Lp-Trickle-Seq": strconv.Itoa(segment.idx), - "Content-Type": stream.mimeType, + "Lp-Trickle-Latest": strconv.Itoa(latestSeq), + "Lp-Trickle-Seq": strconv.Itoa(segment.idx), + "Content-Type": stream.mimeType, }, // TODO take more metadata from http headers }, nil } diff --git a/trickle/trickle_server.go b/trickle/trickle_server.go index 785cd38590..eecc509d6d 100644 --- a/trickle/trickle_server.go +++ b/trickle/trickle_server.go @@ -403,7 +403,7 @@ func (s *Stream) getForWrite(idx int) (*Segment, bool) { return segment, false } -func (s *Stream) getForRead(idx int) (*Segment, bool) { +func (s *Stream) getForRead(idx int) (*Segment, int, bool) { s.mutex.RLock() defer s.mutex.RUnlock() exists := func(seg *Segment, i int) bool { @@ -421,7 +421,7 @@ func (s *Stream) getForRead(idx int) (*Segment, bool) { slog.Info("GET precreating", "stream", s.name, "idx", idx, "latest", s.latestWrite) } slog.Info("GET segment", "stream", s.name, "idx", idx, "latest", s.latestWrite, "exists?", exists(segment, idx)) - return segment, exists(segment, idx) + return segment, s.latestWrite, exists(segment, idx) } func (sm *Server) handleGet(w http.ResponseWriter, r *http.Request) { @@ -439,9 +439,13 @@ func (sm *Server) handleGet(w http.ResponseWriter, r *http.Request) { } func (s *Stream) handleGet(w http.ResponseWriter, r *http.Request, idx int) { - segment, exists := s.getForRead(idx) + segment, latestSeq, exists := s.getForRead(idx) if !exists { - http.Error(w, "Entry not found", http.StatusNotFound) + // Special status to indicate "stream exists but segment doesn't" + w.Header().Set("Lp-Trickle-Latest", strconv.Itoa(latestSeq)) + w.Header().Set("Lp-Trickle-Seq", strconv.Itoa(idx)) + w.WriteHeader(470) + w.Write([]byte("Entry not found")) return } @@ -469,6 +473,9 @@ func (s *Stream) handleGet(w http.ResponseWriter, r *http.Request, idx int) { data, eof := subscriber.readData() if len(data) > 0 { if totalWrites <= 0 { + if segment.idx != latestSeq { + w.Header().Set("Lp-Trickle-Latest", strconv.Itoa(latestSeq)) + } w.Header().Set("Lp-Trickle-Seq", strconv.Itoa(segment.idx)) w.Header().Set("Content-Type", s.mimeType) } From 7639a96b23de5a3e51fc12d8693166a42121f58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Thu, 19 Dec 2024 14:11:58 +0100 Subject: [PATCH 51/56] Increase the initial fee (#3328) We've seen a few "insufficient balance" in staging caused by high-resolution streams, so better to increase the initial fee which acts as payment buffer --- server/ai_process.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/ai_process.go b/server/ai_process.go index e25763a404..ea15bd43ea 100644 --- a/server/ai_process.go +++ b/server/ai_process.go @@ -1023,7 +1023,7 @@ func submitAudioToText(ctx context.Context, params aiRequestParams, sess *AISess return &res, nil } -const initPixelsToPay = 45 * 30 * 1280 * 720 // 45 seconds, 30fps, 720p +const initPixelsToPay = 30 * 30 * 3200 * 1800 // 30 seconds, 30fps, 1800p func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLiveVideoToVideoJSONRequestBody) (any, error) { // Live Video should not reuse the existing session balance, because it could lead to not sending the init From def771268d0851182d39b9258c0a55b77d8cf46a Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Thu, 19 Dec 2024 11:44:52 -0300 Subject: [PATCH 52/56] go.mod: Update ai-worker lib for stability improvements (#3329) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 2c35deb7ff..2b7806e0c0 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/uuid v1.6.0 github.com/jaypipes/ghw v0.10.0 github.com/jaypipes/pcidb v1.0.0 - github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510 + github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18 github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393 diff --git a/go.sum b/go.sum index c6ffdd039a..14a5b8a051 100644 --- a/go.sum +++ b/go.sum @@ -605,8 +605,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI= github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo= github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc= -github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510 h1:rPMpkf43tOa8eixmQkBvYbgGleRWPEpKu3P7FKgtPnc= -github.com/livepeer/ai-worker v0.12.7-0.20241205213704-87d6efe82510/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= +github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3 h1:uutmGZq2YdIKnKhn6QGHtGnKfBGYAUMMOr44LXYs23w= +github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE= github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw= github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA= From 5a756c44abade6e3bf7b789e6f4a4a840a22c6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Leszko?= Date: Thu, 19 Dec 2024 18:26:04 +0100 Subject: [PATCH 53/56] Disable payment check on the O's side (#3330) * Disable payment check on the O's side We encounter "insufficient balance" error from time to time on prod. This needs investigation, but since we're not using public Os, let's temporarily not stop steams even if not enough payment * Disable payment check on the O's side We encounter "insufficient balance" error from time to time on prod. This needs investigation, but since we're not using public Os, let's temporarily not stop steams even if not enough payment --- server/ai_http.go | 16 ++++++++++------ server/live_payment_processor.go | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/server/ai_http.go b/server/ai_http.go index c2937b2ce1..f738f3df0c 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -181,12 +181,16 @@ func (h *lphttp) StartLiveVideoToVideo() http.Handler { sessionID: mid, }) if err != nil { - slog.Warn("Error accounting payment, stopping stream processing", "err", err) - pubCh.Close() - subCh.Close() - eventsCh.Close() - controlPubCh.Close() - cancel() + slog.Warn("Error accounting payment", "err", err) + // We encounter "insufficient balance" error from time to time on prod. + // This needs investigation, but since we're not using public Os, + // let's temporarily not stop steams even if not enough payment + //slog.Warn("Error accounting payment, stopping stream processing", "err", err) + //pubCh.Close() + //subCh.Close() + //eventsCh.Close() + //controlPubCh.Close() + //cancel() } return err } diff --git a/server/live_payment_processor.go b/server/live_payment_processor.go index 48bfe99193..c73ad59b1a 100644 --- a/server/live_payment_processor.go +++ b/server/live_payment_processor.go @@ -94,7 +94,8 @@ func (p *LivePaymentProcessor) processOne(timestamp time.Time) { err := p.processSegmentFunc(int64(pixelsSinceLastProcessed)) if err != nil { slog.Error("Error processing payment", "err", err) - return + // Temporarily ignore failing payments, because they are not critical while we're using our own Os + // return } p.lastProcessedMu.Lock() From 2ff0054681c0d867229cbc245921d3b86ff64556 Mon Sep 17 00:00:00 2001 From: Dmytrol <46675332+Dimitrolito@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:39:28 +0200 Subject: [PATCH 54/56] Fix Typographical Errors in Documentation and Code Comments (#3325) * typos util.go * typos README.md * typos wizard_transcoder.go * typos httpcli.md * typos rtmpwebhookauth.md * typos stub.go * Fix bad grammar --------- Co-authored-by: Thom Shutt --- cmd/devtool/README.md | 2 +- cmd/livepeer_cli/wizard_transcoder.go | 2 +- common/util.go | 2 +- doc/httpcli.md | 2 +- doc/rtmpwebhookauth.md | 4 ++-- pm/stub.go | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/devtool/README.md b/cmd/devtool/README.md index 7710a0a078..63eea3a48d 100644 --- a/cmd/devtool/README.md +++ b/cmd/devtool/README.md @@ -27,7 +27,7 @@ docker run -p 8545:8545 -p 8546:8546 --name geth-with-livepeer-protocol livepeer This command will submit the setup transactions for a broadcaster and generate the Bash script `run_broadcaster_.sh` which can be used to start a broadcaster node. -### Step 3: Set up a orchestrator/transcoder +### Step 3: Set up an orchestrator/transcoder `go run cmd/devtool/devtool.go setup transcoder` diff --git a/cmd/livepeer_cli/wizard_transcoder.go b/cmd/livepeer_cli/wizard_transcoder.go index 2416aadbd0..262a9b9140 100644 --- a/cmd/livepeer_cli/wizard_transcoder.go +++ b/cmd/livepeer_cli/wizard_transcoder.go @@ -108,7 +108,7 @@ func (w *wizard) activateOrchestrator() { val := w.getOrchestratorConfigFormValues() if d.BondedAmount.Cmp(big.NewInt(0)) <= 0 || d.DelegateAddress != d.Address { - fmt.Printf("You must bond to yourself in order to become a orchestrator\n") + fmt.Printf("You must bond to yourself in order to become an orchestrator\n") rebond := false diff --git a/common/util.go b/common/util.go index 4a408ba6c8..19fb0b6a9c 100644 --- a/common/util.go +++ b/common/util.go @@ -272,7 +272,7 @@ func PriceToFixed(price *big.Rat) (int64, error) { return ratToFixed(price, priceScalingFactor) } -// FixedToPrice converts an fixed point number with 3 decimal places represented as in int64 into a big.Rat +// FixedToPrice converts a fixed point number with 3 decimal places represented as in int64 into a big.Rat func FixedToPrice(price int64) *big.Rat { return big.NewRat(price, priceScalingFactor) } diff --git a/doc/httpcli.md b/doc/httpcli.md index 2a5d9e255d..8ff77761d0 100644 --- a/doc/httpcli.md +++ b/doc/httpcli.md @@ -1,6 +1,6 @@ # HTTP endpoint -The Livepeer node exposes a HTTP interface for monitoring and managing the node. This is how the `livepeer_cli` tool interfaces with a running node. +The Livepeer node exposes an HTTP interface for monitoring and managing the node. This is how the `livepeer_cli` tool interfaces with a running node. By default, the CLI listens to localhost:7935. This can be adjusted with the -cliAddr `:` flag. ## Available endpoints: diff --git a/doc/rtmpwebhookauth.md b/doc/rtmpwebhookauth.md index 1b9bdafea3..63e30b521e 100644 --- a/doc/rtmpwebhookauth.md +++ b/doc/rtmpwebhookauth.md @@ -2,7 +2,7 @@ Incoming streams can be authenticated using webhooks on both orchestrator and broadcaster nodes. To use these webhooks, node operators must implement their own web service / endpoint to be accessed only by the Livepeer node. As new streams appear, the Livepeer node will call this endpoint to determine whether the given stream is allowed. -The webhook server should respond with HTTP status code `200` in order to authenticate / authorize the stream. A response with a HTTP status code other than `200` will cause the Livepeer node to disconnect the stream. +The webhook server should respond with HTTP status code `200` in order to authenticate / authorize the stream. A response with an HTTP status code other than `200` will cause the Livepeer node to disconnect the stream. To enable webhook authentication functionality, the Livepeer node should be started with the `-authWebhookUrl` flag, along with the webhook endpoint URL. @@ -74,4 +74,4 @@ If a valid `priceInfo` object is provided in the response the orchestrator will "pixelsPerUnit": number } } -``` \ No newline at end of file +``` diff --git a/pm/stub.go b/pm/stub.go index 44b8ab0961..33ba0352d1 100644 --- a/pm/stub.go +++ b/pm/stub.go @@ -453,7 +453,7 @@ func (m *MockRecipient) ReceiveTicket(ticket *Ticket, sig []byte, seed *big.Int) } // RedeemWinningTickets redeems all winning tickets with the broker -// for a all sessionIDs +// for all sessionIDs func (m *MockRecipient) RedeemWinningTickets(sessionIDs []string) error { args := m.Called(sessionIDs) return args.Error(0) From 0bfd28b38a0cccb2d55bcc8eaff4a73f92f1c09e Mon Sep 17 00:00:00 2001 From: Josh Allmann Date: Fri, 20 Dec 2024 08:51:53 -0800 Subject: [PATCH 55/56] ai/live: Handle more special cases in trickle. (#3332) * ai/live: More error handling for gotrickle clients. Handles additional stopping conditions in publisher such as 404 Handles "stream exists but segment doesn't" condition in subscriber These lead to: 1. more timely shutdowns when necessary, and 2. faster recovery if a subscription falls behind * ai/live: Close a segment if it will be dropped. This is being polite to notify any subscribers that might be waiting for the segment that the segment will not be around. Not required if the segment is written normally, and subscribers will still work fine without; it would just take longer to happen when the segment drops out of the window of active segments. --- server/ai_live_video.go | 14 ++++++++++- trickle/trickle_publisher.go | 33 +++++++++++++++++++++++++ trickle/trickle_server.go | 45 +++++++++++++++++++++++++++++++---- trickle/trickle_subscriber.go | 40 +++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 5 deletions(-) diff --git a/server/ai_live_video.go b/server/ai_live_video.go index edaebdd6eb..01d35f3b03 100644 --- a/server/ai_live_video.go +++ b/server/ai_live_video.go @@ -89,6 +89,7 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara if seq != currentSeq { clog.Infof(ctx, "Next segment has already started; skipping this one seq=%d currentSeq=%d", seq, currentSeq) params.liveParams.sendErrorEvent(fmt.Errorf("Next segment has started")) + segment.Close() return } n, err := segment.Write(r) @@ -96,12 +97,18 @@ func startTricklePublish(ctx context.Context, url *url.URL, params aiRequestPara // no error, all done, let's leave return } + if errors.Is(err, trickle.StreamNotFoundErr) { + clog.Infof(ctx, "Stream no longer exists on orchestrator; terminating") + params.liveParams.stopPipeline(fmt.Errorf("Stream does not exist")) + return + } // Retry segment only if nothing has been sent yet // and the next segment has not yet started // otherwise drop if n > 0 { clog.Infof(ctx, "Error publishing segment; dropping remainder wrote=%d err=%v", n, err) params.liveParams.sendErrorEvent(fmt.Errorf("Error publishing, wrote %d dropping %v", n, err)) + segment.Close() return } clog.Infof(ctx, "Error publishing segment before writing; retrying err=%v", err) @@ -142,10 +149,15 @@ func startTrickleSubscribe(ctx context.Context, url *url.URL, params aiRequestPa clog.V(8).Infof(ctx, "trickle subscribe read data begin") segment, err = subscriber.Read() if err != nil { - if errors.Is(err, trickle.EOS) { + if errors.Is(err, trickle.EOS) || errors.Is(err, trickle.StreamNotFoundErr) { params.liveParams.stopPipeline(fmt.Errorf("trickle subscribe end of stream: %w", err)) return } + var sequenceNonexistent *trickle.SequenceNonexistent + if errors.As(err, &sequenceNonexistent) { + // stream exists but segment doesn't, so skip to leading edge + subscriber.SetSeq(sequenceNonexistent.Latest) + } // TODO if not EOS then signal a new orchestrator is needed err = fmt.Errorf("trickle subscribe error reading: %w", err) clog.Infof(ctx, "%s", err) diff --git a/trickle/trickle_publisher.go b/trickle/trickle_publisher.go index 2caae4459f..0ad6987859 100644 --- a/trickle/trickle_publisher.go +++ b/trickle/trickle_publisher.go @@ -241,6 +241,39 @@ func (p *pendingPost) Write(data io.Reader) (int64, error) { return n, nil } +/* +Close a segment. This is a polite action to notify any +subscribers that might be waiting for this segment. + +Only needed if the segment is dropped or otherwise errored; +not required if the segment is written normally. + +Note that subscribers still work fine even without this call; +it would just take longer for them to stop waiting when +the current segment drops out of the window of active segments. +*/ +func (p *pendingPost) Close() error { + p.writer.Close() + url := fmt.Sprintf("%s/%d", p.client.baseURL, p.index) + req, err := http.NewRequest("DELETE", url, nil) + if err != nil { + return err + } + resp, err := (&http.Client{Transport: &http.Transport{ + // ignore orch certs for now + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }}).Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return &HTTPError{Code: resp.StatusCode, Body: string(body)} + } + return nil +} + // Write sends data to the current segment, sets up the next segment concurrently, and blocks until completion func (c *TricklePublisher) Write(data io.Reader) error { pp, err := c.Next() diff --git a/trickle/trickle_server.go b/trickle/trickle_server.go index eecc509d6d..e7d2b148a3 100644 --- a/trickle/trickle_server.go +++ b/trickle/trickle_server.go @@ -55,6 +55,7 @@ type Stream struct { name string mimeType string writeTime time.Time + closed bool } type Segment struct { @@ -115,6 +116,7 @@ func ConfigureServer(config TrickleServerConfig) *Server { mux.HandleFunc("POST "+basePath+"{streamName}", streamManager.handleCreate) mux.HandleFunc("GET "+basePath+"{streamName}/{idx}", streamManager.handleGet) mux.HandleFunc("POST "+basePath+"{streamName}/{idx}", streamManager.handlePost) + mux.HandleFunc("DELETE "+basePath+"{streamName}/{idx}", streamManager.closeSeq) mux.HandleFunc("DELETE "+basePath+"{streamName}", streamManager.handleDelete) return streamManager } @@ -185,7 +187,7 @@ func (sm *Server) clearAllStreams() { // TODO update changefeed for _, stream := range sm.streams { - stream.clear() + stream.close() } sm.streams = make(map[string]*Stream) } @@ -213,13 +215,14 @@ func (sm *Server) sweepIdleChannels() { } } -func (s *Stream) clear() { +func (s *Stream) close() { s.mutex.Lock() defer s.mutex.Unlock() for _, segment := range s.segments { segment.close() } s.segments = make([]*Segment, maxSegmentsPerStream) + s.closed = true } func (sm *Server) closeStream(streamName string) error { @@ -230,7 +233,7 @@ func (sm *Server) closeStream(streamName string) error { // TODO there is a bit of an issue around session reuse - stream.clear() + stream.close() sm.mutex.Lock() delete(sm.streams, streamName) sm.mutex.Unlock() @@ -258,6 +261,28 @@ func (sm *Server) handleDelete(w http.ResponseWriter, r *http.Request) { } } +func (sm *Server) closeSeq(w http.ResponseWriter, r *http.Request) { + s, exists := sm.getStream(r.PathValue("streamName")) + if !exists { + http.Error(w, "Stream not found", http.StatusNotFound) + return + } + idx, err := strconv.Atoi(r.PathValue("idx")) + if err != nil { + http.Error(w, "Invalid idx", http.StatusBadRequest) + return + } + slog.Info("DELETE closing seq", "channel", s.name, "seq", idx) + s.mutex.RLock() + seg := s.segments[idx%maxSegmentsPerStream] + s.mutex.RUnlock() + if seg == nil || seg.idx != idx { + http.Error(w, "Nonexistent segment", http.StatusBadRequest) + return + } + seg.close() +} + func (sm *Server) handleCreate(w http.ResponseWriter, r *http.Request) { stream := sm.getOrCreateStream(r.PathValue("streamName"), r.Header.Get("Expect-Content"), false) if stream == nil { @@ -489,8 +514,20 @@ func (s *Stream) handleGet(w http.ResponseWriter, r *http.Request, idx int) { } if eof { if totalWrites <= 0 { + // check if the channel was closed; sometimes we drop / skip a segment + s.mutex.RLock() + closed := s.closed + latestSeq := s.latestWrite + s.mutex.RUnlock() w.Header().Set("Lp-Trickle-Seq", strconv.Itoa(segment.idx)) - w.Header().Set("Lp-Trickle-Closed", "terminated") + if closed { + w.Header().Set("Lp-Trickle-Closed", "terminated") + } else { + // if the segment was dropped, its probably slow + // send over latest seq so the client can grab leading edge + w.Header().Set("Lp-Trickle-Latest", strconv.Itoa(latestSeq)) + w.WriteHeader(470) + } } return totalWrites, nil } diff --git a/trickle/trickle_subscriber.go b/trickle/trickle_subscriber.go index 95127a9e96..7730bdf451 100644 --- a/trickle/trickle_subscriber.go +++ b/trickle/trickle_subscriber.go @@ -15,6 +15,15 @@ import ( var EOS = errors.New("End of stream") +type SequenceNonexistent struct { + Latest int + Seq int +} + +func (e *SequenceNonexistent) Error() string { + return fmt.Sprintf("Channel exists but sequence does not: requested %d latest %d", e.Seq, e.Latest) +} + const preconnectRefreshTimeout = 20 * time.Second // TrickleSubscriber represents a trickle streaming reader that always fetches from index -1 @@ -51,10 +60,28 @@ func GetSeq(resp *http.Response) int { return i } +func GetLatest(resp *http.Response) int { + if resp == nil { + return -99 // TODO hmm + } + v := resp.Header.Get("Lp-Trickle-Latest") + i, err := strconv.Atoi(v) + if err != nil { + return -1 // Use the latest index on the server + } + return i +} + func IsEOS(resp *http.Response) bool { return resp.Header.Get("Lp-Trickle-Closed") != "" } +func (c *TrickleSubscriber) SetSeq(seq int) { + c.mu.Lock() + defer c.mu.Unlock() + c.idx = seq +} + func (c *TrickleSubscriber) connect(ctx context.Context) (*http.Response, error) { url := fmt.Sprintf("%s/%d", c.url, c.idx) slog.Debug("preconnecting", "url", url) @@ -76,6 +103,9 @@ func (c *TrickleSubscriber) connect(ctx context.Context) (*http.Response, error) if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) resp.Body.Close() // Ensure we close the body to avoid leaking connections + if resp.StatusCode == http.StatusNotFound || resp.StatusCode == 470 { + return resp, nil + } return nil, fmt.Errorf("failed GET segment, status code: %d, msg: %s", resp.StatusCode, string(body)) } @@ -152,9 +182,19 @@ func (c *TrickleSubscriber) Read() (*http.Response, error) { c.pendingGet = nil if IsEOS(conn) { + conn.Body.Close() // because this is a 200; maybe use a custom status code return nil, EOS } + if conn.StatusCode == http.StatusNotFound { + return nil, StreamNotFoundErr + } + + if conn.StatusCode == 470 { + // stream exists but segment dosn't + return nil, &SequenceNonexistent{Seq: GetSeq(conn), Latest: GetLatest(conn)} + } + // Set to use the next index for the next (pre-)connection idx := GetSeq(conn) if idx >= 0 { From 1dfe3fd03841ab632a9557f8018cc2fad6f6c7e1 Mon Sep 17 00:00:00 2001 From: Max Holland Date: Mon, 23 Dec 2024 16:08:30 +0000 Subject: [PATCH 56/56] Temporary hack to fix comfyui prompt (#3333) * Temporary hack to fix comfyui prompt For stream manager streams the pipelines app is not currently wrapping the comfyui json in a 'prompt' field so we need to handle this for now * fix --- server/ai_mediaserver.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go index 4099a8e1dc..80f92948b7 100644 --- a/server/ai_mediaserver.go +++ b/server/ai_mediaserver.go @@ -467,7 +467,11 @@ func (ls *LivepeerServer) StartLiveVideo() http.Handler { } if len(authResp.paramsMap) > 0 { - pipelineParams = authResp.paramsMap + if _, ok := authResp.paramsMap["prompt"]; !ok && pipeline == "comfyui" { + pipelineParams = map[string]interface{}{"prompt": authResp.paramsMap} + } else { + pipelineParams = authResp.paramsMap + } } if authResp.StreamID != "" {