From d9577d53b74765e0a39227fbcdda8290057ed808 Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Tue, 11 Feb 2025 14:40:11 -0800
Subject: [PATCH] fix: prediction stream raw predict url for non dedicated
 endpoint

PiperOrigin-RevId: 725777307
---
 google/cloud/aiplatform/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
index 8b13f95502..e46915bf28 100644
--- a/google/cloud/aiplatform/models.py
+++ b/google/cloud/aiplatform/models.py
@@ -2583,6 +2583,7 @@ def stream_raw_predict(
         timeout: Optional[float] = None,
     ) -> Iterator[requests.models.Response]:
         """Makes a streaming prediction request using arbitrary headers.
+        For custom model, this method is only supported for dedicated endpoint.
 
         Example usage:
             ```
@@ -2632,7 +2633,7 @@ def stream_raw_predict(
         if self.stream_raw_predict_request_url is None:
             self.stream_raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict"
 
-        url = self.raw_predict_request_url
+        url = self.stream_raw_predict_request_url
 
         if use_dedicated_endpoint:
             self._sync_gca_resource_if_skipped()