Skip to content

Commit 4d54557

Browse files
Allow per request proxy decision
Signed-off-by: Vikrant Puppala <[email protected]>
1 parent 2b228c3 commit 4d54557

File tree

5 files changed

+110
-47
lines changed

5 files changed

+110
-47
lines changed

src/databricks/sql/auth/thrift_http_client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from databricks.sql.types import SSLOptions
1818
from databricks.sql.common.http_utils import (
1919
detect_and_parse_proxy,
20-
create_connection_pool,
2120
)
2221

2322
logger = logging.getLogger(__name__)

src/databricks/sql/backend/sea/utils/http_client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
)
1818
from databricks.sql.common.http_utils import (
1919
detect_and_parse_proxy,
20-
create_connection_pool,
2120
)
2221

2322
logger = logging.getLogger(__name__)

src/databricks/sql/common/http_utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,17 @@
99
from databricks.sql.auth.retry import DatabricksRetryPolicy
1010
from databricks.sql.types import SSLOptions
1111

12+
1213
def detect_and_parse_proxy(
13-
scheme: str, host: str
14+
scheme: str, host: str = None, skip_bypass: bool = False
1415
) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
1516
"""
1617
Detect system proxy and return proxy URI and headers using standardized logic.
1718
1819
Args:
1920
scheme: URL scheme (http/https)
20-
host: Target hostname
21+
host: Target hostname (optional, only needed for bypass checking)
22+
skip_bypass: If True, skip proxy bypass checking and return proxy config if found
2123
2224
Returns:
2325
Tuple of (proxy_uri, proxy_headers) or (None, None) if no proxy
@@ -30,8 +32,8 @@ def detect_and_parse_proxy(
3032
# No proxy found or getproxies() failed - disable proxy
3133
proxy = None
3234
else:
33-
# Proxy found, but check if this host should bypass proxy
34-
if host and urllib.request.proxy_bypass(host):
35+
# Proxy found, but check if this host should bypass proxy (unless skipped)
36+
if not skip_bypass and host and urllib.request.proxy_bypass(host):
3537
proxy = None # Host bypasses proxy per system rules
3638

3739
if not proxy:
@@ -55,4 +57,4 @@ def create_basic_proxy_auth_headers(parsed_proxy) -> Optional[Dict[str, str]]:
5557
if parsed_proxy is None or not parsed_proxy.username:
5658
return None
5759
ap = f"{urllib.parse.unquote(parsed_proxy.username)}:{urllib.parse.unquote(parsed_proxy.password)}"
58-
return make_headers(proxy_basic_auth=ap)
60+
return make_headers(proxy_basic_auth=ap)

src/databricks/sql/common/unified_http_client.py

Lines changed: 100 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import ssl
33
import urllib.parse
4+
import urllib.request
45
from contextlib import contextmanager
56
from typing import Dict, Any, Optional, Generator
67

@@ -14,7 +15,7 @@
1415
from databricks.sql.common.http import HttpMethod
1516
from databricks.sql.common.http_utils import (
1617
detect_and_parse_proxy,
17-
create_connection_pool,
18+
create_basic_proxy_auth_headers,
1819
)
1920

2021
logger = logging.getLogger(__name__)
@@ -27,6 +28,10 @@ class UnifiedHttpClient:
2728
This client uses urllib3 for robust HTTP communication with retry policies,
2829
connection pooling, SSL support, and proxy support. It replaces the various
2930
singleton HTTP clients and direct requests usage throughout the codebase.
31+
32+
The client supports per-request proxy decisions, automatically routing requests
33+
through proxy or direct connections based on system proxy bypass rules and
34+
the target hostname of each request.
3035
"""
3136

3237
def __init__(self, client_context):
@@ -37,12 +42,17 @@ def __init__(self, client_context):
3742
client_context: ClientContext instance containing HTTP configuration
3843
"""
3944
self.config = client_context
40-
self._pool_manager = None
45+
# Since the unified http client is used for all requests, we need to have proxy and direct pool managers
46+
# for per-request proxy decisions.
47+
self._direct_pool_manager = None
48+
self._proxy_pool_manager = None
4149
self._retry_policy = None
42-
self._setup_pool_manager()
50+
self._proxy_uri = None
51+
self._proxy_auth = None
52+
self._setup_pool_managers()
4353

44-
def _setup_pool_manager(self):
45-
"""Set up the urllib3 PoolManager with configuration from ClientContext."""
54+
def _setup_pool_managers(self):
55+
"""Set up both direct and proxy pool managers for per-request proxy decisions."""
4656

4757
# SSL context setup
4858
ssl_context = None
@@ -89,11 +99,6 @@ def _setup_pool_manager(self):
8999
self._retry_policy._command_type = None
90100
self._retry_policy._retry_start_time = None
91101

92-
parsed_url = urllib.parse.urlparse(self.config.hostname)
93-
self.scheme = parsed_url.scheme
94-
self.host = parsed_url.hostname
95-
self.port = parsed_url.port
96-
97102
# Common pool manager kwargs
98103
pool_kwargs = {
99104
"num_pools": self.config.pool_connections,
@@ -107,31 +112,83 @@ def _setup_pool_manager(self):
107112
"ssl_context": ssl_context,
108113
}
109114

110-
# Detect proxy using shared utility
111-
proxy_uri, proxy_auth = detect_and_parse_proxy(
112-
self.scheme, self.config.hostname
113-
)
115+
# Always create a direct pool manager
116+
self._direct_pool_manager = PoolManager(**pool_kwargs)
114117

115-
if proxy_uri:
116-
parsed_proxy = urllib.parse.urlparse(proxy_uri)
117-
# realhost and realport are the host and port of the actual request
118-
self.realhost = self.host
119-
self.realport = self.port
120-
# this is passed to ProxyManager
121-
self.proxy_uri: str = proxy_uri
122-
self.host = parsed_proxy.hostname
123-
self.port = parsed_proxy.port
124-
self.proxy_auth = proxy_auth
125-
else:
126-
self.realhost = self.realport = self.proxy_auth = self.proxy_uri = None
118+
# Detect system proxy configuration
119+
# We use 'https' as default scheme since most requests will be HTTPS
120+
parsed_url = urllib.parse.urlparse(self.config.hostname)
121+
self.scheme = parsed_url.scheme or "https"
127122

128-
# Create proxy or regular pool manager
129-
if self.using_proxy():
130-
self._pool_manager = ProxyManager(
131-
self.config.http_proxy, proxy_headers=proxy_auth, **pool_kwargs
123+
# Check if system has proxy configured for our scheme
124+
try:
125+
# Use shared proxy detection logic, skipping bypass since we handle that per-request
126+
proxy_url, proxy_auth = detect_and_parse_proxy(
127+
self.scheme, skip_bypass=True
132128
)
129+
130+
if proxy_url:
131+
# Store proxy configuration for per-request decisions
132+
self._proxy_uri = proxy_url
133+
self._proxy_auth = proxy_auth
134+
135+
# Create proxy pool manager
136+
self._proxy_pool_manager = ProxyManager(
137+
proxy_url, proxy_headers=proxy_auth, **pool_kwargs
138+
)
139+
logger.debug("Initialized with proxy support: %s", proxy_url)
140+
else:
141+
self._proxy_pool_manager = None
142+
logger.debug("No system proxy detected, using direct connections only")
143+
144+
except Exception as e:
145+
# If proxy detection fails, fall back to direct connections only
146+
logger.debug("Error detecting system proxy configuration: %s", e)
147+
self._proxy_pool_manager = None
148+
149+
def _should_use_proxy(self, target_host: str) -> bool:
150+
"""
151+
Determine if a request to the target host should use proxy.
152+
153+
Args:
154+
target_host: The hostname of the target URL
155+
156+
Returns:
157+
True if proxy should be used, False for direct connection
158+
"""
159+
# If no proxy is configured, always use direct connection
160+
if not self._proxy_pool_manager or not self._proxy_uri:
161+
return False
162+
163+
# Check system proxy bypass rules for this specific host
164+
try:
165+
# proxy_bypass returns True if the host should BYPASS the proxy
166+
# We want the opposite - True if we should USE the proxy
167+
return not urllib.request.proxy_bypass(target_host)
168+
except Exception as e:
169+
# If proxy_bypass fails, default to using proxy (safer choice)
170+
logger.debug("Error checking proxy bypass for host %s: %s", target_host, e)
171+
return True
172+
173+
def _get_pool_manager_for_url(self, url: str) -> urllib3.PoolManager:
174+
"""
175+
Get the appropriate pool manager for the given URL.
176+
177+
Args:
178+
url: The target URL
179+
180+
Returns:
181+
PoolManager instance (either direct or proxy)
182+
"""
183+
parsed_url = urllib.parse.urlparse(url)
184+
target_host = parsed_url.hostname
185+
186+
if target_host and self._should_use_proxy(target_host):
187+
logger.debug("Using proxy for request to %s", target_host)
188+
return self._proxy_pool_manager
133189
else:
134-
self._pool_manager = PoolManager(**pool_kwargs)
190+
logger.debug("Using direct connection for request to %s", target_host)
191+
return self._direct_pool_manager
135192

136193
def _prepare_headers(
137194
self, headers: Optional[Dict[str, str]] = None
@@ -184,10 +241,13 @@ def request_context(
184241
# Prepare retry policy for this request
185242
self._prepare_retry_policy()
186243

244+
# Select appropriate pool manager based on target URL
245+
pool_manager = self._get_pool_manager_for_url(url)
246+
187247
response = None
188248

189249
try:
190-
response = self._pool_manager.request(
250+
response = pool_manager.request(
191251
method=method.value, url=url, headers=request_headers, **kwargs
192252
)
193253
yield response
@@ -227,14 +287,17 @@ def request(
227287
return response
228288

229289
def using_proxy(self) -> bool:
230-
"""Check if proxy is being used."""
231-
return self.realhost is not None
290+
"""Check if proxy support is available (not whether it's being used for a specific request)."""
291+
return self._proxy_pool_manager is not None
232292

233293
def close(self):
234294
"""Close the underlying connection pools."""
235-
if self._pool_manager:
236-
self._pool_manager.clear()
237-
self._pool_manager = None
295+
if self._direct_pool_manager:
296+
self._direct_pool_manager.clear()
297+
self._direct_pool_manager = None
298+
if self._proxy_pool_manager:
299+
self._proxy_pool_manager.clear()
300+
self._proxy_pool_manager = None
238301

239302
def __enter__(self):
240303
return self

tests/unit/test_telemetry.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def mock_telemetry_client():
2727
client_context = MagicMock()
2828

2929
# Patch the _setup_pool_manager method to avoid SSL file loading
30-
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_manager'):
30+
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_managers'):
3131
return TelemetryClient(
3232
telemetry_enabled=True,
3333
session_id_hex=session_id,
@@ -221,7 +221,7 @@ def test_client_lifecycle_flow(self):
221221
client_context = MagicMock()
222222

223223
# Initialize enabled client
224-
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_manager'):
224+
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_managers'):
225225
TelemetryClientFactory.initialize_telemetry_client(
226226
telemetry_enabled=True,
227227
session_id_hex=session_id_hex,
@@ -289,7 +289,7 @@ def test_factory_shutdown_flow(self):
289289
client_context = MagicMock()
290290

291291
# Initialize multiple clients
292-
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_manager'):
292+
with patch('databricks.sql.common.unified_http_client.UnifiedHttpClient._setup_pool_managers'):
293293
for session in [session1, session2]:
294294
TelemetryClientFactory.initialize_telemetry_client(
295295
telemetry_enabled=True,

0 commit comments

Comments
 (0)