1
1
import logging
2
2
import ssl
3
3
import urllib .parse
4
+ import urllib .request
4
5
from contextlib import contextmanager
5
6
from typing import Dict , Any , Optional , Generator
6
7
14
15
from databricks .sql .common .http import HttpMethod
15
16
from databricks .sql .common .http_utils import (
16
17
detect_and_parse_proxy ,
17
- create_connection_pool ,
18
+ create_basic_proxy_auth_headers ,
18
19
)
19
20
20
21
logger = logging .getLogger (__name__ )
@@ -27,6 +28,10 @@ class UnifiedHttpClient:
27
28
This client uses urllib3 for robust HTTP communication with retry policies,
28
29
connection pooling, SSL support, and proxy support. It replaces the various
29
30
singleton HTTP clients and direct requests usage throughout the codebase.
31
+
32
+ The client supports per-request proxy decisions, automatically routing requests
33
+ through proxy or direct connections based on system proxy bypass rules and
34
+ the target hostname of each request.
30
35
"""
31
36
32
37
def __init__ (self , client_context ):
@@ -37,12 +42,17 @@ def __init__(self, client_context):
37
42
client_context: ClientContext instance containing HTTP configuration
38
43
"""
39
44
self .config = client_context
40
- self ._pool_manager = None
45
+ # Since the unified http client is used for all requests, we need to have proxy and direct pool managers
46
+ # for per-request proxy decisions.
47
+ self ._direct_pool_manager = None
48
+ self ._proxy_pool_manager = None
41
49
self ._retry_policy = None
42
- self ._setup_pool_manager ()
50
+ self ._proxy_uri = None
51
+ self ._proxy_auth = None
52
+ self ._setup_pool_managers ()
43
53
44
- def _setup_pool_manager (self ):
45
- """Set up the urllib3 PoolManager with configuration from ClientContext ."""
54
+ def _setup_pool_managers (self ):
55
+ """Set up both direct and proxy pool managers for per-request proxy decisions ."""
46
56
47
57
# SSL context setup
48
58
ssl_context = None
@@ -89,11 +99,6 @@ def _setup_pool_manager(self):
89
99
self ._retry_policy ._command_type = None
90
100
self ._retry_policy ._retry_start_time = None
91
101
92
- parsed_url = urllib .parse .urlparse (self .config .hostname )
93
- self .scheme = parsed_url .scheme
94
- self .host = parsed_url .hostname
95
- self .port = parsed_url .port
96
-
97
102
# Common pool manager kwargs
98
103
pool_kwargs = {
99
104
"num_pools" : self .config .pool_connections ,
@@ -107,31 +112,83 @@ def _setup_pool_manager(self):
107
112
"ssl_context" : ssl_context ,
108
113
}
109
114
110
- # Detect proxy using shared utility
111
- proxy_uri , proxy_auth = detect_and_parse_proxy (
112
- self .scheme , self .config .hostname
113
- )
115
+ # Always create a direct pool manager
116
+ self ._direct_pool_manager = PoolManager (** pool_kwargs )
114
117
115
- if proxy_uri :
116
- parsed_proxy = urllib .parse .urlparse (proxy_uri )
117
- # realhost and realport are the host and port of the actual request
118
- self .realhost = self .host
119
- self .realport = self .port
120
- # this is passed to ProxyManager
121
- self .proxy_uri : str = proxy_uri
122
- self .host = parsed_proxy .hostname
123
- self .port = parsed_proxy .port
124
- self .proxy_auth = proxy_auth
125
- else :
126
- self .realhost = self .realport = self .proxy_auth = self .proxy_uri = None
118
+ # Detect system proxy configuration
119
+ # We use 'https' as default scheme since most requests will be HTTPS
120
+ parsed_url = urllib .parse .urlparse (self .config .hostname )
121
+ self .scheme = parsed_url .scheme or "https"
127
122
128
- # Create proxy or regular pool manager
129
- if self .using_proxy ():
130
- self ._pool_manager = ProxyManager (
131
- self .config .http_proxy , proxy_headers = proxy_auth , ** pool_kwargs
123
+ # Check if system has proxy configured for our scheme
124
+ try :
125
+ # Use shared proxy detection logic, skipping bypass since we handle that per-request
126
+ proxy_url , proxy_auth = detect_and_parse_proxy (
127
+ self .scheme , skip_bypass = True
132
128
)
129
+
130
+ if proxy_url :
131
+ # Store proxy configuration for per-request decisions
132
+ self ._proxy_uri = proxy_url
133
+ self ._proxy_auth = proxy_auth
134
+
135
+ # Create proxy pool manager
136
+ self ._proxy_pool_manager = ProxyManager (
137
+ proxy_url , proxy_headers = proxy_auth , ** pool_kwargs
138
+ )
139
+ logger .debug ("Initialized with proxy support: %s" , proxy_url )
140
+ else :
141
+ self ._proxy_pool_manager = None
142
+ logger .debug ("No system proxy detected, using direct connections only" )
143
+
144
+ except Exception as e :
145
+ # If proxy detection fails, fall back to direct connections only
146
+ logger .debug ("Error detecting system proxy configuration: %s" , e )
147
+ self ._proxy_pool_manager = None
148
+
149
+ def _should_use_proxy (self , target_host : str ) -> bool :
150
+ """
151
+ Determine if a request to the target host should use proxy.
152
+
153
+ Args:
154
+ target_host: The hostname of the target URL
155
+
156
+ Returns:
157
+ True if proxy should be used, False for direct connection
158
+ """
159
+ # If no proxy is configured, always use direct connection
160
+ if not self ._proxy_pool_manager or not self ._proxy_uri :
161
+ return False
162
+
163
+ # Check system proxy bypass rules for this specific host
164
+ try :
165
+ # proxy_bypass returns True if the host should BYPASS the proxy
166
+ # We want the opposite - True if we should USE the proxy
167
+ return not urllib .request .proxy_bypass (target_host )
168
+ except Exception as e :
169
+ # If proxy_bypass fails, default to using proxy (safer choice)
170
+ logger .debug ("Error checking proxy bypass for host %s: %s" , target_host , e )
171
+ return True
172
+
173
+ def _get_pool_manager_for_url (self , url : str ) -> urllib3 .PoolManager :
174
+ """
175
+ Get the appropriate pool manager for the given URL.
176
+
177
+ Args:
178
+ url: The target URL
179
+
180
+ Returns:
181
+ PoolManager instance (either direct or proxy)
182
+ """
183
+ parsed_url = urllib .parse .urlparse (url )
184
+ target_host = parsed_url .hostname
185
+
186
+ if target_host and self ._should_use_proxy (target_host ):
187
+ logger .debug ("Using proxy for request to %s" , target_host )
188
+ return self ._proxy_pool_manager
133
189
else :
134
- self ._pool_manager = PoolManager (** pool_kwargs )
190
+ logger .debug ("Using direct connection for request to %s" , target_host )
191
+ return self ._direct_pool_manager
135
192
136
193
def _prepare_headers (
137
194
self , headers : Optional [Dict [str , str ]] = None
@@ -184,10 +241,13 @@ def request_context(
184
241
# Prepare retry policy for this request
185
242
self ._prepare_retry_policy ()
186
243
244
+ # Select appropriate pool manager based on target URL
245
+ pool_manager = self ._get_pool_manager_for_url (url )
246
+
187
247
response = None
188
248
189
249
try :
190
- response = self . _pool_manager .request (
250
+ response = pool_manager .request (
191
251
method = method .value , url = url , headers = request_headers , ** kwargs
192
252
)
193
253
yield response
@@ -227,14 +287,17 @@ def request(
227
287
return response
228
288
229
289
def using_proxy (self ) -> bool :
230
- """Check if proxy is being used."""
231
- return self .realhost is not None
290
+ """Check if proxy support is available (not whether it's being used for a specific request) ."""
291
+ return self ._proxy_pool_manager is not None
232
292
233
293
def close (self ):
234
294
"""Close the underlying connection pools."""
235
- if self ._pool_manager :
236
- self ._pool_manager .clear ()
237
- self ._pool_manager = None
295
+ if self ._direct_pool_manager :
296
+ self ._direct_pool_manager .clear ()
297
+ self ._direct_pool_manager = None
298
+ if self ._proxy_pool_manager :
299
+ self ._proxy_pool_manager .clear ()
300
+ self ._proxy_pool_manager = None
238
301
239
302
def __enter__ (self ):
240
303
return self
0 commit comments