Skip to content

Commit c367cd0

Browse files
authored
Honor retry delay on Azure/OpenAI status codes 429 and 503 (#795)
## Motivation and Context (Why the change? What's the scenario?) When sending too many requests to Azure / OpenAI and receiving status code 429, the response includes how long to wait before retrying. The same might be true for status code 503. KM default retry strategy ignores this information, often retrying too soon, and causing unnecessary errors and potential pipeline failures. ## High level description (Approach, Design) Fix the retry policy to honor these headers in case of 429 and 503: * retry-after-ms * x-ms-retry-after-ms * Retry-After
1 parent ccfb815 commit c367cd0

12 files changed

+199
-23
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Net.Http.Headers;
4+
5+
namespace Microsoft.KernelMemory.Utils;
6+
7+
#pragma warning disable CA1303
8+
#pragma warning disable CA1812
9+
10+
// TMP workaround for Azure SDK bug
11+
// See https://github.com/Azure/azure-sdk-for-net/issues/46109
12+
internal sealed class AuthFixHandler : DelegatingHandler
13+
{
14+
protected override Task<HttpResponseMessage> SendAsync(
15+
HttpRequestMessage request, CancellationToken cancellationToken)
16+
{
17+
if (request.Headers.TryGetValues("Authorization", out var headers) && headers.Count() > 1)
18+
{
19+
request.Headers.Authorization = new AuthenticationHeaderValue(
20+
request.Headers.Authorization!.Scheme,
21+
request.Headers.Authorization.Parameter);
22+
}
23+
24+
return base.SendAsync(request, cancellationToken);
25+
}
26+
}
27+
28+
internal sealed class HttpLogger : DelegatingHandler
29+
{
30+
protected async override Task<HttpResponseMessage> SendAsync(
31+
HttpRequestMessage request, CancellationToken cancellationToken)
32+
{
33+
// Log the request
34+
Console.WriteLine("## Request:");
35+
Console.WriteLine(request.ToString());
36+
if (request.Content != null)
37+
{
38+
Console.WriteLine(await request.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false));
39+
}
40+
41+
Console.WriteLine("Headers");
42+
foreach (var h in request.Headers)
43+
{
44+
foreach (string x in h.Value)
45+
{
46+
Console.WriteLine($"{h.Key}: {x}");
47+
}
48+
}
49+
50+
Console.WriteLine();
51+
52+
// Proceed with the request
53+
HttpResponseMessage response = await base.SendAsync(request, cancellationToken).ConfigureAwait(false);
54+
55+
// Log the response
56+
Console.WriteLine("\n\n## Response:");
57+
Console.WriteLine(response.ToString());
58+
if (response.Content != null)
59+
{
60+
Console.WriteLine(await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false));
61+
}
62+
63+
Console.WriteLine();
64+
65+
return response;
66+
}
67+
}

extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public AzureOpenAITextEmbeddingGenerator(
4444
HttpClient? httpClient = null)
4545
: this(
4646
config,
47-
AzureOpenAIClientBuilder.Build(config, httpClient),
47+
AzureOpenAIClientBuilder.Build(config, httpClient, loggerFactory),
4848
textTokenizer,
4949
loggerFactory)
5050
{

extensions/AzureOpenAI/AzureOpenAITextGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public AzureOpenAITextGenerator(
3939
HttpClient? httpClient = null)
4040
: this(
4141
config,
42-
AzureOpenAIClientBuilder.Build(config, httpClient),
42+
AzureOpenAIClientBuilder.Build(config, httpClient, loggerFactory),
4343
textTokenizer,
4444
loggerFactory)
4545
{

extensions/AzureOpenAI/Internals/AzureOpenAIClientBuilder.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66
using Azure;
77
using Azure.AI.OpenAI;
88
using Azure.Identity;
9+
using Microsoft.Extensions.Logging;
910
using Microsoft.KernelMemory.Diagnostics;
1011

1112
namespace Microsoft.KernelMemory.AI.AzureOpenAI.Internals;
1213

1314
internal static class AzureOpenAIClientBuilder
1415
{
15-
internal static AzureOpenAIClient Build(AzureOpenAIConfig config, HttpClient? httpClient = null)
16+
internal static AzureOpenAIClient Build(
17+
AzureOpenAIConfig config,
18+
HttpClient? httpClient = null,
19+
ILoggerFactory? loggerFactory = null)
1620
{
1721
if (string.IsNullOrEmpty(config.Endpoint))
1822
{
@@ -21,7 +25,7 @@ internal static AzureOpenAIClient Build(AzureOpenAIConfig config, HttpClient? ht
2125

2226
AzureOpenAIClientOptions options = new()
2327
{
24-
RetryPolicy = new ClientSequentialRetryPolicy(maxRetries: Math.Max(0, config.MaxRetries)),
28+
RetryPolicy = new ClientSequentialRetryPolicy(maxRetries: Math.Max(0, config.MaxRetries), loggerFactory),
2529
ApplicationId = Telemetry.HttpUserAgent,
2630
};
2731

extensions/AzureOpenAI/Internals/ClientSequentialRetryPolicy.cs

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
using System;
44
using System.ClientModel.Primitives;
5+
using Microsoft.Extensions.Logging;
6+
using Microsoft.KernelMemory.Diagnostics;
57

68
namespace Microsoft.KernelMemory.AI.AzureOpenAI.Internals;
79

810
internal sealed class ClientSequentialRetryPolicy : ClientRetryPolicy
911
{
10-
private static readonly TimeSpan[] s_pollingSequence =
12+
private static readonly TimeSpan[] s_retryDelaySequence =
1113
{
1214
TimeSpan.FromSeconds(1),
1315
TimeSpan.FromSeconds(1),
@@ -19,15 +21,65 @@ internal sealed class ClientSequentialRetryPolicy : ClientRetryPolicy
1921
TimeSpan.FromSeconds(8)
2022
};
2123

22-
private static readonly TimeSpan s_maxDelay = s_pollingSequence[^1];
24+
private static readonly TimeSpan s_maxDelay = s_retryDelaySequence[^1];
2325

24-
public ClientSequentialRetryPolicy(int maxRetries = 3) : base(maxRetries)
26+
private readonly ILogger<ClientSequentialRetryPolicy> _log;
27+
28+
public ClientSequentialRetryPolicy(
29+
int maxRetries = 3,
30+
ILoggerFactory? loggerFactory = null) : base(maxRetries)
2531
{
32+
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<ClientSequentialRetryPolicy>();
2633
}
2734

2835
protected override TimeSpan GetNextDelay(PipelineMessage message, int tryCount)
2936
{
37+
// Check if the remote service specified how long to wait before retrying
38+
if (this.TryGetDelayFromResponse(message.Response, out TimeSpan delay))
39+
{
40+
this._log.LogWarning("Delay extracted from HTTP response: {0} msecs", delay.TotalMilliseconds);
41+
return delay;
42+
}
43+
44+
// Use predefined delay, increasing on each attempt up to a max value
3045
int index = Math.Max(0, tryCount - 1);
31-
return index >= s_pollingSequence.Length ? s_maxDelay : s_pollingSequence[index];
46+
return index >= s_retryDelaySequence.Length ? s_maxDelay : s_retryDelaySequence[index];
47+
}
48+
49+
private bool TryGetDelayFromResponse(PipelineResponse? response, out TimeSpan delay)
50+
{
51+
delay = TimeSpan.Zero;
52+
53+
if (response == null || (response.Status != 429 && response.Status != 503)) { return false; }
54+
55+
delay = this.TryGetTimeSpanFromHeader(response, "retry-after-ms")
56+
?? this.TryGetTimeSpanFromHeader(response, "x-ms-retry-after-ms")
57+
?? this.TryGetTimeSpanFromHeader(response, "Retry-After", msecsMultiplier: 1000, allowDateTimeOffset: true)
58+
?? TimeSpan.Zero;
59+
60+
return delay > TimeSpan.Zero;
61+
}
62+
63+
private TimeSpan? TryGetTimeSpanFromHeader(
64+
PipelineResponse response,
65+
string headerName,
66+
int msecsMultiplier = 1,
67+
bool allowDateTimeOffset = false)
68+
{
69+
if (double.TryParse(
70+
response.Headers.TryGetValue(headerName, out string? strValue) ? strValue : null,
71+
out double doubleValue))
72+
{
73+
this._log.LogWarning("Header {0} found, value {1}", headerName, doubleValue);
74+
return TimeSpan.FromMilliseconds(msecsMultiplier * doubleValue);
75+
}
76+
77+
if (allowDateTimeOffset && DateTimeOffset.TryParse(headerName, out DateTimeOffset delayUntil))
78+
{
79+
this._log.LogWarning("Header {0} found, value {1}", headerName, delayUntil);
80+
return delayUntil - DateTimeOffset.UtcNow;
81+
}
82+
83+
return null;
3284
}
3385
}

extensions/OpenAI/OpenAI/Internals/.editorconfig

Lines changed: 0 additions & 3 deletions
This file was deleted.

extensions/OpenAI/OpenAI/Internals/ChangeEndpointPolicy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using System.Collections.Generic;
66
using System.Threading.Tasks;
77

8-
namespace Microsoft.KernelMemory.AI.OpenAI;
8+
namespace Microsoft.KernelMemory.AI.OpenAI.Internals;
99

1010
internal sealed class ChangeEndpointPolicy : PipelinePolicy
1111
{

extensions/OpenAI/OpenAI/Internals/ClientSequentialRetryPolicy.cs

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
using System;
44
using System.ClientModel.Primitives;
5+
using Microsoft.Extensions.Logging;
6+
using Microsoft.KernelMemory.Diagnostics;
57

6-
namespace Microsoft.KernelMemory.AI.OpenAI;
8+
namespace Microsoft.KernelMemory.AI.OpenAI.Internals;
79

810
internal sealed class ClientSequentialRetryPolicy : ClientRetryPolicy
911
{
10-
private static readonly TimeSpan[] s_pollingSequence =
12+
private static readonly TimeSpan[] s_retryDelaySequence =
1113
{
1214
TimeSpan.FromSeconds(1),
1315
TimeSpan.FromSeconds(1),
@@ -19,15 +21,65 @@ internal sealed class ClientSequentialRetryPolicy : ClientRetryPolicy
1921
TimeSpan.FromSeconds(8)
2022
};
2123

22-
private static readonly TimeSpan s_maxDelay = s_pollingSequence[^1];
24+
private static readonly TimeSpan s_maxDelay = s_retryDelaySequence[^1];
2325

24-
public ClientSequentialRetryPolicy(int maxRetries = 3) : base(maxRetries)
26+
private readonly ILogger<ClientSequentialRetryPolicy> _log;
27+
28+
public ClientSequentialRetryPolicy(
29+
int maxRetries = 3,
30+
ILoggerFactory? loggerFactory = null) : base(maxRetries)
2531
{
32+
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<ClientSequentialRetryPolicy>();
2633
}
2734

2835
protected override TimeSpan GetNextDelay(PipelineMessage message, int tryCount)
2936
{
37+
// Check if the remote service specified how long to wait before retrying
38+
if (this.TryGetDelayFromResponse(message.Response, out TimeSpan delay))
39+
{
40+
this._log.LogWarning("Delay extracted from HTTP response: {0} msecs", delay.TotalMilliseconds);
41+
return delay;
42+
}
43+
44+
// Use predefined delay, increasing on each attempt up to a max value
3045
int index = Math.Max(0, tryCount - 1);
31-
return index >= s_pollingSequence.Length ? s_maxDelay : s_pollingSequence[index];
46+
return index >= s_retryDelaySequence.Length ? s_maxDelay : s_retryDelaySequence[index];
47+
}
48+
49+
private bool TryGetDelayFromResponse(PipelineResponse? response, out TimeSpan delay)
50+
{
51+
delay = TimeSpan.Zero;
52+
53+
if (response == null || (response.Status != 429 && response.Status != 503)) { return false; }
54+
55+
delay = this.TryGetTimeSpanFromHeader(response, "retry-after-ms")
56+
?? this.TryGetTimeSpanFromHeader(response, "x-ms-retry-after-ms")
57+
?? this.TryGetTimeSpanFromHeader(response, "Retry-After", msecsMultiplier: 1000, allowDateTimeOffset: true)
58+
?? TimeSpan.Zero;
59+
60+
return delay > TimeSpan.Zero;
61+
}
62+
63+
private TimeSpan? TryGetTimeSpanFromHeader(
64+
PipelineResponse response,
65+
string headerName,
66+
int msecsMultiplier = 1,
67+
bool allowDateTimeOffset = false)
68+
{
69+
if (double.TryParse(
70+
response.Headers.TryGetValue(headerName, out string? strValue) ? strValue : null,
71+
out double doubleValue))
72+
{
73+
this._log.LogWarning("Header {0} found, value {1}", headerName, doubleValue);
74+
return TimeSpan.FromMilliseconds(msecsMultiplier * doubleValue);
75+
}
76+
77+
if (allowDateTimeOffset && DateTimeOffset.TryParse(headerName, out DateTimeOffset delayUntil))
78+
{
79+
this._log.LogWarning("Header {0} found, value {1}", headerName, delayUntil);
80+
return delayUntil - DateTimeOffset.UtcNow;
81+
}
82+
83+
return null;
3284
}
3385
}

extensions/OpenAI/OpenAI/Internals/OpenAIClientBuilder.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,22 @@
33
using System;
44
using System.ClientModel.Primitives;
55
using System.Net.Http;
6+
using Microsoft.Extensions.Logging;
67
using Microsoft.KernelMemory.Diagnostics;
78
using OpenAI;
89

9-
namespace Microsoft.KernelMemory.AI.OpenAI;
10+
namespace Microsoft.KernelMemory.AI.OpenAI.Internals;
1011

1112
internal static class OpenAIClientBuilder
1213
{
1314
internal static OpenAIClient Build(
1415
OpenAIConfig config,
15-
HttpClient? httpClient = null)
16+
HttpClient? httpClient = null,
17+
ILoggerFactory? loggerFactory = null)
1618
{
1719
OpenAIClientOptions options = new()
1820
{
19-
RetryPolicy = new ClientSequentialRetryPolicy(maxRetries: Math.Max(0, config.MaxRetries)),
21+
RetryPolicy = new ClientSequentialRetryPolicy(maxRetries: Math.Max(0, config.MaxRetries), loggerFactory),
2022
ApplicationId = Telemetry.HttpUserAgent,
2123
};
2224

extensions/OpenAI/OpenAI/Internals/SkClientBuilder.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using Microsoft.SemanticKernel.Connectors.OpenAI;
66
using OpenAI;
77

8-
namespace Microsoft.KernelMemory.AI.OpenAI;
8+
namespace Microsoft.KernelMemory.AI.OpenAI.Internals;
99

1010
internal static class SkClientBuilder
1111
{

extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Threading;
88
using System.Threading.Tasks;
99
using Microsoft.Extensions.Logging;
10+
using Microsoft.KernelMemory.AI.OpenAI.Internals;
1011
using Microsoft.KernelMemory.Diagnostics;
1112
using Microsoft.SemanticKernel.AI.Embeddings;
1213
using Microsoft.SemanticKernel.Embeddings;
@@ -45,7 +46,7 @@ public OpenAITextEmbeddingGenerator(
4546
HttpClient? httpClient = null)
4647
: this(
4748
config,
48-
OpenAIClientBuilder.Build(config, httpClient),
49+
OpenAIClientBuilder.Build(config, httpClient, loggerFactory),
4950
textTokenizer,
5051
loggerFactory)
5152
{

extensions/OpenAI/OpenAI/OpenAITextGenerator.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Runtime.CompilerServices;
77
using System.Threading;
88
using Microsoft.Extensions.Logging;
9+
using Microsoft.KernelMemory.AI.OpenAI.Internals;
910
using Microsoft.KernelMemory.Diagnostics;
1011
using Microsoft.SemanticKernel;
1112
using Microsoft.SemanticKernel.Connectors.OpenAI;
@@ -41,7 +42,7 @@ public OpenAITextGenerator(
4142
HttpClient? httpClient = null)
4243
: this(
4344
config,
44-
OpenAIClientBuilder.Build(config, httpClient),
45+
OpenAIClientBuilder.Build(config, httpClient, loggerFactory),
4546
textTokenizer,
4647
loggerFactory)
4748
{

0 commit comments

Comments
 (0)