From ad6cf0f578f768689b1e2898bbb1c2f8d0f8e755 Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Wed, 5 Nov 2025 18:13:56 -0800 Subject: [PATCH 1/6] Update safe retry detection using document service request resource type and operation type. --- Microsoft.Azure.Cosmos.sln | 16 ++- .../src/HttpClient/CosmosHttpClientCore.cs | 12 +- .../src/HttpClient/HttpTimeoutPolicy.cs | 58 +++++++++- .../HttpTimeoutPolicyControlPlaneRead.cs | 6 - ...meoutPolicyControlPlaneRetriableHotPath.cs | 12 -- .../HttpClient/HttpTimeoutPolicyDefault.cs | 7 -- .../HttpTimeoutPolicyForPartitionFailover.cs | 9 +- .../HttpTimeoutPolicyForThinClient.cs | 7 +- .../HttpClient/HttpTimeoutPolicyNoRetry.cs | 2 +- .../CosmosHttpClientCoreTests.cs | 105 ++++++++++++++++-- 10 files changed, 172 insertions(+), 62 deletions(-) diff --git a/Microsoft.Azure.Cosmos.sln b/Microsoft.Azure.Cosmos.sln index cec2d9a5f4..5dd1a4d66a 100644 --- a/Microsoft.Azure.Cosmos.sln +++ b/Microsoft.Azure.Cosmos.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29123.88 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36623.8 d17.14 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Azure.Cosmos", "Microsoft.Azure.Cosmos\src\Microsoft.Azure.Cosmos.csproj", "{36F6F6A8-CEC8-4261-9948-903495BC3C25}" EndProject @@ -181,6 +181,18 @@ Global {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|Any CPU.Build.0 = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.ActiveCfg = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.Build.0 = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.ActiveCfg = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.Build.0 = Debug|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.Build.0 = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.ActiveCfg = Release|Any CPU + {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.Build.0 = Release|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.ActiveCfg = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.Build.0 = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|x64.ActiveCfg = Debug|Any CPU diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index 25b46ad0a6..06b9d9d9f6 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -388,7 +388,7 @@ private async Task SendHttpHelperAsync( return responseMessage; } - bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator); + bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutEnumerator); if (isOutOfRetries) { return responseMessage; @@ -402,7 +402,7 @@ private async Task SendHttpHelperAsync( datum.RecordHttpException(requestMessage, e, resourceType, requestStartTime); trace = datum.Trace; } - bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator); + bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutEnumerator); switch (e) { @@ -415,7 +415,7 @@ private async Task SendHttpHelperAsync( // Convert OperationCanceledException to 408 when the HTTP client throws it. This makes it clear that the // the request timed out and was not user canceled operation. - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(requestMessage.Method)) + if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(documentServiceRequest)) { // throw current exception (caught in transport handler) string message = @@ -440,14 +440,14 @@ private async Task SendHttpHelperAsync( break; case WebException webException: - if (isOutOfRetries || (!timeoutPolicy.IsSafeToRetry(requestMessage.Method) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) + if (isOutOfRetries || (!timeoutPolicy.IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) { throw; } break; case HttpRequestException httpRequestException: - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(requestMessage.Method)) + if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(documentServiceRequest)) { throw; } @@ -493,8 +493,6 @@ private async Task SendHttpHelperAsync( } private static bool IsOutOfRetries( - HttpTimeoutPolicy timeoutPolicy, - DateTime startDateTimeUtc, IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> timeoutEnumerator) { return !timeoutEnumerator.MoveNext(); // No more retries are configured diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs index 1620e1b36c..7753e9f443 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs @@ -13,12 +13,68 @@ internal abstract class HttpTimeoutPolicy public abstract string TimeoutPolicyName { get; } public abstract int TotalRetryCount { get; } public abstract IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> GetTimeoutEnumerator(); - public abstract bool IsSafeToRetry(HttpMethod httpMethod); public abstract bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage); public virtual bool ShouldThrow503OnTimeout => false; + public bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) + { + if (this is HttpTimeoutPolicyNoRetry) + { + return (this as HttpTimeoutPolicyNoRetry).IsSafeToRetry(); + } + if (documentServiceRequest != null) + { + //Query Plan Requests + if (documentServiceRequest.ResourceType == ResourceType.Document + && documentServiceRequest.OperationType == OperationType.QueryPlan) + { + return true; + } + + //Get Partition Key Range Requests + if (documentServiceRequest.ResourceType == ResourceType.PartitionKeyRange + && documentServiceRequest.OperationType == OperationType.ReadFeed) + { + return true; + } + + //Get Addresses Requests + if (documentServiceRequest.ResourceType == ResourceType.Address) + { + return true; + } + + //Meta Data Read + if (HttpTimeoutPolicy.IsMetaData(documentServiceRequest) && documentServiceRequest.IsReadOnlyRequest) + { + return true; + } + + //Data Plane Operations + if (!HttpTimeoutPolicy.IsMetaData(documentServiceRequest)) + { + if (documentServiceRequest.IsReadOnlyRequest) + { + if (this is HttpTimeoutPolicyForThinClient) + { + return (this as HttpTimeoutPolicyForThinClient).shouldRetry; + } + else + { + return true; + } + } + else + { + return false; + } + } + } + return false; + } + public static HttpTimeoutPolicy GetTimeoutPolicy( DocumentServiceRequest documentServiceRequest, bool isPartitionLevelFailoverEnabled = false, diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs index 9f85ba33ab..02a29b99ad 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs @@ -32,12 +32,6 @@ private HttpTimeoutPolicyControlPlaneRead() return this.TimeoutsAndDelays.GetEnumerator(); } - // This is for control plane reads which should always be safe to retry on. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return true; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs index 53a7c78e9e..8143b49015 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs @@ -36,13 +36,6 @@ private HttpTimeoutPolicyControlPlaneRetriableHotPath(bool shouldThrow503OnTimeo return this.TimeoutsAndDelays.GetEnumerator(); } - // The hot path should always be safe to retires since it should be retrieving meta data - // information that is not idempotent. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return true; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { if (responseMessage == null) @@ -55,11 +48,6 @@ public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, Ht return false; } - if (!this.IsSafeToRetry(requestHttpMethod)) - { - return false; - } - return true; } diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs index 69a83707a1..15d4aaa6c3 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs @@ -36,13 +36,6 @@ private HttpTimeoutPolicyDefault(bool shouldThrow503OnTimeout) return this.TimeoutsAndDelays.GetEnumerator(); } - // Assume that it is not safe to retry unless it is a get method. - // Create and other operations could have succeeded even though a timeout occurred. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return httpMethod == HttpMethod.Get; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs index 3e623a5aed..a949040698 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs @@ -21,9 +21,9 @@ private HttpTimeoutPolicyForPartitionFailover(bool shouldThrow503OnTimeout) private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelays = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>() { - (TimeSpan.FromSeconds(.5), TimeSpan.Zero), (TimeSpan.FromSeconds(.5), TimeSpan.Zero), (TimeSpan.FromSeconds(1), TimeSpan.Zero), + (TimeSpan.FromSeconds(5), TimeSpan.Zero), }; public override string TimeoutPolicyName => HttpTimeoutPolicyForPartitionFailover.Name; @@ -35,13 +35,6 @@ private HttpTimeoutPolicyForPartitionFailover(bool shouldThrow503OnTimeout) return this.TimeoutsAndDelays.GetEnumerator(); } - // Assume that it is not safe to retry unless it is a get method. - // Create and other operations could have succeeded even though a timeout occurred. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return httpMethod == HttpMethod.Get; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs index 91201a8538..4f1d9f598f 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs @@ -39,11 +39,6 @@ private HttpTimeoutPolicyForThinClient( return this.TimeoutsAndDelays.GetEnumerator(); } - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return this.shouldRetry; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { if (responseMessage == null) @@ -56,7 +51,7 @@ public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, Ht return false; } - if (!this.IsSafeToRetry(requestHttpMethod)) + if (!this.shouldRetry) { return false; } diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs index 65ea12144b..6553d929e1 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs @@ -32,7 +32,7 @@ private HttpTimeoutPolicyNoRetry() } // Always Unsafe to retry - public override bool IsSafeToRetry(HttpMethod httpMethod) + public bool IsSafeToRetry() { return false; } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs index 9411ade3eb..685f816a6e 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs @@ -123,7 +123,8 @@ async Task sendFunc(HttpRequestMessage request, Cancellatio resourceType: ResourceType.Collection, timeoutPolicy: currentTimeoutPolicy.Key, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(ResourceType.Collection, OperationType.Read)); Assert.AreEqual(HttpStatusCode.OK, responseMessage.StatusCode); } @@ -266,7 +267,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: ResourceType.Collection, timeoutPolicy: HttpTimeoutPolicyDefault.Instance, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(ResourceType.Collection, OperationType.Read)); } } catch (Exception) @@ -281,7 +283,7 @@ Task sendFunc(HttpRequestMessage request, CancellationToken public async Task HttpTimeoutThrow503TestAsync() { - async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) + async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) { int count = 0; Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) @@ -306,7 +308,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: resourceType, timeoutPolicy: timeoutPolicy, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); } } catch (Exception e) @@ -328,19 +331,19 @@ Task sendFunc(HttpRequestMessage request, CancellationToken } //Data plane read - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Data plane write (Should throw a 408 OperationCanceledException rather than a 503) - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); //Meta data read - await TestScenarioAsync(HttpMethod.Get, ResourceType.Database, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Database, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Query plan read (note all query plan operations are reads). - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Metadata Write (Should throw a 408 OperationCanceledException rather than a 503) - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); } [TestMethod] @@ -433,7 +436,8 @@ async Task sendFunc(HttpRequestMessage request, Cancellatio resourceType: ResourceType.Document, timeoutPolicy: HttpTimeoutPolicyControlPlaneRetriableHotPath.Instance, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: documentServiceRequest); Assert.AreEqual(HttpStatusCode.OK, responseMessage.StatusCode); } @@ -492,7 +496,7 @@ public void CreateHttpClientHandlerCreatesCorrectValueType() public async Task HttpTimeoutPolicyForThinClientOn503TestAsync() { - async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) + async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) { int count = 0; Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) @@ -517,7 +521,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: resourceType, timeoutPolicy: timeoutPolicy, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); } } catch (Exception e) @@ -542,6 +547,7 @@ Task sendFunc(HttpRequestMessage request, CancellationToken await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Document, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Read), isPartitionLevelFailoverEnabled: false, @@ -553,6 +559,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Document, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Query), isPartitionLevelFailoverEnabled: false, @@ -564,6 +571,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Post, resourceType: ResourceType.Document, + operationType: OperationType.Upsert, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Create), isPartitionLevelFailoverEnabled: false, @@ -575,6 +583,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Database, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Database, OperationType.Read), isPartitionLevelFailoverEnabled: false, @@ -583,6 +592,78 @@ await TestScenarioAsync( expectedNumberOfRetrys: 3); } + [TestMethod] + public async Task HttpTimeouPolicytRetriesTestAsync() + { + + async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) + { + int count = 0; + Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) + { + count++; + + throw new OperationCanceledException("API with exception"); + + } + + DocumentClientEventSource eventSource = DocumentClientEventSource.Instance; + HttpMessageHandler messageHandler = new MockMessageHandler(sendFunc); + using CosmosHttpClient cosmoshttpClient = MockCosmosUtil.CreateCosmosHttpClient(() => new HttpClient(messageHandler)); + + try + { + using (ITrace trace = Trace.GetRootTrace(nameof(NoRetryOnNoRetryPolicyTestAsync))) + { + HttpResponseMessage responseMessage1 = await cosmoshttpClient.SendHttpAsync(() => + new ValueTask( + result: new HttpRequestMessage(method, new Uri("http://localhost"))), + resourceType: resourceType, + timeoutPolicy: timeoutPolicy, + clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); + } + } + catch (Exception e) + { + Assert.AreEqual(expectedNumberOfRetrys, count, "Should retry 3 times for read methods, for writes should only be tried once"); + Assert.AreEqual(e.GetType(), expectedException); + + if (e.GetType() == typeof(CosmosException)) + { + CosmosException cosmosException = (CosmosException)e; + Assert.AreEqual(cosmosException.StatusCode, System.Net.HttpStatusCode.ServiceUnavailable); + Assert.AreEqual((int)cosmosException.SubStatusCode, (int)SubStatusCodes.TransportGenerated503); + + Assert.IsNotNull(cosmosException.Trace); + Assert.AreNotEqual(cosmosException.Trace, NoOpTrace.Singleton); + } + } + } + + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.QueryPlan, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 1); + + await TestScenarioAsync(HttpMethod.Get, ResourceType.PartitionKeyRange, OperationType.ReadFeed, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + + await TestScenarioAsync(HttpMethod.Post, ResourceType.PartitionKeyRange, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 1); + + await TestScenarioAsync(HttpMethod.Get, ResourceType.Address, OperationType.Read, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + + await TestScenarioAsync(HttpMethod.Post, ResourceType.Address, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 3); + + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 1); + + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 1); + + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyForPartitionFailover.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + + } + private static DocumentServiceRequest CreateDocumentServiceRequestByOperation( ResourceType resourceType, OperationType operationType) From c92aa436f9f1e058642bb0a61a594ba95a2c590f Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Thu, 6 Nov 2025 13:00:40 -0800 Subject: [PATCH 2/6] Use DocumentServiceRequest's IsReadOnlyRequest to determine - part1 --- .../src/HttpClient/CosmosHttpClientCore.cs | 6 +- .../src/HttpClient/HttpTimeoutPolicy.cs | 57 --------------- .../HttpClient/HttpTimeoutPolicyNoRetry.cs | 6 -- .../CosmosHttpClientCoreTests.cs | 71 ------------------- 4 files changed, 3 insertions(+), 137 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index 06b9d9d9f6..898f64a374 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -415,7 +415,7 @@ private async Task SendHttpHelperAsync( // Convert OperationCanceledException to 408 when the HTTP client throws it. This makes it clear that the // the request timed out and was not user canceled operation. - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(documentServiceRequest)) + if (isOutOfRetries || !documentServiceRequest.IsReadOnlyRequest) { // throw current exception (caught in transport handler) string message = @@ -440,14 +440,14 @@ private async Task SendHttpHelperAsync( break; case WebException webException: - if (isOutOfRetries || (!timeoutPolicy.IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) + if (isOutOfRetries || (!documentServiceRequest.IsReadOnlyRequest && !WebExceptionUtility.IsWebExceptionRetriable(webException))) { throw; } break; case HttpRequestException httpRequestException: - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(documentServiceRequest)) + if (isOutOfRetries || !documentServiceRequest.IsReadOnlyRequest) { throw; } diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs index 7753e9f443..1891a908d4 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs @@ -18,63 +18,6 @@ internal abstract class HttpTimeoutPolicy public virtual bool ShouldThrow503OnTimeout => false; - public bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) - { - if (this is HttpTimeoutPolicyNoRetry) - { - return (this as HttpTimeoutPolicyNoRetry).IsSafeToRetry(); - } - if (documentServiceRequest != null) - { - //Query Plan Requests - if (documentServiceRequest.ResourceType == ResourceType.Document - && documentServiceRequest.OperationType == OperationType.QueryPlan) - { - return true; - } - - //Get Partition Key Range Requests - if (documentServiceRequest.ResourceType == ResourceType.PartitionKeyRange - && documentServiceRequest.OperationType == OperationType.ReadFeed) - { - return true; - } - - //Get Addresses Requests - if (documentServiceRequest.ResourceType == ResourceType.Address) - { - return true; - } - - //Meta Data Read - if (HttpTimeoutPolicy.IsMetaData(documentServiceRequest) && documentServiceRequest.IsReadOnlyRequest) - { - return true; - } - - //Data Plane Operations - if (!HttpTimeoutPolicy.IsMetaData(documentServiceRequest)) - { - if (documentServiceRequest.IsReadOnlyRequest) - { - if (this is HttpTimeoutPolicyForThinClient) - { - return (this as HttpTimeoutPolicyForThinClient).shouldRetry; - } - else - { - return true; - } - } - else - { - return false; - } - } - } - return false; - } - public static HttpTimeoutPolicy GetTimeoutPolicy( DocumentServiceRequest documentServiceRequest, bool isPartitionLevelFailoverEnabled = false, diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs index 6553d929e1..cb4c48011b 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs @@ -31,12 +31,6 @@ private HttpTimeoutPolicyNoRetry() return this.TimeoutsAndDelays.GetEnumerator(); } - // Always Unsafe to retry - public bool IsSafeToRetry() - { - return false; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs index 685f816a6e..1507b93e0b 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs @@ -592,77 +592,6 @@ await TestScenarioAsync( expectedNumberOfRetrys: 3); } - [TestMethod] - public async Task HttpTimeouPolicytRetriesTestAsync() - { - - async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) - { - int count = 0; - Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) - { - count++; - - throw new OperationCanceledException("API with exception"); - - } - - DocumentClientEventSource eventSource = DocumentClientEventSource.Instance; - HttpMessageHandler messageHandler = new MockMessageHandler(sendFunc); - using CosmosHttpClient cosmoshttpClient = MockCosmosUtil.CreateCosmosHttpClient(() => new HttpClient(messageHandler)); - - try - { - using (ITrace trace = Trace.GetRootTrace(nameof(NoRetryOnNoRetryPolicyTestAsync))) - { - HttpResponseMessage responseMessage1 = await cosmoshttpClient.SendHttpAsync(() => - new ValueTask( - result: new HttpRequestMessage(method, new Uri("http://localhost"))), - resourceType: resourceType, - timeoutPolicy: timeoutPolicy, - clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default, - documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); - } - } - catch (Exception e) - { - Assert.AreEqual(expectedNumberOfRetrys, count, "Should retry 3 times for read methods, for writes should only be tried once"); - Assert.AreEqual(e.GetType(), expectedException); - - if (e.GetType() == typeof(CosmosException)) - { - CosmosException cosmosException = (CosmosException)e; - Assert.AreEqual(cosmosException.StatusCode, System.Net.HttpStatusCode.ServiceUnavailable); - Assert.AreEqual((int)cosmosException.SubStatusCode, (int)SubStatusCodes.TransportGenerated503); - - Assert.IsNotNull(cosmosException.Trace); - Assert.AreNotEqual(cosmosException.Trace, NoOpTrace.Singleton); - } - } - } - - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.QueryPlan, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); - - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 1); - - await TestScenarioAsync(HttpMethod.Get, ResourceType.PartitionKeyRange, OperationType.ReadFeed, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); - - await TestScenarioAsync(HttpMethod.Post, ResourceType.PartitionKeyRange, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 1); - - await TestScenarioAsync(HttpMethod.Get, ResourceType.Address, OperationType.Read, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); - - await TestScenarioAsync(HttpMethod.Post, ResourceType.Address, OperationType.Upsert, HttpTimeoutPolicyControlPlaneRetriableHotPath.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); - - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 3); - - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 1); - - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyForThinClient.InstanceShouldRetryAndThrow503OnTimeout, typeof(CosmosException), 1); - - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyForPartitionFailover.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); - - } private static DocumentServiceRequest CreateDocumentServiceRequestByOperation( ResourceType resourceType, From b8719670aa66b3430a6e23c5e5909b6c9323142a Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Thu, 6 Nov 2025 13:12:41 -0800 Subject: [PATCH 3/6] Remove sln changes. --- Microsoft.Azure.Cosmos.sln | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/Microsoft.Azure.Cosmos.sln b/Microsoft.Azure.Cosmos.sln index 5dd1a4d66a..cec2d9a5f4 100644 --- a/Microsoft.Azure.Cosmos.sln +++ b/Microsoft.Azure.Cosmos.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.14.36623.8 d17.14 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29123.88 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Azure.Cosmos", "Microsoft.Azure.Cosmos\src\Microsoft.Azure.Cosmos.csproj", "{36F6F6A8-CEC8-4261-9948-903495BC3C25}" EndProject @@ -181,18 +181,6 @@ Global {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|Any CPU.Build.0 = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.ActiveCfg = Release|Any CPU {021DDC27-02EF-42C4-9A9E-AA600833C2EE}.Release|x64.Build.0 = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|Any CPU.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Cover|x64.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.ActiveCfg = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Debug|x64.Build.0 = Debug|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|Any CPU.Build.0 = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.ActiveCfg = Release|Any CPU - {D744906A-1091-403F-B0B6-794DE045169A}.Release|x64.Build.0 = Release|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.ActiveCfg = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|Any CPU.Build.0 = Debug|Any CPU {CE4D6DA8-148D-4A98-943B-D8C2D532E1DC}.Cover|x64.ActiveCfg = Debug|Any CPU From fe72a70b7dc277c6008ae3a3f1131438f253c3c7 Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Thu, 6 Nov 2025 15:32:21 -0800 Subject: [PATCH 4/6] Update the safe retry to use address resource type as well. --- .../src/HttpClient/CosmosHttpClientCore.cs | 15 ++++++++++++--- .../HttpTimeoutPolicyForPartitionFailover.cs | 2 +- .../GatewayStoreModelTest.cs | 13 ++++++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index 898f64a374..65dd6898bf 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -415,7 +415,7 @@ private async Task SendHttpHelperAsync( // Convert OperationCanceledException to 408 when the HTTP client throws it. This makes it clear that the // the request timed out and was not user canceled operation. - if (isOutOfRetries || !documentServiceRequest.IsReadOnlyRequest) + if (isOutOfRetries || !IsSafeToRetry(documentServiceRequest)) { // throw current exception (caught in transport handler) string message = @@ -440,14 +440,14 @@ private async Task SendHttpHelperAsync( break; case WebException webException: - if (isOutOfRetries || (!documentServiceRequest.IsReadOnlyRequest && !WebExceptionUtility.IsWebExceptionRetriable(webException))) + if (isOutOfRetries || (!IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) { throw; } break; case HttpRequestException httpRequestException: - if (isOutOfRetries || !documentServiceRequest.IsReadOnlyRequest) + if (isOutOfRetries || !IsSafeToRetry(documentServiceRequest)) { throw; } @@ -498,6 +498,15 @@ private static bool IsOutOfRetries( return !timeoutEnumerator.MoveNext(); // No more retries are configured } + private static bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) + { + if (documentServiceRequest == null) + { + return false; + } + return documentServiceRequest.IsReadOnlyRequest || documentServiceRequest.ResourceType == ResourceType.Address; + } + private async Task ExecuteHttpHelperAsync( HttpRequestMessage requestMessage, ResourceType resourceType, diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs index a949040698..5f847ac9ec 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs @@ -21,9 +21,9 @@ private HttpTimeoutPolicyForPartitionFailover(bool shouldThrow503OnTimeout) private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelays = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>() { + (TimeSpan.FromSeconds(.5), TimeSpan.Zero), (TimeSpan.FromSeconds(.5), TimeSpan.Zero), (TimeSpan.FromSeconds(1), TimeSpan.Zero), - (TimeSpan.FromSeconds(5), TimeSpan.Zero), }; public override string TimeoutPolicyName => HttpTimeoutPolicyForPartitionFailover.Name; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs index a39603c20c..1a52543ee2 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs @@ -889,15 +889,22 @@ public async Task GatewayStatsDurationTest() DocumentClientEventSource.Instance); using (ITrace trace = Tracing.Trace.GetRootTrace(nameof(GatewayStatsDurationTest))) - { - + { + Tracing.TraceData.ClientSideRequestStatisticsTraceDatum clientSideRequestStatistics = new Tracing.TraceData.ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace); await cosmosHttpClient.SendHttpAsync(() => new ValueTask(new HttpRequestMessage(HttpMethod.Get, "http://someuri.com")), ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, clientSideRequestStatistics, - CancellationToken.None); + CancellationToken.None, + documentServiceRequest: new DocumentServiceRequest( + OperationType.Read, + ResourceType.Document, + $"dbs/dummy_db_id/colls/dummy_ct_id", + body: null, + AuthorizationTokenType.PrimaryMasterKey, + headers: null)); Assert.AreEqual(clientSideRequestStatistics.HttpResponseStatisticsList.Count, 2); // The duration is calculated using date times which can cause the duration to be slightly off. This allows for up to 15 Ms of variance. From c1cd5ccd273dc470074cdfb4754aa8645a75e4f7 Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Fri, 7 Nov 2025 10:42:01 -0800 Subject: [PATCH 5/6] when DRS is null, allow retry, use convention for static methods. --- .../src/HttpClient/CosmosHttpClientCore.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index 65dd6898bf..c797e35d6e 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -415,7 +415,7 @@ private async Task SendHttpHelperAsync( // Convert OperationCanceledException to 408 when the HTTP client throws it. This makes it clear that the // the request timed out and was not user canceled operation. - if (isOutOfRetries || !IsSafeToRetry(documentServiceRequest)) + if (isOutOfRetries || !CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest)) { // throw current exception (caught in transport handler) string message = @@ -440,14 +440,14 @@ private async Task SendHttpHelperAsync( break; case WebException webException: - if (isOutOfRetries || (!IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) + if (isOutOfRetries || (!CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) { throw; } break; case HttpRequestException httpRequestException: - if (isOutOfRetries || !IsSafeToRetry(documentServiceRequest)) + if (isOutOfRetries || !CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest)) { throw; } @@ -502,7 +502,7 @@ private static bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) { if (documentServiceRequest == null) { - return false; + return true; } return documentServiceRequest.IsReadOnlyRequest || documentServiceRequest.ResourceType == ResourceType.Address; } From 07178e202d0fed3b2de1ceb82876bca2c9245348 Mon Sep 17 00:00:00 2001 From: Praveen Kolluri Date: Fri, 7 Nov 2025 15:37:24 -0800 Subject: [PATCH 6/6] Add decision in the comments for IsSafeToRetry mechanism --- Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index c797e35d6e..26c1c92665 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -500,6 +500,10 @@ private static bool IsOutOfRetries( private static bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) { + // Three scenarios are safely retriable: + // 1) If request is null since they are originated from GetAsync calls + // 2) If request is read-only + // 3) If request is an address request. if (documentServiceRequest == null) { return true;