Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Added Request charge and Payload size options to generate request dia…
…gnostics
  • Loading branch information
sourabh1007 committed Apr 29, 2024
commit a1ab372dc4a62b77aec67d7c280051d60f532ce8
36 changes: 33 additions & 3 deletions Microsoft.Azure.Cosmos/src/CosmosThresholdOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,50 @@ namespace Microsoft.Azure.Cosmos
using System;

/// <summary>
/// Threshold values for Distributed Tracing
/// This class describes the thresholds when more details diagnostics are emitted for an operation due to high latency,
/// high RU consumption or high payload sizes.
/// </summary>
public class CosmosThresholdOptions
{
/// <summary>
/// Latency Threshold for non point operations i.e. Query
/// Can be used to define custom latency thresholds. When the latency threshold is exceeded more detailed
/// diagnostics will be emitted (including the request diagnostics). There is some overhead of emitting the
/// more detailed diagnostics - so recommendation is to choose latency thresholds that reduce the noise level
/// and only emit detailed diagnostics when there is really business impact seen.
/// The default value for the point operation latency threshold is 3 seconds.
/// all operations except (ReadItem, CreateItem, UpsertItem, ReplaceItem, PatchItem or DeleteItem)
/// </summary>
/// <value>3 seconds</value>
public TimeSpan NonPointOperationLatencyThreshold { get; set; } = TimeSpan.FromSeconds(3);

/// <summary>
/// Latency Threshold for point operations i.e operation other than Query
/// Can be used to define custom latency thresholds. When the latency threshold is exceeded more detailed
/// diagnostics will be emitted (including the request diagnostics). There is some overhead of emitting the
/// more detailed diagnostics - so recommendation is to choose latency thresholds that reduce the noise level
/// and only emit detailed diagnostics when there is really business impact seen.
/// The default value for the point operation latency threshold is 1 second.
/// Point Operations are: (ReadItem, CreateItem, UpsertItem, ReplaceItem, PatchItem or DeleteItem)
/// </summary>
/// <value>1 second</value>
public TimeSpan PointOperationLatencyThreshold { get; set; } = TimeSpan.FromSeconds(1);

/// <summary>
/// Can be used to define a custom RU (request charge) threshold. When the threshold is exceeded more detailed
/// diagnostics will be emitted (including the request diagnostics). There is some overhead of emitting the
/// more detailed diagnostics - so recommendation is to choose a request charge threshold that reduces the noise
/// level and only emits detailed diagnostics when the request charge is significantly higher thane expected.
/// The default value for the request charge threshold is 1000
/// </summary>
public double RequestChargeThreshold { get; set; } = 1000;

/// <summary>
/// Can be used to define a payload size threshold. When the threshold is exceeded for either request or
/// response payloads more detailed diagnostics will be emitted (including the request diagnostics).
/// There is some overhead of emitting the more detailed diagnostics - so recommendation is to choose a
/// payload size threshold that reduces the noise level and only emits detailed diagnostics when the payload size
/// is significantly higher than expected.
/// The default value for the payload size threshold is Int32.MaxValue
/// </summary>
public int PayloadSizeThresholdInBytes { get; set; } = Int32.MaxValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Microsoft.Azure.Cosmos.Telemetry
{
using System;
using System.Diagnostics.Tracing;
using global::Azure.Core.Diagnostics;
using Microsoft.Azure.Cosmos.Telemetry.Diagnostics;
Expand Down Expand Up @@ -35,17 +36,30 @@ public static void RecordDiagnosticsForRequests(
Documents.OperationType operationType,
OpenTelemetryAttributes response)
{
if (!DiagnosticsFilterHelper.IsSuccessfulResponse(
response.StatusCode, response.SubStatusCode) && CosmosDbEventSource.IsEnabled(EventLevel.Warning))
if (CosmosDbEventSource.IsEnabled(EventLevel.Warning))
{
CosmosDbEventSource.Singleton.FailedRequest(response.Diagnostics.ToString());
}
else if (DiagnosticsFilterHelper.IsLatencyThresholdCrossed(
config: config,
operationType: operationType,
response: response) && CosmosDbEventSource.IsEnabled(EventLevel.Warning))
{
CosmosDbEventSource.Singleton.LatencyOverThreshold(response.Diagnostics.ToString());
if (!DiagnosticsFilterHelper.IsSuccessfulResponse(
response.StatusCode, response.SubStatusCode))
{
CosmosDbEventSource.Singleton.FailedRequest(response.Diagnostics.ToString());
}
else if (DiagnosticsFilterHelper.IsLatencyThresholdCrossed(
config: config,
operationType: operationType,
response: response))
{
CosmosDbEventSource.Singleton.LatencyOverThreshold(response.Diagnostics.ToString());
}
else if (config.RequestChargeThreshold <= response.RequestCharge)
{
CosmosDbEventSource.Singleton.RequestChargeOverThreshold(response.Diagnostics.ToString());
}
else if (config.PayloadSizeThresholdInBytes <=
Math.Max(Convert.ToInt32(response.RequestContentLength),
Convert.ToInt32(response.ResponseContentLength)))
{
CosmosDbEventSource.Singleton.PayloadSizeOverThreshold(response.Diagnostics.ToString());
}
}
}

Expand Down Expand Up @@ -75,5 +89,17 @@ private void FailedRequest(string message)
{
this.WriteEvent(3, message);
}

[Event(4, Level = EventLevel.Warning)]
private void RequestChargeOverThreshold(string message)
{
this.WriteEvent(4, message);
}

[Event(5, Level = EventLevel.Warning)]
private void PayloadSizeOverThreshold(string message)
{
this.WriteEvent(5, message);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,28 @@ public static bool IsLatencyThresholdCrossed(
OperationType operationType,
OpenTelemetryAttributes response)
{
return response.Diagnostics.GetClientElapsedTime() > DiagnosticsFilterHelper.DefaultThreshold(operationType, config);
return response.Diagnostics.GetClientElapsedTime() > DiagnosticsFilterHelper.DefaultLatencyThreshold(operationType, config);
}

/// <summary>
/// Check if response HTTP status code is returning successful
/// </summary>
/// <returns>true or false</returns>
public static bool IsSuccessfulResponse(HttpStatusCode statusCode, int substatusCode)
public static bool IsSuccessfulResponse(HttpStatusCode statusCode, int subStatusCode)
{
return statusCode.IsSuccess()
|| (statusCode == System.Net.HttpStatusCode.NotFound && substatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.NotModified && substatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.Conflict && substatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.PreconditionFailed && substatusCode == 0);
|| (statusCode == System.Net.HttpStatusCode.NotFound && subStatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.NotModified && subStatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.Conflict && subStatusCode == 0)
|| (statusCode == System.Net.HttpStatusCode.PreconditionFailed && subStatusCode == 0);
}

/// <summary>
/// Get default threshold value based on operation type
/// Get default Latency threshold value based on operation type
/// </summary>
/// <param name="operationType"></param>
/// <param name="config"></param>
internal static TimeSpan DefaultThreshold(OperationType operationType, CosmosThresholdOptions config)
internal static TimeSpan DefaultLatencyThreshold(OperationType operationType, CosmosThresholdOptions config)
{
config ??= DiagnosticsFilterHelper.defaultThresholdOptions;
return DiagnosticsFilterHelper.IsPointOperation(operationType) ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public void CheckedDefaultThresholdBasedOnOperationType()

foreach(OperationType operationType in values)
{
TimeSpan defaultThreshold = DiagnosticsFilterHelper.DefaultThreshold(operationType, config);
TimeSpan defaultThreshold = DiagnosticsFilterHelper.DefaultLatencyThreshold(operationType, config);

if(DiagnosticsFilterHelper.IsPointOperation(operationType))
Assert.AreEqual(defaultThreshold, config.PointOperationLatencyThreshold);
Expand Down