Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ public static void RecordDiagnosticsForRequests(
Documents.OperationType operationType,
OpenTelemetryAttributes response)
{
if (DiagnosticsFilterHelper.IsTracingNeeded(
if (!DiagnosticsFilterHelper.IsSuccessfulResponse(
response: response) && CosmosDbEventSource.IsEnabled(EventLevel.Warning))
{
CosmosDbEventSource.Singleton.FailedRequest(response.Diagnostics.ToString());
}
else if (DiagnosticsFilterHelper.IsLatencyThresholdCrossed(
config: config,
operationType: operationType,
response: response) && CosmosDbEventSource.IsEnabled(EventLevel.Warning))
Expand Down Expand Up @@ -64,5 +69,11 @@ private void LatencyOverThreshold(string message)
{
this.WriteEvent(2, message);
}

[Event(3, Level = EventLevel.Error)]
private void FailedRequest(string message)
{
this.WriteEvent(3, message);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ namespace Microsoft.Azure.Cosmos.Telemetry.Diagnostics
internal static class DiagnosticsFilterHelper
{
/// <summary>
/// Allow only when either of below is <b>True</b><br></br>
/// 1) Latency is not more than 100/250 (query) ms<br></br>
/// 3) HTTP status code is not Success<br></br>
/// Allow only when Latency is not more than 100 (non-query) /250 (query) ms
/// </summary>
/// <returns>true or false</returns>
public static bool IsTracingNeeded(
public static bool IsLatencyThresholdCrossed(
DistributedTracingOptions config,
OperationType operationType,
OpenTelemetryAttributes response)
Expand All @@ -31,7 +29,20 @@ public static bool IsTracingNeeded(
latencyThreshold = operationType == OperationType.Query ? DistributedTracingOptions.DefaultQueryTimeoutThreshold : DistributedTracingOptions.DefaultCrudLatencyThreshold;
}

return response.Diagnostics.GetClientElapsedTime() > latencyThreshold || !response.StatusCode.IsSuccess();
return response.Diagnostics.GetClientElapsedTime() > latencyThreshold;
}

/// <summary>
/// Check if response HTTP status code is returning successful
/// </summary>
/// <returns>true or false</returns>
public static bool IsSuccessfulResponse(OpenTelemetryAttributes response)
{
return response.StatusCode.IsSuccess()
|| (response.StatusCode == System.Net.HttpStatusCode.NotFound && response.SubStatusCode == 0)
|| (response.StatusCode == System.Net.HttpStatusCode.NotModified && response.SubStatusCode == 0)
|| (response.StatusCode == System.Net.HttpStatusCode.Conflict && response.SubStatusCode == 0)
|| (response.StatusCode == System.Net.HttpStatusCode.PreconditionFailed && response.SubStatusCode == 0);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
<ATTRIBUTE key="tcp.sub_status_code">1001</ATTRIBUTE>
<ATTRIBUTE key="tcp.status_code">207</ATTRIBUTE>
</ACTIVITY>
<EVENT name="LatencyOverThreshold" />
<EVENT name="FailedRequest" />
</OTelActivities></Output>
</Result>
</Results>
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void CheckReturnFalseOnSuccessAndLowerLatencyThanConfiguredConfig()

Assert.IsFalse(
DiagnosticsFilterHelper
.IsTracingNeeded(distributedTracingOptions, OperationType.Read, response),
.IsLatencyThresholdCrossed(distributedTracingOptions, OperationType.Read, response),
$" Response time is {response.Diagnostics.GetClientElapsedTime().Milliseconds}ms " +
$"and Configured threshold value is {distributedTracingOptions.LatencyThresholdForDiagnosticEvent.Value.Milliseconds}ms " +
$"and Is response Success : {response.StatusCode.IsSuccess()}" );
Expand All @@ -70,8 +70,8 @@ public void CheckReturnTrueOnFailedStatusCode()
};

Assert.IsTrue(
DiagnosticsFilterHelper
.IsTracingNeeded(distributedTracingOptions, OperationType.Read, response),
!DiagnosticsFilterHelper
.IsSuccessfulResponse(response),
$" Response time is {response.Diagnostics.GetClientElapsedTime().Milliseconds}ms " +
$"and Configured threshold value is {distributedTracingOptions.LatencyThresholdForDiagnosticEvent.Value.Milliseconds}ms " +
$"and Is response Success : {response.StatusCode.IsSuccess()}");
Expand Down
7 changes: 6 additions & 1 deletion docs/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
**Source to capture operation level activities**: _Azure.Cosmos.Operation_\
**Source to capture event with request diagnostics** : _Azure-Cosmos-Operation-Request-Diagnostics_

There are 3 kind of events generated:
1. LatencyOverThrehold: If particular operation latency is more than threshold.
2. FailedRequest: If particular reequest failed. Status codes not considered as failed, are anything below 300, 404/0, 304/0, 409/0, and 412/0
3. Exception: If any exception occured.

For detail about usage of this feature, please see the [Azure Cosmos DB SDK observability](https://learn.microsoft.com/azure/cosmos-db/nosql/sdk-observability?tabs=dotnet)

```mermaid
Expand Down Expand Up @@ -104,4 +109,4 @@ flowchart TD
```

### Limitations
1. AAD Support is not available.
1. AAD Support is not available.