Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
2c217a3
First integration of Resolver sources
rzikm Oct 4, 2024
704e33c
Allow mocking
rzikm Oct 4, 2024
aff6d17
Fix dispose
rzikm Oct 4, 2024
15db8a1
Fix tests
rzikm Oct 4, 2024
6bda235
Actually run ResolvConf tests
rzikm Oct 4, 2024
6fc61e0
Add retry functionality
rzikm Oct 8, 2024
2b6d91e
Add logging
rzikm Oct 10, 2024
2137954
Fix Microsoft.Extensions.ServiceDiscovery.Yarp.Tests
rzikm Oct 10, 2024
c204b01
Merge remote-tracking branch 'upstream/main' into managed-resolver
rzikm Nov 26, 2024
2fc7de4
Activity WIP
rzikm Nov 26, 2024
9b4de29
Fix reading CNAME with pointers in domain name segments
rzikm Nov 28, 2024
3ad5a75
Reenable telemetry
rzikm Nov 28, 2024
40f8f9b
Last changes to telemetry
rzikm Nov 28, 2024
e385605
Use strong RNG for Transaction ID generation
rzikm Jan 15, 2025
3cf0a9d
Check against too long domain name
rzikm Feb 3, 2025
6016e41
Disallow pointer to pointer.
rzikm Feb 3, 2025
f8c189d
Code review feedback (easy fixes)
rzikm Feb 3, 2025
d929889
More code review feedback
rzikm Feb 6, 2025
00e41d7
Fix increment
rzikm Feb 6, 2025
7267fb7
Detect CNAME loops
rzikm Feb 7, 2025
c1b76d3
Handle empty Tcp fallback responses and lack of TCP failover support …
rzikm Feb 7, 2025
86490cc
Dispose result.Response when overwritten by another result.
rzikm Feb 7, 2025
92b2905
Move DnsMessageHeader parsing to DnsPrimitives
rzikm Feb 11, 2025
fdf1904
Rework and add tests to retries and failover
rzikm Feb 12, 2025
61814e7
Test failover after exhausting attempts on one server
rzikm Feb 13, 2025
82a8108
Streamline options, remove unsupported options from ResolverOptions
rzikm Feb 13, 2025
fa627fd
Better handling of malformed responses
rzikm Feb 13, 2025
4ec79ca
Code review feedback
rzikm Mar 25, 2025
123375d
More feedback
rzikm Mar 27, 2025
6311616
Guarantee linear parsing of CNAME chains
rzikm Mar 27, 2025
c57a8c5
Fix decoding compressed CNAME in TCP fallback
rzikm Mar 27, 2025
83ca958
More code coverage
rzikm May 12, 2025
03e6a99
Correctly handle IDN names
rzikm May 15, 2025
46418fe
Minor fixes
rzikm May 22, 2025
4075e4f
Add Fuzzing tests for DNS resolver
rzikm May 23, 2025
56cd169
Improve fuzzing of EncodedDomainName
rzikm May 23, 2025
a19e8a2
Merge remote-tracking branch 'upstream/main' into managed-resolver
rzikm Jun 10, 2025
5d5edae
Remove commented out code
rzikm Jun 10, 2025
3c387e9
Fix build
rzikm Jun 10, 2025
c6e3d63
Fix Yarp service discovery tests
rzikm Jun 10, 2025
e66229a
Lazy TCP socket allocation in LoopbackTests
rzikm Jun 12, 2025
da018f7
LoopbackTestBaseLogging
rzikm Jun 12, 2025
2697412
fixup! LoopbackTestBaseLogging
rzikm Jun 12, 2025
214cc9b
Downgrade SharpFuzz
rzikm Jun 17, 2025
0d891cb
Fix buffer use after returning to pool
rzikm Jun 18, 2025
46cf262
Merge branch 'main' into managed-resolver
rzikm Jun 18, 2025
35a5809
Fix build
rzikm Jun 18, 2025
fcf18cf
Merge branch 'main' into managed-resolver
rzikm Jun 25, 2025
df785d1
Remove DnsClient package ref
rzikm Jun 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Handle empty Tcp fallback responses and lack of TCP failover support …
…on server.
  • Loading branch information
rzikm committed Feb 7, 2025
commit c1b76d37d10f1091cadd6ea8535a34092e4a91da
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ internal static partial class Log
[LoggerMessage(5, LogLevel.Warning, "Query {QueryType} {QueryName} on {Server} attempt {Attempt} returned no data", EventName = "NoData")]
public static partial void NoData(ILogger logger, QueryType queryType, string queryName, IPEndPoint server, int attempt);

[LoggerMessage(6, LogLevel.Error, "Query {QueryType} {QueryName} on {Server} attempt {Attempt} failed.", EventName = "QueryError")]
[LoggerMessage(6, LogLevel.Warning, "Query {QueryType} {QueryName} on {Server} attempt {Attempt} failed to return a valid DNS response.", EventName = "MalformedResponse")]
public static partial void MalformedResponse(ILogger logger, QueryType queryType, string queryName, IPEndPoint server, int attempt);

[LoggerMessage(7, LogLevel.Warning, "Query {QueryType} {QueryName} on {Server} attempt {Attempt} failed due to a network error.", EventName = "NetworkError")]
public static partial void NetworkError(ILogger logger, QueryType queryType, string queryName, IPEndPoint server, int attempt, Exception exception);

[LoggerMessage(8, LogLevel.Error, "Query {QueryType} {QueryName} on {Server} attempt {Attempt} failed.", EventName = "QueryError")]
public static partial void QueryError(ILogger logger, QueryType queryType, string queryName, IPEndPoint server, int attempt, Exception exception);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,46 +265,62 @@ internal struct SendQueryResult

async ValueTask<SendQueryResult> SendQueryWithRetriesAsync(string name, QueryType queryType, CancellationToken cancellationToken)
{
SendQueryResult result = default;
SendQueryResult? result = default;

for (int index = 0; index < _options.Servers.Length; index++)
{
IPEndPoint serverEndPoint = _options.Servers[index];

for (int attempt = 0; attempt < _options.Attempts; attempt++)
for (int attempt = 1; attempt <= _options.Attempts; attempt++)
{

try
{
result = await SendQueryToServerWithTimeoutAsync(serverEndPoint, name, queryType, index == _options.Servers.Length - 1, attempt, cancellationToken).ConfigureAwait(false);
}
catch (SocketException ex)
{
Log.NetworkError(_logger, queryType, name, serverEndPoint, attempt, ex);
result = new SendQueryResult { Error = SendQueryError.NetworkError };
continue; // retry or skip to the next server
}
catch (Exception ex) when (!cancellationToken.IsCancellationRequested)
{
Log.QueryError(_logger, queryType, name, serverEndPoint, attempt, ex);
continue; // retry or skip to the next server
}

switch (result.Error)
Debug.Assert(result.HasValue);

switch (result.Value.Error)
{
case SendQueryError.NoError:
return result;
return result.Value;
case SendQueryError.Timeout:
// TODO: should we retry on timeout or skip to the next server?
Log.Timeout(_logger, queryType, name, serverEndPoint, attempt);
break;
case SendQueryError.ServerError:
Log.ErrorResponseCode(_logger, queryType, name, serverEndPoint, result.Response.Header.ResponseCode);
Log.ErrorResponseCode(_logger, queryType, name, serverEndPoint, result.Value.Response.Header.ResponseCode);
break;
case SendQueryError.NoData:
Log.NoData(_logger, queryType, name, serverEndPoint, attempt);
break;
case SendQueryError.MalformedResponse:
Log.MalformedResponse(_logger, queryType, name, serverEndPoint, attempt);
break;
}
}
}

// return the last error received
// TODO: will this always have nondefault value?
return result;
// we should have an error result by now, except when we threw an exception due to internal bug
// (handled here), or cancellation (handled by the caller).
// if (!result.HasValue)
// {
// result = new SendQueryResult { Error = SendQueryError.InternalError };
// }

return result!.Value;
}

internal async ValueTask<SendQueryResult> SendQueryToServerWithTimeoutAsync(IPEndPoint serverEndPoint, string name, QueryType queryType, bool isLastServer, int attempt, CancellationToken cancellationToken)
Expand Down Expand Up @@ -343,6 +359,7 @@ private async ValueTask<SendQueryResult> SendQueryToServerAsync(IPEndPoint serve
{
Log.Query(_logger, queryType, name, serverEndPoint, attempt);

SendQueryError sendError = SendQueryError.NoError;
DateTime queryStartedTime = _timeProvider.GetUtcNow().DateTime;
(DnsDataReader responseReader, DnsMessageHeader header) = await SendDnsQueryCoreUdpAsync(serverEndPoint, name, queryType, cancellationToken).ConfigureAwait(false);

Expand All @@ -353,7 +370,13 @@ private async ValueTask<SendQueryResult> SendQueryToServerAsync(IPEndPoint serve
Log.ResultTruncated(_logger, queryType, name, serverEndPoint, 0);
responseReader.Dispose();
// TCP fallback
(responseReader, header) = await SendDnsQueryCoreTcpAsync(serverEndPoint, name, queryType, cancellationToken).ConfigureAwait(false);
(responseReader, header, sendError) = await SendDnsQueryCoreTcpAsync(serverEndPoint, name, queryType, cancellationToken).ConfigureAwait(false);
}

if (sendError != SendQueryError.NoError)
{
// we failed to get back any response
return new SendQueryResult { Error = sendError };
}

if (header.QueryCount != 1 ||
Expand All @@ -364,11 +387,15 @@ private async ValueTask<SendQueryResult> SendQueryToServerAsync(IPEndPoint serve
return new SendQueryResult
{
Response = new DnsResponse(Array.Empty<byte>(), header, queryStartedTime, queryStartedTime, null!, null!, null!),
Error = SendQueryError.ServerError
Error = SendQueryError.MalformedResponse
};
}

if (header.ResponseCode != QueryResponseCode.NoError)
// we are interested in returned RRs only in case of NOERROR response code,
// if this is not a successful response and we have attempts remaining,
// we skip parsing the response and retry.
// TODO: test server failover behavior
if (header.ResponseCode != QueryResponseCode.NoError && (!isLastServer || attempt != _options.Attempts))
{
return new SendQueryResult
{
Expand All @@ -377,12 +404,6 @@ private async ValueTask<SendQueryResult> SendQueryToServerAsync(IPEndPoint serve
};
}

if (header.ResponseCode != QueryResponseCode.NoError && !isLastServer)
{
// we exhausted attempts on this server, try the next one
return default;
}

int ttl = int.MaxValue;
List<DnsResourceRecord> answers = ReadRecords(header.AnswerCount, ref ttl, ref responseReader);
List<DnsResourceRecord> authorities = ReadRecords(header.AuthorityCount, ref ttl, ref responseReader);
Expand Down Expand Up @@ -506,9 +527,7 @@ internal static SendQueryError ValidateResponse(in DnsResponse response)
(ushort transactionId, int length) = EncodeQuestion(memory, name, queryType);

using var socket = new Socket(serverEndPoint.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
await socket.ConnectAsync(serverEndPoint, cancellationToken).ConfigureAwait(false);

await socket.SendAsync(memory.Slice(0, length), SocketFlags.None, cancellationToken).ConfigureAwait(false);
await socket.SendToAsync(memory.Slice(0, length), SocketFlags.None, serverEndPoint, cancellationToken).ConfigureAwait(false);

DnsDataReader responseReader;
DnsMessageHeader header;
Expand All @@ -527,8 +546,7 @@ internal static SendQueryError ValidateResponse(in DnsResponse response)
header.TransactionId != transactionId ||
!header.IsResponse)
{
// the message is not a response for our query.
// don't dispose reader, we will reuse the buffer
// header mismatch, this is not a response to our query
continue;
}

Expand All @@ -546,7 +564,7 @@ internal static SendQueryError ValidateResponse(in DnsResponse response)
}
}

internal static async ValueTask<(DnsDataReader reader, DnsMessageHeader header)> SendDnsQueryCoreTcpAsync(IPEndPoint serverEndPoint, string name, QueryType queryType, CancellationToken cancellationToken)
internal static async ValueTask<(DnsDataReader reader, DnsMessageHeader header, SendQueryError error)> SendDnsQueryCoreTcpAsync(IPEndPoint serverEndPoint, string name, QueryType queryType, CancellationToken cancellationToken)
{
var buffer = ArrayPool<byte>.Shared.Rent(8 * 1024);
try
Expand All @@ -566,12 +584,20 @@ internal static SendQueryError ValidateResponse(in DnsResponse response)
int read = await socket.ReceiveAsync(buffer.AsMemory(bytesRead), SocketFlags.None, cancellationToken).ConfigureAwait(false);
bytesRead += read;

if (read == 0)
{
// connection closed before receiving complete response message
return (default, default, SendQueryError.MalformedResponse);
}

if (responseLength < 0 && bytesRead >= 2)
{
responseLength = BinaryPrimitives.ReadUInt16BigEndian(buffer.AsSpan(0, 2));

if (responseLength > buffer.Length)
{
// even though this is user-controlled pre-allocation, it is limited to
// 64 kB, so it should be fine.
var largerBuffer = ArrayPool<byte>.Shared.Rent(responseLength);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of dangerous, user controlled pre-allocation. Since we're limited to 65k here you could argue it's not too risky, but we should probably explicitly call it out in a comment and make sure we're okay with the risk.

Technically the array pool (ConfigurableArrayPool) could return a 65 * 2 * 2 (or more) buffer since the implementation is free to search multiple bucket sizes to find a pooled byte[].

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious, if we were to decide it's too risky, what would you suggest as a mitigation? We need the entire message in memory for parsing purposes (domain name compression via pointers with offsets), so I don't see us reading the contents of the message from a stream.

Array.Copy(buffer, largerBuffer, bytesRead);
ArrayPool<byte>.Shared.Return(buffer);
Expand All @@ -585,12 +611,13 @@ internal static SendQueryError ValidateResponse(in DnsResponse response)
header.TransactionId != transactionId ||
!header.IsResponse)
{
throw new InvalidOperationException("Invalid response: Header mismatch");
// header mismatch on TCP fallback
return (default, default, SendQueryError.MalformedResponse);
}

// transfer ownership of buffer to the caller
buffer = null!;
return (responseReader, header);
return (responseReader, header, SendQueryError.NoError);
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,38 @@ namespace Microsoft.Extensions.ServiceDiscovery.Dns.Resolver;

internal enum SendQueryError
{
/// <summary>
/// DNS query was successful and returned response message with answers.
/// </summary>
NoError,

/// <summary>
/// Server failed to respond to the query withing specified timeout.
/// </summary>
Timeout,

/// <summary>
/// Server returned a response with an error code.
/// </summary>
ServerError,
ParseError,

/// <summary>
/// Server returned a malformed response.
/// </summary>
MalformedResponse,

/// <summary>
/// Server returned a response indicating no data are available.
/// </summary>
NoData,

/// <summary>
/// Network-level error occurred during the query.
/// </summary>
NetworkError,

/// <summary>
/// Internal error on part of the implementation.
/// </summary>
InternalError,
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ public void Dispose()
_tcpSocket.Dispose();
}

public void DisableTcpFallback()
{
_tcpSocket.Close();
}

private static async Task<int> ProcessRequestCore(ReadOnlyMemory<byte> message, Func<LoopbackDnsResponseBuilder, Task> action, Memory<byte> responseBuffer)
{
DnsDataReader reader = new DnsDataReader(message);
Expand Down Expand Up @@ -62,7 +67,6 @@ private static async Task<int> ProcessRequestCore(ReadOnlyMemory<byte> message,
{
throw new InvalidOperationException("Failed to write header");
}
;

foreach (var (questionName, questionType, questionClass) in responseBuilder.Questions)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,48 @@ public async Task TcpFailover_Simple_Success()
Assert.Equal(address, res.Address);
Assert.Equal(TimeProvider.GetUtcNow().DateTime.AddSeconds(3600), res.ExpiresAt);
}

[Fact]
public async Task TcpFailover_ServerClosesWithoutData_EmptyResult()
{
Options.Attempts = 1;
Options.Timeout = TimeSpan.FromSeconds(60);

IPAddress address = IPAddress.Parse("172.213.245.111");
_ = DnsServer.ProcessUdpRequest(builder =>
{
builder.Flags |= QueryFlags.ResultTruncated;
return Task.CompletedTask;
});

Task serverTask = DnsServer.ProcessTcpRequest(builder =>
{
throw new InvalidOperationException("This forces closing the socket without writing any data");
});

AddressResult[] results = await Resolver.ResolveIPAddressesAsync("www.example.com", AddressFamily.InterNetwork).AsTask().WaitAsync(TimeSpan.FromSeconds(10));
Assert.Empty(results);

await Assert.ThrowsAsync<InvalidOperationException>(() => serverTask);
}

[Fact]
public async Task TcpFailover_TcpNotAvailable_EmptyResult()
{
Options.Attempts = 1;
Options.Timeout = TimeSpan.FromMilliseconds(100000);

IPAddress address = IPAddress.Parse("172.213.245.111");
_ = DnsServer.ProcessUdpRequest(builder =>
{
builder.Flags |= QueryFlags.ResultTruncated;
return Task.CompletedTask;
});

// turn off TCP support the server
DnsServer.DisableTcpFallback();

AddressResult[] results = await Resolver.ResolveIPAddressesAsync("www.example.com", AddressFamily.InterNetwork);
Assert.Empty(results);
}
}
Loading