Skip to content

Commit c32711b

Browse files
authored
[stress] Sync namespace federated credentials periodically and on startup (Azure#9063)
* Watcher namespace fed cred management fixes * Update stress watcher dockerfile to use net 8 * Add back delete code, remove terminating skip * Bump cluster version to 1.29.8
1 parent a76f3d2 commit c32711b

File tree

5 files changed

+98
-12
lines changed

5 files changed

+98
-12
lines changed
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
dependencies:
22
- name: stress-test-addons
33
repository: https://stresstestcharts.blob.core.windows.net/helm/
4-
version: 0.3.2
5-
digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764
6-
generated: "2024-05-23T11:37:41.371010465-04:00"
4+
version: 0.3.3
5+
digest: sha256:1cffb5ed8ea74953ab7611f9e2de2163af2c3f0918afb9928f71210da9c19a4a
6+
generated: "2024-10-02T16:18:41.429777815-04:00"

tools/stress-cluster/cluster/azure/cluster/cluster.bicep

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ param updateNodes bool = false
1313
// monitoring parameters
1414
param workspaceId string
1515

16-
var kubernetesVersion = '1.29.4'
16+
var kubernetesVersion = '1.29.8'
1717
var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}'
1818

1919
var systemAgentPool = {

tools/stress-cluster/services/Stress.Watcher/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
FROM mcr.microsoft.com/dotnet/sdk:6.0-cbl-mariner2.0 AS build
1+
FROM mcr.microsoft.com/dotnet/sdk:8.0-cbl-mariner2.0 AS build
22

33
COPY ./src /src
44

5-
RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net6.0 -p:PublishSingleFile=true --self-contained
5+
RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net8.0 -p:PublishSingleFile=true --self-contained
66

77
FROM mcr.microsoft.com/azure-cli:cbl-mariner2.0
88

tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ public class NamespaceEventHandler
2828
// Concurrent Federated Identity Credentials writes under the same managed identity are not supported
2929
private static readonly SemaphoreSlim FederatedCredentialWriteSemaphore = new(1, 1);
3030

31+
private Dictionary<string, UserAssignedIdentityResource> WorkloadAppCache = [];
32+
3133
public List<string> WorkloadAppPool;
3234
public string WorkloadAppIssuer;
3335

@@ -62,6 +64,57 @@ public NamespaceEventHandler(
6264
.CreateLogger();
6365
}
6466

67+
public async Task SyncCredentials()
68+
{
69+
try
70+
{
71+
Logger.Information($"Waiting for federated credential write semaphore");
72+
await FederatedCredentialWriteSemaphore.WaitAsync();
73+
await _syncCredentials();
74+
}
75+
finally
76+
{
77+
Logger.Information("Releasing federated credential write semaphore");
78+
FederatedCredentialWriteSemaphore.Release();
79+
}
80+
}
81+
82+
public async Task _syncCredentials()
83+
{
84+
Logger.Information("Syncing namespaced federated credentials, this may take a minute...");
85+
86+
var namespaces = await Client.ListNamespaceAsync();
87+
foreach (var app in WorkloadAppPool)
88+
{
89+
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
90+
var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(resourceId);
91+
var identityResource = await userAssignedIdentity.GetAsync();
92+
var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials();
93+
await foreach (var item in fedCreds.GetAllAsync())
94+
{
95+
if (!namespaces.Items.Any(ns => item.Data.Name == CreateFederatedIdentityCredentialName(ns)))
96+
{
97+
if (!string.IsNullOrEmpty(WatchNamespace) && item.Data.Name != CreateFederatedIdentityCredentialName(WatchNamespace))
98+
{
99+
Logger.Information($"Skipping delete federated credential '{item.Data.Name}' because it is not the watched namespace '{WatchNamespace}'");
100+
continue;
101+
}
102+
// Only perform delete operations for namespace state that may have changed if the watcher was not running.
103+
// Any create operations will be handled after initialization as the watch stream processes all active namespaces on startup
104+
Logger.Information($"Deleting federated identity credential '{item.Data.Name}' for managed identity '{app}' as the corresponding namespace no longer exists.");
105+
WorkloadAppCache.Remove(item.Data.Name);
106+
var lro = await item.DeleteAsync(Azure.WaitUntil.Completed);
107+
}
108+
else
109+
{
110+
WorkloadAppCache[item.Data.Name] = identityResource.Value;
111+
}
112+
}
113+
}
114+
115+
Logger.Information($"Federated credential sync complete. Cached {WorkloadAppCache.Count} federated credentials.");
116+
}
117+
65118
public async Task Watch(CancellationToken cancellationToken)
66119
{
67120
string resourceVersion = null;
@@ -116,7 +169,7 @@ public async Task Watch(CancellationToken cancellationToken)
116169

117170
public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)
118171
{
119-
if (ExcludedNamespaces.Contains(ns.Name()))
172+
if (ExcludedNamespaces.Contains(ns.Name()) || string.IsNullOrEmpty(ns.Name()))
120173
{
121174
return;
122175
}
@@ -156,7 +209,12 @@ public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)
156209

157210
public string CreateFederatedIdentityCredentialName(V1Namespace ns)
158211
{
159-
return $"stress-{ns.Name()}";
212+
return CreateFederatedIdentityCredentialName(ns.Name());
213+
}
214+
215+
public string CreateFederatedIdentityCredentialName(string ns)
216+
{
217+
return $"stress-{ns}";
160218
}
161219

162220
public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
@@ -175,14 +233,21 @@ public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
175233
var identityData = await selectedWorkloadIdentity.GetAsync();
176234
var selectedWorkloadAppId = identityData.Value.Data.ClientId.ToString();
177235

178-
var meta = new V1ObjectMeta(){
236+
var meta = new V1ObjectMeta()
237+
{
179238
Name = ns.Name(),
180239
NamespaceProperty = ns.Name(),
181240
Annotations = new Dictionary<string, string>(){
182241
{ "azure.workload.identity/client-id", selectedWorkloadAppId }
183242
}
184243
};
185244
var serviceAccount = new V1ServiceAccount(metadata: meta);
245+
var allAccounts = await Client.ListNamespacedServiceAccountAsync(ns.Name());
246+
if (allAccounts.Items.Any(sa => sa.Name() == ns.Name()))
247+
{
248+
Logger.Information($"Service account '{ns.Name()}/{ns.Name()}' already exists, skipping creation.");
249+
return;
250+
}
186251
await Client.CreateNamespacedServiceAccountAsync(serviceAccount, ns.Name());
187252
Logger.Information($"Created service account '{ns.Name()}/{ns.Name()}' with workload client id '{selectedWorkloadAppId}'");
188253
}
@@ -200,6 +265,12 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
200265
Logger.Information($"Waiting for federated credential write semaphore");
201266
await FederatedCredentialWriteSemaphore.WaitAsync();
202267

268+
if (WorkloadAppCache.ContainsKey(credentialName))
269+
{
270+
Logger.Information($"Found cache entry for federated credential {credentialName}, returning identity {WorkloadAppCache[credentialName].Data.ClientId}");
271+
return await WorkloadAppCache[credentialName].GetAsync();
272+
}
273+
203274
foreach (var workloadApp in WorkloadAppPool)
204275
{
205276
var userAssignedIdentityResourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, workloadApp);
@@ -246,15 +317,22 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
246317
Logger.Information($"Creating/updating federated identity credential '{credentialName}' " +
247318
$"with subject '{subject}' for managed identity '{selectedWorkloadApp}'");
248319
var lro = await federatedIdentityCredential.UpdateAsync(Azure.WaitUntil.Completed, fedCredData);
320+
WorkloadAppCache[credentialName] = selectedIdentity;
249321
Logger.Information($"Created federated identity credential '{lro.Value.Data.Name}'");
250322

251323
return selectedIdentity;
252324
}
253325

254326
public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
255327
{
328+
Logger.Information($"Waiting for federated credential write semaphore");
329+
await FederatedCredentialWriteSemaphore.WaitAsync();
330+
256331
var credentialName = CreateFederatedIdentityCredentialName(ns);
257332
var workloadApp = "";
333+
334+
WorkloadAppCache.Remove(credentialName);
335+
258336
foreach (var app in WorkloadAppPool)
259337
{
260338
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
@@ -284,9 +362,6 @@ public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
284362
SubscriptionId, ClusterGroup, workloadApp, credentialName);
285363
var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId);
286364

287-
Logger.Information($"Waiting for federated credential write semaphore");
288-
await FederatedCredentialWriteSemaphore.WaitAsync();
289-
290365
Logger.Information($"Deleting federated identity credential '{credentialName}' for managed identity '{workloadApp}'");
291366
var lro = await federatedIdentityCredential.DeleteAsync(Azure.WaitUntil.Completed);
292367
Logger.Information($"Deleted federated identity credential '{credentialName}'");

tools/stress-cluster/services/Stress.Watcher/src/Program.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ static async Task Program(Options options)
7979
var namespaceEventHandler = new NamespaceEventHandler(
8080
client, armClient, workloadConfig.SubscriptionId, workloadConfig.ClusterGroup,
8181
workloadConfig.WorkloadAppPool, workloadConfig.WorkloadAppIssuer, options.Namespace);
82+
await namespaceEventHandler.SyncCredentials();
83+
_ = PollAndSyncCredentials(namespaceEventHandler, 288); // poll every 12 hours
8284

8385
var cts = new CancellationTokenSource();
8486
var taskList = new List<Task>
@@ -164,5 +166,14 @@ static WorkloadAuthConfig GetWorkloadConfigValues(Options options, Boolean isLoc
164166
ClusterGroup = clusterGroup
165167
};
166168
}
169+
170+
static async Task PollAndSyncCredentials(NamespaceEventHandler namespaceHandler, int minutes)
171+
{
172+
while (true)
173+
{
174+
await Task.Delay(TimeSpan.FromMinutes(minutes));
175+
await namespaceHandler.SyncCredentials();
176+
}
177+
}
167178
}
168179
}

0 commit comments

Comments
 (0)