Skip to content

Commit 88fc3f6

Browse files
committed
Add kernel args for rdma mode to complement the modprobe file
Signed-off-by: Sebastian Sch <[email protected]>
1 parent 660e7f8 commit 88fc3f6

File tree

18 files changed

+574
-178
lines changed

18 files changed

+574
-178
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ test-e2e-k8s: export NAMESPACE=sriov-network-operator
226226
test-e2e-k8s: test-e2e
227227

228228
test-bindata-scripts: fakechroot
229-
fakechroot ./test/scripts/enable-kargs_test.sh
229+
fakechroot ./test/scripts/kargs_test.sh
230230

231231
test-%: generate manifests envtest
232232
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*.out -coverpkg ./... -v

bindata/scripts/enable-kargs.sh

Lines changed: 0 additions & 33 deletions
This file was deleted.

bindata/scripts/kargs.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/bash
2+
set -x
3+
4+
command=$1
5+
shift
6+
declare -a kargs=( "$@" )
7+
ret=0
8+
args=$(chroot /host/ cat /proc/cmdline)
9+
10+
if chroot /host/ test -f /run/ostree-booted ; then
11+
for t in "${kargs[@]}";do
12+
if [[ $command == "add" ]];then
13+
if [[ $args != *${t}* ]];then
14+
if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then
15+
chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1
16+
fi
17+
let ret++
18+
fi
19+
fi
20+
if [[ $command == "remove" ]];then
21+
if [[ $args == *${t}* ]];then
22+
if chroot /host/ rpm-ostree kargs | grep -q ${t}; then
23+
chroot /host/ rpm-ostree kargs --delete ${t} > /dev/null 2>&1
24+
fi
25+
let ret++
26+
fi
27+
fi
28+
done
29+
else
30+
chroot /host/ which grubby > /dev/null 2>&1
31+
# if grubby is not there, let's tell it
32+
if [ $? -ne 0 ]; then
33+
exit 127
34+
fi
35+
for t in "${kargs[@]}";do
36+
if [[ $command == "add" ]];then
37+
if [[ $args != *${t}* ]];then
38+
if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then
39+
chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1
40+
fi
41+
let ret++
42+
fi
43+
fi
44+
if [[ $command == "remove" ]];then
45+
if [[ $args == *${t}* ]];then
46+
if chroot /host/ grubby --info=DEFAULT | grep args | grep -q ${t}; then
47+
chroot /host/ grubby --update-kernel=DEFAULT --remove-args=${t} > /dev/null 2>&1
48+
fi
49+
let ret++
50+
fi
51+
fi
52+
done
53+
fi
54+
55+
echo $ret

controllers/sriovnetworknodepolicy_controller.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,22 +155,22 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er
155155
delayedEventHandler := handler.Funcs{
156156
CreateFunc: func(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) {
157157
log.Log.WithName("SriovNetworkNodePolicy").
158-
Info("Enqueuing sync for create event", "resource", e.Object.GetName())
158+
Info("Enqueuing sync for create event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String())
159159
qHandler(q)
160160
},
161161
UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) {
162162
log.Log.WithName("SriovNetworkNodePolicy").
163-
Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName())
163+
Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName(), "type", e.ObjectNew.GetObjectKind().GroupVersionKind().String())
164164
qHandler(q)
165165
},
166166
DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) {
167167
log.Log.WithName("SriovNetworkNodePolicy").
168-
Info("Enqueuing sync for delete event", "resource", e.Object.GetName())
168+
Info("Enqueuing sync for delete event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String())
169169
qHandler(q)
170170
},
171171
GenericFunc: func(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
172172
log.Log.WithName("SriovNetworkNodePolicy").
173-
Info("Enqueuing sync for generic event", "resource", e.Object.GetName())
173+
Info("Enqueuing sync for generic event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String())
174174
qHandler(q)
175175
},
176176
}
@@ -199,6 +199,7 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er
199199
For(&sriovnetworkv1.SriovNetworkNodePolicy{}).
200200
Watches(&corev1.Node{}, nodeEvenHandler).
201201
Watches(&sriovnetworkv1.SriovNetworkNodePolicy{}, delayedEventHandler).
202+
Watches(&sriovnetworkv1.SriovNetworkPoolConfig{}, delayedEventHandler).
202203
WatchesRawSource(&source.Channel{Source: eventChan}, delayedEventHandler).
203204
Complete(r)
204205
}
@@ -271,14 +272,14 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
271272
ns := &sriovnetworkv1.SriovNetworkNodeState{}
272273
ns.Name = node.Name
273274
ns.Namespace = vars.Namespace
274-
j, _ := json.Marshal(ns)
275275
netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client)
276276
if err != nil {
277-
log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node")
277+
logger.Error(err, "failed to get SriovNetworkPoolConfig for the current node")
278278
}
279279
if netPoolConfig != nil {
280280
ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode
281281
}
282+
j, _ := json.Marshal(ns)
282283
logger.V(2).Info("SriovNetworkNodeState CR", "content", j)
283284
if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil {
284285
logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name)

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ require (
3838
github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94
3939
github.com/vishvananda/netns v0.0.4
4040
go.uber.org/zap v1.25.0
41+
golang.org/x/net v0.23.0
4142
golang.org/x/time v0.3.0
4243
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
4344
gopkg.in/yaml.v3 v3.0.1
@@ -144,7 +145,6 @@ require (
144145
golang.org/x/crypto v0.21.0 // indirect
145146
golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect
146147
golang.org/x/mod v0.13.0 // indirect
147-
golang.org/x/net v0.23.0 // indirect
148148
golang.org/x/oauth2 v0.13.0 // indirect
149149
golang.org/x/sync v0.4.0 // indirect
150150
golang.org/x/sys v0.20.0 // indirect

pkg/consts/constants.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,11 @@ const (
124124
`IMPORT{program}="/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}", ` +
125125
`NAME="%s_$env{NUMBER}"`
126126

127-
KernelArgPciRealloc = "pci=realloc"
128-
KernelArgIntelIommu = "intel_iommu=on"
129-
KernelArgIommuPt = "iommu=pt"
127+
KernelArgPciRealloc = "pci=realloc"
128+
KernelArgIntelIommu = "intel_iommu=on"
129+
KernelArgIommuPt = "iommu=pt"
130+
KernelArgRdmaShared = "ib_core.netns_mode=1"
131+
KernelArgRdmaExclusive = "ib_core.netns_mode=0"
130132

131133
// Feature gates
132134
// ParallelNicConfigFeatureGate: allow to configure nics in parallel

pkg/daemon/daemon.go

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"fmt"
66
"math/rand"
7-
"os/exec"
87
"reflect"
98
"sync"
109
"time"
@@ -429,16 +428,6 @@ func (dn *Daemon) nodeStateSyncHandler() error {
429428
reqReboot = reqReboot || r
430429
}
431430

432-
if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode {
433-
err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode)
434-
if err != nil {
435-
log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem")
436-
return err
437-
}
438-
reqReboot = true
439-
reqDrain = true
440-
}
441-
442431
// When running using systemd check if the applied configuration is the latest one
443432
// or there is a new config we need to apply
444433
// When using systemd configuration we write the file
@@ -761,11 +750,11 @@ func (dn *Daemon) rebootNode() {
761750
// However note we use `;` instead of `&&` so we keep rebooting even
762751
// if kubelet failed to shutdown - that way the machine will still eventually reboot
763752
// as systemd will time out the stop invocation.
764-
cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot",
753+
stdOut, StdErr, err := dn.HostHelpers.RunCommand("systemd-run", "--unit", "sriov-network-config-daemon-reboot",
765754
"--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot")
766755

767-
if err := cmd.Run(); err != nil {
768-
log.Log.Error(err, "failed to reboot node")
756+
if err != nil {
757+
log.Log.Error(err, "failed to reboot node", "stdOut", stdOut, "StdErr", StdErr)
769758
}
770759
}
771760

pkg/daemon/plugin_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ var _ = Describe("config daemon plugin loading tests", func() {
4141
vars.ClusterType = consts.ClusterTypeKubernetes
4242
gmockController = gomock.NewController(GinkgoT())
4343
helperMock = helperMocks.NewMockHostHelpersInterface(gmockController)
44+
helperMock.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes()
45+
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false)
46+
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false)
47+
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false)
48+
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false)
49+
helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false)
50+
4451
// k8s plugin is ATM the only plugin which require mocking/faking, as its New method performs additional logic
4552
// other than simple plugin struct initialization
4653
K8sPlugin = func(_ helper.HostHelpersInterface) (plugin.VendorPlugin, error) {

pkg/daemon/writer.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ func (w *NodeStateStatusWriter) setNodeStateStatus(msg Message) (*sriovnetworkv1
189189
nodeState, err := w.updateNodeStateStatusRetry(func(nodeState *sriovnetworkv1.SriovNetworkNodeState) {
190190
nodeState.Status.Interfaces = w.status.Interfaces
191191
nodeState.Status.Bridges = w.status.Bridges
192+
nodeState.Status.System = w.status.System
192193
if msg.lastSyncError != "" || msg.syncStatus == consts.SyncStatusSucceeded {
193194
// clear lastSyncError when sync Succeeded
194195
nodeState.Status.LastSyncError = msg.lastSyncError

pkg/host/internal/lib/netlink/mock/mock_netlink.go

Lines changed: 15 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)