Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/csi-provisioner/csi-provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"github.com/kubernetes-csi/csi-lib-utils/metrics"
ctrl "github.com/kubernetes-csi/external-provisioner/pkg/controller"
snapclientset "github.com/kubernetes-csi/external-snapshotter/pkg/client/clientset/versioned"
"sigs.k8s.io/sig-storage-lib-external-provisioner/controller"
"sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller"

"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -272,7 +272,7 @@ func main() {
if !*enableLeaderElection {
run(context.TODO())
} else {
// this lock name pattern is also copied from sigs.k8s.io/sig-storage-lib-external-provisioner/controller
// this lock name pattern is also copied from sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller
// to preserve backwards compatibility
lockName := strings.Replace(provisionerName, "/", "-", -1)

Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ require (
github.com/kubernetes-csi/csi-lib-utils v0.7.0
github.com/kubernetes-csi/csi-test v2.0.0+incompatible
github.com/kubernetes-csi/external-snapshotter v1.2.1-0.20191220180133-bba358438aee
github.com/miekg/dns v1.1.8 // indirect
github.com/spf13/pflag v1.0.5
google.golang.org/grpc v1.26.0
k8s.io/api v0.17.0
k8s.io/apimachinery v0.17.1-beta.0
k8s.io/api v0.17.3
k8s.io/apimachinery v0.17.3
k8s.io/apiserver v0.17.0
k8s.io/client-go v0.17.0
k8s.io/component-base v0.17.0
k8s.io/csi-translation-lib v0.17.0
k8s.io/klog v1.0.0
k8s.io/kubernetes v1.14.0
sigs.k8s.io/sig-storage-lib-external-provisioner v4.1.0+incompatible
sigs.k8s.io/sig-storage-lib-external-provisioner v4.1.0+incompatible // indirect
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of strange, we have dependencies on both versions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had noticed that, too, but because we never actually pull in any code from it (= it's not in vendor) ignored it.

This might be a bug in sig-storage-lib-external-provisioner:

$ go mod why -m sigs.k8s.io/sig-storage-lib-external-provisioner
# sigs.k8s.io/sig-storage-lib-external-provisioner
github.com/kubernetes-csi/external-provisioner/cmd/csi-provisioner
sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller
sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller.test
sigs.k8s.io/sig-storage-lib-external-provisioner/controller/metrics

=> controller.test (which we don't use) pulls in the unversioned sigs.k8s.io/sig-storage-lib-external-provisioner here: https://github.com/kubernetes-sigs/sig-storage-lib-external-provisioner/blob/7d9e8b4c678a803070309639110a4494a4010efe/controller/controller_test.go#L26

Not sure how that works; perhaps go test simply maps that to the checked out module source code. Will fix it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Since we're not actually using it here, we don't need to wait for.

sigs.k8s.io/sig-storage-lib-external-provisioner/v5 v5.0.0
)

replace k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.17.0
Expand Down
63 changes: 61 additions & 2 deletions go.sum

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/controller/clone_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"
"sigs.k8s.io/sig-storage-lib-external-provisioner/controller"
"sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller"
)

//
Expand Down
60 changes: 46 additions & 14 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ import (
"github.com/kubernetes-csi/csi-lib-utils/rpc"
snapapi "github.com/kubernetes-csi/external-snapshotter/pkg/apis/volumesnapshot/v1beta1"
snapclientset "github.com/kubernetes-csi/external-snapshotter/pkg/client/clientset/versioned"
"sigs.k8s.io/sig-storage-lib-external-provisioner/controller"
"sigs.k8s.io/sig-storage-lib-external-provisioner/util"
"sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller"
"sigs.k8s.io/sig-storage-lib-external-provisioner/v5/util"

v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
Expand Down Expand Up @@ -614,10 +614,30 @@ func (p *csiProvisioner) ProvisionExt(options controller.ProvisionOptions) (*v1.
rep, err = p.csiClient.CreateVolume(ctx, &req)

if err != nil {
if isFinalError(err) {
return nil, controller.ProvisioningFinished, err
}
return nil, controller.ProvisioningInBackground, err
// Giving up after an error and telling the pod scheduler to retry with a different node
// only makes sense if:
// - The CSI driver supports topology: without that, the next CreateVolume call after
// rescheduling will be exactly the same.
// - We are working on a volume with late binding: only in that case will
// provisioning be retried if we give up for now.
// - The error is one where rescheduling is
// a) allowed (i.e. we don't have to keep calling CreateVolume because the operation might be running) and
// b) it makes sense (typically local resource exhausted).
// isFinalError is going to check this.
//
// We do this regardless whether the driver has asked for strict topology because
// even drivers which did not ask for it explicitly might still only look at the first
// topology entry and thus succeed after rescheduling.
mayReschedule := p.supportsTopology() &&
options.SelectedNode != nil
state := checkError(err, mayReschedule)
klog.V(5).Infof("CreateVolume failed, supports topology = %v, node selected %v => may reschedule = %v => state = %v: %v",
p.supportsTopology(),
options.SelectedNode != nil,
mayReschedule,
state,
err)
return nil, state, err
}

if rep.Volume != nil {
Expand Down Expand Up @@ -1247,7 +1267,7 @@ func deprecationWarning(deprecatedParam, newParam, removalVersion string) string
return fmt.Sprintf("\"%s\" is deprecated and will be removed in %s%s", deprecatedParam, removalVersion, newParamPhrase)
}

func isFinalError(err error) bool {
func checkError(err error, mayReschedule bool) controller.ProvisioningState {
// Sources:
// https://github.com/grpc/grpc/blob/master/doc/statuscodes.md
// https://github.com/container-storage-interface/spec/blob/master/spec.md
Expand All @@ -1256,19 +1276,31 @@ func isFinalError(err error) bool {
// This is not gRPC error. The operation must have failed before gRPC
// method was called, otherwise we would get gRPC error.
// We don't know if any previous CreateVolume is in progress, be on the safe side.
return false
return controller.ProvisioningInBackground
}
switch st.Code() {
case codes.ResourceExhausted:
// CSI: operation not pending, "Unable to provision in `accessible_topology`"
// However, it also could be from the transport layer for "message size exceeded".
// Cannot be decided properly here and needs to be resolved in the spec
// https://github.com/container-storage-interface/spec/issues/419.
// What we assume here for now is that message size limits are large enough that
// the error really comes from the CSI driver.
if mayReschedule {
// may succeed elsewhere -> give up for now
return controller.ProvisioningReschedule
}
// may still succeed at a later time -> continue
return controller.ProvisioningInBackground
case codes.Canceled, // gRPC: Client Application cancelled the request
codes.DeadlineExceeded, // gRPC: Timeout
codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateVolume() may be still in progress.
codes.ResourceExhausted, // gRPC: Server temporarily out of resources - previous CreateVolume() may be still in progress.
codes.Aborted: // CSI: Operation pending for volume
return false
codes.DeadlineExceeded, // gRPC: Timeout
codes.Unavailable, // gRPC: Server shutting down, TCP connection broken - previous CreateVolume() may be still in progress.
codes.Aborted: // CSI: Operation pending for volume
return controller.ProvisioningInBackground
}
// All other errors mean that provisioning either did not
// even start or failed. It is for sure not in progress.
return true
return controller.ProvisioningFinished
}

func cleanupVolume(p *csiProvisioner, delReq *csi.DeleteVolumeRequest, provisionerCredentials map[string]string) error {
Expand Down
122 changes: 120 additions & 2 deletions pkg/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ import (
k8stesting "k8s.io/client-go/testing"
utilfeaturetesting "k8s.io/component-base/featuregate/testing"
csitrans "k8s.io/csi-translation-lib"
"sigs.k8s.io/sig-storage-lib-external-provisioner/controller"
"k8s.io/klog"
"sigs.k8s.io/sig-storage-lib-external-provisioner/v5/controller"
)

func init() {
klog.InitFlags(nil)
}

const (
timeout = 10 * time.Second
driverName = "test-driver"
Expand Down Expand Up @@ -109,7 +114,7 @@ func createMockServer(t *testing.T, tmpdir string) (*gomock.Controller,
}

func tempDir(t *testing.T) string {
dir, err := ioutil.TempDir("", "external-attacher-test-")
dir, err := ioutil.TempDir("", "external-provisioner-test-")
if err != nil {
t.Fatalf("Cannot create temporary directory: %s", err)
}
Expand Down Expand Up @@ -2606,6 +2611,119 @@ func TestProvisionWithTopologyEnabled(t *testing.T) {
}
}

// TestProvisionErrorHandling checks how different errors are handled by the provisioner.
func TestProvisionErrorHandling(t *testing.T) {
const requestBytes = 100

testcases := map[codes.Code]controller.ProvisioningState{
codes.ResourceExhausted: controller.ProvisioningInBackground,
codes.Canceled: controller.ProvisioningInBackground,
codes.DeadlineExceeded: controller.ProvisioningInBackground,
codes.Unavailable: controller.ProvisioningInBackground,
codes.Aborted: controller.ProvisioningInBackground,

codes.Unknown: controller.ProvisioningFinished,
codes.InvalidArgument: controller.ProvisioningFinished,
codes.NotFound: controller.ProvisioningFinished,
codes.AlreadyExists: controller.ProvisioningFinished,
codes.PermissionDenied: controller.ProvisioningFinished,
codes.FailedPrecondition: controller.ProvisioningFinished,
codes.OutOfRange: controller.ProvisioningFinished,
codes.Unimplemented: controller.ProvisioningFinished,
codes.Internal: controller.ProvisioningFinished,
codes.DataLoss: controller.ProvisioningFinished,
codes.Unauthenticated: controller.ProvisioningFinished,
}
nodeLabels := []map[string]string{
{"com.example.csi/zone": "zone1", "com.example.csi/rack": "rack1"},
{"com.example.csi/zone": "zone1", "com.example.csi/rack": "rack2"},
}
topologyKeys := []map[string][]string{
{driverName: []string{"com.example.csi/zone", "com.example.csi/rack"}},
{driverName: []string{"com.example.csi/zone", "com.example.csi/rack"}},
}

test := func(driverSupportsTopology, nodeSelected bool) {
t.Run(fmt.Sprintf("topology=%v node=%v", driverSupportsTopology, nodeSelected), func(t *testing.T) {
for code, expectedState := range testcases {
t.Run(code.String(), func(t *testing.T) {
var (
pluginCaps rpc.PluginCapabilitySet
controllerCaps rpc.ControllerCapabilitySet
)
if driverSupportsTopology {
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.Topology, true)()
pluginCaps, controllerCaps = provisionWithTopologyCapabilities()
} else {
pluginCaps, controllerCaps = provisionCapabilities()
}

tmpdir := tempDir(t)
defer os.RemoveAll(tmpdir)
mockController, driver, _, controllerServer, csiConn, err := createMockServer(t, tmpdir)
if err != nil {
t.Fatal(err)
}
defer mockController.Finish()
defer driver.Stop()

// Always return some error.
errOut := status.Error(code, "fake error")
controllerServer.EXPECT().CreateVolume(gomock.Any(), gomock.Any()).Return(nil, errOut).Times(1)

nodes := buildNodes(nodeLabels, k8sTopologyBetaVersion.String())
csiNodes := buildCSINodes(topologyKeys)
clientSet := fakeclientset.NewSimpleClientset(nodes, csiNodes)
scLister, csiNodeLister, nodeLister, claimLister, stopChan := listers(clientSet)
defer close(stopChan)

csiProvisioner := NewCSIProvisioner(clientSet, 5*time.Second, "test-provisioner", "test", 5,
csiConn.conn, nil, driverName, pluginCaps, controllerCaps, "", false, csitrans.New(), scLister, csiNodeLister, nodeLister, claimLister, false)
csiProvisionerExt := csiProvisioner.(controller.ProvisionerExt)

options := controller.ProvisionOptions{
StorageClass: &storagev1.StorageClass{},
PVC: createFakePVC(requestBytes),
}
if nodeSelected {
options.SelectedNode = &nodes.Items[0]
}
pv, state, err := csiProvisionerExt.ProvisionExt(options)

if pv != nil {
t.Errorf("expected no PV, got %v", pv)
}
if err == nil {
t.Fatal("expected error, got nil")
}
st, ok := status.FromError(err)
if !ok {
t.Errorf("expected status %s, got error without status: %v", code, err)
} else if st.Code() != code {
t.Errorf("expected status %s, got %s", code, st.Code())
}

// This is the only situation where we request rescheduling.
if driverSupportsTopology && nodeSelected && code == codes.ResourceExhausted {
expectedState = controller.ProvisioningReschedule
}

if expectedState != state {
t.Errorf("expected provisioning state %s, got %s", expectedState, state)
}
})
}
})
}

// Try all four combinations. For most of them there's no
// difference, but better check...
test(false, false)
test(false, true)
test(true, false)
test(true, true)
}

// TestProvisionWithTopologyDisabled checks that missing Node and CSINode objects, selectedNode
// are ignored and topology is not set on the PV
func TestProvisionWithTopologyDisabled(t *testing.T) {
Expand Down
8 changes: 8 additions & 0 deletions vendor/github.com/cespare/xxhash/v2/.travis.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions vendor/github.com/cespare/xxhash/v2/LICENSE.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 67 additions & 0 deletions vendor/github.com/cespare/xxhash/v2/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions vendor/github.com/cespare/xxhash/v2/go.mod

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file.
Loading