diff --git a/test/extended/dr/backup_restore.go b/test/extended/dr/backup_restore.go
new file mode 100644
index 000000000000..f5f318f4a8f0
--- /dev/null
+++ b/test/extended/dr/backup_restore.go
@@ -0,0 +1,289 @@
+package dr
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"strings"
+	"time"
+
+	g "github.com/onsi/ginkgo"
+	o "github.com/onsi/gomega"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/kubernetes/test/e2e/framework"
+
+	exutil "github.com/openshift/origin/test/extended/util"
+	"github.com/openshift/origin/test/extended/util/disruption"
+)
+
+var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
+	defer g.GinkgoRecover()
+
+	f := framework.NewDefaultFramework("backup-restore")
+	f.SkipNamespaceCreation = true
+	f.SkipPrivilegedPSPBinding = true
+
+	oc := exutil.NewCLIWithoutNamespace("backup-restore")
+
+	// Validate the documented backup and restore procedure as closely as possible:
+	//
+	// backup: https://docs.openshift.com/container-platform/4.6/backup_and_restore/backing-up-etcd.html
+	// restore: https://docs.openshift.com/container-platform/4.6/backup_and_restore/disaster_recovery/scenario-2-restoring-cluster-state.html
+	//
+	// Comments like 'Backup 2' and 'Restore '1a' indicate where a test step
+	// corresponds to a step in the documentation.
+	//
+	// Backing up and recovering on the same node is tested by quorum_restore.go.
+	g.It("[Feature:EtcdRecovery] Cluster should recover from a backup taken on one node and recovered on another", func() {
+		masters := masterNodes(oc)
+		// Need one node to backup from and another to restore to
+		o.Expect(len(masters)).To(o.BeNumerically(">=", 2))
+
+		// Pick one node to backup on
+		backupNode := masters[0]
+		framework.Logf("Selecting node %q as the backup host", backupNode.Name)
+
+		// Recovery 1
+		// Pick a different node to recover on
+		recoveryNode := masters[1]
+		framework.Logf("Selecting node %q as the recovery host", recoveryNode.Name)
+
+		// Recovery 2
+		g.By("Verifying that all masters are reachable via ssh")
+		for _, master := range masters {
+			checkSSH(master)
+		}
+
+		disruptionFunc := func() {
+			// Backup 4
+			//
+			// The backup has to be taken after the upgrade tests have done
+			// their pre-disruption setup to ensure that the api state that is
+			// restored includes those changes.
+			g.By(fmt.Sprintf("Running the backup script on node %q", backupNode.Name))
+			sudoExecOnNodeOrFail(backupNode, "rm -rf /home/core/backup && /usr/local/bin/cluster-backup.sh /home/core/backup && chown -R core /home/core/backup")
+
+			// Recovery 3
+			// Copy the backup data from the backup node to the test host and
+			// from there to the recovery node.
+			//
+			// Another solution could be enabling the recovery node to connect
+			// directly to the backup node. It seemed simpler to use the test host
+			// as an intermediary rather than enabling agent forwarding or copying
+			// the private ssh key to the recovery node.
+
+			g.By("Creating a local temporary directory")
+			tempDir, err := ioutil.TempDir("", "e2e-backup-restore")
+			o.Expect(err).NotTo(o.HaveOccurred())
+
+			// Define the ssh configuration necessary to invoke scp, which does
+			// not appear to be supported by the golang ssh client.
+			commonOpts := "-o StrictHostKeyChecking=no -o LogLevel=error -o ServerAliveInterval=30 -o ConnectionAttempts=100 -o ConnectTimeout=30"
+			authOpt := fmt.Sprintf("-i %s", os.Getenv("KUBE_SSH_KEY_PATH"))
+			bastionHost := os.Getenv("KUBE_SSH_BASTION")
+			proxyOpt := ""
+			if len(bastionHost) > 0 {
+				framework.Logf("Bastion host %s will be used to proxy scp to cluster nodes", bastionHost)
+				// The bastion host is expected to be of the form address:port
+				hostParts := strings.Split(bastionHost, ":")
+				o.Expect(len(hostParts)).To(o.Equal(2))
+				address := hostParts[0]
+				port := hostParts[1]
+				// A proxy command is required for a bastion host
+				proxyOpt = fmt.Sprintf("-o ProxyCommand='ssh -A -W %%h:%%p %s %s -p %s core@%s'", commonOpts, authOpt, port, address)
+			}
+
+			g.By(fmt.Sprintf("Copying the backup directory from backup node %q to the test host", backupNode.Name))
+			backupNodeAddress := addressForNode(backupNode)
+			o.Expect(backupNodeAddress).NotTo(o.BeEmpty())
+			copyFromBackupNodeCmd := fmt.Sprintf(`scp -v %s %s %s -r core@%s:backup %s`, commonOpts, authOpt, proxyOpt, backupNodeAddress, tempDir)
+			runCommandAndRetry(copyFromBackupNodeCmd)
+
+			g.By(fmt.Sprintf("Cleaning the backup path on recovery node %q", recoveryNode.Name))
+			sudoExecOnNodeOrFail(recoveryNode, "rm -rf /home/core/backup")
+
+			g.By(fmt.Sprintf("Copying the backup directory from the test host to recovery node %q", recoveryNode.Name))
+			recoveryNodeAddress := addressForNode(recoveryNode)
+			o.Expect(recoveryNodeAddress).NotTo(o.BeEmpty())
+			copyToRecoveryNodeCmd := fmt.Sprintf(`scp %s %s %s -r %s/backup core@%s:`, commonOpts, authOpt, proxyOpt, tempDir, recoveryNodeAddress)
+			runCommandAndRetry(copyToRecoveryNodeCmd)
+
+			// Stop etcd static pods on non-recovery masters.
+			for _, master := range masters {
+				// The restore script will stop static pods on the recovery node
+				if master.Name == recoveryNode.Name {
+					continue
+				}
+				// Recovery 4b
+				g.By(fmt.Sprintf("Stopping etcd static pod on node %q", master.Name))
+				manifest := "/etc/kubernetes/manifests/etcd-pod.yaml"
+				// Move only if present to ensure idempotent behavior during debugging.
+				sudoExecOnNodeOrFail(master, fmt.Sprintf("test -f %s && mv -f %s /tmp || true", manifest, manifest))
+
+				// Recovery 4c
+				g.By(fmt.Sprintf("Waiting for etcd to exit on node %q", master.Name))
+				// Look for 'etcd ' (with trailing space) to be missing to
+				// differentiate from pods like etcd-operator.
+				sudoExecOnNodeOrFail(master, "crictl ps | grep 'etcd ' | wc -l | grep -q 0")
+
+				// Recovery 4f
+				g.By(fmt.Sprintf("Moving etcd data directory on node %q", master.Name))
+				// Move only if present to ensure idempotent behavior during debugging.
+				sudoExecOnNodeOrFail(master, "test -d /var/lib/etcd && (rm -rf /tmp/etcd && mv /var/lib/etcd/ /tmp) || true")
+			}
+
+			// Recovery 4d
+			// Trigger stop of kube-apiserver static pods on non-recovery
+			// masters, without waiting, to minimize the test time required for
+			// graceful termination to complete.
+			for _, master := range masters {
+				// The restore script will stop static pods on the recovery node
+				if master.Name == recoveryNode.Name {
+					continue
+				}
+				g.By(fmt.Sprintf("Stopping kube-apiserver static pod on node %q", master.Name))
+				manifest := "/etc/kubernetes/manifests/kube-apiserver-pod.yaml"
+				// Move only if present to ensure idempotent behavior during debugging.
+				sudoExecOnNodeOrFail(master, fmt.Sprintf("test -f %s && mv -f %s /tmp || true", manifest, manifest))
+			}
+
+			// Recovery 4e
+			// Wait for kube-apiserver pods to exit
+			for _, master := range masters {
+				// The restore script will stop static pods on the recovery node
+				if master.Name == recoveryNode.Name {
+					continue
+				}
+				g.By(fmt.Sprintf("Waiting for kube-apiserver to exit on node %q", master.Name))
+				// Look for 'kube-apiserver ' (with trailing space) to be missing
+				// to differentiate from pods like kube-apiserver-operator.
+				sudoExecOnNodeOrFail(master, "crictl ps | grep -q 'kube-apiserver ' | wc -l | grep -q 0")
+			}
+
+			// Recovery 7
+			restoreFromBackup(recoveryNode)
+
+			// Recovery 8
+			for _, master := range masters {
+				restartKubelet(master)
+			}
+
+			// Recovery 9a, 9b
+			waitForAPIServer(oc.AdminKubeClient(), recoveryNode)
+
+			// Recovery 10,11,12
+			forceOperandRedeployment(oc.AdminOperatorClient().OperatorV1())
+
+			// Recovery 13
+			waitForReadyEtcdPods(oc.AdminKubeClient(), len(masters))
+
+			waitForOperatorsToSettle()
+		}
+
+		disruption.Run(f, "Backup from one node and recover on another", "restore_different_node",
+			disruption.TestData{},
+			disruptionTests,
+			disruptionFunc,
+		)
+	})
+})
+
+// addressForNode looks for an ssh-accessible ip address for a node in case the
+// node name doesn't resolve in the test environment. An empty string will be
+// returned if an address could not be determined.
+func addressForNode(node *corev1.Node) string {
+	for _, a := range node.Status.Addresses {
+		if a.Type == corev1.NodeExternalIP && a.Address != "" {
+			return a.Address
+		}
+	}
+	// No external IPs were found, let's try to use internal as plan B
+	for _, a := range node.Status.Addresses {
+		if a.Type == corev1.NodeInternalIP && a.Address != "" {
+			return a.Address
+		}
+	}
+	return ""
+}
+
+// What follows are helper functions corresponding to steps in the recovery
+// procedure. They are defined in a granular fashion to allow reuse by the
+// quorum restore test. The quorum restore test needs to interleave the
+// standard commands with commands related to master recreation.
+
+// Recovery 7
+func restoreFromBackup(node *corev1.Node) {
+	g.By(fmt.Sprintf("Running restore script on recovery node %q", node.Name))
+	sudoExecOnNodeOrFail(node, "/usr/local/bin/cluster-restore.sh /home/core/backup")
+}
+
+// Recovery 8
+func restartKubelet(node *corev1.Node) {
+	g.By(fmt.Sprintf("Restarting the kubelet service on node %q", node.Name))
+	sudoExecOnNodeOrFail(node, "systemctl restart kubelet.service")
+}
+
+// Recovery 9a
+func waitForEtcdContainer(node *corev1.Node) {
+	g.By(fmt.Sprintf("Verifying that the etcd container is running on recovery node %q", node.Name))
+	// Look for 'etcd ' (with trailing space) to differentiate from pods
+	// like etcd-operator.
+	sudoExecOnNodeOrFail(node, "crictl ps | grep -q 'etcd '")
+}
+
+// Recovery 9b
+func waitForEtcdPod(node *corev1.Node) {
+	// The success of this check also ensures that the kube apiserver on
+	// the recovery node is accepting connections.
+	g.By(fmt.Sprintf("Verifying that the etcd pod is running on recovery node %q", node.Name))
+	// Look for a single running etcd pod
+	runningEtcdPodCmd := "oc get pods -n openshift-etcd -l k8s-app=etcd --no-headers=true | grep Running | wc -l | grep -q 1"
+	// The kubeconfig on the node is only readable by root and usage requires sudo.
+	nodeKubeConfig := "/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig"
+	sudoExecOnNodeOrFail(node, fmt.Sprintf("KUBECONFIG=%s %s", nodeKubeConfig, runningEtcdPodCmd))
+}
+
+func waitForAPIServerAvailability(client kubernetes.Interface) {
+	g.By("Waiting for API server to become available")
+	err := wait.PollImmediate(10*time.Second, 30*time.Minute, func() (done bool, err error) {
+		_, err = client.CoreV1().Namespaces().Get(context.Background(), "default", metav1.GetOptions{})
+		if err != nil {
+			framework.Logf("Observed an error waiting for apiserver availability outside the cluster: %v", err)
+		}
+		return err == nil, nil
+	})
+	o.Expect(err).NotTo(o.HaveOccurred())
+}
+
+// waitForAPIServer waits for the etcd container and pod running on the
+// recovery node and then waits for the apiserver to be accessible outside
+// the cluster.
+func waitForAPIServer(client kubernetes.Interface, node *corev1.Node) {
+	// Recovery 9a
+	waitForEtcdContainer(node)
+
+	// Recovery 9b
+	waitForEtcdPod(node)
+
+	// Even with the apiserver available on the recovery node, it may
+	// take additional time for the api to become available externally
+	// to the cluster.
+	waitForAPIServerAvailability(client)
+}
+
+// Recovery 13
+func waitForReadyEtcdPods(client kubernetes.Interface, masterCount int) {
+	g.By("Waiting for all etcd pods to become ready")
+	waitForPodsTolerateClientTimeout(
+		client.CoreV1().Pods("openshift-etcd"),
+		exutil.ParseLabelsOrDie("k8s-app=etcd"),
+		exutil.CheckPodIsReady,
+		masterCount,
+		40*time.Minute,
+	)
+}
diff --git a/test/extended/dr/common.go b/test/extended/dr/common.go
index 4acef2741af9..68e32688c540 100644
--- a/test/extended/dr/common.go
+++ b/test/extended/dr/common.go
@@ -11,8 +11,10 @@ import (
 
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/dynamic"
+	"k8s.io/kubernetes/test/e2e/framework"
 	e2e "k8s.io/kubernetes/test/e2e/framework"
 	e2elog "k8s.io/kubernetes/test/e2e/framework/log"
 	e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
@@ -20,6 +22,7 @@ import (
 	"github.com/openshift/origin/test/e2e/upgrade"
 	exutil "github.com/openshift/origin/test/extended/util"
 
+	g "github.com/onsi/ginkgo"
 	o "github.com/onsi/gomega"
 	"github.com/stretchr/objx"
 )
@@ -89,7 +92,17 @@ func waitForMastersToUpdate(oc *exutil.CLI, mcps dynamic.NamespaceableResourceIn
 	o.Expect(err).NotTo(o.HaveOccurred())
 }
 
-func waitForOperatorsToSettle(coc dynamic.NamespaceableResourceInterface) {
+func waitForOperatorsToSettle() {
+	g.By("Waiting for operators to settle before performing post-disruption testing")
+	config, err := framework.LoadConfig()
+	o.Expect(err).NotTo(o.HaveOccurred())
+	dynamicClient := dynamic.NewForConfigOrDie(config)
+	coc := dynamicClient.Resource(schema.GroupVersionResource{
+		Group:    "config.openshift.io",
+		Version:  "v1",
+		Resource: "clusteroperators",
+	})
+
 	var lastErr error
 	// gate on all clusteroperators being ready
 	available := make(map[string]struct{})
@@ -274,6 +287,12 @@ func execOnNodeOrFail(node *corev1.Node, cmd string) {
 	_ = execOnNodeWithOutputOrFail(node, cmd)
 }
 
+// sudoExecOnNodeOrFail executes a command under sudo with execOnNodeOrFail.
+func sudoExecOnNodeOrFail(node *corev1.Node, cmd string) {
+	sudoCmd := fmt.Sprintf(`sudo -i /bin/bash -cx "%s"`, cmd)
+	execOnNodeOrFail(node, sudoCmd)
+}
+
 // checkSSH repeatedly attempts to establish an ssh connection to a
 // node and fails the calling test if unable to establish the
 // connection before the default timeout.
diff --git a/test/extended/dr/force_redeploy.go b/test/extended/dr/force_redeploy.go
new file mode 100644
index 000000000000..e5db7fa023c8
--- /dev/null
+++ b/test/extended/dr/force_redeploy.go
@@ -0,0 +1,182 @@
+package dr
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	g "github.com/onsi/ginkgo"
+	o "github.com/onsi/gomega"
+	"github.com/pborman/uuid"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/kubernetes/test/e2e/framework"
+
+	operatorv1 "github.com/openshift/api/operator/v1"
+	operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
+)
+
+// Enables forcible redeployment of etcd, kube-apiserver,
+// kube-controller-manager, kube-scheduler operands. This is a
+// necessary part of restoring a cluster from backup.
+
+const (
+	redeployWaitInterval = 5 * time.Second
+	redeployWaitTimeout  = 2 * time.Minute
+)
+
+// operatorConfigClient supports patching and retrieving the status of
+// an operator's 'cluster' config resource to support triggering
+// redeployment and watching for a successful rollout.
+type operatorConfigClient struct {
+	name      string
+	patch     func(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions) error
+	getStatus func(ctx context.Context, name string, opts metav1.GetOptions) (*operatorv1.StaticPodOperatorStatus, error)
+}
+
+func (c *operatorConfigClient) String() string {
+	return c.name
+}
+
+// forceOperandRedeployment forces the redeployment the etcd,
+// kube-apiserver, kube-controller-manager and kube-scheduler operands
+// (in that order).  Only when an operand has been successfully rolled
+// out will redeployment of the subsequent operand be attempted.
+func forceOperandRedeployment(client operatorv1client.OperatorV1Interface) {
+	clients := []*operatorConfigClient{
+		{
+			name: "etcd",
+			patch: func(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions) error {
+				_, err := client.Etcds().Patch(ctx, name, pt, data, opts)
+				return err
+			},
+			getStatus: func(ctx context.Context, name string, opts metav1.GetOptions) (*operatorv1.StaticPodOperatorStatus, error) {
+				obj, err := client.Etcds().Get(ctx, name, opts)
+				if err != nil {
+					return nil, err
+				}
+				return &obj.Status.StaticPodOperatorStatus, nil
+			},
+		},
+		{
+			name: "kube-apiserver",
+			patch: func(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions) error {
+				_, err := client.KubeAPIServers().Patch(ctx, name, pt, data, opts)
+				return err
+			},
+			getStatus: func(ctx context.Context, name string, opts metav1.GetOptions) (*operatorv1.StaticPodOperatorStatus, error) {
+				obj, err := client.KubeAPIServers().Get(ctx, name, opts)
+				if err != nil {
+					return nil, err
+				}
+				return &obj.Status.StaticPodOperatorStatus, nil
+			},
+		},
+		{
+			name: "kube-controller-manager",
+			patch: func(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions) error {
+				_, err := client.KubeControllerManagers().Patch(ctx, name, pt, data, opts)
+				return err
+			},
+			getStatus: func(ctx context.Context, name string, opts metav1.GetOptions) (*operatorv1.StaticPodOperatorStatus, error) {
+				obj, err := client.KubeControllerManagers().Get(ctx, name, opts)
+				if err != nil {
+					return nil, err
+				}
+				return &obj.Status.StaticPodOperatorStatus, nil
+			},
+		},
+		{
+			name: "kube-scheduler",
+			patch: func(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions) error {
+				_, err := client.KubeSchedulers().Patch(ctx, name, pt, data, opts)
+				return err
+			},
+			getStatus: func(ctx context.Context, name string, opts metav1.GetOptions) (*operatorv1.StaticPodOperatorStatus, error) {
+				obj, err := client.KubeSchedulers().Get(ctx, name, opts)
+				if err != nil {
+					return nil, err
+				}
+				return &obj.Status.StaticPodOperatorStatus, nil
+			},
+		},
+	}
+	for _, client := range clients {
+		forceRedeployOperand(client)
+	}
+}
+
+// forceRedeployOperand initiates redeployment of an operand and waits for a
+// successful rollout.
+func forceRedeployOperand(client *operatorConfigClient) {
+	// Retrieve the LatestAvailableRevision before rolling out to know
+	// what revision not to look for in the subsequent check for
+	// rollout success.
+	g.By(fmt.Sprintf("Finding LatestAvailableRevision for %s", client))
+	var latestAvailableRevision int32
+	err := wait.PollImmediate(redeployWaitInterval, redeployWaitTimeout, func() (done bool, err error) {
+		status, err := client.getStatus(context.Background(), "cluster", metav1.GetOptions{})
+		if err != nil {
+			framework.Logf("Error retrieving %s operator status: %v", client, err)
+		} else {
+			latestAvailableRevision = status.LatestAvailableRevision
+		}
+		return err == nil, nil
+	})
+	o.Expect(err).NotTo(o.HaveOccurred())
+	framework.Logf("LatestAvailableRevision for %s is %d", client, latestAvailableRevision)
+
+	// Ensure a unique forceRedeploymentReason for each test run to
+	// ensure rollout is always triggered even if running repeatedly
+	// against the same cluster (as when debugging).
+	reason := fmt.Sprintf("e2e-cluster-restore-%s", uuid.NewUUID())
+
+	g.By(fmt.Sprintf("Forcing redeployment of %s", client))
+	data := fmt.Sprintf(`{"spec": {"forceRedeploymentReason": "%s"}}`, reason)
+	err = wait.PollImmediate(redeployWaitInterval, redeployWaitTimeout, func() (done bool, err error) {
+		err = client.patch(context.Background(), "cluster", types.MergePatchType, []byte(data), metav1.PatchOptions{})
+		if err != nil {
+			framework.Logf("Error patching %s operator status to set redeploy reason: %v", client, err)
+		}
+		return err == nil, nil
+	})
+	o.Expect(err).NotTo(o.HaveOccurred())
+
+	g.By(fmt.Sprintf("Waiting for %s to be updated on all nodes to a revision greater than %d", client, latestAvailableRevision))
+	waitForRollout(client, latestAvailableRevision)
+	framework.Logf("Rollout complete for %s", client)
+}
+
+// waitForRollout waits for an operator status to indicate that all nodes are
+// at a revision greater than that provided.
+func waitForRollout(client *operatorConfigClient, previousRevision int32) {
+	// Need to wait as long as 15 minutes for rollout of kube apiserver
+	err := wait.PollImmediate(redeployWaitInterval, 15*time.Minute, func() (done bool, err error) {
+		status, err := client.getStatus(context.Background(), "cluster", metav1.GetOptions{})
+		if err != nil {
+			framework.Logf("Error retrieving %s operator status: %v", client, err)
+			return false, nil
+		}
+		rolloutComplete := false
+		for _, condition := range status.Conditions {
+			if condition.Type == "NodeInstallerProgressing" {
+				rolloutComplete = condition.Reason == "AllNodesAtLatestRevision" && condition.Status == operatorv1.ConditionFalse
+				break
+			}
+		}
+		if !rolloutComplete {
+			return false, nil
+		}
+		// Prevent timing issues by ensuring that the revision of all nodes is
+		// greater than the revision observed before rollout was initiated.
+		for _, nodeStatus := range status.NodeStatuses {
+			if nodeStatus.CurrentRevision == previousRevision {
+				return false, nil
+			}
+		}
+		return true, nil
+	})
+	o.Expect(err).NotTo(o.HaveOccurred())
+}
diff --git a/test/extended/dr/quorum_restore.go b/test/extended/dr/quorum_restore.go
index 2bd29fe0ba61..37ff2f06847b 100644
--- a/test/extended/dr/quorum_restore.go
+++ b/test/extended/dr/quorum_restore.go
@@ -37,6 +37,15 @@ const (
 	machineAnnotationName = "machine.openshift.io/machine"
 )
 
+var disruptionTests []upgrades.Test = []upgrades.Test{
+	&upgrades.ServiceUpgradeTest{},
+	&upgrades.SecretUpgradeTest{},
+	&apps.ReplicaSetUpgradeTest{},
+	&apps.StatefulSetUpgradeTest{},
+	&apps.DeploymentUpgradeTest{},
+	&apps.DaemonSetUpgradeTest{},
+}
+
 var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 	defer g.GinkgoRecover()
 
@@ -47,6 +56,8 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 	oc := exutil.NewCLIWithoutNamespace("disaster-recovery")
 
 	g.It("[Feature:EtcdRecovery] Cluster should restore itself after quorum loss", func() {
+		e2eskipper.Skipf("Test is disabled pending a fix https://github.com/openshift/origin/pull/25774")
+
 		config, err := framework.LoadConfig()
 		o.Expect(err).NotTo(o.HaveOccurred())
 		dynamicClient := dynamic.NewForConfigOrDie(config)
@@ -60,11 +71,6 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 			Version:  "v1",
 			Resource: "machineconfigpools",
 		})
-		coc := dynamicClient.Resource(schema.GroupVersionResource{
-			Group:    "config.openshift.io",
-			Version:  "v1",
-			Resource: "clusteroperators",
-		})
 
 		// test for machines as a proxy for "can we recover a master"
 		machines, err := dynamicClient.Resource(schema.GroupVersionResource{
@@ -79,14 +85,7 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 
 		disruption.Run(f, "Quorum Loss and Restore", "quorum_restore",
 			disruption.TestData{},
-			[]upgrades.Test{
-				&upgrades.ServiceUpgradeTest{},
-				&upgrades.SecretUpgradeTest{},
-				&apps.ReplicaSetUpgradeTest{},
-				&apps.StatefulSetUpgradeTest{},
-				&apps.DeploymentUpgradeTest{},
-				&apps.DaemonSetUpgradeTest{},
-			},
+			disruptionTests,
 			func() {
 
 				framework.Logf("Verify SSH is available before restart")
@@ -248,15 +247,8 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 				_, err = oc.AdminOperatorClient().OperatorV1().KubeAPIServers().Patch(context.Background(), "cluster", types.MergePatchType, []byte(`{"spec": {"forceRedeploymentReason": "recover-kube-apiserver"}}`), metav1.PatchOptions{})
 				o.Expect(err).NotTo(o.HaveOccurred())
 
-				framework.Logf("Wait for etcd pods to become available")
-				_, err = waitForPodsTolerateClientTimeout(
-					oc.AdminKubeClient().CoreV1().Pods("openshift-etcd"),
-					exutil.ParseLabelsOrDie("k8s-app=etcd"),
-					exutil.CheckPodIsReady,
-					expectedNumberOfMasters,
-					40*time.Minute,
-				)
-				o.Expect(err).NotTo(o.HaveOccurred())
+				// Recovery 13
+				waitForReadyEtcdPods(oc.AdminKubeClient(), expectedNumberOfMasters)
 
 				scaleEtcdQuorum(pollClient, expectedNumberOfMasters)
 
@@ -264,14 +256,13 @@ var _ = g.Describe("[sig-etcd][Feature:DisasterRecovery][Disruptive]", func() {
 				// SDN won't switch to Degraded mode when service is down after disaster recovery
 				// restartSDNPods(oc)
 				waitForMastersToUpdate(oc, mcps)
-				waitForOperatorsToSettle(coc)
+				waitForOperatorsToSettle()
 			})
 	},
 	)
 })
 
-func waitForPodsTolerateClientTimeout(c corev1client.PodInterface, label labels.Selector, predicate func(corev1.Pod) bool, count int, timeout time.Duration) ([]string, error) {
-	var podNames []string
+func waitForPodsTolerateClientTimeout(c corev1client.PodInterface, label labels.Selector, predicate func(corev1.Pod) bool, count int, timeout time.Duration) {
 	err := wait.Poll(10*time.Second, timeout, func() (bool, error) {
 		p, e := exutil.GetPodNamesByFilter(c, label, predicate)
 		if e != nil {
@@ -282,10 +273,9 @@ func waitForPodsTolerateClientTimeout(c corev1client.PodInterface, label labels.
 		if len(p) != count {
 			return false, nil
 		}
-		podNames = p
 		return true, nil
 	})
-	return podNames, err
+	o.Expect(err).NotTo(o.HaveOccurred())
 }
 
 func scaleEtcdQuorum(client kubernetes.Interface, replicas int) error {
diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go
index fdeb0a32d1be..87d3b72c1830 100644
--- a/test/extended/util/annotate/generated/zz_generated.annotations.go
+++ b/test/extended/util/annotate/generated/zz_generated.annotations.go
@@ -1733,6 +1733,8 @@ var annotations = map[string]string{
 
 	"[Top Level] [sig-etcd] etcd leader changes are not excessive [Late]": "leader changes are not excessive [Late] [Suite:openshift/conformance/parallel]",
 
+	"[Top Level] [sig-etcd][Feature:DisasterRecovery][Disruptive] [Feature:EtcdRecovery] Cluster should recover from a backup taken on one node and recovered on another": "[Feature:EtcdRecovery] Cluster should recover from a backup taken on one node and recovered on another [Serial]",
+
 	"[Top Level] [sig-etcd][Feature:DisasterRecovery][Disruptive] [Feature:EtcdRecovery] Cluster should restore itself after quorum loss": "[Feature:EtcdRecovery] Cluster should restore itself after quorum loss [Serial]",
 
 	"[Top Level] [sig-imageregistry][Feature:ImageAppend] Image append should create images by appending them": "should create images by appending them [Suite:openshift/conformance/parallel]",