Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 45 additions & 31 deletions pkg/controller/dnszone/awsactuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ type AWSActuator struct {
// awsClient is a utility for making it easy for controllers to interface with AWS
awsClient awsclient.Client

// zoneID is the ID of the hosted zone in route53
zoneID *string
// hostedZone is the AWS object representing the hosted zone in route53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I do not object to this change, it is not clear to me why it is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just when comparing to the other actuators, it seemed odd that we didn't have access to the full object after we went through the trouble of fetching it. But you're right, it has no immediate effect other that making the aws actuator more similar to the other actuators.

hostedZone *route53.HostedZone

// currentTags are the list of tags associated with the currentHostedZone
currentHostedZoneTags []*route53.Tag
Expand Down Expand Up @@ -76,8 +76,8 @@ func NewAWSActuator(

// UpdateMetadata ensures that the Route53 hosted zone metadata is current with the DNSZone
func (a *AWSActuator) UpdateMetadata() error {
if a.zoneID == nil {
return errors.New("zoneID is unpopulated")
if a.hostedZone == nil {
return errors.New("hostedZone is unpopulated")
}

// For now, tags are the only things we can sync with existing zones.
Expand All @@ -95,7 +95,7 @@ func (a *AWSActuator) syncTags() error {
// the toDelete array
copy(toDelete, existingTags)

logger := a.logger.WithField("id", a.zoneID)
logger := a.logger.WithField("id", a.hostedZone.Id)
logger.WithField("current", tagsString(existingTags)).WithField("expected", tagsString(expected)).Debug("syncing tags")

for _, tag := range expected {
Expand Down Expand Up @@ -150,7 +150,7 @@ func (a *AWSActuator) syncTags() error {
_, err := a.awsClient.ChangeTagsForResource(&route53.ChangeTagsForResourceInput{
AddTags: toAddSegment,
RemoveTagKeys: keysToDeleteSegment,
ResourceId: a.zoneID,
ResourceId: a.hostedZone.Id,
ResourceType: aws.String("hostedzone"),
})
if err != nil {
Expand All @@ -165,12 +165,12 @@ func (a *AWSActuator) syncTags() error {

// ModifyStatus updates the DnsZone's status with AWS specific information.
func (a *AWSActuator) ModifyStatus() error {
if a.zoneID == nil {
if a.hostedZone == nil {
return errors.New("zoneID is unpopulated")
}

a.dnsZone.Status.AWS = &hivev1.AWSDNSZoneStatus{
ZoneID: a.zoneID,
ZoneID: a.hostedZone.Id,
}

return nil
Expand Down Expand Up @@ -206,7 +206,7 @@ func (a *AWSActuator) Refresh() error {
}

// Fetch the hosted zone
a.zoneID = nil
a.hostedZone = nil
for _, zoneID := range zoneIDs {
logger := a.logger.WithField("id", zoneID)
logger.Debug("Fetching hosted zone by ID")
Expand All @@ -226,17 +226,23 @@ func (a *AWSActuator) Refresh() error {
continue
}
logger.Debug("Found hosted zone")
a.zoneID = &zoneID
a.hostedZone = resp.HostedZone

// Update dnsZone status now that we have the zoneID
if err := a.ModifyStatus(); err != nil {
a.logger.WithError(err).Error("failed to update status after refresh")
return err
}
}

if a.zoneID == nil {
if a.hostedZone == nil {
a.logger.Debug("No existing zone found")
return nil
}

logger := a.logger.WithField("id", a.zoneID)
logger := a.logger.WithField("id", a.hostedZone.Id)
logger.Debug("Fetching hosted zone tags")
tags, err := a.existingTags(a.zoneID)
tags, err := a.existingTags(a.hostedZone.Id)
if err != nil {
logger.WithError(err).Error("Cannot get hosted zone tags")
return err
Expand Down Expand Up @@ -354,7 +360,11 @@ func (a *AWSActuator) Create() error {
return err
}

a.zoneID = hostedZone.Id
a.hostedZone = hostedZone
if err := a.ModifyStatus(); err != nil {
logger.WithError(err).Error("failed to populate DNSZone status")
return err
}
Comment on lines +364 to +367
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is snuck in the "just store entire aws dnszone" commit. Should it be in the "populate DNSZone status immediately after fetching zoneID (AWS)" commit instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it came up while testing things and i must have squashed the wrong commits together while cleaning up the commits for submission.

a.currentHostedZoneTags = existingTags

logger.Debug("Syncing zone tags")
Expand Down Expand Up @@ -407,19 +417,20 @@ func (a *AWSActuator) findZoneByCallerReference(domain, callerRef string) (*rout

// Delete removes an AWS Route53 hosted zone, typically because the DNSZone object is in a deleting state.
func (a *AWSActuator) Delete() error {
if a.zoneID == nil {
return errors.New("zoneID is unpopulated")
if a.hostedZone == nil {
return errors.New("hostedZone is unpopulated")
}

logger := a.logger.WithField("zone", a.dnsZone.Spec.Zone).WithField("id", aws.StringValue(a.zoneID))
logger := a.logger.WithField("zone", a.dnsZone.Spec.Zone).WithField("id", aws.StringValue(a.hostedZone.Id))

if err := a.deleteRecordSets(logger); err != nil {
logger.Info("Deleting route53 recordsets in hostedzone")
if err := DeleteAWSRecordSets(a.awsClient, a.dnsZone, logger); err != nil {
return err
}

logger.Info("Deleting route53 hostedzone")
_, err := a.awsClient.DeleteHostedZone(&route53.DeleteHostedZoneInput{
Id: a.zoneID,
Id: a.hostedZone.Id,
})
if err != nil {
logLevel := log.ErrorLevel
Expand All @@ -431,24 +442,26 @@ func (a *AWSActuator) Delete() error {
return err
}

func (a *AWSActuator) deleteRecordSets(logger log.FieldLogger) error {
logger.Info("Deleting route53 recordsets in hostedzone")
// DeleteAWSRecordSets will clean up a DNS zone down to the minimum required record entries
func DeleteAWSRecordSets(awsClient awsclient.Client, dnsZone *hivev1.DNSZone, logger log.FieldLogger) error {

maxItems := "100"
listInput := &route53.ListResourceRecordSetsInput{
HostedZoneId: a.zoneID,
HostedZoneId: dnsZone.Status.AWS.ZoneID,
MaxItems: &maxItems,
}
for {
listOutput, err := a.awsClient.ListResourceRecordSets(listInput)
listOutput, err := awsClient.ListResourceRecordSets(listInput)
if err != nil {
return err
}
var changes []*route53.Change
for _, recordSet := range listOutput.ResourceRecordSets {
// Ignore the 2 recordsets that are created with the hosted zone and that cannot be deleted
if n, t := aws.StringValue(recordSet.Name), aws.StringValue(recordSet.Type); n == controllerutils.Dotted(a.dnsZone.Spec.Zone) && (t == route53.RRTypeNs || t == route53.RRTypeSoa) {
if n, t := aws.StringValue(recordSet.Name), aws.StringValue(recordSet.Type); n == controllerutils.Dotted(dnsZone.Spec.Zone) && (t == route53.RRTypeNs || t == route53.RRTypeSoa) {
continue
}

logger.WithField("name", aws.StringValue(recordSet.Name)).WithField("type", aws.StringValue(recordSet.Type)).Info("recordset set for deletion")
changes = append(changes, &route53.Change{
Action: aws.String(route53.ChangeActionDelete),
Expand All @@ -457,9 +470,9 @@ func (a *AWSActuator) deleteRecordSets(logger log.FieldLogger) error {
}
if len(changes) > 0 {
logger.WithField("count", len(changes)).Info("deleting recordsets")
if _, err := a.awsClient.ChangeResourceRecordSets(&route53.ChangeResourceRecordSetsInput{
if _, err := awsClient.ChangeResourceRecordSets(&route53.ChangeResourceRecordSetsInput{
ChangeBatch: &route53.ChangeBatch{Changes: changes},
HostedZoneId: a.zoneID,
HostedZoneId: dnsZone.Status.AWS.ZoneID,
}); err != nil {
return err
}
Expand All @@ -472,18 +485,19 @@ func (a *AWSActuator) deleteRecordSets(logger log.FieldLogger) error {
listInput.StartRecordType = listOutput.NextRecordType
}
return nil

}

// GetNameServers returns the nameservers listed in the route53 hosted zone NS record.
func (a *AWSActuator) GetNameServers() ([]string, error) {
if a.zoneID == nil {
return nil, errors.New("zoneID is unpopulated")
if a.hostedZone == nil {
return nil, errors.New("hostedZone is unpopulated")
}

logger := a.logger.WithField("id", a.zoneID)
logger := a.logger.WithField("id", a.hostedZone.Id)
logger.Debug("Listing hosted zone NS records")
resp, err := a.awsClient.ListResourceRecordSets(&route53.ListResourceRecordSetsInput{
HostedZoneId: aws.String(*a.zoneID),
HostedZoneId: aws.String(*a.hostedZone.Id),
StartRecordType: aws.String("NS"),
StartRecordName: aws.String(a.dnsZone.Spec.Zone),
MaxItems: aws.String("1"),
Expand Down Expand Up @@ -517,7 +531,7 @@ func (a *AWSActuator) GetNameServers() ([]string, error) {

// Exists determines if the route53 hosted zone corresponding to the DNSZone exists
func (a *AWSActuator) Exists() (bool, error) {
return a.zoneID != nil, nil
return a.hostedZone != nil, nil
}

func (a *AWSActuator) setInsufficientCredentialsConditionToFalse() bool {
Expand Down
13 changes: 13 additions & 0 deletions pkg/controller/dnszone/awsactuator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,16 @@ func mockDeleteAWSZone(expect *mock.MockClientMockRecorder) {
expect.ListResourceRecordSets(gomock.Any()).Return(&route53.ListResourceRecordSetsOutput{}, nil).Times(1)
expect.DeleteHostedZone(gomock.Any()).Return(nil, nil).Times(1)
}

func mockGetResourcePages(expect *mock.MockClientMockRecorder) {
expect.GetResourcesPages(gomock.Any(), gomock.Any()).Return(nil).Do(func(i *resourcegroupstaggingapi.GetResourcesInput, f func(*resourcegroupstaggingapi.GetResourcesOutput, bool) bool) {
getResourcesOutput := &resourcegroupstaggingapi.GetResourcesOutput{
ResourceTagMappingList: []*resourcegroupstaggingapi.ResourceTagMapping{
{
ResourceARN: aws.String("arn:aws:route53:::hostedzone/Z055920326CHQAW0WSG5N"),
},
},
}
f(getResourcesOutput, true)
})
}
17 changes: 17 additions & 0 deletions pkg/controller/dnszone/dnszone_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ func TestReconcileDNSProviderForAWS(t *testing.T) {
assert.False(t, controllerutils.HasFinalizer(zone, hivev1.FinalizerDNSZone))
},
},
{
name: "Delete DNSZone without status",
dnsZone: func() *hivev1.DNSZone {
dz := validDNSZoneBeingDeleted()
dz.Status.AWS = nil
return dz
}(),
setupAWSMock: func(expect *mock.MockClientMockRecorder) {
mockGetResourcePages(expect)
mockAWSZoneExists(expect, validDNSZoneWithAdditionalTags())
mockExistingAWSTags(expect)
mockDeleteAWSZone(expect)
},
validateZone: func(t *testing.T, zone *hivev1.DNSZone) {
assert.False(t, controllerutils.HasFinalizer(zone, hivev1.FinalizerDNSZone))
},
},
{
name: "Existing zone, link to parent, reachable SOA",
dnsZone: validDNSZoneWithLinkToParent(),
Expand Down
83 changes: 46 additions & 37 deletions pkg/installmanager/dnscleanup.go
Original file line number Diff line number Diff line change
@@ -1,56 +1,65 @@
package installmanager

import (
awsclient "github.com/openshift/hive/pkg/awsclient"
"context"
"fmt"

log "github.com/sirupsen/logrus"

awssdk "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/route53"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

hivev1 "github.com/openshift/hive/pkg/apis/hive/v1"
awsclient "github.com/openshift/hive/pkg/awsclient"
dns "github.com/openshift/hive/pkg/controller/dnszone"
controllerutils "github.com/openshift/hive/pkg/controller/utils"
)

// cleanupDNSZone queries the Route53 zone and deletes any A records found. Other record
// types may be added in the future, but right now this is the only one we're seeing
// leak and conflict.
// cleanupDNSZone will handle any needed DNS cleanup for ClusterDeployments with
// ManageDNS enabled (this helps to clean up any stray DNS records on install failures)
func cleanupDNSZone(dynClient client.Client, cd *hivev1.ClusterDeployment, logger log.FieldLogger) error {
if cd.Spec.ManageDNS == false {
return nil
}

dnsZone := &hivev1.DNSZone{}
dnsZoneNamespacedName := types.NamespacedName{Namespace: cd.Namespace, Name: controllerutils.DNSZoneName(cd.Name)}
if err := dynClient.Get(context.TODO(), dnsZoneNamespacedName, dnsZone); err != nil {
logger.WithError(err).Error("error looking up managed dnszone")
}

switch {
case cd.Spec.Platform.AWS != nil:
return cleanupAWSDNSZone(dnsZone, cd.Spec.Platform.AWS.Region, logger)
default:
log.Debug("No DNS cleanup for platform type")
return nil
}
}

// cleanupAWSDNSZone will return a DNS zone to the minimum set of DNS records
// May no longer be necessary once https://jira.coreos.com/browse/CORS-1195 is fixed.
func cleanupDNSZone(dnsZoneID, region string, logger log.FieldLogger) error {
zoneLogger := logger.WithField("dnsZoneID", dnsZoneID)
func cleanupAWSDNSZone(dnsZone *hivev1.DNSZone, region string, logger log.FieldLogger) error {
if dnsZone.Status.AWS == nil {
return fmt.Errorf("found non-AWS DNSZone for AWS ClusterDeployment")
}
if dnsZone.Status.AWS.ZoneID == nil {
// Shouldn't really be possible as we block install until DNS is ready:
return fmt.Errorf("DNSZone %s has no ZoneID set", dnsZone.Name)
}

zoneLogger := logger.WithField("dnsZoneID", *dnsZone.Status.AWS.ZoneID)
zoneLogger.Info("cleaning up DNSZone")

awsClient, err := awsclient.NewClient(nil, "", "", region)
if err != nil {
return err
}
recordsOutput, err := awsClient.ListResourceRecordSets(
&route53.ListResourceRecordSetsInput{
HostedZoneId: awssdk.String(dnsZoneID),
},
)
if err != nil {

if err := dns.DeleteAWSRecordSets(awsClient, dnsZone, zoneLogger); err != nil {
logger.WithError(err).Error("failed to clean up DNS Zone")
return err
}
for _, r := range recordsOutput.ResourceRecordSets {
// We're only experiencing problems with A records, so these are all we cleanup for now:
if *r.Type == "A" {
zoneLogger.WithFields(log.Fields{"name": *r.Name, "type": *r.Type}).Info("deleting A record")
request := &route53.ChangeResourceRecordSetsInput{
ChangeBatch: &route53.ChangeBatch{
Changes: []*route53.Change{
{
Action: awssdk.String("DELETE"),
ResourceRecordSet: r,
},
},
},
HostedZoneId: awssdk.String(dnsZoneID),
}
_, err := awsClient.ChangeResourceRecordSets(request)
if err != nil {
logger.WithError(err).WithField("recordset", r.Name).Warn("error deleting recordset")
return err
}
}
}
zoneLogger.Info("DNSZone A records deleted")
zoneLogger.Info("DNSZone cleaned")
return nil
}
20 changes: 1 addition & 19 deletions pkg/installmanager/installmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
contributils "github.com/openshift/hive/contrib/pkg/utils"
hivev1 "github.com/openshift/hive/pkg/apis/hive/v1"
"github.com/openshift/hive/pkg/constants"
controllerutils "github.com/openshift/hive/pkg/controller/utils"
"github.com/openshift/hive/pkg/resource"
k8slabels "github.com/openshift/hive/pkg/util/labels"

Expand Down Expand Up @@ -549,24 +548,7 @@ func cleanupFailedProvision(dynClient client.Client, cd *hivev1.ClusterDeploymen
// If we're managing DNS for this cluster, lookup the DNSZone and cleanup
// any leftover A records that may have leaked due to
// https://jira.coreos.com/browse/CORS-1195.
if cd.Spec.ManageDNS {
dnsZone := &hivev1.DNSZone{}
dnsZoneNamespacedName := types.NamespacedName{Namespace: cd.Namespace, Name: controllerutils.DNSZoneName(cd.Name)}
err := dynClient.Get(context.TODO(), dnsZoneNamespacedName, dnsZone)
if err != nil {
logger.WithError(err).Error("error looking up managed dnszone")
return err
}
if dnsZone.Status.AWS == nil {
return fmt.Errorf("found non-AWS DNSZone for AWS ClusterDeployment")
}
if dnsZone.Status.AWS.ZoneID == nil {
// Shouldn't really be possible as we block install until DNS is ready:
return fmt.Errorf("DNSZone %s has no ZoneID set", dnsZone.Name)
}
return cleanupDNSZone(*dnsZone.Status.AWS.ZoneID, cd.Spec.Platform.AWS.Region, logger)
}
return nil
return cleanupDNSZone(dynClient, cd, logger)
case cd.Spec.Platform.Azure != nil:
uninstaller := &azure.ClusterUninstaller{}
uninstaller.Logger = logger
Expand Down