Skip to content

Commit 328dbeb

Browse files
author
John Burwell
committed
Merge pull request #8 from shapeblue/cancel-maint
CLOUDSTACK-9323: Cancelling maintenance when prepare for maintenance…
2 parents 067022e + e378566 commit 328dbeb

3 files changed

Lines changed: 334 additions & 7 deletions

File tree

server/src/com/cloud/resource/ResourceManagerImpl.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,11 +2113,13 @@ private boolean doCancelMaintenance(long hostId) {
21132113

21142114
/* TODO: move to listener */
21152115
_haMgr.cancelScheduledMigrations(host);
2116+
2117+
boolean vms_migrating=false;
21162118
List<VMInstanceVO> vms = _haMgr.findTakenMigrationWork();
21172119
for (VMInstanceVO vm : vms) {
2118-
if (vm != null && vm.getHostId() != null && vm.getHostId() == hostId) {
2119-
s_logger.info("Unable to cancel migration because the vm is being migrated: " + vm);
2120-
return false;
2120+
if (vm.getHostId() != null && vm.getHostId() == hostId) {
2121+
s_logger.warn("Cancel host maintenance: Migrations scheduled for " + vm + ", hostId = " + hostId);
2122+
vms_migrating=true;
21212123
}
21222124
}
21232125

@@ -2126,7 +2128,7 @@ private boolean doCancelMaintenance(long hostId) {
21262128
_agentMgr.pullAgentOutMaintenance(hostId);
21272129

21282130
// for kvm, need to log into kvm host, restart cloudstack-agent
2129-
if (host.getHypervisorType() == HypervisorType.KVM || host.getHypervisorType() == HypervisorType.LXC) {
2131+
if ((host.getHypervisorType() == HypervisorType.KVM && ! vms_migrating) || host.getHypervisorType() == HypervisorType.LXC) {
21302132

21312133
boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(Config.KvmSshToAgentEnabled.key()));
21322134
if (!sshToAgent) {
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
""" BVT tests for Hosts Maintenance
18+
"""
19+
20+
# Import Local Modules
21+
from marvin.codes import FAILED
22+
from marvin.cloudstackTestCase import *
23+
from marvin.cloudstackAPI import *
24+
from marvin.lib.utils import *
25+
from marvin.lib.base import *
26+
from marvin.lib.common import *
27+
from nose.plugins.attrib import attr
28+
29+
from time import sleep
30+
31+
_multiprocess_shared_ = False
32+
33+
34+
class TestHostMaintenance(cloudstackTestCase):
35+
36+
def setUp(self):
37+
self.logger = logging.getLogger('TestHM')
38+
self.stream_handler = logging.StreamHandler()
39+
self.logger.setLevel(logging.DEBUG)
40+
self.logger.addHandler(self.stream_handler)
41+
self.apiclient = self.testClient.getApiClient()
42+
self.hypervisor = self.testClient.getHypervisorInfo()
43+
self.dbclient = self.testClient.getDbConnection()
44+
self.services = self.testClient.getParsedTestDataConfig()
45+
self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests())
46+
self.pod = get_pod(self.apiclient, self.zone.id)
47+
self.cleanup = []
48+
self.services = {
49+
"service_offering": {
50+
"name": "Ultra Tiny Instance",
51+
"displaytext": "Ultra Tiny Instance",
52+
"cpunumber": 1,
53+
"cpuspeed": 100,
54+
"memory": 128,
55+
},
56+
"vm": {
57+
"username": "root",
58+
"password": "password",
59+
"ssh_port": 22,
60+
# Hypervisor type should be same as
61+
# hypervisor type of cluster
62+
"privateport": 22,
63+
"publicport": 22,
64+
"protocol": 'TCP',
65+
},
66+
"natrule": {
67+
"privateport": 22,
68+
"publicport": 22,
69+
"startport": 22,
70+
"endport": 22,
71+
"protocol": "TCP",
72+
"cidrlist": '0.0.0.0/0',
73+
},
74+
"ostype": 'CentOS 5.3 (64-bit)',
75+
"sleep": 60,
76+
"timeout": 10,
77+
}
78+
79+
80+
def tearDown(self):
81+
try:
82+
# Clean up, terminate the created templates
83+
cleanup_resources(self.apiclient, self.cleanup)
84+
85+
except Exception as e:
86+
raise Exception("Warning: Exception during cleanup : %s" % e)
87+
88+
return
89+
90+
def createVMs(self, hostId, number):
91+
92+
self.template = get_template(
93+
self.apiclient,
94+
self.zone.id,
95+
self.services["ostype"]
96+
)
97+
98+
if self.template == FAILED:
99+
assert False, "get_template() failed to return template with description %s" % self.services["ostype"]
100+
101+
self.logger.debug("Using template %s " % self.template.id)
102+
103+
self.service_offering = ServiceOffering.create(
104+
self.apiclient,
105+
self.services["service_offering"]
106+
)
107+
self.logger.debug("Using service offering %s " % self.service_offering.id)
108+
109+
vms=[]
110+
for i in range(0, number):
111+
self.services["vm"]["zoneid"] = self.zone.id
112+
self.services["vm"]["template"] = self.template.id
113+
self.services["vm"]["displayname"] = 'vm' + str(i)
114+
self.services["vm"]["hypervisor"] = self.hypervisor
115+
vm = VirtualMachine.create(
116+
self.apiclient,
117+
self.services["vm"],
118+
serviceofferingid=self.service_offering.id,
119+
hostid=hostId
120+
)
121+
vms.append(vm)
122+
self.cleanup.append(vm)
123+
self.logger.debug("VM create = {}".format(vm.id))
124+
return vms
125+
126+
def checkVmMigratingOnHost(self, hostId):
127+
vm_migrating=False
128+
listVms1 = VirtualMachine.list(
129+
self.apiclient,
130+
hostid=hostId
131+
)
132+
133+
if (listVms1 is not None):
134+
self.logger.debug('Vms found = {} '.format(len(listVms1)))
135+
for vm in listVms1:
136+
if (vm.state == "Migrating"):
137+
self.logger.debug('VirtualMachine on Hyp id = {} is in {}'.format(vm.id, vm.state))
138+
vm_migrating=True
139+
break
140+
141+
return (vm_migrating,)
142+
143+
def NoOfVMsOnHost(self, hostId):
144+
listVms = VirtualMachine.list(
145+
self.apiclient,
146+
hostid=hostId
147+
)
148+
no_of_vms=0
149+
if (listVms is not None):
150+
for vm in listVms:
151+
self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id))
152+
no_of_vms=no_of_vms+1
153+
154+
return no_of_vms
155+
156+
@attr(
157+
tags=[
158+
"advanced",
159+
"advancedns",
160+
"smoke",
161+
"basic",
162+
"eip",
163+
"sg"],
164+
required_hardware="true")
165+
def test_01_cancel_host_maintenace_with_no_migration_jobs(self):
166+
listHost = Host.list(
167+
self.apiclient,
168+
type='Routing',
169+
zoneid=self.zone.id,
170+
podid=self.pod.id,
171+
)
172+
for host in listHost:
173+
self.logger.debug('1 Hypervisor = {}'.format(host.id))
174+
175+
176+
if (len(listHost) < 2):
177+
raise unittest.SkipTest("Cancel host maintenance when VMs are migrating should be tested for 2 or more hosts");
178+
return
179+
180+
vm_migrating=False
181+
182+
try:
183+
184+
target_host_id = listHost[0].id
185+
other_host_id = listHost[1].id
186+
187+
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
188+
cmd.id = target_host_id
189+
response = self.apiclient.prepareHostForMaintenance(cmd)
190+
191+
self.logger.debug('Host with id {} is in prepareHostForMaintenance'.format(target_host_id))
192+
193+
#as soon as VM is picked for migration its last hostid is updated to the new host
194+
# that is why VM shows up as migrating on the other host
195+
vm_migrating = wait_until(1, 10, self.checkVmMigratingOnHost, other_host_id)
196+
197+
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
198+
cmd.id = target_host_id
199+
response = self.apiclient.cancelHostMaintenance(cmd)
200+
201+
self.logger.debug('Host with id {} is in cancelHostMaintenance'.format(target_host_id) )
202+
203+
204+
target_host_id = listHost[1].id
205+
other_host_id = listHost[0].id
206+
207+
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
208+
cmd.id = target_host_id
209+
response = self.apiclient.prepareHostForMaintenance(cmd)
210+
211+
self.logger.debug('Host with id {} is in prepareHostForMaintenance'.format(target_host_id))
212+
213+
vm_migrating = wait_until(1, 10, self.checkVmMigratingOnHost, other_host_id)
214+
215+
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
216+
cmd.id = target_host_id
217+
response = self.apiclient.cancelHostMaintenance(cmd)
218+
219+
self.logger.debug('Host with id {} is in cancelHostMaintenance'.format(target_host_id) )
220+
221+
222+
except Exception as e:
223+
self.logger.debug("Exception {}".format(e))
224+
self.fail("Cancel host maintenance failed {}".format(e[0]))
225+
226+
227+
if (vm_migrating == True):
228+
raise unittest.SkipTest("VMs are migrating and the test will not be able to check the conditions the test is intended for");
229+
230+
231+
return
232+
233+
234+
235+
236+
@attr(
237+
tags=[
238+
"advanced",
239+
"advancedns",
240+
"smoke",
241+
"basic",
242+
"eip",
243+
"sg"],
244+
required_hardware="true")
245+
def test_02_cancel_host_maintenace_with_migration_jobs(self):
246+
247+
listHost = Host.list(
248+
self.apiclient,
249+
type='Routing',
250+
zoneid=self.zone.id,
251+
podid=self.pod.id,
252+
)
253+
for host in listHost:
254+
self.logger.debug('2 Hypervisor = {}'.format(host.id))
255+
256+
if (len(listHost) != 2):
257+
raise unittest.SkipTest("Cancel host maintenance when VMs are migrating can only be tested with 2 hosts");
258+
return
259+
260+
261+
no_of_vms = self.NoOfVMsOnHost(listHost[0].id)
262+
263+
no_of_vms = no_of_vms + self.NoOfVMsOnHost(listHost[0].id)
264+
265+
if no_of_vms < 5:
266+
self.logger.debug("Create VMs as there are not enough vms to check host maintenance")
267+
no_vm_req = 5 - no_of_vms
268+
if (no_vm_req > 0):
269+
self.logger.debug("Creating vms = {}".format(no_vm_req))
270+
self.vmlist = self.createVMs(listHost[0].id, no_vm_req)
271+
272+
vm_migrating=False
273+
274+
try:
275+
276+
target_host_id = listHost[0].id
277+
other_host_id = listHost[1].id
278+
279+
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
280+
cmd.id = target_host_id
281+
response = self.apiclient.prepareHostForMaintenance(cmd)
282+
283+
self.logger.debug('Host with id {} is in prepareHostForMaintenance'.format(target_host_id))
284+
285+
#as soon as VM is picked for migration its last hostid is updated to the new host
286+
# that is why VM shows up as migrating on the other host
287+
vm_migrating = wait_until(1, 10, self.checkVmMigratingOnHost, other_host_id)
288+
289+
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
290+
cmd.id = target_host_id
291+
response = self.apiclient.cancelHostMaintenance(cmd)
292+
293+
self.logger.debug('Host with id {} is in cancelHostMaintenance'.format(target_host_id) )
294+
295+
296+
target_host_id = listHost[1].id
297+
other_host_id = listHost[0].id
298+
299+
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
300+
cmd.id = target_host_id
301+
response = self.apiclient.prepareHostForMaintenance(cmd)
302+
303+
self.logger.debug('Host with id {} is in prepareHostForMaintenance'.format(target_host_id))
304+
305+
vm_migrating = wait_until(1, 10, self.checkVmMigratingOnHost, other_host_id)
306+
307+
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
308+
cmd.id = target_host_id
309+
response = self.apiclient.cancelHostMaintenance(cmd)
310+
311+
self.logger.debug('Host with id {} is in cancelHostMaintenance'.format(target_host_id) )
312+
313+
314+
except Exception as e:
315+
self.logger.debug("Exception {}".format(e))
316+
self.fail("Cancel host maintenance failed {}".format(e[0]))
317+
318+
319+
if (vm_migrating == False):
320+
raise unittest.SkipTest("No VM is migrating and the test will not be able to check the conditions the test is intended for");
321+
322+
323+
return
324+
325+

tools/marvin/marvin/lib/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,15 +506,15 @@ def verifyRouterState(apiclient, routerid, allowedstates):
506506
(allowedstates, routers[0].redundantstate)]
507507
return [PASS, None]
508508

509-
509+
510510
def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args):
511511
""" Utility method to try out the callback method at most no_of_times with a interval of retry_interval,
512512
Will return immediately if callback returns True. The callback method should be written to return a list of values first being a boolean """
513513

514514
if callback is None:
515515
raise ("Bad value for callback method !")
516-
517-
wait_result = False
516+
517+
wait_result = False
518518
for i in range(0,no_of_times):
519519
time.sleep(retry_interval)
520520
wait_result, return_val = callback(*callback_args)

0 commit comments

Comments
 (0)