Skip to content

Commit 9e61125

Browse files
committed
CA-220275: increase host evacuation timeout during host shutdown
The previous setting was 240s, which isn't enough for migrating VMs in worst cases given our current support limit on host memory. This change increases the uplimit and also adds some parametric flexibility to it. Signed-off-by: Zheng Li <[email protected]>
1 parent c62e5ac commit 9e61125

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

scripts/examples/python/shutdown.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
import sys, time
2626
import signal
27+
import syslog
2728

2829
import XenAPI
2930

@@ -90,6 +91,14 @@ def get_running_domains(session, host):
9091
vms.append((vm,record))
9192
return vms
9293

94+
def estimate_evacuate_timeout(session, host):
95+
""" Rough estimation of the evacuate uplimit based on live VMs memory """
96+
mref = session.xenapi.host.get_metrics(host)
97+
metrics = session.xenapi.host_metrics.get_record(mref)
98+
memory_used = int(metrics['memory_total']) - int(metrics['memory_free'])
99+
# Conservative estimation based on 1000Mbps link, and the memory usage of
100+
# Dom0 (which is not going to be transferred) is an intentional surplus
101+
return (memory_used * 8. / (1000. * 1024 * 1024))
93102

94103
def host_evacuate(session, host):
95104
"""Attempts a host evacuate. If the timeout expires then it attempts to cancel
@@ -98,8 +107,13 @@ def host_evacuate(session, host):
98107
print "\n Requesting evacuation of host",
99108
sys.stdout.flush()
100109
task = session.xenapi.Async.host.evacuate(host)
110+
timeout = 240
101111
try:
102-
if not(wait_for_tasks(session, [ task ], 240)):
112+
timeout = max(estimate_evacuate_timeout(session, host), timeout)
113+
except Exception, e:
114+
syslog.syslog(syslog.LOG_WARNING, "Evacuate timeout estimation error: %s, use default." % e)
115+
try:
116+
if not(wait_for_tasks(session, [ task ], timeout)):
103117
print "\n Cancelling evacuation of host",
104118
sys.stdout.flush()
105119
session.xenapi.task.cancel(task)

scripts/xapi-domains.service

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,10 @@ ExecStart=@BINDIR@/xapi-autostart-vms
1111
ExecStop=@LIBEXECDIR@/shutdown $INSTALLATION_UUID
1212
ExecStop=@LIBEXECDIR@/shutdown --force $INSTALLATION_UUID
1313

14+
# Generous 24hr timeout that corresponding to the max evacuation time of a host
15+
# with memory close to our support limit. Finer grained timeout control depends
16+
# on the logic in the shutdown script itself.
17+
TimeoutStopSec=86400
18+
1419
[Install]
1520
WantedBy=multi-user.target

0 commit comments

Comments
 (0)