Skip to content

Commit 124e1f0

Browse files
committed
CA-217533: Make mark_host_as_dead idempotent
Signed-off-by: Thomas Sanders <[email protected]>
1 parent 8e3d097 commit 124e1f0

File tree

1 file changed

+23
-15
lines changed

1 file changed

+23
-15
lines changed

ocaml/xapi/xapi_host_helpers.ml

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -161,21 +161,29 @@ let reboot ~__context ~host = ()
161161
shutting down asycnronously. We immediately set the Host_metrics.live to false
162162
and add the host to the global list of known-dying hosts. *)
163163
let mark_host_as_dead ~__context ~host ~reason =
164-
Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m
165-
(fun () -> Xapi_globs.hosts_which_are_shutting_down := host :: !Xapi_globs.hosts_which_are_shutting_down);
166-
(* The heartbeat handling code (HA and non-HA) will hopefully ignore the heartbeats
167-
and leave the host as dead from now until it comes back with a Pool.hello *)
168-
Xapi_hooks.host_pre_declare_dead ~__context ~host ~reason;
169-
begin
170-
try
171-
let metrics = Db.Host.get_metrics ~__context ~self:host in
172-
Db.Host_metrics.set_live ~__context ~self:metrics ~value:false;
173-
update_allowed_operations ~__context ~self:host
174-
with e ->
175-
info "Caught and ignoring exception setting host %s to dead: %s" (Ref.string_of host) (ExnHelper.string_of_exn e)
176-
end;
177-
Xapi_hooks.host_post_declare_dead ~__context ~host ~reason
178-
164+
let done_already = Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m
165+
(fun () ->
166+
if List.mem host !Xapi_globs.hosts_which_are_shutting_down then
167+
true
168+
else (
169+
Xapi_globs.hosts_which_are_shutting_down := host :: !Xapi_globs.hosts_which_are_shutting_down;
170+
false
171+
)
172+
) in
173+
if not done_already then (
174+
(* The heartbeat handling code (HA and non-HA) will hopefully ignore the heartbeats
175+
and leave the host as dead from now until it comes back with a Pool.hello *)
176+
Xapi_hooks.host_pre_declare_dead ~__context ~host ~reason;
177+
begin
178+
try
179+
let metrics = Db.Host.get_metrics ~__context ~self:host in
180+
Db.Host_metrics.set_live ~__context ~self:metrics ~value:false;
181+
update_allowed_operations ~__context ~self:host
182+
with e ->
183+
info "Caught and ignoring exception setting host %s to dead: %s" (Ref.string_of host) (ExnHelper.string_of_exn e)
184+
end;
185+
Xapi_hooks.host_post_declare_dead ~__context ~host ~reason
186+
)
179187

180188
(* Toggled by an explicit Host.disable call to prevent a master restart making us bounce back *)
181189
let user_requested_host_disable = ref false

0 commit comments

Comments
 (0)