Skip to content

Commit 4ba168e

Browse files
Merge pull request xapi-project#1518 from simonjbeaumont/hfx-954
[HFX-954] Fix data races in HA when slaves return in the pool.
2 parents 3aeb73d + 9f95f83 commit 4ba168e

File tree

7 files changed

+99
-83
lines changed

7 files changed

+99
-83
lines changed

ocaml/xapi/create_storage.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ open Client
1919
module D=Debug.Debugger(struct let name="xapi" end)
2020
open D
2121

22-
let plug_all_pbds __context rpc session_id =
22+
let plug_all_pbds __context =
2323
(* Explicitly resynchronise local PBD state *)
2424
let my_pbds = Helpers.get_my_pbds __context in
2525
Storage_access.resynchronise_pbds ~__context ~pbds:(List.map fst my_pbds);
@@ -31,22 +31,22 @@ let plug_all_pbds __context rpc session_id =
3131
try
3232
if pbd_record.API.pBD_currently_attached
3333
then debug "Not replugging PBD %s: already plugged in" (Ref.string_of self)
34-
else Client.PBD.plug ~rpc ~session_id ~self
34+
else Xapi_pbd.plug ~__context ~self
3535
with e ->
3636
result := false;
3737
error "Could not plug in pbd '%s': %s" (Db.PBD.get_uuid ~__context ~self) (Printexc.to_string e))
3838
my_pbds;
3939
!result
4040

41-
let plug_unplugged_pbds __context rpc session_id =
41+
let plug_unplugged_pbds __context =
4242
let my_pbds = Helpers.get_my_pbds __context in
4343
List.iter
4444
(fun (self, pbd_record) ->
4545
try
4646
if pbd_record.API.pBD_currently_attached
4747
then debug "Not replugging PBD %s: already plugged in" (Ref.string_of self)
48-
else Client.PBD.plug ~rpc ~session_id ~self
49-
with e -> debug "Could not plug in pbd '%s': %s" (Client.PBD.get_uuid rpc session_id self) (Printexc.to_string e))
48+
else Xapi_pbd.plug ~__context ~self
49+
with e -> debug "Could not plug in pbd '%s': %s" (Db.PBD.get_uuid ~__context ~self) (Printexc.to_string e))
5050
my_pbds
5151

5252
(* Create a PBD which connects this host to the SR, if one doesn't already exist *)
@@ -107,7 +107,7 @@ let create_storage (me: API.ref_host) rpc session_id __context : unit =
107107
else
108108
debug "Skipping creation of PBDs for shared SRs";
109109

110-
let all_pbds_ok = plug_all_pbds __context rpc session_id in
110+
let all_pbds_ok = plug_all_pbds __context in
111111
if not(all_pbds_ok) then begin
112112
let obj_uuid = Helpers.get_localhost_uuid () in
113113
Xapi_alert.add ~name:Api_messages.pbd_plug_failed_on_server_start ~priority:1L ~cls:`Host ~obj_uuid ~body:"";

ocaml/xapi/xapi.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,8 @@ let server_init() =
987987
debug "Waiting forever for the management interface to gain an IP address";
988988
let ip = wait_for_management_ip_address () in
989989
debug "Management interface got IP address: %s; attempting to re-plug any unplugged PBDs" ip;
990-
Helpers.call_api_functions ~__context (fun rpc session_id ->
991-
Create_storage.plug_unplugged_pbds __context rpc session_id)
990+
Helpers.call_api_functions ~__context (fun rpc session_id ->
991+
Create_storage.plug_unplugged_pbds __context)
992992
)
993993
in
994994

ocaml/xapi/xapi_ha.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ module Monitor = struct
521521
let now = Unix.gettimeofday () in
522522
let plan_too_old = now -. !last_plan_time > Xapi_globs.ha_monitor_plan_timer in
523523
if plan_too_old || !plan_out_of_date then begin
524-
let changed = Xapi_ha_vm_failover.update_pool_status ~__context in
524+
let changed = Xapi_ha_vm_failover.update_pool_status ~__context ~live_set:liveset_refs () in
525525

526526
(* Extremely bad: something managed to break our careful plan *)
527527
if changed && not !plan_out_of_date then error "Overcommit protection failed to prevent a change which invalidated our failover plan";
@@ -1461,7 +1461,7 @@ let enable __context heartbeat_srs configuration =
14611461
(* Update the Pool's planning configuration (ha_overcommitted, ha_plan_exists_for) *)
14621462
(* Start by assuming there is no ha_plan_for: this can be revised upwards later *)
14631463
Db.Pool.set_ha_plan_exists_for ~__context ~self:pool ~value:0L;
1464-
let (_: bool) = Xapi_ha_vm_failover.update_pool_status ~__context in
1464+
let (_: bool) = Xapi_ha_vm_failover.update_pool_status ~__context () in
14651465

14661466
let generation = Uuid.string_of_uuid (Uuid.make_uuid ()) in
14671467

ocaml/xapi/xapi_ha_vm_failover.ml

Lines changed: 78 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,20 @@ let host_of_non_agile_vm ~__context all_hosts_and_snapshots_sorted (vm, snapshot
9393
warn "No host could support protected xHA VM: %s (%s)" (Helpers.short_string_of_ref vm) (snapshot.API.vM_name_label);
9494
[]
9595

96+
let get_live_set ~__context =
97+
let all_hosts = Db.Host.get_all_records ~__context in
98+
let live_hosts = List.filter (fun (rf,r) -> r.API.host_enabled
99+
&& (try Db.Host_metrics.get_live ~__context ~self:r.API.host_metrics with _ -> false))
100+
all_hosts in
101+
List.map (fun (rf,_) -> rf) live_hosts
102+
96103
(** Given the current number of host failures to consider (only useful for passing to the binpacker to influence its
97104
choice of heuristic), return an instantaneous VM restart plan which includes all protected offline VMs, and a
98105
planning configuration corresponding to the state of the world after the starts are complete, for use in further
99106
planning.
100107
Returns: (VM restart plan, new planning configuration, true if some protected non-agile VMs exist)
101108
*)
102-
let compute_restart_plan ~__context ~all_protected_vms ?(change=no_configuration_change) num_failures =
109+
let compute_restart_plan ~__context ~all_protected_vms ~live_set ?(change=no_configuration_change) num_failures =
103110
(* This function must be deterministic: for the same set of hosts and set of VMs it must produce the same output.
104111
We rely partially on the binpacker enforcing its own ordering over hosts and vms, so it's not critical for us
105112
to sort the result of Db.*.get_all calls generally. However the handling of non-agile VMs needs special care. *)
@@ -155,7 +162,8 @@ let compute_restart_plan ~__context ~all_protected_vms ?(change=no_configuration
155162
true
156163
&& r.API.host_enabled
157164
&& not (List.mem rf change.hosts_to_disable)
158-
&& (try Db.Host_metrics.get_live ~__context ~self:r.API.host_metrics with _ -> false) in
165+
&& (try Db.Host_metrics.get_live ~__context ~self:r.API.host_metrics with _ -> false)
166+
&& (List.mem rf live_set) in
159167
let live_hosts_and_snapshots, dead_hosts_and_snapshots = List.partition is_alive all_hosts_and_snapshots in
160168

161169
let live_hosts = List.map fst live_hosts_and_snapshots (* and dead_hosts = List.map fst dead_hosts_and_snapshots *) in
@@ -273,10 +281,11 @@ type result =
273281
breaking the plan.
274282
*)
275283

276-
let plan_for_n_failures ~__context ~all_protected_vms ?(change = no_configuration_change) n =
284+
let plan_for_n_failures ~__context ~all_protected_vms ?live_set ?(change = no_configuration_change) n =
285+
let live_set = match live_set with None -> get_live_set ~__context | Some s -> s in
277286
try
278287
(* 'changes' are applied by the compute_restart_plan function *)
279-
let plan, config, vms_not_restarted, non_agile_protected_vms_exist = compute_restart_plan ~__context ~all_protected_vms ~change n in
288+
let plan, config, vms_not_restarted, non_agile_protected_vms_exist = compute_restart_plan ~__context ~all_protected_vms ~live_set ~change n in
280289

281290
(* Could some VMs not be started? If so we're overcommitted before we started. *)
282291
if vms_not_restarted <> [] then begin
@@ -299,26 +308,26 @@ let plan_for_n_failures ~__context ~all_protected_vms ?(change = no_configuratio
299308
error "Unexpected error in HA VM failover planning function: %s" (ExnHelper.string_of_exn e);
300309
No_plan_exists
301310

302-
let compute_max_host_failures_to_tolerate ~__context ?protected_vms () =
311+
let compute_max_host_failures_to_tolerate ~__context ?live_set ?protected_vms () =
303312
let protected_vms = match protected_vms with
304313
| None -> all_protected_vms ~__context
305314
| Some vms -> vms in
306315
let nhosts = List.length (Db.Host.get_all ~__context) in
307316
(* We assume that if not(plan_exists(n)) then \forall.x>n not(plan_exists(n))
308317
although even if we screw this up it's not a disaster because all we need is a
309318
safe approximation (so ultimately "0" will do but we'd prefer higher) *)
310-
Helpers.bisect (fun n -> plan_for_n_failures ~__context ~all_protected_vms:protected_vms (Int64.to_int n) = Plan_exists_for_all_VMs) 0L (Int64.of_int nhosts)
319+
Helpers.bisect (fun n -> plan_for_n_failures ~__context ~all_protected_vms:protected_vms ?live_set (Int64.to_int n) = Plan_exists_for_all_VMs) 0L (Int64.of_int nhosts)
311320

312321
(* Make sure the pool is marked as overcommitted and the appropriate alert is generated. Return
313322
true if something changed, false otherwise *)
314-
let mark_pool_as_overcommitted ~__context =
323+
let mark_pool_as_overcommitted ~__context ~live_set =
315324
let pool = Helpers.get_pool ~__context in
316325

317326
let overcommitted = Db.Pool.get_ha_overcommitted ~__context ~self:pool in
318327
let planned_for = Db.Pool.get_ha_plan_exists_for ~__context ~self:pool in
319328
let to_tolerate = Db.Pool.get_ha_host_failures_to_tolerate ~__context ~self:pool in
320329

321-
let max_failures = compute_max_host_failures_to_tolerate ~__context () in
330+
let max_failures = compute_max_host_failures_to_tolerate ~__context ~live_set () in
322331
if planned_for <> max_failures then begin
323332
Db.Pool.set_ha_plan_exists_for ~__context ~self:pool ~value:(min to_tolerate max_failures);
324333
if max_failures < planned_for
@@ -345,7 +354,10 @@ let mark_pool_as_overcommitted ~__context =
345354
planned_for <> max_failures || (not overcommitted)
346355

347356
(* Update the pool's HA fields *)
348-
let update_pool_status ~__context =
357+
let update_pool_status ~__context ?live_set () =
358+
let live_set = match live_set with
359+
| None -> get_live_set ~__context
360+
| Some s -> s in
349361
let pool = Helpers.get_pool ~__context in
350362
let overcommitted = Db.Pool.get_ha_overcommitted ~__context ~self:pool in
351363
let to_tolerate = Db.Pool.get_ha_host_failures_to_tolerate ~__context ~self:pool in
@@ -357,7 +369,7 @@ let update_pool_status ~__context =
357369
i.e. have we become overcommitted (in the sense of having too few resources to satisfy
358370
demand) or not?
359371
*)
360-
match plan_for_n_failures ~__context ~all_protected_vms (Int64.to_int to_tolerate) with
372+
match plan_for_n_failures ~__context ~all_protected_vms ~live_set (Int64.to_int to_tolerate) with
361373
| Plan_exists_for_all_VMs ->
362374
debug "HA failover plan exists for all protected VMs";
363375
Db.Pool.set_ha_overcommitted ~__context ~self:pool ~value:false;
@@ -367,29 +379,30 @@ let update_pool_status ~__context =
367379
overcommitted || (planned_for <> to_tolerate)
368380
| Plan_exists_excluding_non_agile_VMs ->
369381
debug "HA failover plan exists for all protected VMs, excluding some non-agile VMs";
370-
mark_pool_as_overcommitted ~__context; (* might define this as false later *)
382+
mark_pool_as_overcommitted ~__context ~live_set; (* might define this as false later *)
371383
| No_plan_exists ->
372384
debug "No HA failover plan exists";
373-
mark_pool_as_overcommitted ~__context
385+
mark_pool_as_overcommitted ~__context ~live_set
374386

375387
let assert_configuration_change_preserves_ha_plan ~__context c =
376388
debug "assert_configuration_change_preserves_ha_plan c = %s" (string_of_configuration_change ~__context c);
377389

378390
(* Only block the operation if a plan exists now but would evaporate with the proposed changes.
379391
This prevents us blocking all operations should be suddenly become overcommitted eg through
380392
multiple host failures *)
393+
let live_set = get_live_set ~__context in
381394
let pool = Helpers.get_pool ~__context in
382395
if Db.Pool.get_ha_enabled ~__context ~self:pool && not(Db.Pool.get_ha_allow_overcommit ~__context ~self:pool) then begin
383396
let to_tolerate = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in
384397
let all_protected_vms = all_protected_vms ~__context in
385398

386-
match plan_for_n_failures ~__context ~all_protected_vms to_tolerate with
399+
match plan_for_n_failures ~__context ~all_protected_vms ~live_set to_tolerate with
387400
| Plan_exists_excluding_non_agile_VMs
388401
| No_plan_exists ->
389402
debug "assert_configuration_change_preserves_ha_plan: no plan currently exists; cannot get worse"
390403
| Plan_exists_for_all_VMs -> begin
391404
(* Does the plan break? *)
392-
match plan_for_n_failures ~__context ~all_protected_vms ~change:c to_tolerate with
405+
match plan_for_n_failures ~__context ~all_protected_vms ~live_set ~change:c to_tolerate with
393406
| Plan_exists_for_all_VMs ->
394407
debug "assert_configuration_change_preserves_ha_plan: plan exists after change"
395408
| Plan_exists_excluding_non_agile_VMs
@@ -430,56 +443,56 @@ let restart_auto_run_vms ~__context live_set n =
430443
an accurate way to determine 'failed' VMs but it will suffice for our 'best-effort'
431444
category. *)
432445
let reset_vms = ref [] in
433-
List.iter
434-
(fun h ->
435-
if not (List.mem h live_set) then
436-
begin
437-
let hostname = Db.Host.get_hostname ~__context ~self:h in
438-
debug "Setting host %s to dead" hostname;
439-
(* Sample this before calling any hook scripts *)
440-
let resident_on_vms = Db.Host.get_resident_VMs ~__context ~self:h in
441-
reset_vms := resident_on_vms @ !reset_vms;
442-
443-
(* ensure live=false *)
444-
begin
445-
try
446-
let h_metrics = Db.Host.get_metrics ~__context ~self:h in
447-
let current = Db.Host_metrics.get_live ~__context ~self:h_metrics in
448-
if current then begin
449-
(* Fire off a ha_host_failed message if the host hasn't just shut itself down *)
450-
let shutting_down = Threadext.Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m (fun () -> !Xapi_globs.hosts_which_are_shutting_down) in
451-
if not (List.exists (fun x -> x=h) shutting_down) then begin
452-
let obj_uuid = Db.Host.get_uuid ~__context ~self:h in
453-
let host_name = Db.Host.get_name_label ~__context ~self:h in
454-
Xapi_alert.add ~name:Api_messages.ha_host_failed ~priority:Api_messages.ha_host_failed_priority ~cls:`Host ~obj_uuid
455-
~body:(Printf.sprintf "Server '%s' has failed" host_name);
456-
end;
457-
(* Call external host failed hook (allows a third-party to use power-fencing if desired) *)
458-
Xapi_hooks.host_pre_declare_dead ~__context ~host:h ~reason:Xapi_hooks.reason__fenced;
459-
Db.Host_metrics.set_live ~__context ~self:h_metrics ~value:false; (* since slave is fenced, it will not set this to true again itself *)
460-
Xapi_host_helpers.update_allowed_operations ~__context ~self:h;
461-
Xapi_hooks.host_post_declare_dead ~__context ~host:h ~reason:Xapi_hooks.reason__fenced;
462-
end
463-
with _ ->
464-
(* if exn assume h_metrics doesn't exist, then "live" is defined to be false implicitly, so do nothing *)
465-
()
466-
end;
467-
debug "Setting all VMs running or paused on %s to Halted" hostname;
468-
(* ensure all vms resident_on this host running or paused have their powerstates reset *)
469-
470-
List.iter
471-
(fun vm ->
472-
let vm_powerstate = Db.VM.get_power_state ~__context ~self:vm in
473-
if (vm_powerstate=`Running || vm_powerstate=`Paused) then
474-
Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted
475-
)
476-
resident_on_vms
477-
end
478-
)
479-
hosts;
446+
let dead_hosts = ref [] in
447+
List.iter (fun h ->
448+
if not (List.mem h live_set) then begin
449+
let hostname = Db.Host.get_hostname ~__context ~self:h in
450+
debug "Setting host %s to dead" hostname;
451+
(* Sample this before calling any hook scripts *)
452+
let resident_on_vms = List.filter
453+
(fun vm -> not (Db.VM.get_is_control_domain ~__context ~self:vm))
454+
(Db.Host.get_resident_VMs ~__context ~self:h) in
455+
reset_vms := resident_on_vms @ !reset_vms;
456+
457+
(* ensure live=false *)
458+
begin
459+
try
460+
let h_metrics = Db.Host.get_metrics ~__context ~self:h in
461+
let current = Db.Host_metrics.get_live ~__context ~self:h_metrics in
462+
if current then begin
463+
(* Fire off a ha_host_failed message if the host hasn't just shut itself down *)
464+
let shutting_down = Threadext.Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m (fun () -> !Xapi_globs.hosts_which_are_shutting_down) in
465+
if not (List.exists (fun x -> x=h) shutting_down) then begin
466+
let obj_uuid = Db.Host.get_uuid ~__context ~self:h in
467+
let host_name = Db.Host.get_name_label ~__context ~self:h in
468+
Xapi_alert.add ~msg:Api_messages.ha_host_failed ~cls:`Host ~obj_uuid
469+
~body:(Printf.sprintf "Server '%s' has failed" host_name);
470+
end;
471+
(* Call external host failed hook (allows a third-party to use power-fencing if desired) *)
472+
Xapi_hooks.host_pre_declare_dead ~__context ~host:h ~reason:Xapi_hooks.reason__fenced;
473+
Db.Host_metrics.set_live ~__context ~self:h_metrics ~value:false; (* since slave is fenced, it will not set this to true again itself *)
474+
Xapi_host_helpers.update_allowed_operations ~__context ~self:h;
475+
dead_hosts := h :: !dead_hosts;
476+
end
477+
with _ ->
478+
() (* if exn assume h_metrics doesn't exist, then "live" is defined to be false implicitly, so do nothing *)
479+
end
480+
end) hosts;
481+
482+
debug "Setting all VMs running or paused to Halted";
483+
(* ensure all vms resident_on this host running or paused have their powerstates reset *)
484+
List.iter (fun vm ->
485+
let vm_powerstate = Db.VM.get_power_state ~__context ~self:vm in
486+
if (vm_powerstate=`Running || vm_powerstate=`Paused) then
487+
Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted)
488+
!reset_vms;
489+
(* host_post_declare_dead may take a long time if the SR is locked *)
490+
dead_hosts := List.rev !dead_hosts;
491+
List.iter (fun h -> Xapi_hooks.host_post_declare_dead ~__context ~host:h ~reason:Xapi_hooks.reason__fenced)
492+
!dead_hosts;
480493

481494
(* If something has changed then we'd better refresh the pool status *)
482-
if !reset_vms <> [] then ignore(update_pool_status ~__context);
495+
if !reset_vms <> [] then ignore(update_pool_status ~__context ~live_set ());
483496

484497
(* At this point failed protected agile VMs are Halted, not resident_on anywhere *)
485498

@@ -497,7 +510,7 @@ let restart_auto_run_vms ~__context live_set n =
497510
protection.
498511
For the best-effort VMs we call the script
499512
when we have reset some VMs to halted (no guarantee there is enough resource but better safe than sorry) *)
500-
let plan, config, vms_not_restarted, non_agile_protected_vms_exist = compute_restart_plan ~__context ~all_protected_vms n in
513+
let plan, config, vms_not_restarted, non_agile_protected_vms_exist = compute_restart_plan ~__context ~all_protected_vms ~live_set n in
501514
let plan, config, vms_not_restarted, non_agile_protected_vms_exist =
502515
if true
503516
&& Xapi_hooks.pool_pre_ha_vm_restart_hook_exists ()
@@ -511,7 +524,7 @@ let restart_auto_run_vms ~__context live_set n =
511524
error "pool-pre-ha-vm-restart-hook failed: %s: continuing anyway" (ExnHelper.string_of_exn e)
512525
end;
513526
debug "Recomputing restart plan to take into account new state of the world after running the script";
514-
compute_restart_plan ~__context ~all_protected_vms n
527+
compute_restart_plan ~__context ~all_protected_vms ~live_set n
515528
end else plan, config, vms_not_restarted, non_agile_protected_vms_exist (* nothing needs recomputing *)
516529
in
517530

@@ -566,7 +579,7 @@ let restart_auto_run_vms ~__context live_set n =
566579
| Api_errors.Server_error(code, params) when code = Api_errors.ha_operation_would_break_failover_plan ->
567580
(* This should never happen since the planning code would always allow the restart of a protected VM... *)
568581
error "Caught exception HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN: setting pool as overcommitted and retrying";
569-
ignore_bool(mark_pool_as_overcommitted ~__context);
582+
ignore_bool(mark_pool_as_overcommitted ~__context ~live_set);
570583
begin
571584
try
572585
go ();

0 commit comments

Comments
 (0)