Skip to content

Commit ac8df23

Browse files
committed
CA-83264: fix a small race in xenopsd which can cause some slowdown in VM lifecycles
To know if the race has happened, just grep for EEXISTS in xensource.log or xenstore-access.log (which should appear after a /vm/<uuid>/rtc/timeoffset watch).
1 parent bc832cf commit ac8df23

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

ocaml/xenops/xenops_server_xen.ml

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,7 @@ let watch_xenstore () =
20032003
(fun xc xs ->
20042004
let domains = ref IntMap.empty in
20052005
let watches = ref IntMap.empty in
2006+
let uuids = ref IntMap.empty in
20062007

20072008
let watch path =
20082009
debug "xenstore watch %s" path;
@@ -2018,17 +2019,22 @@ let watch_xenstore () =
20182019
let add_domU_watches xs domid uuid =
20192020
debug "Adding watches for: domid %d" domid;
20202021
List.iter watch (all_domU_watches domid uuid);
2022+
uuids := IntMap.add domid uuid !uuids;
20212023
watches := IntMap.add domid [] !watches in
20222024

2023-
let remove_domU_watches xs domid uuid =
2025+
let remove_domU_watches xs domid =
20242026
debug "Removing watches for: domid %d" domid;
2025-
List.iter unwatch (all_domU_watches domid uuid);
2026-
List.iter (fun d ->
2027-
List.iter unwatch (watches_of_device d)
2028-
) (try IntMap.find domid !watches with Not_found -> []);
2029-
watches := IntMap.remove domid !watches in
2027+
if IntMap.mem domid !uuids then begin
2028+
let uuid = IntMap.find domid !uuids in
2029+
List.iter unwatch (all_domU_watches domid uuid);
2030+
List.iter (fun d ->
2031+
List.iter unwatch (watches_of_device d)
2032+
) (try IntMap.find domid !watches with Not_found -> []);
2033+
watches := IntMap.remove domid !watches;
2034+
uuids := IntMap.remove domid !uuids;
2035+
end in
20302036

2031-
let cancel_domU_operations xs domid uuid =
2037+
let cancel_domU_operations xs domid =
20322038
(* Anyone blocked on a domain/device operation which won't happen because the domain
20332039
just shutdown should be cancelled here. *)
20342040
debug "Cancelling watches for: domid %d" domid;
@@ -2059,8 +2065,14 @@ let watch_xenstore () =
20592065
let di = IntMap.find domid (if IntMap.mem domid domains' then domains' else !domains) in
20602066
let id = Uuid.uuid_of_int_array di.Xenctrl.handle |> Uuid.string_of_uuid in
20612067
if domid > 0 && not (DB.exists id)
2062-
then debug "However domain %d is not managed by us: ignoring" domid
2063-
else begin
2068+
then begin
2069+
debug "However domain %d is not managed by us: ignoring" domid;
2070+
if IntMap.mem domid !uuids then begin
2071+
debug "Cleaning-up the remaining watches for: domid %d" domid;
2072+
cancel_domU_operations xs domid;
2073+
remove_domU_watches xs domid;
2074+
end;
2075+
end else begin
20642076
Updates.add (Dynamic.Vm id) updates;
20652077
(* A domain is 'running' if we know it has not shutdown *)
20662078
let running = IntMap.mem domid domains' && (not (IntMap.find domid domains').Xenctrl.shutdown) in
@@ -2070,8 +2082,8 @@ let watch_xenstore () =
20702082
| false, true ->
20712083
add_domU_watches xs domid id
20722084
| true, false ->
2073-
cancel_domU_operations xs domid id;
2074-
remove_domU_watches xs domid id
2085+
cancel_domU_operations xs domid;
2086+
remove_domU_watches xs domid
20752087
end
20762088
) different;
20772089
domains := domains' in

0 commit comments

Comments
 (0)