Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions ocaml/xapi/cert_distrib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,25 @@ let collect_pool_certs ~__context ~rpc ~session_id ~map ~from_hosts =
map cert
)

let _distrib_m = Mutex.create ()

(* where possible, the master host should control the certificate
* distributions. this allows us to coordinate multiple parties that are
* trying to modify /etc/stunnel at the same time with [lock]!
*
* we apply this lock to all top level distribution calls, with the exception of
* the pool join functions that execute on the joiner.
*)
let lock (f : unit -> 'a) : 'a =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about with_lock or locked as a name?

D.debug "cert_distrib.ml: locking..." ;
Mutex.lock _distrib_m ;
Fun.protect
~finally:(fun () ->
D.debug "cert_distrib.ml: unlocking..." ;
Mutex.unlock _distrib_m
)
f

let exchange_certificates_among_all_members ~__context =
(* here we coordinate the certificate distribution. from a high level
we do the following:
Expand All @@ -363,6 +382,7 @@ let exchange_certificates_among_all_members ~__context =
we do not guarantee 'atomicity', so if regenerating the bundle on one host
fails, then state across the pool will most likely become inconsistent, and
manual intervention may be required *)
lock @@ fun () ->
let all_hosts = Xapi_pool_helpers.get_master_slaves_list ~__context in
Helpers.call_api_functions ~__context @@ fun rpc session_id ->
let certs =
Expand Down Expand Up @@ -396,6 +416,7 @@ let import_joiner ~__context ~uuid ~certificate ~to_hosts =

(* This function is called on the pool that is incorporating a new host *)
let exchange_certificates_with_joiner ~__context ~uuid ~certificate =
lock @@ fun () ->
let all_hosts = Db.Host.get_all ~__context in
import_joiner ~__context ~uuid ~certificate ~to_hosts:all_hosts ;
Helpers.call_api_functions ~__context @@ fun rpc session_id ->
Expand All @@ -414,6 +435,7 @@ let collect_ca_certs ~__context ~names =

(* This function is called on the pool that is incorporating a new host *)
let exchange_ca_certificates_with_joiner ~__context ~import ~export =
lock @@ fun () ->
let all_hosts = Db.Host.get_all ~__context in
let appliance_certs = List.map WireProtocol.certificate_file_of_pair import in

Expand Down
16 changes: 12 additions & 4 deletions ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -782,12 +782,20 @@ functor
do_op_on ~local_fn ~__context ~host (fun session_id rpc ->
Client.Pool.eject rpc session_id host
) ;
(* call eject on all other slaves first *)
(* perform cleanup on remaining pool members
* this must be best effort - once an eject has begun we cannot rollback *)
other
|> List.iter (fun h ->
do_op_on ~local_fn ~__context ~host:h (fun session_id rpc ->
Client.Pool.eject rpc session_id host
)
try
do_op_on ~local_fn ~__context ~host:h (fun session_id rpc ->
Client.Pool.eject rpc session_id host
)
with e ->
D.warn
"Pool.eject: while ejecting host=%s, we failed to clean up \
on host=%s. ignoring error: %s"
(Ref.short_string_of host) (Ref.short_string_of h)
(Printexc.to_string e)
) ;
(* finally clean up on master *)
do_op_on ~local_fn ~__context ~host:master (fun session_id rpc ->
Expand Down