Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
REQ-403 cert_distrib lock
This prevents concurrent cert distribs interfering with each other.

Pool.join caveat: during a join, the joiner modifies /etc/stunnel
itself. This cannot be locked by the master, so some race conditions
still remain.  Additionally, the joiner goes offline for a portion of
the join, so could lose out on certificate updates during this time,
especially if multiple pool joins are happening at the same time.

Signed-off-by: Ben Anson <[email protected]>
  • Loading branch information
Ben Anson committed Jun 21, 2021
commit 88af3b2b031783842503cc79b7d6d363608b9f23
22 changes: 22 additions & 0 deletions ocaml/xapi/cert_distrib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,25 @@ let collect_pool_certs ~__context ~rpc ~session_id ~map ~from_hosts =
map cert
)

let _distrib_m = Mutex.create ()

(* where possible, the master host should control the certificate
* distributions. this allows us to coordinate multiple parties that are
* trying to modify /etc/stunnel at the same time with [lock]!
*
* we apply this lock to all top level distribution calls, with the exception of
* the pool join functions that execute on the joiner.
*)
let lock (f : unit -> 'a) : 'a =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about with_lock or locked as a name?

D.debug "cert_distrib.ml: locking..." ;
Mutex.lock _distrib_m ;
Fun.protect
~finally:(fun () ->
D.debug "cert_distrib.ml: unlocking..." ;
Mutex.unlock _distrib_m
)
f

let exchange_certificates_among_all_members ~__context =
(* here we coordinate the certificate distribution. from a high level
we do the following:
Expand All @@ -363,6 +382,7 @@ let exchange_certificates_among_all_members ~__context =
we do not guarantee 'atomicity', so if regenerating the bundle on one host
fails, then state across the pool will most likely become inconsistent, and
manual intervention may be required *)
lock @@ fun () ->
let all_hosts = Xapi_pool_helpers.get_master_slaves_list ~__context in
Helpers.call_api_functions ~__context @@ fun rpc session_id ->
let certs =
Expand Down Expand Up @@ -396,6 +416,7 @@ let import_joiner ~__context ~uuid ~certificate ~to_hosts =

(* This function is called on the pool that is incorporating a new host *)
let exchange_certificates_with_joiner ~__context ~uuid ~certificate =
lock @@ fun () ->
let all_hosts = Db.Host.get_all ~__context in
import_joiner ~__context ~uuid ~certificate ~to_hosts:all_hosts ;
Helpers.call_api_functions ~__context @@ fun rpc session_id ->
Expand All @@ -414,6 +435,7 @@ let collect_ca_certs ~__context ~names =

(* This function is called on the pool that is incorporating a new host *)
let exchange_ca_certificates_with_joiner ~__context ~import ~export =
lock @@ fun () ->
let all_hosts = Db.Host.get_all ~__context in
let appliance_certs = List.map WireProtocol.certificate_file_of_pair import in

Expand Down