Skip to content

Commit acdfa4a

Browse files
committed
Merge pull request xapi-project#1355 from jeromemaloberti/CA-106403
CA-106403: A timed out connection on writedb must be closed too.
2 parents 1f9d19b + 0338d36 commit acdfa4a

File tree

5 files changed

+44
-39
lines changed

5 files changed

+44
-39
lines changed

ocaml/database/block_device_io.ml

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -306,38 +306,43 @@ let accept_conn s latest_response_time =
306306
(* Listen on a given socket. Accept a single connection and transfer all the data from it to dest_fd, or raise Timeout if target_response_time happens first. *)
307307
(* Raises NotEnoughSpace if the next write would exceed the available_space. *)
308308
let transfer_data_from_sock_to_fd sock dest_fd available_space target_response_time =
309-
(* Open the data channel *)
310-
let s = listen_on sock in
311-
let data_client = accept_conn s target_response_time in
312-
R.info "Accepted connection on data socket";
313-
ignore_exn (fun () -> Unix.close s);
314-
315-
(* Read all the data from the data channel, writing it straight into the block device, keeping track of accumulated length *)
316-
let total_length = ref 0 in
317-
R.debug "Reading from data socket, writing to the block device...";
318-
let bytes_read = finally
319-
(fun () ->
320-
(* Read data from the client until EOF. Returns the length read. *)
321-
Unixext.read_data_in_chunks (fun chunk len ->
322-
(* Check that there's enough space *)
323-
if available_space - !total_length < len then raise NotEnoughSpace;
324-
(* Otherwise write it *)
325-
Unixext.time_limited_write dest_fd len chunk target_response_time;
326-
total_length := !total_length + len
327-
) ~block_size:16384 data_client
328-
)
329-
(fun () ->
330-
(* Close the connection *)
331-
(* CA-42914: If there was an exception, note that we are forcibly closing the connection when possibly the client (xapi) is still trying to write data. This will cause it to see a 'connection reset by peer' error. *)
332-
R.info "Closing connection on data socket";
333-
try
334-
Unix.shutdown data_client Unix.SHUTDOWN_ALL;
335-
Unix.close data_client
336-
with e ->
337-
R.warn "Exception %s while closing socket" (Printexc.to_string e);
338-
) in
339-
R.debug "Finished reading from data socket";
340-
bytes_read
309+
(* Open the data channel *)
310+
let s = listen_on sock in
311+
try
312+
(* May raise a Timeout exception: CA-106403 *)
313+
let data_client = accept_conn s target_response_time in
314+
R.info "Accepted connection on data socket";
315+
ignore_exn (fun () -> Unix.close s);
316+
317+
(* Read all the data from the data channel, writing it straight into the block device, keeping track of accumulated length *)
318+
let total_length = ref 0 in
319+
R.debug "Reading from data socket, writing to the block device...";
320+
let bytes_read = finally
321+
(fun () ->
322+
(* Read data from the client until EOF. Returns the length read. *)
323+
Unixext.read_data_in_chunks (fun chunk len ->
324+
(* Check that there's enough space *)
325+
if available_space - !total_length < len then raise NotEnoughSpace;
326+
(* Otherwise write it *)
327+
Unixext.time_limited_write dest_fd len chunk target_response_time;
328+
total_length := !total_length + len
329+
) ~block_size:16384 data_client
330+
)
331+
(fun () ->
332+
(* Close the connection *)
333+
(* CA-42914: If there was an exception, note that we are forcibly closing the connection when possibly the client (xapi) is still trying to write data. This will cause it to see a 'connection reset by peer' error. *)
334+
R.info "Closing connection on data socket";
335+
try
336+
Unix.shutdown data_client Unix.SHUTDOWN_ALL;
337+
Unix.close data_client
338+
with e ->
339+
R.warn "Exception %s while closing socket" (Printexc.to_string e);
340+
) in
341+
R.debug "Finished reading from data socket";
342+
bytes_read
343+
with Unixext.Timeout -> (* Raised by accept_conn *)
344+
ignore_exn (fun () -> Unix.close s);
345+
raise Unixext.Timeout
341346

342347
let transfer_database_to_sock sock db_fn target_response_time =
343348
(* Open the data channel *)

ocaml/database/redo_log.ml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,13 +747,14 @@ let empty log =
747747
let flush_db_to_redo_log db log =
748748
R.info "Flushing database to redo_log [%s]" log.name;
749749
let write_db_to_fd = (fun out_fd -> Db_xml.To.fd out_fd db) in
750-
write_db (Db_cache_types.Manifest.generation (Db_cache_types.Database.manifest db)) write_db_to_fd log
750+
write_db (Db_cache_types.Manifest.generation (Db_cache_types.Database.manifest db)) write_db_to_fd log;
751+
!(log.currently_accessible)
751752

752753
(* Write the given database to all active redo_logs *)
753754
let flush_db_to_all_active_redo_logs db =
754755
R.info "Flushing database to all active redo-logs";
755756
with_active_redo_logs (fun log ->
756-
flush_db_to_redo_log db log)
757+
ignore(flush_db_to_redo_log db log))
757758

758759
(* Write a delta to all active redo_logs *)
759760
let database_callback event db =
@@ -783,7 +784,7 @@ let database_callback event db =
783784
write_delta (Db_cache_types.Manifest.generation (Db_cache_types.Database.manifest db)) entry
784785
(fun () ->
785786
(* the function which will be invoked if a database write is required instead of a delta *)
786-
flush_db_to_redo_log db log)
787+
ignore(flush_db_to_redo_log db log))
787788
log
788789
)
789790
) to_write

ocaml/database/redo_log.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ val empty : redo_log -> unit
112112
(** Invalidate the block device. This means that subsequent attempts to read from the block device will not find anything.
113113
This function is best-effort only and does not raise any exceptions in the case of error. *)
114114

115-
val flush_db_to_redo_log: Db_cache_types.Database.t -> redo_log -> unit
115+
val flush_db_to_redo_log: Db_cache_types.Database.t -> redo_log -> bool
116116
(** Immediately write the given database to the given redo_log instance *)
117117

118118
val flush_db_to_all_active_redo_logs: Db_cache_types.Database.t -> unit

ocaml/xapi/xapi_pool.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ let sync_database ~__context =
940940
(fun () ->
941941
(* If HA is enabled I'll first try to flush to the LUN *)
942942
let pool = Helpers.get_pool ~__context in
943-
let flushed_to_vdi = Db.Pool.get_ha_enabled ~__context ~self:pool && (Xha_metadata_vdi.flush_database ~__context Xapi_ha.ha_redo_log) in
943+
let flushed_to_vdi = Db.Pool.get_ha_enabled ~__context ~self:pool && (Db_lock.with_lock (fun () -> Xha_metadata_vdi.flush_database ~__context Xapi_ha.ha_redo_log)) in
944944
if flushed_to_vdi
945945
then debug "flushed database to metadata VDI: assuming this is sufficient."
946946
else begin

ocaml/xapi/xha_metadata_vdi.ml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,5 @@ open Pervasiveext
6868
(** Attempt to flush the database to the metadata VDI *)
6969
let flush_database ~__context log =
7070
try
71-
Redo_log.flush_db_to_redo_log (Db_ref.get_database (Db_backend.make ())) log;
72-
true
71+
Redo_log.flush_db_to_redo_log (Db_ref.get_database (Db_backend.make ())) log
7372
with _ -> false

0 commit comments

Comments
 (0)