Skip to content

Commit 703073c

Browse files
committed
EA-1110/CI-18: make some global variables configurable via config file
Their default values are not changed. We'll do the value tweaking in the coming checkins. Signed-off-by: Zheng Li <[email protected]>
1 parent e220713 commit 703073c

21 files changed

+328
-200
lines changed

ocaml/database/block_device_io.ml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ let _ =
691691

692692
if !dump then begin
693693
(* Open the block device *)
694-
let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. Xapi_globs.redo_log_max_startup_time) in
694+
let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. !Xapi_globs.redo_log_max_startup_time) in
695695
R.info "Opened block device.";
696696

697697
let target_response_time = Unix.gettimeofday() +. 3600. in
@@ -745,7 +745,7 @@ let _ =
745745

746746
if !empty then begin
747747
(* Open the block device *)
748-
let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. Xapi_globs.redo_log_max_startup_time) in
748+
let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. !Xapi_globs.redo_log_max_startup_time) in
749749
R.info "Opened block device.";
750750

751751
let target_response_time = Unix.gettimeofday() +. 3600. in
@@ -762,7 +762,7 @@ let _ =
762762
(* Main loop: accept a new client, communicate with it until it stops sending commands, repeat. *)
763763
while true do
764764
let start_of_startup = Unix.gettimeofday() in
765-
let target_startup_response_time = start_of_startup +. Xapi_globs.redo_log_max_startup_time in
765+
let target_startup_response_time = start_of_startup +. !Xapi_globs.redo_log_max_startup_time in
766766

767767
R.debug "Awaiting incoming connections on %s..." !ctrlsock;
768768
let client = accept_conn s target_startup_response_time in
@@ -788,10 +788,10 @@ let _ =
788788

789789
(* Note: none of the action functions throw any exceptions; they report errors directly to the client. *)
790790
let (action_fn, block_time) = match str with
791-
| "writedelta" -> action_writedelta, Xapi_globs.redo_log_max_block_time_writedelta
792-
| "writedb___" -> action_writedb, Xapi_globs.redo_log_max_block_time_writedb
793-
| "read______" -> action_read, Xapi_globs.redo_log_max_block_time_read
794-
| "empty_____" -> action_empty, Xapi_globs.redo_log_max_block_time_empty
791+
| "writedelta" -> action_writedelta, !Xapi_globs.redo_log_max_block_time_writedelta
792+
| "writedb___" -> action_writedb, !Xapi_globs.redo_log_max_block_time_writedb
793+
| "read______" -> action_read, !Xapi_globs.redo_log_max_block_time_read
794+
| "empty_____" -> action_empty, !Xapi_globs.redo_log_max_block_time_empty
795795
| _ -> (fun _ _ _ _ -> send_failure client (str^"|nack") ("Unknown command "^str)), 0.
796796
in
797797
(* "Start the clock!" -- set the latest time by which we need to have responded to the client. *)

ocaml/database/master_connection.ml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ let with_timestamp f =
6161
One common way this can happen is if we end up blocked waiting for a TCP timeout when the
6262
master goes away unexpectedly... *)
6363
let start_master_connection_watchdog() =
64-
let connection_reset_timeout = 2. *. 60. in
6564
Thread.create
6665
(fun () ->
6766
while (true)
@@ -73,7 +72,7 @@ let start_master_connection_watchdog() =
7372
| Some t ->
7473
let now = Unix.gettimeofday() in
7574
let since_last_call = now -. t in
76-
if since_last_call > connection_reset_timeout then
75+
if since_last_call > !Xapi_globs.master_connection_reset_timeout then
7776
begin
7877
debug "Master connection timeout: forcibly resetting master connection";
7978
force_connection_reset()
@@ -104,7 +103,7 @@ let open_secure_connection () =
104103
(* Do a db xml_rpc request, catching exception and trying to reopen the connection if it
105104
fails *)
106105
exception Goto_handler
107-
let connection_timeout = ref 10. (* -ve means retry forever *)
106+
let connection_timeout = ref !Xapi_globs.master_connection_default_timeout
108107

109108
(* if this is true then xapi will restart if retries exceeded [and enter emergency mode if still
110109
can't reconnect after reboot]. if this is false then xapi will just throw exception if retries
@@ -154,8 +153,8 @@ let do_db_xml_rpc_persistent_with_reopen ~host ~path (req: string) : string =
154153
with
155154
(* TODO: This http exception handler caused CA-36936 and can probably be removed now that there's backoff delay in the generic handler _ below *)
156155
| Http_client.Http_error (http_code,err_msg) ->
157-
error "Received HTTP error %s (%s) from master. This suggests our master address is wrong. Sleeping for %.0fs and then restarting." http_code err_msg Xapi_globs.permanent_master_failure_retry_timeout;
158-
Thread.delay Xapi_globs.permanent_master_failure_retry_timeout;
156+
error "Received HTTP error %s (%s) from master. This suggests our master address is wrong. Sleeping for %.0fs and then restarting." http_code err_msg !Xapi_globs.permanent_master_failure_retry_interval;
157+
Thread.delay !Xapi_globs.permanent_master_failure_retry_interval;
159158
exit Xapi_globs.restart_return_code
160159
| e ->
161160
begin
@@ -180,7 +179,7 @@ let do_db_xml_rpc_persistent_with_reopen ~host ~path (req: string) : string =
180179
end
181180
else
182181
debug "Connection to master died: time taken so far in this call '%f'; will %s"
183-
time_sofar (if !connection_timeout < 0.
182+
time_sofar (if !connection_timeout < 0.
184183
then "never timeout"
185184
else Printf.sprintf "timeout after '%f'" !connection_timeout);
186185
if time_sofar > !connection_timeout && !connection_timeout >= 0. then

ocaml/database/redo_log.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ let connect sockpath latest_response_time =
230230
(* It's probably the case that the process hasn't started yet. *)
231231
(* See if we can afford to wait and try again *)
232232
Unix.close s;
233-
let attempt_delay = Xapi_globs.redo_log_connect_delay in
233+
let attempt_delay = !Xapi_globs.redo_log_connect_delay in
234234
let now = Unix.gettimeofday() in
235235
let remaining = latest_response_time -. now in
236236
if attempt_delay < remaining then begin
@@ -326,7 +326,7 @@ let rec read_read_response sock fn_db fn_delta expected_gen_count latest_respons
326326
let action_empty sock datasockpath =
327327
R.debug "Performing empty";
328328
(* Compute desired response time *)
329-
let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_empty in
329+
let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_empty in
330330
(* Empty *)
331331
let str = "empty_____" in
332332
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -346,7 +346,7 @@ let action_empty sock datasockpath =
346346
let action_read fn_db fn_delta sock datasockpath =
347347
R.debug "Performing read";
348348
(* Compute desired response time *)
349-
let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_read in
349+
let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_read in
350350
(* Write *)
351351
let str = "read______" in
352352
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -356,7 +356,7 @@ let action_read fn_db fn_delta sock datasockpath =
356356
let action_write_db marker generation_count write_fn sock datasockpath =
357357
R.debug "Performing writedb (generation %Ld)" generation_count;
358358
(* Compute desired response time *)
359-
let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_writedb in
359+
let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_writedb in
360360
(* Send write command down control channel *)
361361
let str = Printf.sprintf "writedb___|%s|%016Ld" marker generation_count in
362362
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -413,7 +413,7 @@ let action_write_db marker generation_count write_fn sock datasockpath =
413413
let action_write_delta marker generation_count data flush_db_fn sock datasockpath =
414414
R.debug "Performing writedelta (generation %Ld)" generation_count;
415415
(* Compute desired response time *)
416-
let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_writedelta in
416+
let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_writedelta in
417417
(* Write *)
418418
let str = Printf.sprintf "writedelta|%s|%016Ld|%016d|%s" marker generation_count (String.length data) data in
419419
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -569,7 +569,7 @@ let startup log =
569569
match !(log.sock) with
570570
| Some _ -> () (* We're already connected *)
571571
| None ->
572-
let latest_connect_time = get_latest_response_time Xapi_globs.redo_log_max_startup_time in
572+
let latest_connect_time = get_latest_response_time !Xapi_globs.redo_log_max_startup_time in
573573

574574
(* Now connect to the process via the socket *)
575575
let s = connect ctrlsockpath latest_connect_time in

ocaml/xapi/cli_operations.ml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ let pool_join printer rpc session_id params =
899899
~master_address:(List.assoc "master-address" params)
900900
~master_username:(List.assoc "master-username" params)
901901
~master_password:(List.assoc "master-password" params);
902-
printer (Cli_printer.PList ["Host agent will restart and attempt to join pool in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
902+
printer (Cli_printer.PList ["Host agent will restart and attempt to join pool in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
903903
with
904904
| Api_errors.Server_error(code, params) when code=Api_errors.pool_joining_host_connection_failed ->
905905
printer (Cli_printer.PList ["Host cannot contact destination host: connection refused.";
@@ -913,7 +913,7 @@ let pool_eject fd printer rpc session_id params =
913913

914914
let go () =
915915
Client.Pool.eject ~rpc ~session_id ~host;
916-
printer (Cli_printer.PList ["Specified host will attempt to restart as a master of a new pool in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."]) in
916+
printer (Cli_printer.PList ["Specified host will attempt to restart as a master of a new pool in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."]) in
917917

918918
if force
919919
then go ()
@@ -969,11 +969,11 @@ let pool_eject fd printer rpc session_id params =
969969
let pool_emergency_reset_master printer rpc session_id params =
970970
let master_address = List.assoc "master-address" params in
971971
Client.Pool.emergency_reset_master ~rpc ~session_id ~master_address;
972-
printer (Cli_printer.PList ["Host agent will restart and become slave of "^master_address^" in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
972+
printer (Cli_printer.PList ["Host agent will restart and become slave of "^master_address^" in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
973973

974974
let pool_emergency_transition_to_master printer rpc session_id params =
975975
Client.Pool.emergency_transition_to_master ~rpc ~session_id;
976-
printer (Cli_printer.PList ["Host agent will restart and transition to master in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
976+
printer (Cli_printer.PList ["Host agent will restart and transition to master in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
977977

978978
let pool_recover_slaves printer rpc session_id params =
979979
let hosts = Client.Pool.recover_slaves ~rpc ~session_id in
@@ -3590,7 +3590,7 @@ let pool_restore_db fd printer rpc session_id params =
35903590
ignore(track_http_operation fd rpc session_id make_command "restore database");
35913591
if dry_run
35923592
then printer (Cli_printer.PList [ "Dry-run backup restore successful" ])
3593-
else printer (Cli_printer.PList ["Host will reboot with restored database in "^(string_of_int Xapi_globs.db_restore_fuse_time)^" seconds..."])
3593+
else printer (Cli_printer.PList ["Host will reboot with restored database in "^(string_of_float !Xapi_globs.db_restore_fuse_time)^" seconds..."])
35943594

35953595

35963596
let pool_enable_external_auth printer rpc session_id params =

ocaml/xapi/db_gc.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ let check_host_liveness ~__context =
194194
let now = Unix.gettimeofday () in
195195
(* we can now compare 'host_time' with 'now' *)
196196

197-
if now -. host_time < Xapi_globs.host_assumed_dead_interval then begin
197+
if now -. host_time < !Xapi_globs.host_assumed_dead_interval then begin
198198
(* From the heartbeat PoV the host looks alive. We try to (i) minimise database sets; and (ii)
199199
avoid toggling the host back to live if it has been marked as shutting_down. *)
200200
Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m
@@ -252,7 +252,7 @@ let timeout_sessions_common ~__context sessions =
252252
(* Only keep a list of (ref, last_active, uuid) *)
253253
let disposable_sessions = List.map (fun (x, y) -> x, Date.to_float y.Db_actions.session_last_active, y.Db_actions.session_uuid) disposable_sessions in
254254
(* Definitely invalidate sessions last used long ago *)
255-
let threshold_time = Unix.time () -. Xapi_globs.inactive_session_timeout in
255+
let threshold_time = Unix.time () -. !Xapi_globs.inactive_session_timeout in
256256
let young, old = List.partition (fun (_, y, _) -> y > threshold_time) disposable_sessions in
257257
(* If there are too many young sessions then we need to delete the oldest *)
258258
let lucky, unlucky =
@@ -286,8 +286,8 @@ let timeout_sessions ~__context =
286286

287287
let timeout_tasks ~__context =
288288
let all_tasks = Db.Task.get_internal_records_where ~__context ~expr:Db_filter_types.True in
289-
let oldest_completed_time = Unix.time() -. Xapi_globs.completed_task_timeout (* time out completed tasks after 65 minutes *) in
290-
let oldest_pending_time = Unix.time() -. Xapi_globs.pending_task_timeout (* time out pending tasks after 24 hours *) in
289+
let oldest_completed_time = Unix.time() -. !Xapi_globs.completed_task_timeout (* time out completed tasks after 65 minutes *) in
290+
let oldest_pending_time = Unix.time() -. !Xapi_globs.pending_task_timeout (* time out pending tasks after 24 hours *) in
291291

292292
let should_delete_task (_, t) =
293293
if task_status_is_completed t.Db_actions.task_status
@@ -533,7 +533,7 @@ let start_heartbeat_thread() =
533533

534534
while(true) do
535535
try
536-
Thread.delay Xapi_globs.host_heartbeat_interval;
536+
Thread.delay !Xapi_globs.host_heartbeat_interval;
537537
send_one_heartbeat ~__context rpc session_id
538538
with
539539
| (Api_errors.Server_error (x,y)) as e ->
@@ -549,6 +549,6 @@ let start_heartbeat_thread() =
549549
exit Xapi_globs.restart_return_code
550550
| e ->
551551
debug "Caught %s - logging in again" (ExnHelper.string_of_exn e);
552-
Thread.delay Xapi_globs.host_heartbeat_interval;
552+
Thread.delay !Xapi_globs.host_heartbeat_interval;
553553
done
554554
end)

ocaml/xapi/events.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ module Domain_shutdown = struct
125125
if Xapi_fist.disable_reboot_delay () then begin
126126
debug "FIST: disable_reboot_delay";
127127
0, 0
128-
end else if time_vm_ran_for ~__context ~vm < Xapi_globs.minimum_time_between_reboot_with_no_added_delay then begin
128+
end else if time_vm_ran_for ~__context ~vm < !Xapi_globs.minimum_time_between_reboot_with_no_added_delay then begin
129129
calculate_reboot_delay ~__context ~vm domid
130130
end else 0, 0 in
131131
if delay <> 0 then begin
@@ -189,9 +189,9 @@ module Domain_shutdown = struct
189189
(* Perform bounce-suppression to prevent fast crash loops *)
190190
let action =
191191
let t = time_vm_ran_for ~__context ~vm in
192-
if t < Xapi_globs.minimum_time_between_bounces then begin
192+
if t < !Xapi_globs.minimum_time_between_bounces then begin
193193
let msg = Printf.sprintf "VM (%s) domid %d crashed too soon after start (ran for %f; minimum time %f)"
194-
(Db.VM.get_name_label ~__context ~self:vm) domid t Xapi_globs.minimum_time_between_bounces in
194+
(Db.VM.get_name_label ~__context ~self:vm) domid t !Xapi_globs.minimum_time_between_bounces in
195195
match action with
196196
| `coredump_and_restart ->
197197
debug "%s: converting coredump_and_restart -> coredump_and_destroy" msg;

ocaml/xapi/message_forwarding.ml

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,17 +2682,15 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
26822682
let (_: Thread.t) = Thread.create (fun () ->
26832683
Client.PIF.reconfigure_ip rpc session_id self mode iP netmask gateway dNS) () in
26842684
let task_id = Context.get_task_id __context in
2685-
let rec poll i =
2686-
if i>300 then failwith "Failed to see host on network after timeout expired";
2685+
let start_time = Unix.gettimeofday () in
2686+
let progress = ref 0.0 in
2687+
while !progress = 0.0 do
2688+
if Unix.gettimeofday () -. start_time < !Xapi_globs.pif_reconfigure_ip_timeout then
2689+
failwith "Failed to see host on network after timeout expired";
26872690
Thread.delay 1.0;
2688-
debug "Polling task %s progress" (Ref.string_of task_id);
2689-
let progress = Db.Task.get_progress ~__context ~self:task_id in
2690-
debug "progress=%f" progress;
2691-
if progress=0.0
2692-
then poll (i+1)
2693-
else ()
2694-
in
2695-
poll 0)
2691+
progress := Db.Task.get_progress ~__context ~self:task_id;
2692+
debug "Polling task %s progress" (Ref.string_of task_id)
2693+
done)
26962694

26972695
let scan ~__context ~host =
26982696
info "PIF.scan: host = '%s'" (host_uuid ~__context host);

ocaml/xapi/pool_db_backup.ml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ open Db_cache_types
2323
module D = Debug.Debugger(struct let name="pool_db_sync" end)
2424
open D
2525

26-
let pool_db_sync_timer = 60.0 *. 5. (* CA-16878: 5 minutes, same as the local database flush *)
27-
2826
let octet_stream = Http.Hdr.content_type ^": application/octet-stream"
2927

3028
(* CA-18377: The smallest database that is compatible with the Miami database schema. *)
@@ -203,8 +201,8 @@ let push_database_restore_handler (req: Http.Request.t) s _ =
203201

204202
(* now restart *)
205203
debug "xapi has received new database via xml; will reboot and use that db...";
206-
info "Rebooting to use restored database after delay of: %d" Xapi_globs.db_restore_fuse_time;
207-
Xapi_fuse.light_fuse_and_reboot ~fuse_length:Xapi_globs.db_restore_fuse_time ();
204+
info "Rebooting to use restored database after delay of: %f" !Xapi_globs.db_restore_fuse_time;
205+
Xapi_fuse.light_fuse_and_reboot ~fuse_length:!Xapi_globs.db_restore_fuse_time ();
208206
end
209207
)
210208

@@ -263,7 +261,7 @@ let pool_db_backup_thread () =
263261
let generation = Db_lock.with_lock (fun () -> Manifest.generation (Database.manifest (Db_ref.get_database (Context.database_of __context)))) in
264262
let dohost host =
265263
try
266-
Thread.delay pool_db_sync_timer;
264+
Thread.delay !Xapi_globs.pool_db_sync_interval;
267265
debug "Starting DB synchronise with host %s" (Ref.string_of host);
268266
Helpers.call_api_functions ~__context
269267
(fun rpc session_id -> Client.Host.request_backup rpc session_id host generation false);
@@ -274,7 +272,7 @@ let pool_db_backup_thread () =
274272
log_backtrace () in
275273

276274
(* since thread.delay is inside dohost fn make sure we don't spin if hosts=[]: *)
277-
if hosts=[] then Thread.delay pool_db_sync_timer
275+
if hosts=[] then Thread.delay !Xapi_globs.pool_db_sync_interval
278276
else List.iter dohost hosts;
279277
end
280278
with e -> debug "Exception in DB synchronise thread: %s" (ExnHelper.string_of_exn e)

0 commit comments

Comments
 (0)