WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-api

[Xen-API] [PATCH 3 of 5] CA-42914, SCTX-434: Increase the timeout for wr

To: xen-api@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-API] [PATCH 3 of 5] CA-42914, SCTX-434: Increase the timeout for writing the database to the redo-log
From: Jonathan Davies <jonathan.davies@xxxxxxxxxx>
Date: Tue, 26 Oct 2010 14:45:00 +0100
Delivery-date: Tue, 26 Oct 2010 06:46:47 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1288100697@dusk>
List-help: <mailto:xen-api-request@lists.xensource.com?subject=help>
List-id: Discussion of API issues surrounding Xen <xen-api.lists.xensource.com>
List-post: <mailto:xen-api@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-api>, <mailto:xen-api-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-api>, <mailto:xen-api-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-api-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Jonathan Davies <jonathan.davies@xxxxxxxxxx>
# Date 1288100551 -3600
# Node ID 48abfd03ac2c87bdaec712e1921b4234273a8247
# Parent  42c273981f8bd474df729e8dfd9e73ad6a66f6a0
CA-42914, SCTX-434: Increase the timeout for writing the database to the 
redo-log

Previously, we had a flat timeout of 2 seconds for all redo-log operations. 
However, this has been shown to be too impatient for writing large databases 
over slow connections. (This resulted in timeouts firing prematurely, resulting 
in lots of METADATA_LUN_BROKEN alerts, and the redo-log being entirely useless 
for large pools as the database would never be successfully written!)

The timeout for each operation can now be specified independently. The new 
default for database writes is 30 seconds.

Signed-off-by: Jonathan Davies <jonathan.davies@xxxxxxxxxx>

diff -r 42c273981f8b -r 48abfd03ac2c ocaml/database/block_device_io.ml
--- a/ocaml/database/block_device_io.ml Tue Oct 26 14:42:30 2010 +0100
+++ b/ocaml/database/block_device_io.ml Tue Oct 26 14:42:31 2010 +0100
@@ -781,16 +781,16 @@
                 let str = String.make command_size '\000' in
                 Unixext.really_read client str 0 command_size;
                 
+                (* Note: none of the action functions throw any exceptions; 
they report errors directly to the client. *)
+                let (action_fn, block_time) = match str with
+                  | "writedelta" -> action_writedelta, 
Xapi_globs.redo_log_max_block_time_writedelta
+                  | "writedb___" -> action_writedb,    
Xapi_globs.redo_log_max_block_time_writedb
+                  | "read______" -> action_read,       
Xapi_globs.redo_log_max_block_time_read
+                  | "empty_____" -> action_empty,      
Xapi_globs.redo_log_max_block_time_empty
+                  | _ -> (fun _ _ _ _ -> send_failure client (str^"|nack") 
("Unknown command "^str)), 0.
+                in
                 (* "Start the clock!" -- set the latest time by which we need 
to have responded to the client. *)
-                let target_response_time = Unix.gettimeofday() +. 
Xapi_globs.redo_log_max_block_time in
-                (* Note: none of the action functions throw any exceptions; 
they report errors directly to the client. *)
-                let action_fn = match str with
-                  | "writedelta" -> action_writedelta
-                  | "writedb___" -> action_writedb
-                  | "read______" -> action_read
-                  | "empty_____" -> action_empty
-                  | _ -> (fun _ _ _ _ -> send_failure client (str^"|nack") 
("Unknown command "^str))
-                in
+                let target_response_time = Unix.gettimeofday() +. block_time in
                 action_fn block_dev_fd client !datasock target_response_time
               with (* this must be an exception in Unixext.really_read because 
action_fn doesn't throw exceptions *)
               | End_of_file ->
diff -r 42c273981f8b -r 48abfd03ac2c ocaml/database/redo_log.ml
--- a/ocaml/database/redo_log.ml        Tue Oct 26 14:42:30 2010 +0100
+++ b/ocaml/database/redo_log.ml        Tue Oct 26 14:42:31 2010 +0100
@@ -176,9 +176,9 @@
    In practise, this should never happen since whenever a writedelta fails, we 
attempt a writedb. *)
 let stop_at_missing_record = true
 
-let get_latest_response_time () =
+let get_latest_response_time block_time =
   let now = Unix.gettimeofday() in
-  now +. Xapi_globs.redo_log_max_block_time
+  now +. block_time
 
 (* Returns the PID of the process *)
 let start_io_process block_dev ctrlsockpath datasockpath =
@@ -293,7 +293,7 @@
 let action_empty sock datasockpath =
   R.debug "Performing empty";
   (* Compute desired response time *)
-  let latest_response_time = get_latest_response_time() in
+  let latest_response_time = get_latest_response_time 
Xapi_globs.redo_log_max_block_time_empty in
   (* Empty *)
   let str = "empty_____" in
   Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -313,7 +313,7 @@
 let action_read fn_db fn_delta sock datasockpath =
   R.debug "Performing read";
   (* Compute desired response time *)
-  let latest_response_time = get_latest_response_time() in
+  let latest_response_time = get_latest_response_time 
Xapi_globs.redo_log_max_block_time_read in
   (* Write *)
   let str = "read______" in
   Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -323,7 +323,7 @@
 let action_write_db marker generation_count write_fn sock datasockpath =
   R.debug "Performing writedb (generation %Ld)" generation_count;
   (* Compute desired response time *)
-  let latest_response_time = get_latest_response_time() in
+  let latest_response_time = get_latest_response_time 
Xapi_globs.redo_log_max_block_time_writedb in
   (* Send write command down control channel *)
   let str = Printf.sprintf "writedb___|%s|%016Ld" marker generation_count in
   Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -363,7 +363,7 @@
 let action_write_delta marker generation_count data flush_db_fn sock 
datasockpath =
   R.debug "Performing writedelta (generation %Ld)" generation_count;
   (* Compute desired response time *)
-  let latest_response_time = get_latest_response_time() in
+  let latest_response_time = get_latest_response_time 
Xapi_globs.redo_log_max_block_time_writedelta in
   (* Write *)
   let str = Printf.sprintf "writedelta|%s|%016Ld|%016d|%s" marker 
generation_count (String.length data) data in
   Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -526,7 +526,7 @@
           match !sock with
           | Some _ -> () (* We're already connected *)
           | None ->
-            let latest_connect_time = get_latest_response_time() in
+            let latest_connect_time = get_latest_response_time 
Xapi_globs.redo_log_max_startup_time in
 
             (* Now connect to the process via the socket *)
             let s = connect ctrlsockpath latest_connect_time in
diff -r 42c273981f8b -r 48abfd03ac2c ocaml/xapi/xapi_globs.ml
--- a/ocaml/xapi/xapi_globs.ml  Tue Oct 26 14:42:30 2010 +0100
+++ b/ocaml/xapi/xapi_globs.ml  Tue Oct 26 14:42:31 2010 +0100
@@ -495,10 +495,19 @@
 
 (** {3 Settings related to the connection to the block device I/O process} *)
 
-(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died *)
-let redo_log_max_block_time = 2.
+(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died 
while emptying *)
+let redo_log_max_block_time_empty = 2.
 
-(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process while initially connecting to it 
before assuming that it has died *)
+(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died 
while reading *)
+let redo_log_max_block_time_read = 30.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died 
while writing a delta *)
+let redo_log_max_block_time_writedelta = 2.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died 
while writing a database *)
+let redo_log_max_block_time_writedb = 30.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a 
response from the block device I/O process before assuming that it has died 
while initially connecting to it *)
 let redo_log_max_startup_time = 5.
 
 (** The delay between each attempt to connect to the block device I/O process 
*)

Attachment: xen-api.hg-5.patch
Description: Text Data

_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api