|
|
|
|
|
|
|
|
|
|
xen-api
[Xen-API] [PATCH 4 of 5] CA-42914: Deal with unexpected closure of data
# HG changeset patch
# User Jonathan Davies <jonathan.davies@xxxxxxxxxx>
# Date 1288100551 -3600
# Node ID 7e16add444a9b9387aa908f71570baee8c755981
# Parent 48abfd03ac2c87bdaec712e1921b4234273a8247
CA-42914: Deal with unexpected closure of data socket caused by exception in
block_device_io
Previously, the closing of the data socket causes xapi's code that writes
database to an fd to raise Sys_error("Connection reset by peer").
Instead, we can safely ignore the unexpected closing of the data socket and
wait until we hear what happened over the control socket. Any exception that
may be raised during transfer_data_from_sock_to_fd in block_device_io (that
causes the data socket to be prematurely closed) gets caught in the exception
handlers in action_writedb that call send_failure. So suppress all
Sys_error("Connection reset by peer") exceptions that xapi may raise during the
writing of the database to the fd because full details should be forthcoming on
the control socket.
Signed-off-by: Jonathan Davies <jonathan.davies@xxxxxxxxxx>
diff -r 48abfd03ac2c -r 7e16add444a9 ocaml/database/block_device_io.ml
--- a/ocaml/database/block_device_io.ml Tue Oct 26 14:42:31 2010 +0100
+++ b/ocaml/database/block_device_io.ml Tue Oct 26 14:42:31 2010 +0100
@@ -328,6 +328,7 @@
)
(fun () ->
(* Close the connection *)
+ (* CA-42914: If there was an exception, note that we are forcibly
closing the connection when possibly the client (xapi) is still trying to write
data. This will cause it to see a 'connection reset by peer' error. *)
R.info "Closing connection on data socket";
ignore_exn (fun () -> Unix.close data_client)
) in
diff -r 48abfd03ac2c -r 7e16add444a9 ocaml/database/redo_log.ml
--- a/ocaml/database/redo_log.ml Tue Oct 26 14:42:31 2010 +0100
+++ b/ocaml/database/redo_log.ml Tue Oct 26 14:42:31 2010 +0100
@@ -339,17 +339,34 @@
(fun () ->
(* Send data straight down the data channel, then close it to send an
EOF. *)
(* Ideally, we would check whether this completes before the
latest_response_time. Could implement this by performing the write in a
separate thread. *)
- write_fn datasock
+
+ try
+ write_fn datasock;
+ R.debug "Finished writing database to data socket";
+ with
+ | Sys_error("Connection reset by peer") ->
+ (* CA-41914: Note that if the block_device_io process internally
+ * throws Timeout (or indeed any other exception), it will forcibly
+ * close this connection, we'll see a Sys_error("Connection reset by
+ * peer"). This can be safely suppressed because we'll hear all the
+ * gory details in the response we read over the control socket. *)
+ R.warn "I/O process forcibly closed the data socket while trying to
write database to it. Await the response to see why it did that.";
+ | e ->
+ (* We'll re-raise other exceptions, though. *)
+ R.error "Got an unexpected exception while trying to write database
to the data socket: %s. Re-raising." (Printexc.to_string e);
+ raise e
)
(fun () ->
- R.debug "Finished writing database to data socket";
(* Ensure the data socket is closed even if exception is thrown from
write_fn *)
- Unix.close datasock
+ R.info "Closing data socket";
+ Unix.close datasock;
);
(* Read response *)
let response_length = 12 in
+ R.debug "Reading response...";
let response = Unixext.time_limited_read sock response_length
latest_response_time in
+ R.debug "Got response [%s]" response;
match response with
| "writedb|ack_" -> ()
| "writedb|nack" ->
xen-api.hg-5.patch
Description: Text Data
_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api
|
|
|
|
|