# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1257373330 0
# Node ID ec2f51bc7d3bb040954b96b45cd29117ed007567
# Parent 02fe2ca1d1f4e02b0921164299ff9e996c90413b
Turn off the alerts METADATA_LUN_{HEALTHY,BROKEN} by default. They can be
re-enabled by setting the key Pool.other_config:metadata_lun_alerts=true.
The alerts would only be generated in two circumstances:
1. a bug in the code generating them;
2. when the storage underlying the metadata LUN is broken.
I think they were being triggered by brief storage datapath glitches and then
scaring people. Until we can investigate this a bit further, it seems better to
turn them off by default.
Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>
diff -r 02fe2ca1d1f4 -r ec2f51bc7d3b ocaml/xapi/redo_log_alert.ml
--- a/ocaml/xapi/redo_log_alert.ml Wed Nov 04 22:22:09 2009 +0000
+++ b/ocaml/xapi/redo_log_alert.ml Wed Nov 04 22:22:10 2009 +0000
@@ -22,13 +22,16 @@
let raise_system_alert news =
(* This code may block indefinitely while attempting to look up the pool
UUID and send the alert, so do it in a separate thread *)
ignore (Thread.create (fun () ->
- debug "Raising system alert...";
+ debug "Processing redo log event: %s" news;
let __context = Context.make "context" in
let pool = Helpers.get_pool ~__context in
let obj_uuid = Db.Pool.get_uuid ~__context ~self:pool in
- debug "Pool UUID is %s" obj_uuid;
- (try ignore (Xapi_message.create ~__context ~name:news ~priority:1L
~cls:`Pool ~obj_uuid ~body:"") with _ -> ());
- debug "System alert raised"
+ let other_config = Db.Pool.get_other_config ~__context ~self:pool in
+ if List.mem_assoc Xapi_globs.redo_log_alert_key other_config &&
(List.assoc Xapi_globs.redo_log_alert_key other_config = "true") then begin
+ debug "Raising alert for pool UUID %s" obj_uuid;
+ (try ignore (Xapi_message.create ~__context ~name:news ~priority:1L
~cls:`Pool ~obj_uuid ~body:"") with _ -> ());
+ debug "Alert raised"
+ end else debug "Not raising alert because Pool.other_config:%s <> true"
Xapi_globs.redo_log_alert_key;
) ())
let loop () =
diff -r 02fe2ca1d1f4 -r ec2f51bc7d3b ocaml/xapi/redo_log_alert.mli
--- a/ocaml/xapi/redo_log_alert.mli Wed Nov 04 22:22:09 2009 +0000
+++ b/ocaml/xapi/redo_log_alert.mli Wed Nov 04 22:22:10 2009 +0000
@@ -13,5 +13,6 @@
*)
(** Runs forever waiting for the redo log's status to change i.e. for
- it to fail or to recover, generating alerts on transitions.*)
+ it to fail or to recover, generating alerts on transitions if
+ Pool.other_config:metadata_lun_alerts is set to "true" *)
val loop: unit -> unit
diff -r 02fe2ca1d1f4 -r ec2f51bc7d3b ocaml/xapi/xapi_globs.ml
--- a/ocaml/xapi/xapi_globs.ml Wed Nov 04 22:22:09 2009 +0000
+++ b/ocaml/xapi/xapi_globs.ml Wed Nov 04 22:22:10 2009 +0000
@@ -530,6 +530,9 @@
(** The maximum permitted backoff delay, in seconds *)
let redo_log_maximum_backoff_delay = 120
+(** Pool.other_config key which, when set to the value "true", enables
generation of METADATA_LUN_{HEALTHY_BROKEN} alerts *)
+let redo_log_alert_key = "metadata_lun_alerts"
+
(** Called from the SR.lvhd_stop_using_these_vdis_and_call_script *)
let lvhd_script_hook = "lvhd-script-hook"
@@ -600,3 +603,4 @@
let permanent_master_failure_retry_timeout = 5. *. 60. (* 5 minutes *)
+
3 files changed, 13 insertions(+), 5 deletions(-)
ocaml/xapi/redo_log_alert.ml | 11 +++++++----
ocaml/xapi/redo_log_alert.mli | 3 ++-
ocaml/xapi/xapi_globs.ml | 4 ++++
xen-api.hg-2.patch
Description: Text Data
_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api
|