If xenstored is part-way through writing a header+payload into the
buffer shared with a guest domain when the guest domain decides to
suspend, the buffer is corrupted, as xenstored doesn't know that it
has a partial write to complete when the domain revives. The domain
is expecting proper completion of the partial header+payload and is
disappointed.
The attached patch modifies xenstored such that it checks for
sufficient space for header+payload before making any changes to the
shared buffer.
It is against 3.0.4-1, but the code in unstable looks the same.
Require that writes to a domain complete in a single chunk (i.e
both header and payload are written at the same time) to avoid leaving
the ring in an inconsistent state across suspend/resume.
Signed-off-by: David Edmondson <dme@xxxxxxx>
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -264,10 +264,17 @@ static bool write_messages(struct connec
{
int ret;
struct buffered_data *out;
+ unsigned int space_required = 0;
+ bool r = false;
out = list_top(&conn->out_list, struct buffered_data, list);
if (out == NULL)
return true;
+
+ if (out->inhdr)
+ space_required += sizeof (out->hdr);
+ space_required += out->hdr.msg.len;
+ space_required -= out->used;
if (out->inhdr) {
if (verbose)
@@ -277,36 +284,59 @@ static bool write_messages(struct connec
out->buffer, conn);
ret = conn->write(conn, out->hdr.raw + out->used,
sizeof(out->hdr) - out->used);
- if (ret < 0)
- return false;
-
+ if (ret < 0) {
+ r = false;
+ goto done;
+ }
+
+ space_required -= ret;
out->used += ret;
- if (out->used < sizeof(out->hdr))
- return true;
+ if (out->used < sizeof(out->hdr)) {
+ r = true;
+ goto done;
+ }
out->inhdr = false;
out->used = 0;
/* Second write might block if non-zero. */
- if (out->hdr.msg.len && !conn->domain)
- return true;
+ if ((out->hdr.msg.len != 0) &&
+ (conn->domain == NULL)) {
+ r = true;
+ goto done;
+ }
}
ret = conn->write(conn, out->buffer + out->used,
out->hdr.msg.len - out->used);
- if (ret < 0)
- return false;
-
+ if (ret < 0) {
+ r = false;
+ goto done;
+ }
+
+ space_required -= ret;
out->used += ret;
- if (out->used != out->hdr.msg.len)
- return true;
-
+ if (out->used != out->hdr.msg.len) {
+ r = true;
+ goto done;
+ }
+
+ r = true;
trace_io(conn, "OUT", out);
list_del(&out->list);
talloc_free(out);
- return true;
+done:
+ if ((conn->domain != NULL) && (space_required != 0))
+ /*
+ * Not all of the data was consumed. This can leave
+ * the shared ring pointers in an inconsistent state
+ * across suspend/resume.
+ */
+ eprintf("partial write to shared page of domain");
+
+ return r;
}
static int destroy_conn(void *_conn)
@@ -1997,7 +2027,7 @@ int main(int argc, char *argv[])
goto more;
}
- if (domain_can_write(i) && !list_empty(&i->out_list)) {
+ if (domain_can_write(i)) {
handle_output(i);
goto more;
}
diff --git a/tools/xenstore/xenstored_domain.c
b/tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -109,23 +109,29 @@ static int writechn(struct connection *c
void *dest;
struct xenstore_domain_interface *intf = conn->domain->interface;
XENSTORE_RING_IDX cons, prod;
-
- /* Must read indexes once, and before anything else, and verified. */
- cons = intf->rsp_cons;
- prod = intf->rsp_prod;
- mb();
- if (!check_indexes(cons, prod)) {
- errno = EIO;
- return -1;
- }
-
- dest = get_output_chunk(cons, prod, intf->rsp, &avail);
- if (avail < len)
- len = avail;
-
- memcpy(dest, data, len);
- mb();
- intf->rsp_prod += len;
+ unsigned int remain = len;
+
+ while (remain > 0) {
+ /* Must read indexes once, and before anything else,
+ * and verified. */
+ cons = intf->rsp_cons;
+ prod = intf->rsp_prod;
+ mb();
+ if (!check_indexes(cons, prod)) {
+ errno = EIO;
+ return -1;
+ }
+
+ dest = get_output_chunk(cons, prod, intf->rsp, &avail);
+ if (avail > remain)
+ avail = remain;
+ memcpy(dest, data, avail);
+ data += avail;
+ remain -= avail;
+
+ mb();
+ intf->rsp_prod += avail;
+ }
xc_evtchn_notify(xce_handle, conn->domain->port);
@@ -236,7 +242,30 @@ bool domain_can_write(struct connection
bool domain_can_write(struct connection *conn)
{
struct xenstore_domain_interface *intf = conn->domain->interface;
- return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+ struct buffered_data *out;
+ unsigned int space_required = 0, space_available;
+
+ out = list_top(&conn->out_list, struct buffered_data, list);
+ if (out == NULL)
+ return false;
+
+ if (out->inhdr)
+ space_required += sizeof (out->hdr);
+ space_required += out->hdr.msg.len;
+ space_required -= out->used;
+
+ if (space_required > XENSTORE_RING_SIZE) {
+ eprintf("attempt to write %d bytes "
+ "to domain %d will never succeed",
+ space_required, conn->domain->domid);
+ talloc_free(conn);
+ return false;
+ }
+
+ space_available = XENSTORE_RING_SIZE -
+ (intf->rsp_prod - intf->rsp_cons);
+
+ return (space_available >= space_required);
}
static char *talloc_domain_path(void *context, unsigned int domid)
dme.
--
David Edmondson, Sun Microsystems, http://www.dme.org
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|