WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] better error handling in fs-backend

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] better error handling in fs-backend
From: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Date: Tue, 7 Jul 2009 17:17:40 +0100
Delivery-date: Tue, 07 Jul 2009 09:18:04 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Alpine 2.00 (DEB 1167 2008-08-23)
Hi all,
currently most of the error checking in fs-backend is done by the use of
asserts that would terminate the daemon in case of a single error on a
single request.
This patch replaces the asserts with debugging messages and terminates
the connection on which the error occurred.
With this patch applied I was able to complete successfully over 1000
live migrations with stubdoms.


Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>

---


diff -r 92ee18aac1b6 tools/fs-back/fs-backend.c
--- a/tools/fs-back/fs-backend.c        Mon Jul 06 13:28:43 2009 +0100
+++ b/tools/fs-back/fs-backend.c        Tue Jul 07 16:55:36 2009 +0100
@@ -144,7 +144,8 @@
         xc_evtchn_notify(mount->evth, mount->local_evtchn);
 }
 
-static void terminate_mount_request(struct fs_mount *mount) {
+void terminate_mount_request(struct fs_mount *mount)
+{
     int count = 0, i;
 
     FS_DEBUG("terminate_mount_request %s\n", mount->frontend);
@@ -158,7 +159,13 @@
         }
     mount->nr_entries = count;
 
-    while (!xenbus_frontend_state_changed(mount, STATE_CLOSING));
+    /* wait for the frontend to shut down but don't wait more than 3
+     * seconds */
+    i = 0;
+    while (!xenbus_frontend_state_changed(mount, STATE_CLOSING) && i < 3) {
+        sleep(1);
+        i++;
+    }
     xenbus_write_backend_state(mount, STATE_CLOSED);
 
     xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
@@ -183,7 +190,7 @@
 {
     struct fs_mount *mount;
     struct fs_export *export;
-    struct fsif_sring *sring;
+    struct fsif_sring *sring = NULL;
     uint32_t dom_ids[MAX_RING_SIZE];
     int i;
 
@@ -204,24 +211,38 @@
     }
 
     mount = (struct fs_mount*)malloc(sizeof(struct fs_mount));
+    memset(mount, 0x00, sizeof(struct fs_mount));
     mount->dom_id = frontend_dom_id;
     mount->export = export;
     mount->mount_id = mount_id++;
-    xenbus_read_mount_request(mount, frontend);
+    if (xenbus_read_mount_request(mount, frontend) < 0)
+        goto error;
     FS_DEBUG("Frontend found at: %s (gref=%d, evtchn=%d)\n", 
             mount->frontend, mount->grefs[0], mount->remote_evtchn);
-    xenbus_write_backend_node(mount);
+    if (!xenbus_write_backend_node(mount)) {
+        FS_DEBUG("ERROR: failed to write backend node on xenbus\n");
+        goto error;
+    }
     mount->evth = -1;
     mount->evth = xc_evtchn_open(); 
-    assert(mount->evth != -1);
+    if (mount->evth < 0) {
+        FS_DEBUG("ERROR: Couldn't open evtchn!\n");
+        goto error;
+    }
     mount->local_evtchn = -1;
     mount->local_evtchn = xc_evtchn_bind_interdomain(mount->evth, 
                                                      mount->dom_id, 
                                                      mount->remote_evtchn);
-    assert(mount->local_evtchn != -1);
+    if (mount->local_evtchn < 0) {
+        FS_DEBUG("ERROR: Couldn't bind evtchn!\n");
+        goto error;
+    }
     mount->gnth = -1;
     mount->gnth = xc_gnttab_open(); 
-    assert(mount->gnth != -1);
+    if (mount->gnth < 0) {
+        FS_DEBUG("ERROR: Couldn't open gnttab!\n");
+        goto error;
+    }
     for(i=0; i<mount->shared_ring_size; i++)
         dom_ids[i] = mount->dom_id;
     sring = xc_gnttab_map_grant_refs(mount->gnth,
@@ -230,16 +251,41 @@
                                      mount->grefs,
                                      PROT_READ | PROT_WRITE);
 
+    if (!sring) {
+        FS_DEBUG("ERROR: Couldn't amp grant refs!\n");
+        goto error;
+    }
+
     BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * 
XC_PAGE_SIZE);
     mount->nr_entries = mount->ring.nr_ents; 
     for (i = 0; i < MAX_FDS; i++)
         mount->fds[i] = -1;
 
     LIST_INSERT_HEAD(&mount_requests_head, mount, entries);
-    xenbus_watch_frontend_state(mount);
-    xenbus_write_backend_state(mount, STATE_READY);
+    if (!xenbus_watch_frontend_state(mount)) {
+        FS_DEBUG("ERROR: failed to watch frontend state on xenbus\n");
+        goto error;
+    }
+    if (!xenbus_write_backend_state(mount, STATE_READY)) {
+        FS_DEBUG("ERROR: failed to write backend state to xenbus\n");
+        goto error;
+    }
     
     allocate_request_array(mount);
+
+    return;
+
+error:
+    xenbus_write_backend_state(mount, STATE_CLOSED);
+    if (sring)
+        xc_gnttab_munmap(mount->gnth, mount->ring.sring, 
mount->shared_ring_size);
+    if (mount->gnth > 0)
+        xc_gnttab_close(mount->gnth);
+    if (mount->local_evtchn > 0)
+        xc_evtchn_unbind(mount->evth, mount->local_evtchn);
+    if (mount->evth > 0)
+        xc_evtchn_close(mount->evth);
+    return;
 }
 
 static void await_connections(void)
diff -r 92ee18aac1b6 tools/fs-back/fs-backend.h
--- a/tools/fs-back/fs-backend.h        Mon Jul 06 13:28:43 2009 +0100
+++ b/tools/fs-back/fs-backend.h        Tue Jul 07 16:55:36 2009 +0100
@@ -56,6 +56,7 @@
     LIST_ENTRY(fs_mount) entries;
 };
 
+void terminate_mount_request(struct fs_mount *mount);
 
 /* Handle to XenStore driver */
 extern struct xs_handle *xsh;
@@ -63,12 +64,12 @@
 bool xenbus_create_request_node(void);
 int xenbus_register_export(struct fs_export *export);
 int xenbus_get_watch_fd(void);
-void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
-void xenbus_write_backend_node(struct fs_mount *mount);
-void xenbus_write_backend_state(struct fs_mount *mount, const char *state);
+int xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
+bool xenbus_write_backend_node(struct fs_mount *mount);
+bool xenbus_write_backend_state(struct fs_mount *mount, const char *state);
 int xenbus_frontend_state_changed(struct fs_mount *mount, const char 
*oldstate);
-void xenbus_watch_frontend_state(struct fs_mount *mount);
-void xenbus_unwatch_frontend_state(struct fs_mount *mount);
+bool xenbus_watch_frontend_state(struct fs_mount *mount);
+bool xenbus_unwatch_frontend_state(struct fs_mount *mount);
 char* xenbus_read_frontend_state(struct fs_mount *mount);
 
 /* File operations, implemented in fs-ops.c */
diff -r 92ee18aac1b6 tools/fs-back/fs-ops.c
--- a/tools/fs-back/fs-ops.c    Mon Jul 06 13:28:43 2009 +0100
+++ b/tools/fs-back/fs-ops.c    Tue Jul 07 16:55:36 2009 +0100
@@ -99,7 +99,10 @@
         }
     }
 out:
-    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
     mount->ring.req_cons++;
@@ -187,7 +190,11 @@
     priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
     priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
     priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
-    assert(aio_read(&priv_req->aiocb) >= 0);
+    if (aio_read(&priv_req->aiocb) < 0) {
+        FS_DEBUG("ERROR: aio_read failed errno=%d\n", errno);
+        xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
+        terminate_mount_request(mount);
+    }
 
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
@@ -201,9 +208,10 @@
     uint16_t req_id;
 
     /* Release the grant */
-    assert(xc_gnttab_munmap(mount->gnth, 
-                            priv_req->page, 
-                            priv_req->count) == 0);
+    if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
 
     /* Get a response from the ring */
     rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -257,7 +265,11 @@
     priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
     priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
     priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
-    assert(aio_write(&priv_req->aiocb) >= 0);
+    if (aio_write(&priv_req->aiocb) < 0) {
+        FS_DEBUG("ERROR: aio_write failed errno=%d\n", errno);
+        xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
+        terminate_mount_request(mount);
+    }
 
      
     /* We can advance the request consumer index, from here on, the request
@@ -272,9 +284,10 @@
     uint16_t req_id;
 
     /* Release the grant */
-    assert(xc_gnttab_munmap(mount->gnth, 
-                            priv_req->page, 
-                            priv_req->count) == 0);
+    if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     
     /* Get a response from the ring */
     rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -392,7 +405,10 @@
         ret = remove(file_name);
     }
     FS_DEBUG("Got ret: %d\n", ret);
-    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
     mount->ring.req_cons++;
@@ -435,7 +451,10 @@
         ret = rename(old_file_name, new_file_name);
     }
     FS_DEBUG("Got ret: %d\n", ret);
-    assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, buf, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
     mount->ring.req_cons++;
@@ -500,7 +519,10 @@
         }
     }
 out:
-    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     FS_DEBUG("Got ret %d (errno=%d)\n", ret, errno);
 
     /* Get a response from the ring */
@@ -556,7 +578,8 @@
     /* If there was any error with reading the directory, errno will be set */
     error_code = errno;
     /* Copy file names of the remaining non-NULL dirents into buf */
-    assert(NAME_MAX < XC_PAGE_SIZE >> 1);
+    if (NAME_MAX >= XC_PAGE_SIZE >> 1)
+        goto error_out;
     while(dirent != NULL && 
             (XC_PAGE_SIZE - ((unsigned long)buf & XC_PAGE_MASK) > NAME_MAX))
     {
@@ -572,7 +595,10 @@
     ret_val = ((nr_files << NR_FILES_SHIFT) & NR_FILES_MASK) | 
               ((error_code << ERROR_SHIFT) & ERROR_MASK) | 
               (dirent != NULL ? HAS_MORE_FLAG : 0);
-    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     
     /* Get a response from the ring */
     rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -640,7 +666,10 @@
     if(ret >= 0)
         ret = stat.f_bsize * stat.f_bfree;
 
-    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
     mount->ring.req_cons++;
@@ -680,8 +709,10 @@
     priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
     priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
     priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
-    assert(aio_fsync(O_SYNC, &priv_req->aiocb) >= 0);
-
+    if (aio_fsync(O_SYNC, &priv_req->aiocb) < 0) {
+        FS_DEBUG("ERROR: aio_fsync failed errno=%d\n", errno);
+        terminate_mount_request(mount);
+    }
      
     /* We can advance the request consumer index, from here on, the request
      * should not be used (it may be overrinden by a response) */
diff -r 92ee18aac1b6 tools/fs-back/fs-xenbus.c
--- a/tools/fs-back/fs-xenbus.c Mon Jul 06 13:28:43 2009 +0100
+++ b/tools/fs-back/fs-xenbus.c Tue Jul 07 16:55:36 2009 +0100
@@ -107,11 +107,14 @@
     int res;
     assert(xsh != NULL);
     res = xs_watch(xsh, WATCH_NODE, "conn-watch");
-    assert(res);
+    if (!res) {
+        FS_DEBUG("ERROR: xs_watch %s failed ret=%d errno=%d\n", WATCH_NODE, 
res, errno);
+        return -1;
+    }
     return xs_fileno(xsh); 
 }
 
-void xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
+int xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
 {
     char node[1024];
     char *s;
@@ -126,12 +129,18 @@
     mount->frontend = frontend;
     snprintf(node, sizeof(node), "%s/state", frontend);
     s = xs_read(xsh, XBT_NULL, node, NULL);
-    assert(strcmp(s, STATE_READY) == 0);
+    if (strcmp(s, STATE_READY) != 0) {
+        FS_DEBUG("ERROR: frontend not read\n");
+        goto error;
+    }
     free(s);
     snprintf(node, sizeof(node), "%s/ring-size", frontend);
     s = xs_read(xsh, XBT_NULL, node, NULL);
     mount->shared_ring_size = atoi(s);
-    assert(mount->shared_ring_size <= MAX_RING_SIZE);
+    if (mount->shared_ring_size > MAX_RING_SIZE) {
+        FS_DEBUG("ERROR: shared_ring_size (%d) > MAX_RING_SIZE\n", 
mount->shared_ring_size);
+        goto error;
+    }
     free(s);
     for(i=0; i<mount->shared_ring_size; i++)
     {
@@ -144,6 +153,11 @@
     s = xs_read(xsh, XBT_NULL, node, NULL);
     mount->remote_evtchn = atoi(s);
     free(s);
+    return 0;
+
+error:
+    free(s);
+    return -1;
 }
 
 /* Small utility function to figure out our domain id */
@@ -161,7 +175,7 @@
 } 
 
 
-void xenbus_write_backend_node(struct fs_mount *mount)
+bool xenbus_write_backend_node(struct fs_mount *mount)
 {
     char node[1024], backend_node[1024];
     int self_id;
@@ -175,10 +189,10 @@
     xs_write(xsh, XBT_NULL, node, backend_node, strlen(backend_node));
 
     snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
-    xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, 
strlen(STATE_INITIALISED));
+    return xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, 
strlen(STATE_INITIALISED));
 }
 
-void xenbus_write_backend_state(struct fs_mount *mount, const char *state)
+bool xenbus_write_backend_state(struct fs_mount *mount, const char *state)
 {
     char node[1024];
     int self_id;
@@ -186,29 +200,25 @@
     assert(xsh != NULL);
     self_id = get_self_id();
     snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
-    xs_write(xsh, XBT_NULL, node, state, strlen(state));
+    return xs_write(xsh, XBT_NULL, node, state, strlen(state));
 }
 
-void xenbus_watch_frontend_state(struct fs_mount *mount)
+bool xenbus_watch_frontend_state(struct fs_mount *mount)
 {
-    int res;
     char statepath[1024];
 
     assert(xsh != NULL);
     snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
-    res = xs_watch(xsh, statepath, "frontend-state");
-    assert(res);
+    return xs_watch(xsh, statepath, "frontend-state");
 }
 
-void xenbus_unwatch_frontend_state(struct fs_mount *mount)
+bool xenbus_unwatch_frontend_state(struct fs_mount *mount)
 {
-    int res;
     char statepath[1024];
 
     assert(xsh != NULL);
     snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
-    res = xs_unwatch(xsh, statepath, "frontend-state");
-    assert(res);
+    return xs_unwatch(xsh, statepath, "frontend-state");
 }
 
 int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] better error handling in fs-backend, Stefano Stabellini <=