[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] xenconsoled CPU denial of service problem



Hi Keir,

Any feedback on this rate limiting patch for DomU consoles ?

Regards,
Dan.

On Mon, Sep 11, 2006 at 03:19:05PM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 30, 2006 at 07:13:30PM +0100, Keir Fraser wrote:
> > On 29/8/06 4:59 pm, "Daniel P. Berrange" <berrange@xxxxxxxxxx> wrote:
> > 
> > > The patch sets
> > >   - data size 5 kb
> > >   - period 200 ms
> > >   - delay 200 ms
> > 
> > A few comments:
> >  * I think the 'delay' parameter is not really useful. Think of this simply
> > as a simple credit-based scheduler that replenishes credit every 'period'.
> > So in this case you get 5kB every 200ms. If the domU offers more data in a
> > period, it must wait until its credit is replenished at the end of the
> > current 200ms period.
> >  * I'm not sure bytes of data is the right thing to limit here. The main
> > thing that hoses domain0 is the repeated rescheduling of the console daemon,
> > and that is fundamentally caused by event-channel notifications. So it might
> > make sense to rate limit the number of times we read the event-channel port
> > from xc_evtchn_pending -- e.g., no more than 10 times a second (should be
> > plenty). This has a few advantages: 1. Looking just at data transferred
> > doesn't stop a malicious domain from hosing you with no-op event-channel
> > notifications; 2. This is a fundamental metric that can be measured and
> > rate-limited on all backend interfaces, so perhaps we can come up with some
> > common library of code that we apply to all backends/daemons.
> 
> I've re-worked the patch based on this principle of "n events allowed
> in each time-slice", setting n=30 & the time-slice = 200ms. The code
> was actually much simpler than my previous patch so its definitely a
> winning strategy.  Testing by running
> 
>  'while /bin/true ; do echo t > /proc/sysrq-trigger; done'
> 
> ..in one of the guest VMs on a  2.2 GHz Opteron, shows no significant CPU
> utilization attributed to xenconsoled. I've not examined whether this code
> can be put into a common library - it was simple enough to integrate in
> the xenconsoled event loop
> 
> > It may turn out we need to rate limit on data *as well*, if it turns out
> > that sinking many kilobytes of data a second is prohibitively expensive, but
> > I doubt this will happen. For a start, the strict limiting of notifications
> > will encourage data to get queued up and improve batching of console data,
> > and batches of data should be processed quite efficiently. This same
> > argument extends to other backends (e.g., batching of requests in xenstored,
> > netback, blkback, etc).
> 
> Based on initial testing it doesn't look like the data rate itself was causing
> any significant overhead, once the event channel port reads were limited.
> 
> 
>  Signed-off by: Daniel P. Berrange <berrange@xxxxxxxxxx>
> 
> Regards,
> Dan.
> -- 
> |=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
> |=-           Perl modules: http://search.cpan.org/~danberr/              -=|
> |=-               Projects: http://freshmeat.net/~danielpb/               -=|
> |=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

> diff -r bfd00b317815 tools/console/daemon/io.c
> --- a/tools/console/daemon/io.c       Mon Sep 11 01:55:03 2006 +0100
> +++ b/tools/console/daemon/io.c       Mon Sep 11 10:09:32 2006 -0400
> @@ -37,12 +37,18 @@
>  #include <termios.h>
>  #include <stdarg.h>
>  #include <sys/mman.h>
> +#include <sys/time.h>
>  
>  #define MAX(a, b) (((a) > (b)) ? (a) : (b))
>  #define MIN(a, b) (((a) < (b)) ? (a) : (b))
>  
>  /* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
>  #define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
> +
> +/* How many events are allowed in each time period */
> +#define RATE_LIMIT_ALLOWANCE 30
> +/* Duration of each time period in ms */
> +#define RATE_LIMIT_PERIOD 200
>  
>  struct buffer
>  {
> @@ -65,6 +71,8 @@ struct domain
>       evtchn_port_t local_port;
>       int xce_handle;
>       struct xencons_interface *interface;
> +     int event_count;
> +     long long next_period;
>  };
>  
>  static struct domain *dom_head;
> @@ -306,6 +314,13 @@ static struct domain *create_domain(int 
>  {
>       struct domain *dom;
>       char *s;
> +     struct timeval tv;
> +
> +     if (gettimeofday(&tv, NULL) < 0) {
> +             dolog(LOG_ERR, "Cannot get time of day %s:%s:L%d",
> +                   __FILE__, __FUNCTION__, __LINE__);
> +             return NULL;
> +     }
>  
>       dom = (struct domain *)malloc(sizeof(struct domain));
>       if (dom == NULL) {
> @@ -330,6 +345,8 @@ static struct domain *create_domain(int 
>       dom->buffer.size = 0;
>       dom->buffer.capacity = 0;
>       dom->buffer.max_capacity = 0;
> +     dom->event_count = 0;
> +     dom->next_period = (tv.tv_sec * 1000) + (tv.tv_usec / 1000) + 
> RATE_LIMIT_PERIOD;
>       dom->next = NULL;
>  
>       dom->ring_ref = -1;
> @@ -512,9 +529,12 @@ static void handle_ring_read(struct doma
>       if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
>               return;
>  
> +     dom->event_count++;
> +
>       buffer_append(dom);
>  
> -     (void)xc_evtchn_unmask(dom->xce_handle, port);
> +     if (dom->event_count < RATE_LIMIT_ALLOWANCE)
> +             (void)xc_evtchn_unmask(dom->xce_handle, port);
>  }
>  
>  static void handle_xs(void)
> @@ -549,6 +569,9 @@ void handle_io(void)
>       do {
>               struct domain *d, *n;
>               int max_fd = -1;
> +             struct timeval timeout;
> +             struct timeval tv;
> +             long long now, next_timeout = 0;
>  
>               FD_ZERO(&readfds);
>               FD_ZERO(&writefds);
> @@ -556,8 +579,33 @@ void handle_io(void)
>               FD_SET(xs_fileno(xs), &readfds);
>               max_fd = MAX(xs_fileno(xs), max_fd);
>  
> +             if (gettimeofday(&tv, NULL) < 0)
> +                     return;
> +             now = (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
> +
> +             /* Re-calculate any event counter allowances & unblock
> +                domains with new allowance */
>               for (d = dom_head; d; d = d->next) {
> -                     if (d->xce_handle != -1) {
> +                     /* Add 5ms of fuzz since select() often returns
> +                        a couple of ms sooner than requested. Without
> +                        the fuzz we typically do an extra spin in select()
> +                        with a 1/2 ms timeout every other iteration */
> +                     if ((now+5) > d->next_period) {
> +                             d->next_period = now + RATE_LIMIT_PERIOD;
> +                             if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> +                                     (void)xc_evtchn_unmask(d->xce_handle, 
> d->local_port);
> +                             }
> +                             d->event_count = 0;
> +                     }
> +             }
> +
> +             for (d = dom_head; d; d = d->next) {
> +                     if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> +                             /* Determine if we're going to be the next time 
> slice to expire */
> +                             if (!next_timeout ||
> +                                 d->next_period < next_timeout)
> +                                     next_timeout = d->next_period;
> +                     } else if (d->xce_handle != -1) {
>                               int evtchn_fd = xc_evtchn_fd(d->xce_handle);
>                               FD_SET(evtchn_fd, &readfds);
>                               max_fd = MAX(evtchn_fd, max_fd);
> @@ -573,16 +621,29 @@ void handle_io(void)
>                       }
>               }
>  
> -             ret = select(max_fd + 1, &readfds, &writefds, 0, NULL);
> +             /* If any domain has been rate limited, we need to work
> +                out what timeout to supply to select */
> +             if (next_timeout) {
> +                     long long duration = (next_timeout - now);
> +                     /* Shouldn't happen, but sanity check force greater 
> than 0 */
> +                     if (duration <= 0)
> +                             duration = 1;
> +                     timeout.tv_sec = duration / 1000;
> +                     timeout.tv_usec = (duration - (timeout.tv_sec * 1000)) 
> * 1000;
> +             }
> +
> +             ret = select(max_fd + 1, &readfds, &writefds, 0, next_timeout ? 
> &timeout : NULL);
>  
>               if (FD_ISSET(xs_fileno(xs), &readfds))
>                       handle_xs();
>  
>               for (d = dom_head; d; d = n) {
>                       n = d->next;
> -                     if (d->xce_handle != -1 &&
> -                         FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
> -                             handle_ring_read(d);
> +                     if (d->event_count < RATE_LIMIT_ALLOWANCE) {
> +                             if (d->xce_handle != -1 &&
> +                                 FD_ISSET(xc_evtchn_fd(d->xce_handle), 
> &readfds))
> +                                     handle_ring_read(d);
> +                     }
>  
>                       if (d->tty_fd != -1 && FD_ISSET(d->tty_fd, &readfds))
>                               handle_tty_read(d);

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel


-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.