Hi Keir,
Any feedback on this rate limiting patch for DomU consoles ?
Regards,
Dan.
On Mon, Sep 11, 2006 at 03:19:05PM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 30, 2006 at 07:13:30PM +0100, Keir Fraser wrote:
> > On 29/8/06 4:59 pm, "Daniel P. Berrange" <berrange@xxxxxxxxxx> wrote:
> >
> > > The patch sets
> > > - data size 5 kb
> > > - period 200 ms
> > > - delay 200 ms
> >
> > A few comments:
> > * I think the 'delay' parameter is not really useful. Think of this simply
> > as a simple credit-based scheduler that replenishes credit every 'period'.
> > So in this case you get 5kB every 200ms. If the domU offers more data in a
> > period, it must wait until its credit is replenished at the end of the
> > current 200ms period.
> > * I'm not sure bytes of data is the right thing to limit here. The main
> > thing that hoses domain0 is the repeated rescheduling of the console daemon,
> > and that is fundamentally caused by event-channel notifications. So it might
> > make sense to rate limit the number of times we read the event-channel port
> > from xc_evtchn_pending -- e.g., no more than 10 times a second (should be
> > plenty). This has a few advantages: 1. Looking just at data transferred
> > doesn't stop a malicious domain from hosing you with no-op event-channel
> > notifications; 2. This is a fundamental metric that can be measured and
> > rate-limited on all backend interfaces, so perhaps we can come up with some
> > common library of code that we apply to all backends/daemons.
>
> I've re-worked the patch based on this principle of "n events allowed
> in each time-slice", setting n=30 & the time-slice = 200ms. The code
> was actually much simpler than my previous patch so its definitely a
> winning strategy. Testing by running
>
> 'while /bin/true ; do echo t > /proc/sysrq-trigger; done'
>
> ..in one of the guest VMs on a 2.2 GHz Opteron, shows no significant CPU
> utilization attributed to xenconsoled. I've not examined whether this code
> can be put into a common library - it was simple enough to integrate in
> the xenconsoled event loop
>
> > It may turn out we need to rate limit on data *as well*, if it turns out
> > that sinking many kilobytes of data a second is prohibitively expensive, but
> > I doubt this will happen. For a start, the strict limiting of notifications
> > will encourage data to get queued up and improve batching of console data,
> > and batches of data should be processed quite efficiently. This same
> > argument extends to other backends (e.g., batching of requests in xenstored,
> > netback, blkback, etc).
>
> Based on initial testing it doesn't look like the data rate itself was causing
> any significant overhead, once the event channel port reads were limited.
>
>
> Signed-off by: Daniel P. Berrange <berrange@xxxxxxxxxx>
>
> Regards,
> Dan.
> --
> |=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
> |=- Perl modules: http://search.cpan.org/~danberr/ -=|
> |=- Projects: http://freshmeat.net/~danielpb/ -=|
> |=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
> diff -r bfd00b317815 tools/console/daemon/io.c
> --- a/tools/console/daemon/io.c Mon Sep 11 01:55:03 2006 +0100
> +++ b/tools/console/daemon/io.c Mon Sep 11 10:09:32 2006 -0400
> @@ -37,12 +37,18 @@
> #include <termios.h>
> #include <stdarg.h>
> #include <sys/mman.h>
> +#include <sys/time.h>
>
> #define MAX(a, b) (((a) > (b)) ? (a) : (b))
> #define MIN(a, b) (((a) < (b)) ? (a) : (b))
>
> /* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
> #define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
> +
> +/* How many events are allowed in each time period */
> +#define RATE_LIMIT_ALLOWANCE 30
> +/* Duration of each time period in ms */
> +#define RATE_LIMIT_PERIOD 200
>
> struct buffer
> {
> @@ -65,6 +71,8 @@ struct domain
> evtchn_port_t local_port;
> int xce_handle;
> struct xencons_interface *interface;
> + int event_count;
> + long long next_period;
> };
>
> static struct domain *dom_head;
> @@ -306,6 +314,13 @@ static struct domain *create_domain(int
> {
> struct domain *dom;
> char *s;
> + struct timeval tv;
> +
> + if (gettimeofday(&tv, NULL) < 0) {
> + dolog(LOG_ERR, "Cannot get time of day %s:%s:L%d",
> + __FILE__, __FUNCTION__, __LINE__);
> + return NULL;
> + }
>
> dom = (struct domain *)malloc(sizeof(struct domain));
> if (dom == NULL) {
> @@ -330,6 +345,8 @@ static struct domain *create_domain(int
> dom->buffer.size = 0;
> dom->buffer.capacity = 0;
> dom->buffer.max_capacity = 0;
> + dom->event_count = 0;
> + dom->next_period = (tv.tv_sec * 1000) + (tv.tv_usec / 1000) +
> RATE_LIMIT_PERIOD;
> dom->next = NULL;
>
> dom->ring_ref = -1;
> @@ -512,9 +529,12 @@ static void handle_ring_read(struct doma
> if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
> return;
>
> + dom->event_count++;
> +
> buffer_append(dom);
>
> - (void)xc_evtchn_unmask(dom->xce_handle, port);
> + if (dom->event_count < RATE_LIMIT_ALLOWANCE)
> + (void)xc_evtchn_unmask(dom->xce_handle, port);
> }
>
> static void handle_xs(void)
> @@ -549,6 +569,9 @@ void handle_io(void)
> do {
> struct domain *d, *n;
> int max_fd = -1;
> + struct timeval timeout;
> + struct timeval tv;
> + long long now, next_timeout = 0;
>
> FD_ZERO(&readfds);
> FD_ZERO(&writefds);
> @@ -556,8 +579,33 @@ void handle_io(void)
> FD_SET(xs_fileno(xs), &readfds);
> max_fd = MAX(xs_fileno(xs), max_fd);
>
> + if (gettimeofday(&tv, NULL) < 0)
> + return;
> + now = (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
> +
> + /* Re-calculate any event counter allowances & unblock
> + domains with new allowance */
> for (d = dom_head; d; d = d->next) {
> - if (d->xce_handle != -1) {
> + /* Add 5ms of fuzz since select() often returns
> + a couple of ms sooner than requested. Without
> + the fuzz we typically do an extra spin in select()
> + with a 1/2 ms timeout every other iteration */
> + if ((now+5) > d->next_period) {
> + d->next_period = now + RATE_LIMIT_PERIOD;
> + if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> + (void)xc_evtchn_unmask(d->xce_handle,
> d->local_port);
> + }
> + d->event_count = 0;
> + }
> + }
> +
> + for (d = dom_head; d; d = d->next) {
> + if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> + /* Determine if we're going to be the next time
> slice to expire */
> + if (!next_timeout ||
> + d->next_period < next_timeout)
> + next_timeout = d->next_period;
> + } else if (d->xce_handle != -1) {
> int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> FD_SET(evtchn_fd, &readfds);
> max_fd = MAX(evtchn_fd, max_fd);
> @@ -573,16 +621,29 @@ void handle_io(void)
> }
> }
>
> - ret = select(max_fd + 1, &readfds, &writefds, 0, NULL);
> + /* If any domain has been rate limited, we need to work
> + out what timeout to supply to select */
> + if (next_timeout) {
> + long long duration = (next_timeout - now);
> + /* Shouldn't happen, but sanity check force greater
> than 0 */
> + if (duration <= 0)
> + duration = 1;
> + timeout.tv_sec = duration / 1000;
> + timeout.tv_usec = (duration - (timeout.tv_sec * 1000))
> * 1000;
> + }
> +
> + ret = select(max_fd + 1, &readfds, &writefds, 0, next_timeout ?
> &timeout : NULL);
>
> if (FD_ISSET(xs_fileno(xs), &readfds))
> handle_xs();
>
> for (d = dom_head; d; d = n) {
> n = d->next;
> - if (d->xce_handle != -1 &&
> - FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
> - handle_ring_read(d);
> + if (d->event_count < RATE_LIMIT_ALLOWANCE) {
> + if (d->xce_handle != -1 &&
> + FD_ISSET(xc_evtchn_fd(d->xce_handle),
> &readfds))
> + handle_ring_read(d);
> + }
>
> if (d->tty_fd != -1 && FD_ISSET(d->tty_fd, &readfds))
> handle_tty_read(d);
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|