WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] trace security fixes

To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] trace security fixes
From: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Date: Mon, 28 Jun 2010 12:12:55 +0100
Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
Delivery-date: Mon, 28 Jun 2010 04:13:59 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4C289810020000780000860E@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcsWrlZy1oStahetR0Ki7AZo6KfOpwABITjO
Thread-topic: [Xen-devel] [PATCH] trace security fixes
User-agent: Microsoft-Entourage/12.24.0.100205
Both of these xentrace patches need an Ack from George Dunlap. Also I  will
need to know if they belong in any tree other than xen-unstable.

I would never tolerate use of volatiles rather than proper barriers in
general. But for xentrace I'll leave it up to George.

 -- Keir

On 28/06/2010 11:39, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
> this was because c/s 17000 was backported to that tree without also
> applying c/s 17515), this is the result of reviewing the code for
> security issues:
> - the hypervisor should not rely on any specific state of the actual
>   trace buffer (as it is shared writable with Dom0)
> - there's no need to share writably the t_info pages (Dom0 only wants
>   [and needs] to read it)
> - T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
>   num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
> - check_tbuf_size() didn't consider the case of the incoming size not
>   allowing for the 2*data_size range for t_buf->{prod,cons}
> - printk()-s should be lower level or rate limited
> 
> For the first of those items, I chose to use 'volatile' qualifiers
> rather than sprinkling around barriers - this can certainly be changed
> if considered unacceptable.
> 
> To make clear what purpose specific variables have and/or where they
> got loaded from, the patch also changes the type of some of them to
> be explicitly u32/s32, and removes pointless assertions (like checking
> an unsigned variable to be >= 0).
> 
> I also took the prototype adjustment of __trace_var() as an opportunity
> to simplify the TRACE_xD() macros. Similar simplification could be done
> on the (quite numerous) direct callers of the function.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- 2010-06-15.orig/tools/xenmon/xenbaked.c     2010-06-01 13:39:57.000000000
> +0200
> +++ 2010-06-15/tools/xenmon/xenbaked.c  2010-06-28 10:23:01.000000000 +0200
> @@ -84,7 +84,7 @@ typedef struct settings_st {
>  } settings_t;
> 
>  struct t_struct {
> -    struct t_info *t_info;  /* Structure with information about individual
> buffers */
> +    const struct t_info *t_info; /* Structure with information about
> individual buffers */
>      struct t_buf **meta;    /* Pointers to trace buffer metadata */
>      unsigned char **data;   /* Pointers to trace buffer data areas */
>  };
> @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
>      }
> 
>      /* Map t_info metadata structure */
> -    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> -                                        tinfo_size, PROT_READ | PROT_WRITE,
> -                                        tbufs_mfn);
> +    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> +                                        PROT_READ, tbufs_mfn);
> 
>      if ( tbufs.t_info == 0 )
>      {
> @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
>      for(i=0; i<num; i++)
>      {
> 
> -        uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) +
> tbufs.t_info->mfn_offset[i];
> +        const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> +                                   + tbufs.t_info->mfn_offset[i];
>          int j;
>          xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
> 
> --- 2010-06-15.orig/tools/xentrace/xentrace.c   2010-06-01 13:39:57.000000000
> +0200
> +++ 2010-06-15/tools/xentrace/xentrace.c        2010-06-28 10:23:26.000000000
> +0200
> @@ -63,7 +63,7 @@ typedef struct settings_st {
>  } settings_t;
> 
>  struct t_struct {
> -    struct t_info *t_info;  /* Structure with information about individual
> buffers */
> +    const struct t_info *t_info; /* Structure with information about
> individual buffers */
>      struct t_buf **meta;    /* Pointers to trace buffer metadata */
>      unsigned char **data;   /* Pointers to trace buffer data areas */
>  };
> @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
>      int i;
> 
>      /* Map t_info metadata structure */
> -    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> -                                        tinfo_size, PROT_READ | PROT_WRITE,
> -                                        tbufs_mfn);
> +    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> +                                        PROT_READ, tbufs_mfn);
> 
>      if ( tbufs.t_info == 0 )
>      {
> @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
>      for(i=0; i<num; i++)
>      {
> 
> -        uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) +
> tbufs.t_info->mfn_offset[i];
> +        const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> +                                   + tbufs.t_info->mfn_offset[i];
>          int j;
>          xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
> 
> --- 2010-06-15.orig/xen/common/trace.c  2010-06-28 09:43:58.000000000 +0200
> +++ 2010-06-15/xen/common/trace.c       2010-06-28 11:58:37.000000000 +0200
> @@ -51,15 +51,15 @@ static struct t_info *t_info;
>  #define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
>  #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
>  /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t
> */
> -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) +
> 1) / sizeof(uint32_t))
> -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) /
> sizeof(uint32_t))
> +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
>  static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
> -static int data_size;
> +static u32 data_size;
> 
>  /* High water mark for trace buffers; */
>  /* Send virtual interrupt when buffer level reaches this point */
> -static int t_buf_highwater;
> +static u32 t_buf_highwater;
> 
>  /* Number of records lost due to per-CPU trace buffer being full. */
>  static DEFINE_PER_CPU(unsigned long, lost_records);
> @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
> 
>  /**
>   * check_tbuf_size - check to make sure that the proposed size will fit
> - * in the currently sized struct t_info.
> + * in the currently sized struct t_info and allows prod and cons to
> + * reach double the value without overflow.
>   */
> -static inline int check_tbuf_size(int size)
> +static int check_tbuf_size(u32 pages)
>  {
> -    return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE /
> sizeof(uint32_t));
> +    u32 size = pages * PAGE_SIZE;
> +
> +    return (size / PAGE_SIZE != pages) || (size + size < size) ||
> +           (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
> +            T_INFO_SIZE / sizeof(uint32_t));
>  }
> 
>  /**
> @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
>      }
> 
>      t_info->tbuf_size = opt_tbuf_size;
> -    printk("tbuf_size %d\n", t_info->tbuf_size);
> +    printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
> 
>      nr_pages = opt_tbuf_size;
>      order = get_order_from_pages(nr_pages);
> @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
> 
>          spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
> 
> -        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
> +        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
>          buf->cons = buf->prod = 0;
>          per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
> 
> @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
>              /* Write list first, then write per-cpu offset. */
>              wmb();
>              t_info->mfn_offset[cpu]=offset;
> -            printk("p%d mfn %"PRIx32" offset %d\n",
> +            printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
>                     cpu, mfn, offset);
>              offset+=i;
>          }
> @@ -190,6 +195,7 @@ out_dealloc:
>          spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
>          if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
>          {
> +            per_cpu(t_bufs, cpu) = NULL;
>              ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
>              free_xenheap_pages(rawbuf, order);
>          }
> @@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
> 
>      for(i=0; i<T_INFO_PAGES; i++)
>          share_xen_page_with_privileged_guests(
> -            virt_to_page(t_info) + i, XENSHARE_writable);
> +            virt_to_page(t_info) + i, XENSHARE_readonly);
> 
>      if ( opt_tbuf_size == 0 )
>      {
> @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
>      return rc;
>  }
> 
> -static inline int calc_rec_size(int cycles, int extra)
> +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
>  {
> -    int rec_size;
> -    rec_size = 4;
> +    unsigned int rec_size = 4;
> +
>      if ( cycles )
>          rec_size += 8;
>      rec_size += extra;
>      return rec_size;
>  }
> 
> -static inline int calc_unconsumed_bytes(struct t_buf *buf)
> +static inline bool_t bogus(u32 prod, u32 cons)
> +{
> +    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
> +         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
> +    {
> +        tb_init_done = 0;
> +        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons
> (%08x)\n",
> +               smp_processor_id(), prod, cons);
> +        return 1;
> +    }
> +    return 0;
> +}
> +
> +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
>  {
> -    int x = buf->prod - buf->cons;
> +    u32 prod = buf->prod, cons = buf->cons;
> +    s32 x = prod - cons;
> +
> +    if ( bogus(prod, cons) )
> +        return data_size;
> +
>      if ( x < 0 )
>          x += 2*data_size;
> 
> @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
>      return x;
>  }
> 
> -static inline int calc_bytes_to_wrap(struct t_buf *buf)
> +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
>  {
> -    int x = data_size - buf->prod;
> +    u32 prod = buf->prod;
> +    s32 x = data_size - prod;
> +
> +    if ( bogus(prod, buf->cons) )
> +        return 0;
> +
>      if ( x <= 0 )
>          x += data_size;
> 
> @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
>      return x;
>  }
> 
> -static inline int calc_bytes_avail(struct t_buf *buf)
> +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
>  {
>      return data_size - calc_unconsumed_bytes(buf);
>  }
> 
> -static inline struct t_rec *
> -next_record(struct t_buf *buf)
> +static inline struct t_rec *next_record(const volatile struct t_buf *buf)
>  {
> -    int x = buf->prod;
> +    u32 x = buf->prod;
> +
> +    if ( !tb_init_done || bogus(x, buf->cons) )
> +        return NULL;
> +
>      if ( x >= data_size )
>          x -= data_size;
> 
> -    ASSERT(x >= 0);
>      ASSERT(x < data_size);
> 
>      return (struct t_rec *)&this_cpu(t_data)[x];
>  }
> 
> -static inline int __insert_record(struct t_buf *buf,
> -                                  unsigned long event,
> -                                  int extra,
> -                                  int cycles,
> -                                  int rec_size,
> -                                  unsigned char *extra_data)
> +static inline void __insert_record(volatile struct t_buf *buf,
> +                                   unsigned long event,
> +                                   unsigned int extra,
> +                                   bool_t cycles,
> +                                   unsigned int rec_size,
> +                                   const void *extra_data)
>  {
>      struct t_rec *rec;
>      unsigned char *dst;
> -    unsigned long extra_word = extra/sizeof(u32);
> -    int local_rec_size = calc_rec_size(cycles, extra);
> +    unsigned int extra_word = extra / sizeof(u32);
> +    unsigned int local_rec_size = calc_rec_size(cycles, extra);
>      uint32_t next;
> 
>      BUG_ON(local_rec_size != rec_size);
> @@ -475,17 +506,20 @@ static inline int __insert_record(struct
>      /* Double-check once more that we have enough space.
>       * Don't bugcheck here, in case the userland tool is doing
>       * something stupid. */
> -    if ( calc_bytes_avail(buf) < rec_size )
> +    next = calc_bytes_avail(buf);
> +    if ( next < rec_size )
>      {
> -        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
> -               __func__,
> -               calc_bytes_avail(buf),
> -               data_size, buf->prod, buf->cons, data_size, rec_size);
> -        return 0;
> +        if ( printk_ratelimit() )
> +            printk(XENLOG_WARNING
> +                   "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
> +                   __func__, next, data_size, buf->prod, buf->cons,
> rec_size);
> +        return;
>      }
>      rmb();
> 
>      rec = next_record(buf);
> +    if ( !rec )
> +        return;
>      rec->event = event;
>      rec->extra_u32 = extra_word;
>      dst = (unsigned char *)rec->u.nocycles.extra_u32;
> @@ -502,21 +536,22 @@ static inline int __insert_record(struct
> 
>      wmb();
> 
> -    next = buf->prod + rec_size;
> +    next = buf->prod;
> +    if ( bogus(next, buf->cons) )
> +        return;
> +    next += rec_size;
>      if ( next >= 2*data_size )
>          next -= 2*data_size;
> -    ASSERT(next >= 0);
>      ASSERT(next < 2*data_size);
>      buf->prod = next;
> -
> -    return rec_size;
>  }
> 
> -static inline int insert_wrap_record(struct t_buf *buf, int size)
> +static inline void insert_wrap_record(volatile struct t_buf *buf,
> +                                      unsigned int size)
>  {
> -    int space_left = calc_bytes_to_wrap(buf);
> -    unsigned long extra_space = space_left - sizeof(u32);
> -    int cycles = 0;
> +    u32 space_left = calc_bytes_to_wrap(buf);
> +    unsigned int extra_space = space_left - sizeof(u32);
> +    bool_t cycles = 0;
> 
>      BUG_ON(space_left > size);
> 
> @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
>          ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
>      }
> 
> -    return __insert_record(buf,
> -                    TRC_TRACE_WRAP_BUFFER,
> -                    extra_space,
> -                    cycles,
> -                    space_left,
> -                    NULL);
> +    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
> +                    space_left, NULL);
>  }
> 
>  #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
> 
> -static inline int insert_lost_records(struct t_buf *buf)
> +static inline void insert_lost_records(volatile struct t_buf *buf)
>  {
>      struct {
>          u32 lost_records;
> @@ -553,12 +584,8 @@ static inline int insert_lost_records(st
> 
>      this_cpu(lost_records) = 0;
> 
> -    return __insert_record(buf,
> -                           TRC_LOST_RECORDS,
> -                           sizeof(ed),
> -                           1 /* cycles */,
> -                           LOST_REC_SIZE,
> -                           (unsigned char *)&ed);
> +    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
> +                    LOST_REC_SIZE, &ed);
>  }
> 
>  /*
> @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
>   * failure, otherwise 0.  Failure occurs only if the trace buffers are not
> yet
>   * initialised.
>   */
> -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra,
> +                 const void *extra_data)
>  {
> -    struct t_buf *buf;
> -    unsigned long flags, bytes_to_tail, bytes_to_wrap;
> -    int rec_size, total_size;
> -    int extra_word;
> -    int started_below_highwater = 0;
> +    volatile struct t_buf *buf;
> +    unsigned long flags;
> +    u32 bytes_to_tail, bytes_to_wrap;
> +    unsigned int rec_size, total_size;
> +    unsigned int extra_word;
> +    bool_t started_below_highwater;
> 
>      if( !tb_init_done )
>          return;
> @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
> 
>      /* Read tb_init_done /before/ t_bufs. */
>      rmb();
> -
> -    spin_lock_irqsave(&this_cpu(t_lock), flags);
> -
>      buf = this_cpu(t_bufs);
> -
>      if ( unlikely(!buf) )
> -        goto unlock;
> +        return;
> +
> +    spin_lock_irqsave(&this_cpu(t_lock), flags);
> 
>      started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
> 
> --- 2010-06-15.orig/xen/include/xen/trace.h     2010-06-28 09:43:58.000000000
> +0200
> +++ 2010-06-15/xen/include/xen/trace.h  2010-06-25 12:08:36.000000000 +0200
> @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
> 
>  int trace_will_trace_event(u32 event);
> 
> -void __trace_var(u32 event, int cycles, int extra, unsigned char
> *extra_data);
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
> 
>  static inline void trace_var(u32 event, int cycles, int extra,
>                                 unsigned char *extra_data)
> @@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
>          {                                                       \
>              u32 _d[1];                                          \
>              _d[0] = d1;                                         \
> -            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> @@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
>              u32 _d[2];                                          \
>              _d[0] = d1;                                         \
>              _d[1] = d2;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> @@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
>              _d[0] = d1;                                         \
>              _d[1] = d2;                                         \
>              _d[2] = d3;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> @@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
>              _d[1] = d2;                                         \
>              _d[2] = d3;                                         \
>              _d[3] = d4;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> @@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
>              _d[2] = d3;                                         \
>              _d[3] = d4;                                         \
>              _d[4] = d5;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> @@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
>              _d[3] = d4;                                         \
>              _d[4] = d5;                                         \
>              _d[5] = d6;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>          }                                                       \
>      } while ( 0 )
> 
> 
> 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel