The memevent code currently has a mechanism for reserving space in the ring
before putting an event, but each caller must individually ensure that the
vCPUs are correctly paused if no space is available.

This fixes that issue by reversing the semantics, we ensure that ensure space
is always left in the ring for one event per vCPU per ring.  If this constraint
is above to violated by a vCPU putting a response, we pause the vCPU on the way
out.

Additionally, we ensure that no events on lost by Xen as a consumer if multiple
responses land on the ring for a single domctl. (This is currently impossible,
but safe anyways... this patch is required by the next in the series).

Signed-off-by: Adin Scannell <adin@scannell.ca>

diff -r a422e2a4451e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4017,7 +4017,6 @@ static int hvm_memory_event_traps(long p
     struct vcpu* v = current;
     struct domain *d = v->domain;
     mem_event_request_t req;
-    int rc;
 
     if ( !(p & HVMPME_MODE_MASK) ) 
         return 0;
@@ -4025,10 +4024,6 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_access);
-    if ( rc )
-        return rc;
-    
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
     req.reason = reason;
diff -r a422e2a4451e xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -78,6 +78,9 @@ static int mem_event_enable(struct domai
     med->ring_page = map_domain_page(mfn_x(ring_mfn));
     med->shared_page = map_domain_page(mfn_x(shared_mfn));
 
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
     /* Allocate event channel */
     rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                          current->domain->domain_id);
@@ -95,7 +98,7 @@ static int mem_event_enable(struct domai
     mem_event_ring_lock_init(med);
 
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, med);
 
     return 0;
 
@@ -120,13 +123,41 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static inline int mem_event_ring_free(struct domain *d, struct mem_event_domain *med)
+{
+    int free_requests;
+
+    free_requests = RING_FREE_REQUESTS(&med->front_ring);
+    if ( unlikely(free_requests < d->max_vcpus) )
+    {
+        /* This may happen. */
+        gdprintk(XENLOG_INFO, "mem_event request slots for domain %d: %d\n",
+                               d->domain_id, free_requests);
+        WARN_ON(1);
+    }
+
+    return free_requests;
+}
+
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX req_prod;
 
+    if( mem_event_check_ring(d, med) < 0 )
+        return 0;
+
     mem_event_ring_lock(med);
 
+    if( mem_event_ring_free(d, med) == 0 )
+    {
+        /* This shouldn't happen. */
+        gdprintk(XENLOG_WARNING, "possible lost mem_event for domain %d\n",
+                                 d->domain_id);
+        mem_event_mark_and_pause(current, med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -135,16 +166,28 @@ void mem_event_put_request(struct domain
     req_prod++;
 
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
+    /*
+     * We ensure that each vcpu can put at least *one* event -- because some
+     * events are not repeatable, such as dropping a page.  This will ensure no
+     * vCPU is left with an event that they must place on the ring, but cannot.
+     * They will be paused after the event is placed.
+     * See large comment below in mem_event_unpause_vcpus().
+     */
+    if( current->domain->domain_id == d->domain_id &&
+        mem_event_ring_free(d, med) < d->max_vcpus )
+        mem_event_mark_and_pause(current, med);
+
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
 }
 
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
@@ -154,6 +197,12 @@ void mem_event_get_response(struct mem_e
     front_ring = &med->front_ring;
     rsp_cons = front_ring->rsp_cons;
 
+    if( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
+    {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     /* Copy response */
     memcpy(rsp, RING_GET_RESPONSE(front_ring, rsp_cons), sizeof(*rsp));
     rsp_cons++;
@@ -163,54 +212,65 @@ void mem_event_get_response(struct mem_e
     front_ring->sring->rsp_event = rsp_cons + 1;
 
     mem_event_ring_unlock(med);
+
+    return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
+    int free;
+    int online = d->max_vcpus;
 
+    if( !med->blocked )
+        return;
+
+    mem_event_ring_lock(med);
+    free = mem_event_ring_free(d, med);
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit). 
+     * See large comment above in mem_event_put_request().
+     */
     for_each_vcpu ( d, v )
+        if ( test_bit(_VPF_mem_event, &v->pause_flags) )
+            online--;
+ 
+    for_each_vcpu ( d, v )
+    {
+        if ( !(med->blocked) || online >= mem_event_ring_free(d, med) )
+            break;
         if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        {
             vcpu_wake(v);
+            online++;
+            med->blocked--;
+        }
+    }
+
+    mem_event_ring_unlock(med);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
-{
-    mem_event_ring_lock(med);
-    med->req_producers--;
-    mem_event_ring_unlock(med);
+    if ( !test_bit(_VPF_mem_event, &v->pause_flags) )
+    {
+        set_bit(_VPF_mem_event, &v->pause_flags);
+        vcpu_sleep_nosync(v);
+        med->blocked++;
+    }
 }
 
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
-
     if ( !med->ring_page )
         return -1;
 
-    mem_event_ring_lock(med);
-
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
-    {
-        med->req_producers++;
-        ring_full = 0;
-    }
-
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
-    mem_event_ring_unlock(med);
-
-    return ring_full;
+    return 0;
 }
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
diff -r a422e2a4451e xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -300,12 +300,16 @@ int mem_sharing_sharing_resume(struct do
 {
     mem_event_response_t rsp;
 
-    /* Get request off the ring */
-    mem_event_get_response(&d->mem_share, &rsp);
+    /* Get all requests off the ring */
+    while( mem_event_get_response(d, &d->mem_share, &rsp) )
+    {
+        /* Unpause domain/vcpu */
+        if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
-    /* Unpause domain/vcpu */
-    if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Unpause any domains that were paused because the ring was full */
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 
     return 0;
 }
diff -r a422e2a4451e xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -803,7 +803,6 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_paging);
         return;
     }
 
@@ -841,26 +840,27 @@ void p2m_mem_paging_resume(struct domain
     p2m_type_t p2mt;
     mfn_t mfn;
 
-    /* Pull the response off the ring */
-    mem_event_get_response(&d->mem_paging, &rsp);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_paging, &rsp) )
+    {
+        /* Fix p2m entry if the page was not dropped */
+        if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
+        {
+            mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
+            p2m_lock(p2m);
+            set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
+            set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+            audit_p2m(p2m, 1);
+            p2m_unlock(p2m);
+        }
 
-    /* Fix p2m entry if the page was not dropped */
-    if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
-    {
-        mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
-        p2m_lock(p2m);
-        set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
-        set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
-        audit_p2m(p2m, 1);
-        p2m_unlock(p2m);
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
-
     /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -899,7 +899,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_pause(v, &d->mem_access);
         }
         else
         {
@@ -911,8 +911,6 @@ void p2m_mem_access_check(unsigned long 
 
         return;
     }
-    else if ( res > 0 )
-        return;  /* No space in buffer; VCPU paused */
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
@@ -943,15 +941,17 @@ void p2m_mem_access_resume(struct p2m_do
     struct domain *d = p2m->domain;
     mem_event_response_t rsp;
 
-    mem_event_get_response(&d->mem_access, &rsp);
-
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_access, &rsp) )
+    {
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
     /* Unpause any domains that were paused because the ring was full or no listener 
      * was available */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_access);
 }
 
 
diff -r a422e2a4451e xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -25,12 +25,18 @@
 #define __MEM_EVENT_H__
 
 /* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med);
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med);
+
+/* Returns 0 if ring is full/empty and 1 upon success.
+ * In either case, the vCPU may be blocked afterwards to ensure that the
+ * ring does not lose events.  In general, put_request should not fail, as
+ * it attempts to ensure that each vCPU can put at least one request. */
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                            mem_event_response_t *rsp);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r a422e2a4451e xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -183,13 +183,14 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
     void *ring_page;
     /* front-end ring */
     mem_event_front_ring_t front_ring;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
     /* event channel port (vcpu0 only) */
     int xen_port;
 };