WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] RE: VM hung after running sometime

Hi Keir:
 
       Regards to HVM hang , according to our recent test, it turns out this issue still exists.
       When I go through the code, I obseved something abnormal and need your help.
 
      We've noticed when VM hang, its VCPU flags is always 4, which indicates _VPF_blocked_in_xen,
      and it is invoked in prepare_wait_on_xen_event_channel. I've noticed that Domain U has setup
      a event channel  with domain 0 for each VCPU and qemu-dm select on the event fd.  
 
      notify_via_xen_event_channel is called when Domain U issue a request. And in qemu-dm it will 
      get the event,  and invoke cpu_handle_ioreq(/xen-4.0.0/tools/ioemu-qemu-xen/i386-dm/helper2.c)
     ->cpu_get_ioreq()->xc_evtchn_unmask(). In evtchn_unmask it will has operation on evtchn_pending, 
      evtchn_mask, or evtchn_pending_sel.
 
      My confusion is on notify_via_xen_event_channel()->evtchn_set_pending, the **evtchn_set_pending here
      in not locked**, while inside it also have operation on evtchn_pending, evtchn_mask, or evtchn_pending_sel.
 
      I'm afried this access competition might cause event undeliverd from dom U to qemu-dm, but I am not sure,
     since  I still not fully understand where event_mask and is set, and where event_pending is cleared.
 
-------------------------notify_via_xen_event_channel-------------------------------------
 989 void notify_via_xen_event_channel(int lport)
 990 {
 991     struct evtchn *lchn, *rchn;
 992     struct domain *ld = current->domain, *rd;
 993     int            rport;
 994
 995     spin_lock(&ld->event_lock);
 996
 997     ASSERT(port_is_valid(ld, lport));
 998     lchn = evtchn_from_port(ld, lport);
 999     ASSERT(lchn->consumer_is_xen);
1000
1001     if ( likely(lchn->state == ECS_INTERDOMAIN) )
1002     {
1003         rd    = lchn->u.interdomain.remote_dom;
1004         rport = lchn->u.interdomain.remote_port;
100 5         rchn  = evtchn_from_port(rd, rport);
1006         evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
1007     }
1008
1009     spin_unlock(&ld->event_lock);
1010 }
     
----------------------------evtchn_set_pending----------------------
535 static int evtchn_set_pending(struct vcpu *v, int port)
 536 {
 537     struct domain *d = v->domain;
 538     int vcpuid;
 539
 540     /*
 541      * The following bit operations must happen in strict order.
 542      * NB. On x86, the atomic bit operations also act as memory barriers.
 543      * There is therefore sufficiently strict ordering for this architecture --
 544      * others may require explicit memory barriers.
 545      */
 546
 547     if ( test_and_set_bit(port, &shared_info(d, evtchn_pending)) )
 548         return 1;
 549
 550     if ( !test_bit   & nbsp;    (port, &shared_info(d, evtchn_mask)) &&
 551          !test_and_set_bit(port / BITS_PER_EVTCHN_WORD(d),
 552                            &vcpu_info(v, evtchn_pending_sel)) )
 553     {
 554         vcpu_mark_events_pending(v);
 555     }
 556
 557     /* Check if some VCPU might be polling for this event. */
 558     if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) )
 559         return 0;
 560
 561     /* Wake any interested (or potentially interested) pollers. */
 562 &n bsp;   for ( vcpuid = find_first_bit(d->poll_mask, d->max_vcpus);
 563           vcpuid < d->max_vcpus;
 564           vcpuid = find_next_bit(d->poll_mask, d->max_vcpus, vcpuid+1) )
 565     {
 566         v = d->vcpu[vcpuid];
 567         if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
 568              test_and_clear_bit(vcpuid, d->poll_mask) )
 569         {
 570             v->poll_evtchn = 0;
 571             vcpu_unbl ock(v);
   
--------------------------------------evtchn_unmask------------------------------
 764
 765 int evtchn_unmask(unsigned int port)
 766 {
 767     struct domain *d = current->domain;
 768     struct vcpu   *v;
 769
 770     spin_lock(&d->event_lock);
 771
 772     if ( unlikely(!port_is_valid(d, port)) )
 773     {
 774         spin_unlock(&d->event_lock);
 775         return -EINVAL;
 776     }
 777
 778     v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
 779
 780     /*
 781      * These operations must happen in strict order. Based on
 782      * include/xen/event.h:evtchn_set_p ending().
 783      */
 784     if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) &&
 785          test_bit          (port, &shared_info(d, evtchn_pending)) &&
 786          !test_and_set_bit (port / BITS_PER_EVTCHN_WORD(d),
 787                             &vcpu_info(v, evtchn_pending_sel)) )
 788     {
 789         vcpu_mark_events_pending(v);
 790     }
 791
 792     spin_unlock(&d->event_lock);
 793
 794  &nb sp;  return 0;
 795 }                          
 ----------------------------cpu_get_ioreq-------------------------
260 static ioreq_t *cpu_get_ioreq(void)
261 {
262     int i;
263     evtchn_port_t port;
264
265     port = xc_evtchn_pending(xce_handle);
266     if (port != -1) {
267         for ( i = 0; i < vcpus; i++ )
268             if ( ioreq_local_port[i] == port )
269                 break;
270
271         if ( i == vcpus ) {
272             fprintf(logfile, "Fatal error while trying to get io event!\n");
273             exit(1);
274         }
275
276    &nbs p;    // unmask the wanted port again
277         xc_evtchn_unmask(xce_handle, port);
278
279         //get the io packet from shared memory
280         send_vcpu = i;
281         return __cpu_get_ioreq(i);
282     }
283
284     //read error or read nothing
285     return NULL;
286 }
287
      
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel