Hi Tim and Hao:
The patch failed to fix the bug.
I applied patch, and add some more log info.
in mem_sharing_unshare_page
664 shr_lock();
665 mfn = gfn_to_mfn(d, gfn, &p2mt);
666 /* Has someone already unshared it? */
667 if (!p2m_is_shared(p2mt)) {
668 printk("===someone unshare mfn %lx\n", mfn);
669 shr_unlock();
670 return 0;
671 }
In mem_sharing_nominate_page
508 shr_lock();
509 mfn = gfn_to_mfn(d, gfn, &p2mt);
510
511 /* Check if mfn is valid */
512 ret = -EINVAL;
513 if (!mfn_valid(mfn))
514 goto out;
515
516 if (p2m_is_shared(p2mt)) {
517 page = mfn_to_page(mfn);
518 printk("===page h %lu, mfx %lx is already shared\n", page->shr_handle, mfn);
519 }
520 /* Check p2m type */
521 if (!p2m_is_sharable(p2mt))
522 goto out;
523
Also in mem_sharing_share_pages, I print some free info
584 shr_lock();
585
586 ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
587 se = mem_sharing_hash_lookup(sh);
588 if(se == NULL) goto err_out;
589 ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
590 ce = mem_sharing_hash_lookup(ch);
591 if(ce == NULL) goto err_out;
592 spage = mfn_to_page(se->mfn);
593 cpage = mfn_to_page(ce->mfn);
594 printk("===will free cpage_mfn %lx spage_mfn %lx \n", ce->mfn, se->mfn);
634 ASSERT(list_empty(&ce->gfns));
635 mem_sharing_hash_delete(ch);
636 atomic_inc(&nr_saved_mfns);
637 /* Free the client page */
638 if(test_and_clear_bit(_PGC_allocated, &cpage->count_info)){
639 put_page(cpage);
640 printk("===free cpage_mfn %lx spage_mfn %lx \n", ce->mfn, se->mfn);
641 }
642 ret = 0;
643
644 err_out:
645 shr_unlock();
646
647 return ret;
Below is the serial output.
We can see neither line 668 and nor line 518 is print out.
blktap_sysfs_create: adding attributes for dev ffff88012148ee00
(XEN) ===will free cpage_mfn 1406fd spage_mfn 2df6a6
(XEN) ===free cpage_mfn 0 spage_mfn 2df6a6
(XEN) ===will free cpage_mfn 14083c spage_mfn 2df6a8
(XEN) ===free cpage_mfn 0 spage_mfn 2df6a8
(XEN) ===will free cpage_mfn 1409e5 spage_mfn 2df6a9
(XEN) ===free cpage_mfn 0 spage_mfn 2df6a9
(XEN) ===will free cpage_mfn 142eb4 spage_mfn 141c4b
(XEN) ===free cpage_mfn 0 spage_mfn 141c4b
(XEN) printk: 32 messages suppressed.
(XEN) mm.c:859:d0 Error getting mfn 2df6a8 (pfn fffffffffffffffe) from L1 entry 80000002df6a8627 for l1e_owner=0, pg_owner=2
(XEN) mm.c:859:d0 Error getting mfn 2df6a9 (pfn fffffffffffffffe) from L1 entry 80000002df6a9627 for l1e_owner=0, pg_owner=2
(XEN) ===will free cpage_mfn 219a1c spage_mfn 1421b1
(XEN) ===free cpage_mfn 0 spage_mfn 1421b1
(XEN) ===will free cpage_mfn 2dea05 spage_mfn 142124
(XEN) ===free cpage_mfn 0 spage_mfn 142124
(XEN) ===will free cpage_mfn 147127 spage_mfn 146cbb
(XEN) ===free cpage_mfn 0 spage_mfn 146cbb
(XEN) ===will free cpage_mfn 146ecf spage_mfn 14127f
(XEN) ===free cpage_mfn 0 spage_mfn 14127f
From: Jui-Hao Chiang [mailto:juihaochiang@xxxxxxxxx]
Date: 2011.1.7. 0:12
TO: Tim Deegan
CC: tinnycloud; xen-devel@xxxxxxxxxxxxxxxxxxx
Sub: [PATCH] mem_sharing: fix race condition of nominate and unshare
Hi, this patch does the following
(1) When updating/checking p2m type for mem_sharing, we must hold shr_lock
(2) For nominate operation, if the page is already nominated, return the handle from page_info->shr_handle
(3) For unshare operation, it is possible that multiple users unshare a page via hvm_hap_nested_page_fault() at the same time. If the page is already un-shared by someone else, simply return success.
NOTE: we assume that nobody holds page_alloc_lock/p2m_lock before calling nominate/share/unshare.
Signed-off-by: Jui-Hao Chiang <juihaochiang@xxxxxxxxx>
Signed-off-by: Han-Lin Li <Han-Lin.Li@xxxxxxxxxxx>
Bests,
Jui-Hao