On Thu, 2006-06-15 at 11:58 +0900, Isaku Yamahata wrote:
> 2 / 7
>
> # HG changeset patch
> # User yamahata@xxxxxxxxxxxxx
> # Node ID 03b29286001dea87284cc401492a799e7e654a0f
> # Parent ad418fdb1981be2108d84bafbd294a9db9899396
> add volatile pte entry of the p2m table.
> Since p2m table are shared by cpu. volatile is needed.
> fix the races in pte access of __assign_domain_page() and
> destroy_grant_host_mapping()
> PATCHNAME: volatile_pte_t
>
> Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
>
> diff -r ad418fdb1981 -r 03b29286001d xen/arch/ia64/xen/mm.c
> --- a/xen/arch/ia64/xen/mm.c Thu Jun 15 11:33:14 2006 +0900
> +++ b/xen/arch/ia64/xen/mm.c Thu Jun 15 11:33:20 2006 +0900
> @@ -338,10 +338,42 @@ unsigned long translate_domain_mpaddr(un
> }
>
> //XXX !xxx_present() should be used instread of !xxx_none()?
> +// __assign_new_domain_page(), assign_new_domain_page() and
> +// assign_new_domain0_page() are used only when domain creation.
> +// their accesses aren't racy so that returned pte_t doesn't need
> +// volatile qualifier
> +static pte_t*
> +__lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
> +{
> + struct mm_struct *mm = &d->arch.mm;
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> +
> + BUG_ON(mm->pgd == NULL);
> + pgd = pgd_offset(mm, mpaddr);
> + if (pgd_none(*pgd)) {
> + pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
> + }
> +
> + pud = pud_offset(pgd, mpaddr);
> + if (pud_none(*pud)) {
> + pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
> + }
> +
> + pmd = pmd_offset(pud, mpaddr);
> + if (pmd_none(*pmd)) {
> + pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
> + }
> +
> + return pte_offset_map(pmd, mpaddr);
> +}
> +
Very nice rewrite.
> +//XXX !xxx_present() should be used instread of !xxx_none()?
> // pud, pmd, pte page is zero cleared when they are allocated.
> // Their area must be visible before population so that
> // cmpxchg must have release semantics.
> -static pte_t*
> +static volatile pte_t*
> lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
> {
> struct mm_struct *mm = &d->arch.mm;
> @@ -384,11 +416,11 @@ lookup_alloc_domain_pte(struct domain* d
> }
> }
>
> - return pte_offset_map(pmd, mpaddr);
> + return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
> }
>
> //XXX xxx_none() should be used instread of !xxx_present()?
> -static pte_t*
> +static volatile pte_t*
> lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
> {
> struct mm_struct *mm = &d->arch.mm;
> @@ -409,11 +441,11 @@ lookup_noalloc_domain_pte(struct domain*
> if (unlikely(!pmd_present(*pmd)))
> return NULL;
>
> - return pte_offset_map(pmd, mpaddr);
> + return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
> }
>
> #ifdef CONFIG_XEN_IA64_DOM0_VP
> -static pte_t*
> +static volatile pte_t*
> lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
> {
> struct mm_struct *mm = &d->arch.mm;
> @@ -434,13 +466,13 @@ lookup_noalloc_domain_pte_none(struct do
> if (unlikely(pmd_none(*pmd)))
> return NULL;
>
> - return pte_offset_map(pmd, mpaddr);
> + return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
> }
In all of the functions above, it appears that the return value of
a function (pte_offset_map()) is being returned as a volatile result
from each of the functions. Is that really needed? I'm not sure
it helps in this case, but I could be wrong.
> unsigned long
> ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
> {
> - pte_t *pte;
> + volatile pte_t *pte;
>
> pte = lookup_noalloc_domain_pte(d, mpaddr);
> if (pte == NULL)
pte seems to be used only locally, so I think the volatile
makes no difference here.
> @@ -470,7 +502,7 @@ __lookup_domain_mpa(struct domain *d, un
>
> unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
> {
> - pte_t *pte;
> + volatile pte_t *pte;
>
> #ifdef CONFIG_DOMAIN0_CONTIGUOUS
> if (d == dom0) {
> @@ -486,9 +518,10 @@ unsigned long lookup_domain_mpa(struct d
> #endif
> pte = lookup_noalloc_domain_pte(d, mpaddr);
> if (pte != NULL) {
> - if (pte_present(*pte)) {
> + pte_t tmp_pte = *pte;// pte is volatile. copy the value.
> + if (pte_present(tmp_pte)) {
> //printk("lookup_domain_page: found mapping for %lx,
> pte=%lx\n",mpaddr,pte_val(*pte));
> - return pte_val(*pte);
> + return pte_val(tmp_pte);
> } else if (VMX_DOMAIN(d->vcpu[0]))
> return GPFN_INV_MASK;
> }
Simililarly here: pte seems to be used only locally, so I think the
volatile makes no difference here. There are several other functions
where 'pte_t *pte;' becomes 'volatile pte_t *pte;' that may not be
needed at all.
[snip...]
> @@ -975,10 +1021,12 @@ destroy_grant_host_mapping(unsigned long
> unsigned long mfn, unsigned int flags)
> {
> struct domain* d = current->domain;
> - pte_t* pte;
> + volatile pte_t* pte;
> + unsigned long cur_arflags;
> + pte_t cur_pte;
> + pte_t new_pte;
> pte_t old_pte;
> - unsigned long old_mfn = INVALID_MFN;
> - struct page_info* old_page;
> + struct page_info* page;
>
> if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
> DPRINTK("%s: flags 0x%x\n", __func__, flags);
> @@ -986,21 +1034,42 @@ destroy_grant_host_mapping(unsigned long
> }
>
> pte = lookup_noalloc_domain_pte(d, gpaddr);
> - if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
> + if (pte == NULL) {
> + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx\n", __func__, gpaddr, mfn);
> return GNTST_general_error;
> -
> - // update pte
> - old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
> - if (pte_present(old_pte)) {
> - old_mfn = pte_pfn(old_pte);
> - } else {
> + }
> +
> + again:
> + cur_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
> + cur_pte = pfn_pte(mfn, __pgprot(cur_arflags));
> + if (!pte_present(cur_pte)) {
> + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx\n",
> + __func__, gpaddr, mfn, pte_val(cur_pte));
> return GNTST_general_error;
> }
> - domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
> -
> - old_page = mfn_to_page(old_mfn);
> - BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d,
> page) is not needed.
> - put_page(old_page);
> + new_pte = __pte(0);
> +
> + old_pte = ptep_cmpxchg_rel(&d->arch.mm, gpaddr, pte, cur_pte, new_pte);
> + if (unlikely(!pte_present(old_pte))) {
> + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
> + __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
> + return GNTST_general_error;
> + }
> + if (unlikely(pte_val(cur_pte) != pte_val(old_pte))) {
> + if (pte_pfn(old_pte) == mfn) {
> + goto again;
Maybe I'm just being paranoid, but is there *any* chance this goto loop
will not terminate?
> + }
> + DPRINTK("%s gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
> + __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
> + return GNTST_general_error;
> + }
> + BUG_ON(pte_pfn(old_pte) != mfn);
> +
> + domain_page_flush(d, gpaddr, mfn, INVALID_MFN);
> +
> + page = mfn_to_page(mfn);
> + BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page)
> is not needed.
> + put_page(page);
>
> return GNTST_okay;
> }
[snip...]
> diff -r ad418fdb1981 -r 03b29286001d
> xen/include/asm-ia64/linux-xen/asm/pgtable.h
> --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Jun 15 11:33:14
> 2006 +0900
> +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Jun 15 11:33:20
> 2006 +0900
> @@ -210,7 +210,7 @@ ia64_phys_addr_valid (unsigned long addr
> #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
> #ifdef XEN
> static inline void
> -set_pte_rel(pte_t* ptep, pte_t pteval)
> +set_pte_rel(volatile pte_t* ptep, pte_t pteval)
> {
> #if CONFIG_SMP
> asm volatile ("st8.rel [%0]=%1" ::
Doesn't pass-by-value defeat the intention of marking the ptep argument
as volatile?
> @@ -402,8 +402,14 @@ ptep_test_and_clear_dirty (struct vm_are
> }
> #endif
> +#ifdef XEN
> +static inline pte_t
> +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
> + volatile pte_t *ptep)
> +#else
> static inline pte_t
> ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
> +#endif
> {
> #ifdef CONFIG_SMP
> return __pte(xchg((long *) ptep, 0));
Similarly here in passing ptep....
> @@ -416,7 +422,8 @@ ptep_get_and_clear(struct mm_struct *mm,
>
> #ifdef XEN
> static inline pte_t
> -ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
> +ptep_xchg(struct mm_struct *mm, unsigned long addr,
> + volatile pte_t *ptep, pte_t npte)
> {
> #ifdef CONFIG_SMP
> return __pte(xchg((long *) ptep, pte_val(npte)));
> @@ -428,8 +435,8 @@ ptep_xchg(struct mm_struct *mm, unsigned
> }
>
> static inline pte_t
> -ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> - pte_t old_pte, pte_t new_pte)
> +ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr,
> + volatile pte_t *ptep, pte_t old_pte, pte_t new_pte)
> {
> #ifdef CONFIG_SMP
> return __pte(cmpxchg_rel(&pte_val(*ptep),
>
And in the above two cases, also. Since we're passing an argument,
we're going to copy the value anyway, so volatile probably doesn't
matter -- unless I've missed something (which is always possible)...
--
Ciao,
al
----------------------------------------------------------------------
Al Stone Alter Ego:
Open Source and Linux R&D Debian Developer
Hewlett-Packard Company http://www.debian.org
E-mail: ahs3@xxxxxxxxx ahs3@xxxxxxxxxx
----------------------------------------------------------------------
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|