Attached is a cleaned-up version of your patch. One change is that
getdomaininfo does not return 'dying' until domain_kill() is fully
completed. This will prevent tools from not retrying domain_kill() because
it looks like it's already been executed!
One thing is still missing though -- where do you intend to put the retry
loop in the toolstack?
-- Keir
On 9/8/07 09:18, "Isaku Yamahata" <yamahata@xxxxxxxxxxxxx> wrote:
> Hi.
>
> Soft lockup warning pops up when destroying a domain with large memory
> on IA64. It is because the domain destroy hypercall which frees all
> pages of the domain takes too long.
> This patch is the common part of the patch set which eliminates it.
> The remaining arch (IA64) specific part is thet make the hypercall
> return -EAGAIN if necessary and make kernel repeat issuing it until
> success. I will send the remaining patches to xen-ia64-devel if this
> patch is acceptable.
>
> Another approach would be that the hypercall returns immidiately
> sucessfully and the actual work is done backgroundly in xen.
> Since the thread in xen isn't supported currently, it would be
> somewhat complicated one using timer or soft interrupt handler.
> So I chose repeating the hypercall.
>
>
> # HG changeset patch
> # User yamahata@xxxxxxxxxxxxx
> # Date 1186646122 -32400
> # Node ID 68e56e0339b6bf42a694c2c435106b2a7bf286c3
> # Parent 484848f240e80cd403b0425fa35d7257bbc18cc2
> Make XEN_DOMCTL_destroydomain hypercall return error.
> This is preparetion to prevent soft lockup warning when domain destroy and
> doesn't change existing behaviour.
>
> When domain memory size is very large, the domain destroy hypercall takes
> too long to trigger soft lockup warning because the hypercall frees all
> pages of the domain.
> To prevent soft lockup message, the hypercall returns -EAGAIN if necessary
> and kernel repeats issuing it until success.
> The remaining part is arch specific.
> PATCHNAME: preparetion_destroy_domain_soft_lockup
>
> Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
>
> diff -r 484848f240e8 -r 68e56e0339b6 xen/arch/ia64/xen/domain.c
> --- a/xen/arch/ia64/xen/domain.c Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/arch/ia64/xen/domain.c Thu Aug 09 16:55:22 2007 +0900
> @@ -932,7 +932,7 @@ static void relinquish_memory(struct dom
> spin_unlock_recursive(&d->page_alloc_lock);
> }
>
> -void domain_relinquish_resources(struct domain *d)
> +int domain_relinquish_resources(struct domain *d)
> {
> /* Relinquish guest resources for VT-i domain. */
> if (d->vcpu[0] && VMX_DOMAIN(d->vcpu[0]))
> @@ -950,6 +950,8 @@ void domain_relinquish_resources(struct
>
> /* Free page used by xen oprofile buffer */
> free_xenoprof_pages(d);
> +
> + return 0;
> }
>
> unsigned long
> diff -r 484848f240e8 -r 68e56e0339b6 xen/arch/powerpc/domain.c
> --- a/xen/arch/powerpc/domain.c Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/arch/powerpc/domain.c Thu Aug 09 16:55:22 2007 +0900
> @@ -313,13 +313,13 @@ static void relinquish_memory(struct dom
> spin_unlock_recursive(&d->page_alloc_lock);
> }
>
> -void domain_relinquish_resources(struct domain *d)
> +int domain_relinquish_resources(struct domain *d)
> {
> relinquish_memory(d, &d->xenpage_list);
> relinquish_memory(d, &d->page_list);
> xfree(d->arch.foreign_mfns);
> xfree(d->arch.p2m);
> - return;
> + return 0;
> }
>
> void arch_dump_domain_info(struct domain *d)
> diff -r 484848f240e8 -r 68e56e0339b6 xen/arch/x86/domain.c
> --- a/xen/arch/x86/domain.c Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/arch/x86/domain.c Thu Aug 09 16:55:22 2007 +0900
> @@ -1717,7 +1717,7 @@ static void vcpu_destroy_pagetables(stru
> v->arch.cr3 = 0;
> }
>
> -void domain_relinquish_resources(struct domain *d)
> +int domain_relinquish_resources(struct domain *d)
> {
> struct vcpu *v;
>
> @@ -1754,6 +1754,8 @@ void domain_relinquish_resources(struct
>
> if ( is_hvm_domain(d) )
> hvm_domain_relinquish_resources(d);
> +
> + return 0;
> }
>
> void arch_dump_domain_info(struct domain *d)
> diff -r 484848f240e8 -r 68e56e0339b6 xen/common/domain.c
> --- a/xen/common/domain.c Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/common/domain.c Thu Aug 09 16:55:22 2007 +0900
> @@ -298,26 +298,35 @@ struct domain *rcu_lock_domain_by_id(dom
> }
>
>
> -void domain_kill(struct domain *d)
> -{
> +int domain_kill(struct domain *d)
> +{
> + int rc = 0;
> domain_pause(d);
>
> /* Already dying? Then bail. */
> - if ( test_and_set_bool(d->is_dying) )
> + if ( test_and_set_bool(d->is_dying) &&
> + test_and_set_bool(d->is_relinquishing_resources))
> {
> domain_unpause(d);
> - return;
> - }
> -
> + return 0;
> + }
> +
> + gnttab_release_mappings(d);
> + rc = domain_relinquish_resources(d);
> + if (rc != 0) {
> + BUG_ON(rc != -EAGAIN);
> + /* clear_bool() isn't defined */
> + (void)test_and_clear_bool(d->is_relinquishing_resources);
> + return rc;
> + }
> evtchn_destroy(d);
> - gnttab_release_mappings(d);
> - domain_relinquish_resources(d);
> put_domain(d);
>
> /* Kick page scrubbing after domain_relinquish_resources(). */
> page_scrub_kick();
>
> send_guest_global_virq(dom0, VIRQ_DOM_EXC);
> + return 0;
> }
>
>
> diff -r 484848f240e8 -r 68e56e0339b6 xen/common/domctl.c
> --- a/xen/common/domctl.c Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/common/domctl.c Thu Aug 09 16:55:22 2007 +0900
> @@ -397,10 +397,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
> {
> ret = -EINVAL;
> if ( d != current->domain )
> - {
> - domain_kill(d);
> - ret = 0;
> - }
> + ret = domain_kill(d);
> rcu_unlock_domain(d);
> }
> }
> diff -r 484848f240e8 -r 68e56e0339b6 xen/include/asm-ia64/domain.h
> --- a/xen/include/asm-ia64/domain.h Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/include/asm-ia64/domain.h Thu Aug 09 16:55:22 2007 +0900
> @@ -18,7 +18,6 @@ struct tlb_track;
> struct tlb_track;
> #endif
>
> -extern void domain_relinquish_resources(struct domain *);
> struct vcpu;
> extern void relinquish_vcpu_resources(struct vcpu *v);
> extern void vcpu_share_privregs_with_guest(struct vcpu *v);
> diff -r 484848f240e8 -r 68e56e0339b6 xen/include/xen/domain.h
> --- a/xen/include/xen/domain.h Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/include/xen/domain.h Thu Aug 09 16:55:22 2007 +0900
> @@ -45,7 +45,7 @@ int arch_set_info_guest(struct vcpu *, v
> int arch_set_info_guest(struct vcpu *, vcpu_guest_context_u);
> void arch_get_info_guest(struct vcpu *, vcpu_guest_context_u);
>
> -void domain_relinquish_resources(struct domain *d);
> +int domain_relinquish_resources(struct domain *d);
>
> void dump_pageframe_info(struct domain *d);
>
> diff -r 484848f240e8 -r 68e56e0339b6 xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h Wed Aug 08 17:50:24 2007 +0100
> +++ b/xen/include/xen/sched.h Thu Aug 09 16:55:22 2007 +0900
> @@ -192,7 +192,9 @@ struct domain
> bool_t is_polling;
> /* Is this guest dying (i.e., a zombie)? */
> bool_t is_dying;
> - /* Domain is paused by controller software? */
> + /* Is thin guest relinquishing resources? */
> + bool_t is_relinquishing_resources;
> + /* Domain is paused by controller softwares? */
> bool_t is_paused_by_controller;
>
> /* Guest has shut down (inc. reason code)? */
> @@ -335,7 +337,7 @@ static inline struct domain *rcu_lock_cu
>
> struct domain *get_domain_by_id(domid_t dom);
> void domain_destroy(struct domain *d);
> -void domain_kill(struct domain *d);
> +int domain_kill(struct domain *d);
> void domain_shutdown(struct domain *d, u8 reason);
> void domain_resume(struct domain *d);
> void domain_pause_for_debugger(void);
>
domkill.patch
Description: Binary data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|