diff -r cc82d54bedfd tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Dec 05 15:54:22 2008 +0000 +++ b/tools/python/xen/xend/XendCheckpoint.py Tue Dec 09 16:34:35 2008 +0800 @@ -253,7 +253,7 @@ def restore(xd, fd, dominfo = None, paus # set memory limit xc.domain_setmaxmem(dominfo.getDomid(), maxmem) - balloon.free(memory + shadow) + balloon.free(memory + shadow, dominfo) shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024) dominfo.info['shadow_memory'] = shadow_cur diff -r cc82d54bedfd tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Dec 05 15:54:22 2008 +0000 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Dec 09 16:34:35 2008 +0800 @@ -2105,7 +2105,7 @@ class XendDomainInfo: # overhead is greater for some types of domain than others. For # example, an x86 HVM domain will have a default shadow-pagetable # allocation of 1MB. We free up 2MB here to be on the safe side. - balloon.free(2*1024) # 2MB should be plenty + balloon.free(2*1024, self) # 2MB should be plenty ssidref = 0 if security.on() == xsconstants.XS_POLICY_USE: @@ -2299,7 +2299,7 @@ class XendDomainInfo: vtd_mem = ((vtd_mem + 1023) / 1024) * 1024 # Make sure there's enough RAM available for the domain - balloon.free(memory + shadow + vtd_mem) + balloon.free(memory + shadow + vtd_mem, self) # Set up the shadow memory shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024) @@ -2716,7 +2716,7 @@ class XendDomainInfo: # The domain might already have some shadow memory overhead_kb -= xc.shadow_mem_control(self.domid) * 1024 if overhead_kb > 0: - balloon.free(overhead_kb) + balloon.free(overhead_kb, self) def _unwatchVm(self): """Remove the watch on the VM path, if any. Idempotent. Nothrow diff -r cc82d54bedfd tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Fri Dec 05 15:54:22 2008 +0000 +++ b/tools/python/xen/xend/balloon.py Tue Dec 09 16:35:38 2008 +0800 @@ -67,7 +67,7 @@ def get_dom0_target_alloc(): raise VmError('Failed to query target memory allocation of dom0.') return kb -def free(need_mem): +def free(need_mem ,self): """Balloon out memory from the privileged domain so that there is the specified required amount (in KiB) free. """ @@ -121,6 +121,40 @@ def free(need_mem): max_free_mem = total_mem - dom0_alloc if need_mem >= max_free_mem: retries = rlimit + + # Check whethercurrent machine is a numa system and the new + # created hvm has all its vcpus in the same node, if all the + # conditions above are fit. We will wait until all the pages + # in scrub list are freed (if waiting time go beyond 20s, + # we will stop waiting it.) + if physinfo['nr_nodes'] > 1 and retries == 0: + oldnode = -1 + waitscrub = 1 + vcpus = self.info['cpus'][0] + for vcpu in vcpus: + nodenum = 0 + for node in physinfo['node_to_cpu']: + for cpu in node: + if vcpu == cpu: + if oldnode == -1: + oldnode = nodenum + elif oldnode != nodenum: + waitscrub = 0 + nodenum = nodenum + 1 + + if waitscrub == 1 and scrub_mem > 0: + log.debug("wait for scrub %s", scrub_mem) + while scrub_mem > 0 and retries < rlimit: + time.sleep(sleep_time) + physinfo = xc.physinfo() + free_mem = physinfo['free_memory'] + scrub_mem = physinfo['scrub_memory'] + retries += 1 + sleep_time += SLEEP_TIME_GROWTH + log.debug("scrub for %d times", retries) + + retries = 0 + sleep_time = SLEEP_TIME_GROWTH while retries < rlimit: physinfo = xc.physinfo()