# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1258186190 0
# Node ID 01f4bb96bf8536a91043c8f3bb9e55705c5191fa
# Parent b6b2e97f8db91d66d60c0a389ee33bea9cfad9dd
xend: Balloon down memory to achive enough DMA32 memory for PV guests
with PCI pass-through to succesfully launch.
If the user hasn't used dom0_mem=3D bootup parameter, the privileged
domain usurps all of the memory. During launch of PV guests with PCI
pass-through we ratchet down the memory for the privileged domain to
the required memory for the PV guest. However, for PV guests with PCI
pass-through we do not take into account that the PV guest is going to
swap its SWIOTLB memory for DMA32 memory - in fact, swap 64MB of
it. This patch balloon's down the privileged domain so that there are
64MB of DMA32 memory available.
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
tools/python/xen/lowlevel/xc/xc.c | 18 ++++++++++
tools/python/xen/xend/XendConfig.py | 7 ++++
tools/python/xen/xend/XendDomainInfo.py | 56 ++++++++++++++++++++++++++++++--
tools/python/xen/xend/XendNode.py | 12 ++++--
4 files changed, 87 insertions(+), 6 deletions(-)
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/lowlevel/xc/xc.c Sat Nov 14 08:09:50 2009 +0000
@@ -1059,6 +1059,7 @@ static PyObject *pyxc_physinfo(XcObject
int i, j, max_cpu_id;
uint64_t free_heap;
PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
+ PyObject *node_to_dma32_mem_obj;
xc_cpu_to_node_t map[MAX_CPU_ID + 1];
const char *virtcap_names[] = { "hvm", "hvm_directio" };
@@ -1128,10 +1129,27 @@ static PyObject *pyxc_physinfo(XcObject
Py_DECREF(pyint);
}
+ xc_dom_loginit();
+ /* DMA memory. */
+ node_to_dma32_mem_obj = PyList_New(0);
+
+ for ( i = 0; i < info.nr_nodes; i++ )
+ {
+ PyObject *pyint;
+
+ xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
+ xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap);
+ pyint = PyInt_FromLong(free_heap / 1024);
+ PyList_Append(node_to_dma32_mem_obj, pyint);
+ Py_DECREF(pyint);
+ }
+
PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
Py_DECREF(node_to_cpu_obj);
PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
Py_DECREF(node_to_memory_obj);
+ PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
+ Py_DECREF(node_to_dma32_mem_obj);
return ret_obj;
#undef MAX_CPU_ID
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendConfig.py Sat Nov 14 08:09:50 2009 +0000
@@ -2111,6 +2111,13 @@ class XendConfig(dict):
def is_hap(self):
return self['platform'].get('hap', 0)
+ def is_pv_and_has_pci(self):
+ for dev_type, dev_info in self.all_devices_sxpr():
+ if dev_type != 'pci':
+ continue
+ return not self.is_hvm()
+ return False
+
def update_platform_pci(self):
pci = []
for dev_type, dev_info in self.all_devices_sxpr():
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py Sat Nov 14 08:09:50 2009 +0000
@@ -2580,7 +2580,8 @@ class XendDomainInfo:
def _setCPUAffinity(self):
- """ Repin domain vcpus if a restricted cpus list is provided
+ """ Repin domain vcpus if a restricted cpus list is provided.
+ Returns the choosen node number.
"""
def has_cpus():
@@ -2597,6 +2598,7 @@ class XendDomainInfo:
return True
return False
+ index = 0
if has_cpumap():
for v in range(0, self.info['VCPUs_max']):
if self.info['vcpus_params'].has_key('cpumap%i' % v):
@@ -2647,6 +2649,54 @@ class XendDomainInfo:
cpumask = info['node_to_cpu'][index]
for v in range(0, self.info['VCPUs_max']):
xc.vcpu_setaffinity(self.domid, v, cpumask)
+ return index
+
+ def _freeDMAmemory(self, node):
+
+ # If we are PV and have PCI devices the guest will
+ # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32
+ # zone (under 4GB). To do so, we need to balloon down Dom0 to where
+ # there is enough (64MB) memory under the 4GB mark. This balloon-ing
+ # might take more memory out than just 64MB thought :-(
+ if not self.info.is_pv_and_has_pci():
+ return
+
+ retries = 2000
+ ask_for_mem = 0;
+ need_mem = 0
+ try:
+ while (retries > 0):
+ physinfo = xc.physinfo()
+ free_mem = physinfo['free_memory']
+ nr_nodes = physinfo['nr_nodes']
+ node_to_dma32_mem = physinfo['node_to_dma32_mem']
+ if (node > nr_nodes):
+ return;
+ # Extra 2MB above 64GB seems to do the trick.
+ need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node]
+ # our starting point. We ask just for the difference to
+ # be have an extra 64MB under 4GB.
+ ask_for_mem = max(need_mem, ask_for_mem);
+ if (need_mem > 0):
+ log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. '
+ 'Asking for %dKiB', retries, need_mem,
+ ask_for_mem)
+
+ balloon.free(ask_for_mem, self)
+ ask_for_mem = ask_for_mem + 2048;
+ else:
+ # OK. We got enough DMA memory.
+ break
+ retries = retries - 1
+ except:
+ # This is best-try after all.
+ need_mem = max(1, need_mem);
+ pass
+
+ if (need_mem > 0):
+ log.warn('We tried our best to balloon down DMA memory to '
+ 'accomodate your PV guest. We need %dKiB extra memory.',
+ need_mem)
def _setSchedParams(self):
if XendNode.instance().xenschedinfo() == 'credit':
@@ -2668,7 +2718,7 @@ class XendDomainInfo:
# repin domain vcpus if a restricted cpus list is provided
# this is done prior to memory allocation to aide in memory
# distribution for NUMA systems.
- self._setCPUAffinity()
+ node = self._setCPUAffinity()
# Set scheduling parameters.
self._setSchedParams()
@@ -2729,6 +2779,8 @@ class XendDomainInfo:
self._introduceDomain()
if self.info.target():
self._setTarget(self.info.target())
+
+ self._freeDMAmemory(node)
self._createDevices()
diff -r b6b2e97f8db9 -r 01f4bb96bf85 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Fri Nov 13 22:13:59 2009 +0000
+++ b/tools/python/xen/xend/XendNode.py Sat Nov 14 08:09:50 2009 +0000
@@ -872,11 +872,11 @@ class XendNode:
except:
str='none\n'
return str[:-1];
- def format_node_to_memory(self, pinfo):
+ def format_node_to_memory(self, pinfo, key):
str=''
whitespace=''
try:
- node_to_memory=pinfo['node_to_memory']
+ node_to_memory=pinfo[key]
for i in range(0, pinfo['nr_nodes']):
str+='%snode%d:%d\n' % (whitespace,
i,
@@ -896,7 +896,10 @@ class XendNode:
info['total_memory'] = info['total_memory'] / 1024
info['free_memory'] = info['free_memory'] / 1024
info['node_to_cpu'] = self.format_node_to_cpu(info)
- info['node_to_memory'] = self.format_node_to_memory(info)
+ info['node_to_memory'] = self.format_node_to_memory(info,
+ 'node_to_memory')
+ info['node_to_dma32_mem'] = self.format_node_to_memory(info,
+ 'node_to_dma32_mem')
ITEM_ORDER = ['nr_cpus',
'nr_nodes',
@@ -908,7 +911,8 @@ class XendNode:
'total_memory',
'free_memory',
'node_to_cpu',
- 'node_to_memory'
+ 'node_to_memory',
+ 'node_to_dma32_mem'
]
return [[k, info[k]] for k in ITEM_ORDER]
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|