# HG changeset patch
# User konrad.wilk@xxxxxxxxxx
# Date 1258150318 18000
# Node ID 82762bc10aa5a193173d8a83a5dbada1003bdcd2
# Parent 88adf22e0fe3a77d0be95530b74c3781ffc918f1
Balloon down memory to achive enough DMA32 memory for PV guests with PCI
pass-through to succesfully launch.
If the user hasn't used dom0_mem= bootup parameter, the privileged domain
usurps all of the memory. During launch of PV guests with PCI pass-through
we ratchet down the memory for the privileged domain to the required memory
for the PV guest. However, for PV guests with PCI pass-through we do not
take into account that the PV guest is going to swap its SWIOTLB memory
for DMA32 memory - in fact, swap 64MB of it. This patch balloon's down
the privileged domain so that there are 64MB of DMA32 memory available.
Note: If 'dom0_mem' is used, the user will probably never encounter this
failure.
P.S.
If you see:
about to get started...
And nothing after that, and xenctx shows
Call Trace:
[<ffffffff8132cfe3>] __const_udelay+0x1e <--
[<ffffffff816b9043>] panic+0x1c0
[<ffffffff81013335>] xen_swiotlb_fixup+0x123
[<ffffffff81a05e17>] xen_swiotlb_init_with_default_size+0x9c
[<ffffffff81a05f91>] xen_swiotlb_init+0x4b
[<ffffffff81a0ab72>] pci_iommu_alloc+0x86
[<ffffffff81a22972>] mem_init+0x28
[<ffffffff813201a9>] sort_extable+0x39
[<ffffffff819feb90>] start_kernel+0x301
[<ffffffff819fdf76>] x86_64_start_reservations+0x101
[<ffffffff81a03cdf>] xen_start_kernel+0x715
Then this is the patch for this.
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 17:11:58 2009 -0500
@@ -1059,6 +1059,7 @@
int i, j, max_cpu_id;
uint64_t free_heap;
PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
+ PyObject *node_to_dma32_mem_obj;
xc_cpu_to_node_t map[MAX_CPU_ID + 1];
const char *virtcap_names[] = { "hvm", "hvm_directio" };
@@ -1128,10 +1129,27 @@
Py_DECREF(pyint);
}
+ xc_dom_loginit();
+ /* DMA memory. */
+ node_to_dma32_mem_obj = PyList_New(0);
+
+ for ( i = 0; i < info.nr_nodes; i++ )
+ {
+ PyObject *pyint;
+
+ xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
+ xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap);
+ pyint = PyInt_FromLong(free_heap / 1024);
+ PyList_Append(node_to_dma32_mem_obj, pyint);
+ Py_DECREF(pyint);
+ }
+
PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
Py_DECREF(node_to_cpu_obj);
PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
Py_DECREF(node_to_memory_obj);
+ PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
+ Py_DECREF(node_to_dma32_mem_obj);
return ret_obj;
#undef MAX_CPU_ID
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendConfig.py Fri Nov 13 17:11:58 2009 -0500
@@ -2111,6 +2111,13 @@
def is_hap(self):
return self['platform'].get('hap', 0)
+ def is_pv_and_has_pci(self):
+ for dev_type, dev_info in self.all_devices_sxpr():
+ if dev_type != 'pci':
+ continue
+ return not self.is_hvm()
+ return False
+
def update_platform_pci(self):
pci = []
for dev_type, dev_info in self.all_devices_sxpr():
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Nov 13 17:11:58 2009 -0500
@@ -2580,7 +2580,8 @@
def _setCPUAffinity(self):
- """ Repin domain vcpus if a restricted cpus list is provided
+ """ Repin domain vcpus if a restricted cpus list is provided.
+ Returns the choosen node number.
"""
def has_cpus():
@@ -2597,6 +2598,7 @@
return True
return False
+ index = 0
if has_cpumap():
for v in range(0, self.info['VCPUs_max']):
if self.info['vcpus_params'].has_key('cpumap%i' % v):
@@ -2647,6 +2649,54 @@
cpumask = info['node_to_cpu'][index]
for v in range(0, self.info['VCPUs_max']):
xc.vcpu_setaffinity(self.domid, v, cpumask)
+ return index
+
+ def _freeDMAmemory(self, node):
+
+ # If we are PV and have PCI devices the guest will
+ # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32
+ # zone (under 4GB). To do so, we need to balloon down Dom0 to where
+ # there is enough (64MB) memory under the 4GB mark. This balloon-ing
+ # might take more memory out than just 64MB thought :-(
+ if not self.info.is_pv_and_has_pci():
+ return
+
+ retries = 2000
+ ask_for_mem = 0;
+ need_mem = 0
+ try:
+ while (retries > 0):
+ physinfo = xc.physinfo()
+ free_mem = physinfo['free_memory']
+ nr_nodes = physinfo['nr_nodes']
+ node_to_dma32_mem = physinfo['node_to_dma32_mem']
+ if (node > nr_nodes):
+ return;
+ # Extra 2MB above 64GB seems to do the trick.
+ need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node]
+ # our starting point. We ask just for the difference to
+ # be have an extra 64MB under 4GB.
+ ask_for_mem = max(need_mem, ask_for_mem);
+ if (need_mem > 0):
+ log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. '
+ 'Asking for %dKiB', retries, need_mem,
+ ask_for_mem)
+
+ balloon.free(ask_for_mem, self)
+ ask_for_mem = ask_for_mem + 2048;
+ else:
+ # OK. We got enough DMA memory.
+ break
+ retries = retries - 1
+ except:
+ # This is best-try after all.
+ need_mem = max(1, need_mem);
+ pass
+
+ if (need_mem > 0):
+ log.warn('We tried our best to balloon down DMA memory to '
+ 'accomodate your PV guest. We need %dKiB extra memory.',
+ need_mem)
def _setSchedParams(self):
if XendNode.instance().xenschedinfo() == 'credit':
@@ -2668,7 +2718,7 @@
# repin domain vcpus if a restricted cpus list is provided
# this is done prior to memory allocation to aide in memory
# distribution for NUMA systems.
- self._setCPUAffinity()
+ node = self._setCPUAffinity()
# Set scheduling parameters.
self._setSchedParams()
@@ -2730,6 +2780,8 @@
if self.info.target():
self._setTarget(self.info.target())
+ self._freeDMAmemory(node)
+
self._createDevices()
self.image.cleanupTmpImages()
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendNode.py Fri Nov 13 17:11:58 2009 -0500
@@ -872,11 +872,11 @@
except:
str='none\n'
return str[:-1];
- def format_node_to_memory(self, pinfo):
+ def format_node_to_memory(self, pinfo, key):
str=''
whitespace=''
try:
- node_to_memory=pinfo['node_to_memory']
+ node_to_memory=pinfo[key]
for i in range(0, pinfo['nr_nodes']):
str+='%snode%d:%d\n' % (whitespace,
i,
@@ -896,7 +896,10 @@
info['total_memory'] = info['total_memory'] / 1024
info['free_memory'] = info['free_memory'] / 1024
info['node_to_cpu'] = self.format_node_to_cpu(info)
- info['node_to_memory'] = self.format_node_to_memory(info)
+ info['node_to_memory'] = self.format_node_to_memory(info,
+ 'node_to_memory')
+ info['node_to_dma32_mem'] = self.format_node_to_memory(info,
+ 'node_to_dma32_mem')
ITEM_ORDER = ['nr_cpus',
'nr_nodes',
@@ -908,7 +911,8 @@
'total_memory',
'free_memory',
'node_to_cpu',
- 'node_to_memory'
+ 'node_to_memory',
+ 'node_to_dma32_mem'
]
return [[k, info[k]] for k in ITEM_ORDER]
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|