WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH Xen-unstable] Balloon down memory to achive enough DM

To: keir.fraser@xxxxxxxxxxxxx, xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH Xen-unstable] Balloon down memory to achive enough DMA32 memory for PV guests with PCI pass-through to succesfully launch.
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Fri, 13 Nov 2009 17:16:02 -0500
Cc:
Delivery-date: Fri, 13 Nov 2009 14:18:04 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.19 (2009-01-05)
# HG changeset patch
# User konrad.wilk@xxxxxxxxxx
# Date 1258150318 18000
# Node ID 82762bc10aa5a193173d8a83a5dbada1003bdcd2
# Parent  88adf22e0fe3a77d0be95530b74c3781ffc918f1
Balloon down memory to achive enough DMA32 memory for PV guests with PCI 
pass-through to succesfully launch.

If the user hasn't used dom0_mem= bootup parameter, the privileged domain
usurps all of the memory. During launch of PV guests with PCI pass-through
we ratchet down the memory for the privileged domain to the required memory
for the PV guest. However, for PV guests with PCI pass-through we do not
take into account that the PV guest is going to swap its SWIOTLB memory
for DMA32 memory - in fact, swap 64MB of it. This patch balloon's down
the privileged domain so that there are 64MB of DMA32 memory available.

Note: If 'dom0_mem' is used, the user will probably never encounter this
failure.

P.S.
If you see:
about to get started...

And nothing after that, and xenctx shows

Call Trace:
  [<ffffffff8132cfe3>] __const_udelay+0x1e  <--
  [<ffffffff816b9043>] panic+0x1c0
  [<ffffffff81013335>] xen_swiotlb_fixup+0x123
  [<ffffffff81a05e17>] xen_swiotlb_init_with_default_size+0x9c
  [<ffffffff81a05f91>] xen_swiotlb_init+0x4b
  [<ffffffff81a0ab72>] pci_iommu_alloc+0x86
  [<ffffffff81a22972>] mem_init+0x28
  [<ffffffff813201a9>] sort_extable+0x39
  [<ffffffff819feb90>] start_kernel+0x301
  [<ffffffff819fdf76>] x86_64_start_reservations+0x101
  [<ffffffff81a03cdf>] xen_start_kernel+0x715

Then this is the patch for this.

diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Nov 13 17:11:58 2009 -0500
@@ -1059,6 +1059,7 @@
     int i, j, max_cpu_id;
     uint64_t free_heap;
     PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
+    PyObject *node_to_dma32_mem_obj;
     xc_cpu_to_node_t map[MAX_CPU_ID + 1];
     const char *virtcap_names[] = { "hvm", "hvm_directio" };
 
@@ -1128,10 +1129,27 @@
         Py_DECREF(pyint);
     }
 
+    xc_dom_loginit();
+    /* DMA memory. */
+    node_to_dma32_mem_obj = PyList_New(0);
+
+    for ( i = 0; i < info.nr_nodes; i++ )
+    {
+        PyObject *pyint;
+
+        xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
+        xc_dom_printf("Node:%d: DMA32:%ld\n", i, free_heap);
+        pyint = PyInt_FromLong(free_heap / 1024);
+        PyList_Append(node_to_dma32_mem_obj, pyint);
+        Py_DECREF(pyint);
+    }
+
     PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
     Py_DECREF(node_to_cpu_obj);
     PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
     Py_DECREF(node_to_memory_obj);
+    PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
+    Py_DECREF(node_to_dma32_mem_obj);
  
     return ret_obj;
 #undef MAX_CPU_ID
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendConfig.py       Fri Nov 13 17:11:58 2009 -0500
@@ -2111,6 +2111,13 @@
     def is_hap(self):
         return self['platform'].get('hap', 0)
 
+    def is_pv_and_has_pci(self):
+        for dev_type, dev_info in self.all_devices_sxpr():
+            if dev_type != 'pci':
+                continue
+            return not self.is_hvm()
+        return False
+
     def update_platform_pci(self):
         pci = []
         for dev_type, dev_info in self.all_devices_sxpr():
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Nov 13 17:11:58 2009 -0500
@@ -2580,7 +2580,8 @@
 
 
     def _setCPUAffinity(self):
-        """ Repin domain vcpus if a restricted cpus list is provided
+        """ Repin domain vcpus if a restricted cpus list is provided.
+            Returns the choosen node number.
         """
 
         def has_cpus():
@@ -2597,6 +2598,7 @@
                         return True
             return False
 
+        index = 0
         if has_cpumap():
             for v in range(0, self.info['VCPUs_max']):
                 if self.info['vcpus_params'].has_key('cpumap%i' % v):
@@ -2647,6 +2649,54 @@
                 cpumask = info['node_to_cpu'][index]
                 for v in range(0, self.info['VCPUs_max']):
                     xc.vcpu_setaffinity(self.domid, v, cpumask)
+        return index
+
+    def _freeDMAmemory(self, node):
+
+       # If we are PV and have PCI devices the guest will
+       # turn on a SWIOTLB. The SWIOTLB _MUST_ be located in the DMA32
+       # zone (under 4GB). To do so, we need to balloon down Dom0 to where
+       # there is enough (64MB) memory under the 4GB mark. This balloon-ing
+       # might take more memory out than just 64MB thought :-(
+       if not self.info.is_pv_and_has_pci():
+               return
+
+       retries = 2000
+       ask_for_mem = 0;
+       need_mem = 0
+       try:            
+           while (retries > 0):
+               physinfo = xc.physinfo()
+               free_mem = physinfo['free_memory']
+               nr_nodes = physinfo['nr_nodes']
+               node_to_dma32_mem = physinfo['node_to_dma32_mem']
+               if (node > nr_nodes):
+                    return;
+               # Extra 2MB above 64GB seems to do the trick.
+               need_mem = 64 * 1024 + 2048 - node_to_dma32_mem[node]
+               # our starting point. We ask just for the difference to
+               # be have an extra 64MB under 4GB.
+               ask_for_mem = max(need_mem, ask_for_mem);
+               if (need_mem > 0):
+                    log.debug('_freeDMAmemory (%d) Need %dKiB DMA memory. '
+                              'Asking for %dKiB', retries, need_mem,
+                              ask_for_mem)
+
+                    balloon.free(ask_for_mem, self)
+                    ask_for_mem = ask_for_mem + 2048;
+               else:
+                    # OK. We got enough DMA memory.
+                    break
+               retries  = retries - 1
+       except:
+           # This is best-try after all.
+           need_mem = max(1, need_mem);
+           pass
+
+       if (need_mem > 0):
+           log.warn('We tried our best to balloon down DMA memory to '
+                    'accomodate your PV guest. We need %dKiB extra memory.',
+                    need_mem)
 
     def _setSchedParams(self):
         if XendNode.instance().xenschedinfo() == 'credit':
@@ -2668,7 +2718,7 @@
             # repin domain vcpus if a restricted cpus list is provided
             # this is done prior to memory allocation to aide in memory
             # distribution for NUMA systems.
-            self._setCPUAffinity()
+            node = self._setCPUAffinity()
 
             # Set scheduling parameters.
             self._setSchedParams()
@@ -2730,6 +2780,8 @@
             if self.info.target():
                 self._setTarget(self.info.target())
 
+            self._freeDMAmemory(node)
+
             self._createDevices()
 
             self.image.cleanupTmpImages()
diff -r 88adf22e0fe3 -r 82762bc10aa5 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Fri Nov 13 17:10:09 2009 -0500
+++ b/tools/python/xen/xend/XendNode.py Fri Nov 13 17:11:58 2009 -0500
@@ -872,11 +872,11 @@
         except:
             str='none\n'
         return str[:-1];
-    def format_node_to_memory(self, pinfo):
+    def format_node_to_memory(self, pinfo, key):
         str=''
         whitespace=''
         try:
-            node_to_memory=pinfo['node_to_memory']
+            node_to_memory=pinfo[key]
             for i in range(0, pinfo['nr_nodes']):
                 str+='%snode%d:%d\n' % (whitespace,
                                         i,
@@ -896,7 +896,10 @@
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
         info['node_to_cpu']  = self.format_node_to_cpu(info)
-        info['node_to_memory'] = self.format_node_to_memory(info)
+        info['node_to_memory'] = self.format_node_to_memory(info,
+                                       'node_to_memory')
+        info['node_to_dma32_mem'] = self.format_node_to_memory(info,
+                                       'node_to_dma32_mem')
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -908,7 +911,8 @@
                       'total_memory',
                       'free_memory',
                       'node_to_cpu',
-                      'node_to_memory'
+                      'node_to_memory',
+                      'node_to_dma32_mem'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel