Jeremy Fitzhardinge wrote:
> On 06/17/2010 09:16 AM, Xu, Dongxiao wrote:
>> Ian,
>>
>> Sorry for the late response, I was on vacation days before.
>>
>> Ian Campbell wrote:
>>
>>> On Thu, 2010-06-10 at 12:48 +0100, Xu, Dongxiao wrote:
>>>
>>>> Hi Jeremy,
>>>>
>>>> The attached patch should fix the PV network issue after applying
>>>> the netback multiple threads patchset.
>>>>
>>> Thanks for this Donxiao. Do you think this crash was a potential
>>> symptom of this issue? It does seem to go away if I apply your
>>> patch.
>>>
>> Actually, the phenomenon is the same on my side without the fixing
>> patch.
>>
>>
>>> BUG: unable to handle kernel paging request at 70000027
>>> IP: [<c0294867>] make_tx_response+0x17/0xd0
>>> *pdpt = 0000000000000000
>>> Oops: 0000 [#2] SMP
>>> last sysfs file:
>>> Modules linked in:
>>> Supported: Yes
>>>
>>> Pid: 1083, comm: netback/0 Tainted: G D
>>> (2.6.27.45-0.1.1-x86_32p-xen #222) EIP: 0061:[<c0294867>]
>>> EFLAGS: 00010296 CPU: 0 EIP is at make_tx_response+0x17/0xd0
>>> EAX: 6fffffff EBX: 00000000 ECX: 00000000 EDX: f00610a4
>>> ESI: 6fffffff EDI: f00620a4 EBP: ed0c3f18 ESP: ed0c3f0c
>>> DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: e021
>>> Process netback/0 (pid: 1083, ti=ed0c2000 task=ee9de070
>>> task.ti=ed0c2000) Stack: 00000000 00000000 f00620a4 ed0c3fa8
>>> c029676a c0456000 ee9de070 ed0c3fd0 ed0c3f94 00000002
>>> ed0c3fb8 f0062ca4 f0061000 6fffffff 011d9000 f00620a4
>>> f006108c ed0c3f5c c04ffb00 c04ffb00 ed0c3fc0 ed0c3fbc
>>> ed0c3fb8 ed0c2000 Call Trace: [<c029676a>] ?
>>> net_tx_action+0x32a/0xa50 [<c0296f62>] ?
>>> netbk_action_thread+0x62/0x190 [<c0296f00>] ?
>>> netbk_action_thread+0x0/0x190 [<c013f84c>] ?
>>> kthread+0x3c/0x70 [<c013f810>] ? kthread+0x0/0x70
>>> [<c0105633>] ? kernel_thread_helper+0x7/0x10
>>> =======================
>>> Code: ec 8d 41 01 89 47 2c c7 45 e4 ea ff ff ff eb dd 8d 74
>>> 26 00 55 66 0f be c9 89 e5 83 ec 0c 89 74 24 04 89 c6 89 1c
>>> 24 89 7c 24 08 <8b> 78 28 8b 40 30 0f b7 5a 08 83 e8 01 21
>>> f8 8d 04 40 c1 e0 02 EIP: [<c0294867>] make_tx_response+0x17/0xd0
>>> SS:ESP e021:ed0c3f0c ---[ end trace f7e370bf10f6f981 ]---
>>>
>>> The crash is in one of the calls to list_move_tail and I think it is
>>> because netbk->pending_inuse_head not being initialised until after
>>> the threads and/or tasklets are created (I was running in threaded
>>> mode). Perhaps even though we are now zeroing the netbk struct
>>> those fields should still be initialised before kicking off any
>>> potentially asynchronous tasks?
>>>
>> You are right, I will commit another patch to fix it.
>>
>
> Does this patch address it, or does it need something else?
Hi Jeremy,
The patch is good except that we should move mmap_pages related
code back after it is initialized. So I modified your patch a little.
Thanks,
Dongxiao
Subject: [PATCH] xen/netback: make sure all the group structures are
initialized before starting async code
Split the netbk group array initialization into two phases: one to do
simple "can't fail" initialization of lists, timers, locks, etc; and a
second pass to allocate memory and start the async code.
This makes sure that all the basic stuff is in place by the time the async code
is running.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
---
drivers/xen/netback/netback.c | 46 +++++++++++++++++++++++-----------------
1 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 9a7ada2..f5d8952 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1750,8 +1750,10 @@ static int __init netback_init(void)
/* We can increase reservation by this much in net_rx_action(). */
// balloon_update_driver_allowance(NET_RX_RING_SIZE);
+ /* First pass - do simple "can't fail" setup */
for (group = 0; group < xen_netbk_group_nr; group++) {
struct xen_netbk *netbk = &xen_netbk[group];
+
skb_queue_head_init(&netbk->rx_queue);
skb_queue_head_init(&netbk->tx_queue);
@@ -1764,12 +1766,27 @@ static int __init netback_init(void)
netbk->netbk_tx_pending_timer.function =
netbk_tx_pending_timeout;
+ netbk->pending_cons = 0;
+ netbk->pending_prod = MAX_PENDING_REQS;
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ netbk->pending_ring[i] = i;
+
+ INIT_LIST_HEAD(&netbk->pending_inuse_head);
+ INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+ spin_lock_init(&netbk->net_schedule_list_lock);
+
+ atomic_set(&netbk->netfront_count, 0);
+ }
+
+ /* Second pass - do memory allocation and initialize threads/tasklets */
+ for (group = 0; group < xen_netbk_group_nr; group++) {
+ struct xen_netbk *netbk = &xen_netbk[group];
+
netbk->mmap_pages =
alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
if (!netbk->mmap_pages) {
printk(KERN_ALERT "%s: out of memory\n", __func__);
- del_timer(&netbk->netbk_tx_pending_timer);
- del_timer(&netbk->net_timer);
rc = -ENOMEM;
goto failed_init;
}
@@ -1781,11 +1798,6 @@ static int __init netback_init(void)
INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
}
- netbk->pending_cons = 0;
- netbk->pending_prod = MAX_PENDING_REQS;
- for (i = 0; i < MAX_PENDING_REQS; i++)
- netbk->pending_ring[i] = i;
-
if (MODPARM_netback_kthread) {
init_waitqueue_head(&netbk->kthread.netbk_action_wq);
netbk->kthread.task =
@@ -1801,8 +1813,6 @@ static int __init netback_init(void)
"kthread_run() fails at netback\n");
free_empty_pages_and_pagevec(netbk->mmap_pages,
MAX_PENDING_REQS);
- del_timer(&netbk->netbk_tx_pending_timer);
- del_timer(&netbk->net_timer);
rc = PTR_ERR(netbk->kthread.task);
goto failed_init;
}
@@ -1814,13 +1824,6 @@ static int __init netback_init(void)
net_rx_action,
(unsigned long)netbk);
}
-
- INIT_LIST_HEAD(&netbk->pending_inuse_head);
- INIT_LIST_HEAD(&netbk->net_schedule_list);
-
- spin_lock_init(&netbk->net_schedule_list_lock);
-
- atomic_set(&netbk->netfront_count, 0);
}
netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
@@ -1850,13 +1853,16 @@ static int __init netback_init(void)
return 0;
failed_init:
- for (i = 0; i < group; i++) {
+ for (i = 0; i < xen_netbk_group_nr; i++) {
struct xen_netbk *netbk = &xen_netbk[i];
- free_empty_pages_and_pagevec(netbk->mmap_pages,
- MAX_PENDING_REQS);
+
del_timer(&netbk->netbk_tx_pending_timer);
del_timer(&netbk->net_timer);
- if (MODPARM_netback_kthread)
+
+ if (netbk->mmap_pages)
+ free_empty_pages_and_pagevec(netbk->mmap_pages,
+ MAX_PENDING_REQS);
+ if (MODPARM_netback_kthread && netbk->kthread.task)
kthread_stop(netbk->kthread.task);
}
vfree(xen_netbk);
--
1.6.3
>
> Subject: [PATCH] xen/netback: make sure all the group structures are
> initialized before starting async code
>
> Split the netbk group array initialization into two phases: one to do
> simple "can't fail" initialization of lists, timers, locks, etc; and a
> second pass to allocate memory and start the async code.
>
> This makes sure that all the basic stuff is in place by the time the
> async code
> is running.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
>
> diff --git a/drivers/xen/netback/netback.c
> b/drivers/xen/netback/netback.c
> index 9a7ada2..95de476 100644
> --- a/drivers/xen/netback/netback.c
> +++ b/drivers/xen/netback/netback.c
> @@ -1750,8 +1750,10 @@ static int __init netback_init(void)
> /* We can increase reservation by this much in net_rx_action(). */
> // balloon_update_driver_allowance(NET_RX_RING_SIZE);
>
> + /* First pass - do simple "can't fail" setup */
> for (group = 0; group < xen_netbk_group_nr; group++) {
> struct xen_netbk *netbk = &xen_netbk[group];
> +
> skb_queue_head_init(&netbk->rx_queue);
> skb_queue_head_init(&netbk->tx_queue);
>
> @@ -1764,16 +1766,6 @@ static int __init netback_init(void)
> netbk->netbk_tx_pending_timer.function =
> netbk_tx_pending_timeout;
>
> - netbk->mmap_pages =
> - alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
> - if (!netbk->mmap_pages) {
> - printk(KERN_ALERT "%s: out of memory\n", __func__);
> - del_timer(&netbk->netbk_tx_pending_timer);
> - del_timer(&netbk->net_timer);
> - rc = -ENOMEM;
> - goto failed_init;
> - }
> -
> for (i = 0; i < MAX_PENDING_REQS; i++) {
> page = netbk->mmap_pages[i];
> SetPageForeign(page, netif_page_release);
> @@ -1786,6 +1778,26 @@ static int __init netback_init(void)
> for (i = 0; i < MAX_PENDING_REQS; i++)
> netbk->pending_ring[i] = i;
>
> + INIT_LIST_HEAD(&netbk->pending_inuse_head);
> + INIT_LIST_HEAD(&netbk->net_schedule_list);
> +
> + spin_lock_init(&netbk->net_schedule_list_lock);
> +
> + atomic_set(&netbk->netfront_count, 0);
> + }
> +
> + /* Second pass - do memory allocation and initialize
> threads/tasklets */ + for (group = 0; group < xen_netbk_group_nr;
> group++) { + struct xen_netbk *netbk = &xen_netbk[group];
> +
> + netbk->mmap_pages =
> + alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
> + if (!netbk->mmap_pages) {
> + printk(KERN_ALERT "%s: out of memory\n", __func__);
> + rc = -ENOMEM;
> + goto failed_init;
> + }
> +
> if (MODPARM_netback_kthread) {
> init_waitqueue_head(&netbk->kthread.netbk_action_wq);
> netbk->kthread.task =
> @@ -1801,8 +1813,6 @@ static int __init netback_init(void)
> "kthread_run() fails at netback\n");
> free_empty_pages_and_pagevec(netbk->mmap_pages,
> MAX_PENDING_REQS);
> - del_timer(&netbk->netbk_tx_pending_timer);
> - del_timer(&netbk->net_timer);
> rc = PTR_ERR(netbk->kthread.task);
> goto failed_init;
> }
> @@ -1814,13 +1824,6 @@ static int __init netback_init(void)
> net_rx_action,
> (unsigned long)netbk);
> }
> -
> - INIT_LIST_HEAD(&netbk->pending_inuse_head);
> - INIT_LIST_HEAD(&netbk->net_schedule_list);
> -
> - spin_lock_init(&netbk->net_schedule_list_lock);
> -
> - atomic_set(&netbk->netfront_count, 0);
> }
>
> netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
> @@ -1850,13 +1853,16 @@ static int __init netback_init(void)
> return 0;
>
> failed_init:
> - for (i = 0; i < group; i++) {
> + for (i = 0; i < xen_netbk_group_nr; i++) {
> struct xen_netbk *netbk = &xen_netbk[i];
> - free_empty_pages_and_pagevec(netbk->mmap_pages,
> - MAX_PENDING_REQS);
> +
> del_timer(&netbk->netbk_tx_pending_timer);
> del_timer(&netbk->net_timer);
> - if (MODPARM_netback_kthread)
> +
> + if (netbk->mmap_pages)
> + free_empty_pages_and_pagevec(netbk->mmap_pages,
> + MAX_PENDING_REQS);
> + if (MODPARM_netback_kthread && netbk->kthread.task)
> kthread_stop(netbk->kthread.task);
> }
> vfree(xen_netbk);
>
> J
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|