It is possible for netpoll to deadlock by attempting to take the network device xmit_lock twice on different processors. This patch resolves this by deferring to transmission of the second and subsequent skbs. Signed-Off-By: Mark Broadbent --- linux-2.6.10-org/net/core/netpoll.c 2005-01-05 22:30:53.000000000 +0000 +++ linux-2.6.10/net/core/netpoll.c 2005-01-07 20:10:20.000000000 +0000 @@ -1,6 +1,9 @@ /* * Common framework for low-level network console, dump, and debugger code * + * Jan 5 2005 Mark Broadbent + * avoid deadlock on transmission + * * Sep 8 2003 Matt Mackall * * based on the netconsole code from: @@ -19,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -31,6 +36,22 @@ #define MAX_SKBS 32 #define MAX_UDP_CHUNK 1460 +static void tx_retry_wq(void *p); + +struct tx_queue_list { + struct list_head link; + struct sk_buff *skb; + struct netpoll *np; +}; + +static spinlock_t tx_list_lock = SPIN_LOCK_UNLOCKED; +static struct tx_queue_list tx_queue = { + .link = LIST_HEAD_INIT(tx_queue.link) +}; + +static DECLARE_WORK(tx_wq_obj, tx_retry_wq, NULL); + + static spinlock_t skb_list_lock = SPIN_LOCK_UNLOCKED; static int nr_skbs; static struct sk_buff *skbs; @@ -178,17 +199,21 @@ repeat: return skb; } -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +/* This must be called with np->dev->xmit_lock spinlock held - this function + * function will drop the lock before returning indicating if it needs to be + * retried + */ +static int transmit_skb(struct netpoll *np, struct sk_buff *skb) { int status; -repeat: - if(!np || !np->dev || !netif_running(np->dev)) { - __kfree_skb(skb); - return; - } - - spin_lock(&np->dev->xmit_lock); + if (np && np->dev && netif_running(np->dev)) + return -1; + + /* Attempt the xmit lock as we may already hold it */ + if (!spin_trylock(&np->dev->xmit_lock)) + return -1; + np->dev->xmit_lock_owner = smp_processor_id(); /* @@ -200,7 +225,7 @@ repeat: spin_unlock(&np->dev->xmit_lock); netpoll_poll(np); - goto repeat; + return -1; } status = np->dev->hard_start_xmit(skb, np->dev); @@ -210,8 +235,73 @@ repeat: /* transmit busy */ if(status) { netpoll_poll(np); - goto repeat; + return -1; } + + return 0; +} + +static void tx_retry_wq(void *p) +{ + struct list_head *tx_ctr, *tx_tmp; + struct tx_queue_list *tx_el; + unsigned long flags; + + spin_lock_irqsave(&tx_list_lock, flags); + + list_for_each_safe(tx_ctr, tx_tmp, &tx_queue.link) { + tx_el = list_entry(tx_ctr, struct tx_queue_list, link); + + if (transmit_skb(tx_el->np, tx_el->skb)) { + // Transmit failed - reschedule + schedule_delayed_work(&tx_wq_obj, HZ/100); + goto out_unlock; + } + + list_del(tx_ctr); + __kfree_skb(tx_el->skb); + kfree(tx_el); + } + +out_unlock: + spin_unlock_irqrestore(&tx_list_lock, flags); +} + +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +{ + /* If the list is non-empty (packets pending tx) or the skb transmit + * failed then pkts are pending tx, append to the end of list to stop + * reordering of the pkts + */ + if (!list_empty(&tx_queue.link) || transmit_skb(np, skb)) { + struct tx_queue_list *txel; + + /* If we failed to get the xmit_lock then netpoll already holds + * this lock but will deadlock here if we relock, queue the skb + * for transmission later. + * This function is always called with local irqs disabled, hence + * the lack of spin_lock_irqsave + */ + spin_lock(&tx_list_lock); + + txel = kmalloc(sizeof(struct tx_queue_list), GFP_ATOMIC); + if (!txel) + goto out_free; + + txel->skb = skb_get(skb); + txel->np = np; + + list_add_tail(&txel->link, &tx_queue.link); + + schedule_delayed_work(&tx_wq_obj, HZ/100); + + spin_unlock(&tx_list_lock); + } + + return; + +out_free: + __kfree_skb(skb); } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -644,6 +734,23 @@ int netpoll_setup(struct netpoll *np) void netpoll_cleanup(struct netpoll *np) { + struct list_head *tx_ctr, *tx_tmp; + struct tx_queue_list *tx_el; + unsigned long flags; + + spin_lock_irqsave(&tx_list_lock, flags); + list_for_each_safe(tx_ctr, tx_tmp, &tx_queue.link) { + tx_el = list_entry(tx_ctr, struct tx_queue_list, link); + + // This netpoll is going away - remove just these elements + if (tx_el->np == np) { + list_del(tx_ctr); + __kfree_skb(tx_el->skb); + kfree(tx_el); + } + } + spin_unlock_irqrestore(&tx_list_lock, flags); + if (np->rx_hook) { unsigned long flags;