* [Patch} to fix oops in olympic token ring driver on media disconnect
@ 2004-08-13 19:11 Neil Horman
2004-08-13 22:07 ` Alan Cox
0 siblings, 1 reply; 9+ messages in thread
From: Neil Horman @ 2004-08-13 19:11 UTC (permalink / raw)
To: linux-kernel; +Cc: Pete Zaitcev, mike_phillips
[-- Attachment #1: Type: text/plain, Size: 1264 bytes --]
Hey all-
I've put this patch together to fix up an oops in the olympic token
ring driver. The problem was orgionally reported as an oops which
occurs on media disconnect. The problem was that free_irq was being
called from several places in the olympic interrupt handler, which
triggered the subsequent oops. This patch corrects that by removing the
free_irq call from all the interrupt handler code paths, instead
allowing the olympic_close to handle the free_irq on an interface down
event. The patch also cleans up some double free conditions on the
receive ring buffer that arose due to this change. Lastly the patch
corrects a situation in which calling ifdown on an interface owned by
the driver would hang the process in cases where the adaper needed to be
reset to complete any operations, but never woke the wait queue which
the olympic_close routine was waiting on. This patch cleans that up.
Tested by me, on 2.4 and 2.6 with good, working results, and no more oopses.
Thanks!
Neil
--
--
/***************************************************
*Neil Horman
*Software Engineer
*Red Hat, Inc.
*nhorman@redhat.com
*gpg keyid: 1024D / 0x92A74FA1
*http://pgp.mit.edu
***************************************************/
[-- Attachment #2: linux-2.6.7-olympic-oops.patch --]
[-- Type: text/plain, Size: 6792 bytes --]
--- linux-2.6.6-1.435.2.3/drivers/net/tokenring/olympic.c 2004-07-01 08:24:45.000000000 -0400
+++ linux-2.6.6-1.435.2.3-olympic/drivers/net/tokenring/olympic.c 2004-08-13 13:14:44.000000000 -0400
@@ -220,8 +220,10 @@ static int __devinit olympic_probe(struc
}
olympic_priv = dev->priv ;
+ spin_lock_init(&olympic_priv->olympic_lock) ;
+
init_waitqueue_head(&olympic_priv->srb_wait);
init_waitqueue_head(&olympic_priv->trb_wait);
#if OLYMPIC_DEBUG
printk(KERN_INFO "pci_device: %p, dev:%p, dev->priv: %p\n", pdev, dev, dev->priv);
@@ -310,9 +312,8 @@ static int __devinit olympic_init(struct
return -ENODEV;
}
}
- spin_lock_init(&olympic_priv->olympic_lock) ;
/* Needed for cardbus */
if(!(readl(olympic_mmio+BCTL) & BCTL_MODE_INDICATOR)) {
writel(readl(olympic_priv->olympic_mmio+FERMASK)|FERMASK_INT_BIT, olympic_mmio+FERMASK);
@@ -441,8 +442,10 @@ static int olympic_open(struct net_devic
int i, open_finished = 1 ;
DECLARE_WAITQUEUE(wait,current) ;
+ olympic_init(dev);
+
if(request_irq(dev->irq, &olympic_interrupt, SA_SHIRQ , "olympic", dev)) {
return -EAGAIN;
}
@@ -897,9 +900,12 @@ static void olympic_freemem(struct net_d
struct olympic_private *olympic_priv=(struct olympic_private *)dev->priv;
int i;
for(i=0;i<OLYMPIC_RX_RING_SIZE;i++) {
- dev_kfree_skb_irq(olympic_priv->rx_ring_skb[olympic_priv->rx_status_last_received]);
+ if (olympic_priv->rx_ring_skb[olympic_priv->rx_status_last_received] != NULL) {
+ dev_kfree_skb_irq(olympic_priv->rx_ring_skb[olympic_priv->rx_status_last_received]);
+ olympic_priv->rx_ring_skb[olympic_priv->rx_status_last_received] = NULL;
+ }
if (olympic_priv->olympic_rx_ring[olympic_priv->rx_status_last_received].buffer != 0xdeadbeef) {
pci_unmap_single(olympic_priv->pdev,
le32_to_cpu(olympic_priv->olympic_rx_ring[olympic_priv->rx_status_last_received].buffer),
olympic_priv->pkt_buf_sz, PCI_DMA_FROMDEVICE);
@@ -943,11 +949,8 @@ static irqreturn_t olympic_interrupt(int
/* Hotswap gives us this on removal */
if (sisr == 0xffffffff) {
printk(KERN_WARNING "%s: Hotswap adapter removal.\n",dev->name) ;
- olympic_freemem(dev) ;
- free_irq(dev->irq, dev) ;
- dev->stop = NULL ;
spin_unlock(&olympic_priv->olympic_lock) ;
return IRQ_NONE;
}
@@ -960,11 +963,9 @@ static irqreturn_t olympic_interrupt(int
printk(KERN_ERR "Olympic: EISR Error, EISR=%08x\n",readl(olympic_mmio+EISR)) ;
printk(KERN_ERR "The adapter must be reset to clear this condition.\n") ;
printk(KERN_ERR "Please report this error to the driver maintainer and/\n") ;
printk(KERN_ERR "or the linux-tr mailing list.\n") ;
- olympic_freemem(dev) ;
- free_irq(dev->irq, dev) ;
- dev->stop = NULL ;
+ wake_up_interruptible(&olympic_priv->srb_wait);
spin_unlock(&olympic_priv->olympic_lock) ;
return IRQ_HANDLED;
} /* SISR_ERR */
@@ -1005,11 +1006,8 @@ static irqreturn_t olympic_interrupt(int
printk(KERN_WARNING "%s: Adapter Check Interrupt Raised, 8 bytes of information follow:\n", dev->name);
writel(readl(olympic_mmio+LAPWWC),olympic_mmio+LAPA);
adapter_check_area = olympic_priv->olympic_lap + ((readl(olympic_mmio+LAPWWC)) & (~0xf800)) ;
printk(KERN_WARNING "%s: Bytes %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",dev->name, readb(adapter_check_area+0), readb(adapter_check_area+1), readb(adapter_check_area+2), readb(adapter_check_area+3), readb(adapter_check_area+4), readb(adapter_check_area+5), readb(adapter_check_area+6), readb(adapter_check_area+7)) ;
- olympic_freemem(dev) ;
- free_irq(dev->irq, dev) ;
- dev->stop = NULL ;
spin_unlock(&olympic_priv->olympic_lock) ;
return IRQ_HANDLED;
} /* SISR_ADAPTER_CHECK */
@@ -1093,36 +1091,34 @@ static int olympic_close(struct net_devi
writeb(SRB_CLOSE_ADAPTER,srb+0);
writeb(0,srb+1);
writeb(OLYMPIC_CLEAR_RET_CODE,srb+2);
+ add_wait_queue(&olympic_priv->srb_wait,&wait) ;
+ set_current_state(TASK_INTERRUPTIBLE) ;
+
spin_lock_irqsave(&olympic_priv->olympic_lock,flags);
olympic_priv->srb_queued=1;
writel(LISR_SRB_CMD,olympic_mmio+LISR_SUM);
spin_unlock_irqrestore(&olympic_priv->olympic_lock,flags);
-
- t = jiffies ;
-
- add_wait_queue(&olympic_priv->srb_wait,&wait) ;
- set_current_state(TASK_INTERRUPTIBLE) ;
while(olympic_priv->srb_queued) {
- schedule() ;
+
+ t = schedule_timeout(60*HZ);
+
if(signal_pending(current)) {
printk(KERN_WARNING "%s: SRB timed out.\n",dev->name);
printk(KERN_WARNING "SISR=%x MISR=%x\n",readl(olympic_mmio+SISR),readl(olympic_mmio+LISR));
olympic_priv->srb_queued=0;
break;
}
- if ((jiffies-t) > 60*HZ) {
+
+ if (t == 0) {
printk(KERN_WARNING "%s: SRB timed out. May not be fatal. \n",dev->name) ;
- olympic_priv->srb_queued=0;
- break ;
}
- set_current_state(TASK_INTERRUPTIBLE) ;
+ olympic_priv->srb_queued=0;
}
remove_wait_queue(&olympic_priv->srb_wait,&wait) ;
- set_current_state(TASK_RUNNING) ;
olympic_priv->rx_status_last_received++;
olympic_priv->rx_status_last_received&=OLYMPIC_RX_RING_SIZE-1;
@@ -1512,31 +1508,8 @@ drop_frame:
udelay(1);
writel(readl(olympic_mmio+BCTL)&~(3<<13),olympic_mmio+BCTL);
netif_stop_queue(dev);
olympic_priv->srb = readw(olympic_priv->olympic_lap + LAPWWO) ;
- for(i=0;i<OLYMPIC_RX_RING_SIZE;i++) {
- dev_kfree_skb_irq(olympic_priv->rx_ring_skb[olympic_priv->rx_status_last_received]);
- if (olympic_priv->olympic_rx_ring[olympic_priv->rx_status_last_received].buffer != 0xdeadbeef) {
- pci_unmap_single(olympic_priv->pdev,
- le32_to_cpu(olympic_priv->olympic_rx_ring[olympic_priv->rx_status_last_received].buffer),
- olympic_priv->pkt_buf_sz, PCI_DMA_FROMDEVICE);
- }
- olympic_priv->rx_status_last_received++;
- olympic_priv->rx_status_last_received&=OLYMPIC_RX_RING_SIZE-1;
- }
- /* unmap rings */
- pci_unmap_single(olympic_priv->pdev, olympic_priv->rx_status_ring_dma_addr,
- sizeof(struct olympic_rx_status) * OLYMPIC_RX_RING_SIZE, PCI_DMA_FROMDEVICE);
- pci_unmap_single(olympic_priv->pdev, olympic_priv->rx_ring_dma_addr,
- sizeof(struct olympic_rx_desc) * OLYMPIC_RX_RING_SIZE, PCI_DMA_TODEVICE);
-
- pci_unmap_single(olympic_priv->pdev, olympic_priv->tx_status_ring_dma_addr,
- sizeof(struct olympic_tx_status) * OLYMPIC_TX_RING_SIZE, PCI_DMA_FROMDEVICE);
- pci_unmap_single(olympic_priv->pdev, olympic_priv->tx_ring_dma_addr,
- sizeof(struct olympic_tx_desc) * OLYMPIC_TX_RING_SIZE, PCI_DMA_TODEVICE);
-
- free_irq(dev->irq,dev);
- dev->stop=NULL;
printk(KERN_WARNING "%s: Adapter has been closed \n", dev->name) ;
} /* If serious error */
if (olympic_priv->olympic_message_level) {
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-13 19:11 [Patch} to fix oops in olympic token ring driver on media disconnect Neil Horman
@ 2004-08-13 22:07 ` Alan Cox
2004-08-13 23:35 ` Pete Zaitcev
0 siblings, 1 reply; 9+ messages in thread
From: Alan Cox @ 2004-08-13 22:07 UTC (permalink / raw)
To: Neil Horman; +Cc: Linux Kernel Mailing List, Pete Zaitcev, mike_phillips
On Gwe, 2004-08-13 at 20:11, Neil Horman wrote:
> the olympic_close routine was waiting on. This patch cleans that up.
>
> Tested by me, on 2.4 and 2.6 with good, working results, and no more oopses.
Should it not be blocking the IRQs on the chip as well ?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-13 22:07 ` Alan Cox
@ 2004-08-13 23:35 ` Pete Zaitcev
2004-08-13 23:48 ` Jeff Garzik
0 siblings, 1 reply; 9+ messages in thread
From: Pete Zaitcev @ 2004-08-13 23:35 UTC (permalink / raw)
To: Alan Cox; +Cc: Neil Horman, Linux Kernel Mailing List, mike_phillips, zaitcev
On Fri, 13 Aug 2004 23:07:16 +0100
Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> On Gwe, 2004-08-13 at 20:11, Neil Horman wrote:
> > the olympic_close routine was waiting on. This patch cleans that up.
> >
> > Tested by me, on 2.4 and 2.6 with good, working results, and no more oopses.
>
> Should it not be blocking the IRQs on the chip as well ?
I assumed that old olympic_close() did, but perhaps it didn't after all.
There is nothing like the following in it:
+#define DISABLE_IRQS(base_addr) do { \
+ writel(LISR_LIE,(base_addr)+LISR_RWM);\
+ writel(SISR_MI,(base_addr)+SISR_RWM);\
+} while(0)
This is curious. If something was never used, how do we know if it works?
Maybe it's safer just to leave things as Neil did.
-- Pete
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-13 23:35 ` Pete Zaitcev
@ 2004-08-13 23:48 ` Jeff Garzik
2004-08-14 0:06 ` Neil Horman
2004-08-16 15:10 ` Mike_Phillips
0 siblings, 2 replies; 9+ messages in thread
From: Jeff Garzik @ 2004-08-13 23:48 UTC (permalink / raw)
To: Pete Zaitcev
Cc: Alan Cox, Neil Horman, Linux Kernel Mailing List, mike_phillips
Pete Zaitcev wrote:
> On Fri, 13 Aug 2004 23:07:16 +0100
> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>
>
>>On Gwe, 2004-08-13 at 20:11, Neil Horman wrote:
>>
>>>the olympic_close routine was waiting on. This patch cleans that up.
>>>
>>>Tested by me, on 2.4 and 2.6 with good, working results, and no more oopses.
>>
>>Should it not be blocking the IRQs on the chip as well ?
>
>
> I assumed that old olympic_close() did, but perhaps it didn't after all.
> There is nothing like the following in it:
>
> +#define DISABLE_IRQS(base_addr) do { \
> + writel(LISR_LIE,(base_addr)+LISR_RWM);\
> + writel(SISR_MI,(base_addr)+SISR_RWM);\
> +} while(0)
>
> This is curious. If something was never used, how do we know if it works?
> Maybe it's safer just to leave things as Neil did.
Well, regardless, Neil's patch is IMO a good first step.
There is plenty of work in olympic for any motivated person :)
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-13 23:48 ` Jeff Garzik
@ 2004-08-14 0:06 ` Neil Horman
2004-08-16 15:10 ` Mike_Phillips
1 sibling, 0 replies; 9+ messages in thread
From: Neil Horman @ 2004-08-14 0:06 UTC (permalink / raw)
To: Jeff Garzik
Cc: Pete Zaitcev, Alan Cox, Linux Kernel Mailing List, mike_phillips
Jeff Garzik wrote:
> Pete Zaitcev wrote:
>
>> On Fri, 13 Aug 2004 23:07:16 +0100
>> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>>
>>
>>> On Gwe, 2004-08-13 at 20:11, Neil Horman wrote:
>>>
>>>> the olympic_close routine was waiting on. This patch cleans that up.
>>>>
>>>> Tested by me, on 2.4 and 2.6 with good, working results, and no
>>>> more oopses.
>>>
>>>
>>> Should it not be blocking the IRQs on the chip as well ?
>>
>>
>>
>> I assumed that old olympic_close() did, but perhaps it didn't after all.
>> There is nothing like the following in it:
>>
>> +#define DISABLE_IRQS(base_addr) do { \
>> + writel(LISR_LIE,(base_addr)+LISR_RWM);\
>> + writel(SISR_MI,(base_addr)+SISR_RWM);\
>> +} while(0)
>>
>> This is curious. If something was never used, how do we know if it
>> works?
>> Maybe it's safer just to leave things as Neil did.
>
>
>
> Well, regardless, Neil's patch is IMO a good first step.
>
> There is plenty of work in olympic for any motivated person :)
>
> Jeff
>
>
Thanks :) I figure if someone reports system hangs or lock-ups with this
card in place, I just re-add teh irq disable stuff.
Neil
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-13 23:48 ` Jeff Garzik
2004-08-14 0:06 ` Neil Horman
@ 2004-08-16 15:10 ` Mike_Phillips
2004-08-16 15:18 ` Jeff Garzik
` (2 more replies)
1 sibling, 3 replies; 9+ messages in thread
From: Mike_Phillips @ 2004-08-16 15:10 UTC (permalink / raw)
To: Jeff Garzik
Cc: Alan Cox, Linux Kernel Mailing List, Neil Horman, Pete Zaitcev
> Well, regardless, Neil's patch is IMO a good first step.
Neil's patch is to make the annoying regression test failure go away. To
be honest I have had *one* user email me that this is a problem and once I
gave them the "don't remove the cable on token ring, its not ethernet"
talk, they were fine.
> There is plenty of work in olympic for any motivated person :)
It works, its used by an ever decreasing number of users - let it have a
peaceful and graceful old age.
Mike
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-16 15:10 ` Mike_Phillips
@ 2004-08-16 15:18 ` Jeff Garzik
2004-08-17 0:08 ` Pete Zaitcev
2004-08-17 0:42 ` Neil Horman
2 siblings, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2004-08-16 15:18 UTC (permalink / raw)
To: Mike_Phillips
Cc: Alan Cox, Linux Kernel Mailing List, Neil Horman, Pete Zaitcev
Mike_Phillips@URSCorp.com wrote:
>>Well, regardless, Neil's patch is IMO a good first step.
>
>
> Neil's patch is to make the annoying regression test failure go away. To
> be honest I have had *one* user email me that this is a problem and once I
> gave them the "don't remove the cable on token ring, its not ethernet"
> talk, they were fine.
Well I
* disagree the kernel should oops,
* I disagree that olympic should call free_irq in the interrupt handler, and
* I disagree that olympic should do 'dev->stop = NULL' at all, much less
in the interrupt handler
sorry,
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-16 15:10 ` Mike_Phillips
2004-08-16 15:18 ` Jeff Garzik
@ 2004-08-17 0:08 ` Pete Zaitcev
2004-08-17 0:42 ` Neil Horman
2 siblings, 0 replies; 9+ messages in thread
From: Pete Zaitcev @ 2004-08-17 0:08 UTC (permalink / raw)
To: Mike_Phillips
Cc: Jeff Garzik, Alan Cox, Linux Kernel Mailing List, Neil Horman
On Mon, 16 Aug 2004 11:10:19 -0400
Mike_Phillips@URSCorp.com wrote:
> > Well, regardless, Neil's patch is IMO a good first step.
>
> Neil's patch is to make the annoying regression test failure go away. To
> be honest I have had *one* user email me that this is a problem and once I
> gave them the "don't remove the cable on token ring, its not ethernet"
> talk, they were fine.
Maybe they are afraid of your bofh-ness, man.
> It works, its used by an ever decreasing number of users - let it have a
> peaceful and graceful old age.
I don't think this works with a bug so clear-cut as this one.
On the USB front it's so hard to find any users with meaningful
bug reports that I apply fixes for singular users, if they make
broader sense. How do you know that that one user you mentioned is
not a representative of countless IBM minions who clang their chains,
pull Token Ring cables and cry in their dungeons...
-- Pete
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Patch} to fix oops in olympic token ring driver on media disconnect
2004-08-16 15:10 ` Mike_Phillips
2004-08-16 15:18 ` Jeff Garzik
2004-08-17 0:08 ` Pete Zaitcev
@ 2004-08-17 0:42 ` Neil Horman
2 siblings, 0 replies; 9+ messages in thread
From: Neil Horman @ 2004-08-17 0:42 UTC (permalink / raw)
To: Mike_Phillips
Cc: Jeff Garzik, Alan Cox, Linux Kernel Mailing List, Pete Zaitcev
Mike_Phillips@URSCorp.com wrote:
>>Well, regardless, Neil's patch is IMO a good first step.
>>
>>
>
>Neil's patch is to make the annoying regression test failure go away. To
>be honest I have had *one* user email me that this is a problem and once I
>gave them the "don't remove the cable on token ring, its not ethernet"
>talk, they were fine.
>
>
>
I'm confused mike, it makes an annoying regression test go away, and it
fixes the problem of one, now two users, not to mention anyone else who
didn't report this. Is there something specific you don't like about
this patch? If so, please let me know.
>>There is plenty of work in olympic for any motivated person :)
>>
>>
>
>It works, its used by an ever decreasing number of users - let it have a
>peaceful and graceful old age.
>
>Mike
>
>
I don't think oopsing on a lobe fault is graceful, or peaceful to anyone
experiencing it. :)
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2004-08-17 0:42 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-13 19:11 [Patch} to fix oops in olympic token ring driver on media disconnect Neil Horman
2004-08-13 22:07 ` Alan Cox
2004-08-13 23:35 ` Pete Zaitcev
2004-08-13 23:48 ` Jeff Garzik
2004-08-14 0:06 ` Neil Horman
2004-08-16 15:10 ` Mike_Phillips
2004-08-16 15:18 ` Jeff Garzik
2004-08-17 0:08 ` Pete Zaitcev
2004-08-17 0:42 ` Neil Horman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).