public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Kyle McMartin <kyle@mcmartin.ca>
To: David Miller <davem@davemloft.net>
Cc: airlied@gmail.com, jkosina@suse.cz, jeffrey.t.kirsher@intel.com,
	david.vrabel@csr.com, rjw@sisk.pl, linux-kernel@vger.kernel.org,
	kernel-testers@vger.kernel.org, chrisl@vmware.com
Subject: Re: [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM
Date: Wed, 24 Sep 2008 15:10:22 -0400	[thread overview]
Message-ID: <20080924191022.GC2591@phobos.i.cabal.ca> (raw)
In-Reply-To: <20080924.003638.71148740.davem@davemloft.net>

On Wed, Sep 24, 2008 at 12:36:38AM -0700, David Miller wrote:
> The e1000e side here is reproducable way too easily for it to be the
> same case, as far as I see it.
> 

I've been working on a patch to detect (using a timer and checking at
 up/down) whether or not the flash has been corrupted, and, if it is
rewrite it with the saved good copy (which obviously only helps if
it's the same boot.)

Unfortunately, I don't have enough time to finish it before I go away
for the weekend, so I'll toss it over the wall and see if it sticks to
anything.

At a glance, one would need to add support for rewriting
adapter->hw.flash from ethtool if someone reprograms the good firmware
back, and writing the good flash back on down/remove if it detects
a change.

Bear in mind, super quick hack, and I haven't even run-tested it yet.

If nobody decides to run with it, I'll probably give it another poke
late tonight.

Definitely-not-signed-off-by-or-tested-by: Kyle

At the very least, if someone pokes in a hexdump of the firmware, at
least we might be able to see some of the method to the madness of the
corruption pattern.

diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index ac4e506..08cce8c 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -168,6 +168,7 @@ struct e1000_adapter {
 	struct timer_list watchdog_timer;
 	struct timer_list phy_info_timer;
 	struct timer_list blink_timer;
+	struct timer_list flash_timer;
 
 	struct work_struct reset_task;
 	struct work_struct watchdog_task;
diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h
index 74f263a..ca3f645 100644
--- a/drivers/net/e1000e/hw.h
+++ b/drivers/net/e1000e/hw.h
@@ -863,6 +863,11 @@ struct e1000_hw {
 
 	u8 __iomem *hw_addr;
 	u8 __iomem *flash_address;
+	int flash_len;
+
+	u8 *flash;
+	u8 *flash_backup;
+	spinlock_t flashlock;
 
 	struct e1000_mac_info  mac;
 	struct e1000_fc_info   fc;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index d266510..13f05f8 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2535,6 +2535,7 @@ void e1000e_down(struct e1000_adapter *adapter)
 
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
+	del_timer_sync(&adapter->flash_timer);
 
 	netdev->tx_queue_len = adapter->tx_queue_len;
 	netif_carrier_off(netdev);
@@ -2922,6 +2923,33 @@ static void e1000_update_phy_info(unsigned long data)
 	e1000_get_phy_info(&adapter->hw);
 }
 
+static inline int e1000_test_flash(struct e1000_adapter *adapter)
+{
+	int ret = 0;
+
+	if (adapter->hw.flash && adapter->hw.flash_backup) {
+		spin_lock(&adapter->hw.flashlock);
+		memcpy(adapter->hw.flash_backup, adapter->hw.flash_address,
+			adapter->hw.flash_len);
+		ret = memcmp(adapter->hw.flash, adapter->hw.flash_backup,
+			adapter->hw.flash_len);
+		spin_unlock(&adapter->hw.flashlock);
+		if (ret) {
+			/* dump_eeprom(adapter); */
+			printk(KERN_ERR "AWOOOGA AWOOOGA flash changed\n");
+		}
+	}
+
+	return ret;
+}
+
+static void e1000_flash_test(unsigned long data)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+	e1000_test_flash(adapter);
+	mod_timer(&adapter->flash_timer, jiffies+(20*HZ));
+}
+
 /**
  * e1000e_update_stats - Update the board statistics counters
  * @adapter: board private structure
@@ -4439,6 +4467,22 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 		adapter->hw.flash_address = ioremap(flash_start, flash_len);
 		if (!adapter->hw.flash_address)
 			goto err_flashmap;
+
+		adapter->hw.flash_len = (int)flash_len;
+		/* stash away a copy of the flash, and allocate
+		   space for a second copy... */
+		if (!adapter->hw.flash) {
+			u8 *flash = kmalloc(flash_len, GFP_KERNEL);
+			u8 *flash_backup = kmalloc(flash_len, GFP_KERNEL);
+			if (flash && flash_backup) {
+				memcpy(flash, adapter->hw.flash_address,
+					adapter->hw.flash_len);
+				adapter->hw.flash = flash;
+				adapter->hw.flash_backup = flash_backup;
+				spin_lock_init(&adapter->hw.flashlock);
+			}
+		}
+			
 	}
 
 	/* construct the net_device struct */
@@ -4570,6 +4614,10 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 	adapter->phy_info_timer.function = &e1000_update_phy_info;
 	adapter->phy_info_timer.data = (unsigned long) adapter;
 
+	init_timer(&adapter->flash_timer);
+	adapter->flash_timer.function = &e1000_flash_test;
+	adapter->flash_timer.data = (unsigned long) adapter;
+
 	INIT_WORK(&adapter->reset_task, e1000_reset_task);
 	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
 
@@ -4641,6 +4689,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 
 	e1000_print_device_info(adapter);
 
+	/* every twenty seconds, test the flash */
+	mod_timer(&adapter->flash_timer, jiffies+(HZ*20));
+
 	return 0;
 
 err_register:
@@ -4690,6 +4741,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
 	set_bit(__E1000_DOWN, &adapter->state);
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
+	del_timer_sync(&adapter->flash_timer);
 
 	flush_scheduled_work();
 

  parent reply	other threads:[~2008-09-24 19:10 UTC|newest]

Thread overview: 140+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-21 18:52 2.6.27-rc6-git6: Reported regressions from 2.6.26 Rafael J. Wysocki
2008-09-21 18:52 ` [Bug #11207] VolanoMark regression with 2.6.27-rc1 Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11210] libata badness Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11220] Screen stays black after resume Rafael J. Wysocki
2008-09-30 22:25   ` Pavel Machek
2008-09-21 18:54 ` [Bug #11215] INFO: possible recursive locking detected ps2_command Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11264] Invalid op opcode in kernel/workqueue Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11237] corrupt PMD after resume Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11230] Kconfig no longer outputs a .config with freshly updated defconfigs Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11224] Only three cores found on quad-core machine Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11335] 2.6.27-rc2-git5 BUG: unable to handle kernel paging request Rafael J. Wysocki
2008-09-21 23:49   ` Randy Dunlap
2008-09-21 18:54 ` [Bug #11308] tbench regression on each kernel release from 2.6.22 -&gt; 2.6.28 Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11272] BUG: parport_serial in 2.6.27-rc1 for NetMos Technology PCI 9835 Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11271] BUG: fealnx in 2.6.27-rc1 Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11380] lockdep warning: cpu_add_remove_lock at:cpu_maps_update_begin+0x14/0x16 Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11340] LTP overnight run resulted in unusable box Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11357] Can not boot up with zd1211rw USB-Wlan Stick Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11404] BUG: in 2.6.23-rc3-git7 in do_cciss_intr Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki
2008-09-21 23:51   ` David Miller
2008-09-22  6:59     ` Dave Airlie
2008-09-22  7:01       ` David Miller
2008-09-22 22:15       ` Jiri Kosina
2008-09-22 22:28         ` David Miller
2008-09-23  1:26           ` Dave Airlie
2008-09-23  1:59             ` David Miller
2008-09-23 14:29               ` Jiri Kosina
2008-09-23 16:38                 ` Renato S. Yamane
2008-09-23 21:03                 ` Dave Airlie
2008-09-23 22:05                   ` David Miller
2008-09-24  6:02                     ` David Newall
2008-09-23 21:05                 ` David Miller
2008-09-23 21:09                   ` Dave Airlie
2008-09-23 22:07                     ` David Miller
2008-09-23 22:12                       ` Jeff Kirsher
2008-09-23 22:19                         ` Jiri Kosina
2008-09-24  4:12                           ` David Miller
2008-09-24  5:45                             ` Dave Airlie
2008-09-24  7:36                               ` David Miller
2008-09-24  8:59                                 ` Dave Airlie
2008-09-24  9:01                                   ` David Miller
2008-09-24  9:16                                     ` Dave Airlie
2008-09-24 16:33                                       ` Jiri Kosina
2008-09-24 16:37                                         ` Jiri Kosina
2008-09-24 18:10                                           ` Jiri Kosina
2008-09-24 20:18                                           ` Dave Airlie
2008-09-24 20:07                                         ` Dave Airlie
2008-09-24 22:54                                         ` Parag Warudkar
2008-09-24 16:27                                 ` Jonathan Corbet
2008-09-24 16:56                                   ` Jiri Kosina
2008-09-24 20:47                                   ` Theodore Tso
2008-09-25 19:01                                   ` Jiri Kosina
2008-09-24 19:10                                 ` Kyle McMartin [this message]
2008-09-24 19:22                                   ` Jesse Brandeburg
2008-09-24 19:52                                   ` David Miller
2008-09-24 22:37                                   ` Jiri Kosina
2008-09-25 18:39                                     ` H. Peter Anvin
2008-09-25 20:45                                     ` Kok, Auke
2008-09-24 23:15                             ` Jiri Kosina
2008-09-25  0:22                               ` Dave Airlie
2008-09-25  1:27                                 ` Jiri Kosina
2008-09-25  2:01                                   ` Frans Pop
2008-09-25 17:24                                     ` Jiri Kosina
2008-09-25 18:46                                       ` H. Peter Anvin
2008-09-25 18:56                                       ` Jesse Barnes
2008-09-25 20:22                                         ` Jiri Kosina
2008-09-25 19:36                                           ` Jesse Barnes
2008-09-25 20:35                                             ` Jiri Kosina
2008-09-25 21:06                                               ` Dave Airlie
2008-09-25 21:42                                                 ` Jesse Brandeburg
2008-09-25 21:45                                                   ` Dave Airlie
2008-09-25 22:45                                                     ` Jiri Kosina
2008-09-26  7:06                                                       ` Alexey Rempel
2008-09-25 22:57                                               ` H. Peter Anvin
2008-09-26 18:55                                                 ` Krzysztof Halasa
2008-09-26 19:39                                                   ` Alan Cox
2008-09-25 19:23                                       ` Krzysztof Halasa
2008-09-25 20:06                                       ` David Miller
2008-09-25  2:28                                   ` Jeff Garzik
2008-09-25  3:51                                     ` Dave Airlie
2008-09-25  4:00                                       ` David Miller
2008-09-25  4:25                                 ` Jesse Brandeburg
2008-09-25 16:26                                   ` Krzysztof Halasa
2008-09-25  0:26                               ` Jesse Barnes
2008-09-25  0:33                                 ` Jiri Kosina
2008-09-25 16:08                                   ` Jesse Barnes
2008-09-25 19:43                                 ` Jesse Barnes
2008-09-25 20:45                                   ` Jiri Kosina
2008-09-25 12:24                     ` Jiri Kosina
2008-09-21 18:54 ` [Bug #11407] suspend: unable to handle kernel paging request Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11439] [2.6.27-rc4-git4] compilation warnings Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11442] btusb hibernation/suspend breakage in current -git Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11459] kernel crash after wifi connection established Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11501] Failed to open destination file: Permission deniedihex2fw Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11476] failure to associate after resume from suspend to ram Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11465] Linux-2.6.27-rc5, drm errors in log Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11505] oltp ~10% regression with 2.6.27-rc5 on stoakley machine Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11516] severe performance degradation on x86_64 going from 2.6.26-rc9 -&gt; 2.6.27-rc5 Rafael J. Wysocki
2008-09-23  9:49   ` Jason Vas Dias
2008-09-27  9:23     ` Thomas Gleixner
     [not found]       ` <200809280138.57672.jason.vas.dias@gmail.com>
2008-09-28 10:58         ` Thomas Gleixner
2008-09-21 18:54 ` [Bug #11512] sort-of regression due to "kconfig: speed up all*config + randconfig" Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11506] oops during unmount - ext3? (2.6.27-rc5) Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11507] usb: sometimes dead keyboard after boot Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11548] kernel BUG at drivers/pci/intel-iommu.c:1373! Rafael J. Wysocki
2008-09-24  1:18   ` Chris Mason
2008-09-24 18:23     ` Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11543] kernel panic: softlockup in tick_periodic() ??? Rafael J. Wysocki
2008-09-22  6:01   ` Cyrill Gorcunov
2008-09-23 10:50   ` Thomas Gleixner
2008-09-23 13:52     ` Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11551] Semi-repeatable hard lockup on 2.6.27-rc6 Rafael J. Wysocki
2008-09-21 20:39   ` Steven Noonan
2008-09-21 18:54 ` [Bug #11550] pnp: Huge number of "io resource overlap" messages Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11549] 2.6.27-rc5 acpi: EC Storm error message on bootup Rafael J. Wysocki
2008-09-21 21:07   ` jmerkey
2008-09-21 18:54 ` [Bug #11552] Disabling IRQ #23 Rafael J. Wysocki
2008-09-21 23:16   ` Justin Mattock
2008-09-22 10:53     ` Alan Stern
2008-09-22 16:20       ` Justin Mattock
2008-09-21 18:54 ` [Bug #11568] spontaneous reboot on resume with 2.6.27 Rafael J. Wysocki
2008-09-23  2:13   ` Andy Wettstein
2008-09-21 18:54 ` [Bug #11590] Nokia 5310 Xpress usb-storage not mounting Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11569] Don't complain about disabled irqs when the system has paniced Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11610] Problem with kernel commit 664d080c41463570b95717b5ad86e79dc1be0877 Rafael J. Wysocki
2008-09-21 23:10   ` Michal 'vorner' Vaner
2008-09-21 18:54 ` [Bug #11609] oops in find_get_page Rafael J. Wysocki
2008-09-21 18:54 ` [Bug #11608] 2.6.27-rc6 BUG: unable to handle kernel paging request Rafael J. Wysocki
2008-09-25  0:46   ` Chuck Ebbert
2008-09-25  3:03     ` Nick Piggin
2008-09-21 18:54 ` [Bug #11611] Commit 2344abbcbdb82140050e8be29d3d55e4f6fe860b breaks resume on nx6325 Rafael J. Wysocki
     [not found] ` <7lIhZC4hCxE.A.0gG.R0q1IB@albercik>
2008-09-21 20:43   ` [Bug #11555] rmmod ide-cd_mod: tried to init an initialized object, something is seriously wrong Mariusz Kozlowski
2008-09-21 21:57 ` 2.6.27-rc6-git6: Reported regressions from 2.6.26 Alexey Starikovskiy
  -- strict thread matches above, loose matches on Subject: below --
2008-09-27 15:54 2.6.27-rc7-git5: " Rafael J. Wysocki
2008-09-27 15:56 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki
2008-09-25 22:31 Andres Freund
2008-09-12 18:59 2.6.27-rc6-git2: Reported regressions from 2.6.26 Rafael J. Wysocki
2008-09-12 19:06 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki
2008-09-06 21:24 2.6.27-rc5-git8: Reported regressions from 2.6.26 Rafael J. Wysocki
2008-09-06 21:30 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki
2008-08-30 19:46 2.6.27-rc5-git2: Reported regressions from 2.6.26 Rafael J. Wysocki
2008-08-30 19:50 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki
2008-08-23 18:07 2.6.27-rc4-git1: Reported regressions from 2.6.26 Rafael J. Wysocki
2008-08-23 18:10 ` [Bug #11382] e1000e: 2.6.27-rc1 corrupts EEPROM/NVM Rafael J. Wysocki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080924191022.GC2591@phobos.i.cabal.ca \
    --to=kyle@mcmartin.ca \
    --cc=airlied@gmail.com \
    --cc=chrisl@vmware.com \
    --cc=davem@davemloft.net \
    --cc=david.vrabel@csr.com \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=jkosina@suse.cz \
    --cc=kernel-testers@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rjw@sisk.pl \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox