netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ben Hutchings <bhutchings@solarflare.com>
To: David Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org, linux-net-drivers@solarflare.com
Subject: [PATCH net-next-2.6 03/17] sfc: Distinguish critical and non-critical over-temperature conditions
Date: Thu, 02 Dec 2010 23:46:24 +0000	[thread overview]
Message-ID: <1291333584.3259.26.camel@bwh-desktop> (raw)
In-Reply-To: <1291333490.3259.23.camel@bwh-desktop>

Set both the 'maximum' and critical temperature limits for LM87
hardware monitors on Falcon boards.  Do not shut down a port until the
critical temperature is reached, but warn as soon as the 'maximum'
temperature is reached.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/sfc/falcon_boards.c |  109 ++++++++++++++++++++++++++++----------
 1 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/drivers/net/sfc/falcon_boards.c b/drivers/net/sfc/falcon_boards.c
index cfc29d7..86180ee 100644
--- a/drivers/net/sfc/falcon_boards.c
+++ b/drivers/net/sfc/falcon_boards.c
@@ -30,17 +30,28 @@
 #define FALCON_BOARD_SFN4112F 0x52
 
 /* Board temperature is about 15°C above ambient when air flow is
- * limited. */
+ * limited.  The maximum acceptable ambient temperature varies
+ * depending on the PHY specifications but the critical temperature
+ * above which we should shut down to avoid damage is 80°C. */
 #define FALCON_BOARD_TEMP_BIAS	15
+#define FALCON_BOARD_TEMP_CRIT	(80 + FALCON_BOARD_TEMP_BIAS)
 
 /* SFC4000 datasheet says: 'The maximum permitted junction temperature
  * is 125°C; the thermal design of the environment for the SFC4000
  * should aim to keep this well below 100°C.' */
+#define FALCON_JUNC_TEMP_MIN	0
 #define FALCON_JUNC_TEMP_MAX	90
+#define FALCON_JUNC_TEMP_CRIT	125
 
 /*****************************************************************************
  * Support for LM87 sensor chip used on several boards
  */
+#define LM87_REG_TEMP_HW_INT_LOCK	0x13
+#define LM87_REG_TEMP_HW_EXT_LOCK	0x14
+#define LM87_REG_TEMP_HW_INT		0x17
+#define LM87_REG_TEMP_HW_EXT		0x18
+#define LM87_REG_TEMP_EXT1		0x26
+#define LM87_REG_TEMP_INT		0x27
 #define LM87_REG_ALARMS1		0x41
 #define LM87_REG_ALARMS2		0x42
 #define LM87_IN_LIMITS(nr, _min, _max)			\
@@ -57,6 +68,27 @@
 
 #if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
 
+static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
+{
+	while (*reg_values) {
+		u8 reg = *reg_values++;
+		u8 value = *reg_values++;
+		int rc = i2c_smbus_write_byte_data(client, reg, value);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static const u8 falcon_lm87_common_regs[] = {
+	LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
+	LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
+	LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
+	LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
+	LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
+	0
+};
+
 static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
 			 const u8 *reg_values)
 {
@@ -67,13 +99,12 @@ static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
 	if (!client)
 		return -EIO;
 
-	while (*reg_values) {
-		u8 reg = *reg_values++;
-		u8 value = *reg_values++;
-		rc = i2c_smbus_write_byte_data(client, reg, value);
-		if (rc)
-			goto err;
-	}
+	rc = efx_poke_lm87(client, reg_values);
+	if (rc)
+		goto err;
+	rc = efx_poke_lm87(client, falcon_lm87_common_regs);
+	if (rc)
+		goto err;
 
 	board->hwmon_client = client;
 	return 0;
@@ -91,36 +122,56 @@ static void efx_fini_lm87(struct efx_nic *efx)
 static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
 {
 	struct i2c_client *client = falcon_board(efx)->hwmon_client;
-	s32 alarms1, alarms2;
+	bool temp_crit, elec_fault, is_failure;
+	u16 alarms;
+	s32 reg;
 
 	/* If link is up then do not monitor temperature */
 	if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
 		return 0;
 
-	alarms1 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
-	alarms2 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
-	if (alarms1 < 0)
-		return alarms1;
-	if (alarms2 < 0)
-		return alarms2;
-	alarms1 &= mask;
-	alarms2 &= mask >> 8;
-	if (alarms1 || alarms2) {
+	reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
+	if (reg < 0)
+		return reg;
+	alarms = reg;
+	reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
+	if (reg < 0)
+		return reg;
+	alarms |= reg << 8;
+	alarms &= mask;
+
+	temp_crit = false;
+	if (alarms & LM87_ALARM_TEMP_INT) {
+		reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
+		if (reg < 0)
+			return reg;
+		if (reg > FALCON_BOARD_TEMP_CRIT)
+			temp_crit = true;
+	}
+	if (alarms & LM87_ALARM_TEMP_EXT1) {
+		reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
+		if (reg < 0)
+			return reg;
+		if (reg > FALCON_JUNC_TEMP_CRIT)
+			temp_crit = true;
+	}
+	elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
+	is_failure = temp_crit || elec_fault;
+
+	if (alarms)
 		netif_err(efx, hw, efx->net_dev,
-			  "LM87 detected a hardware failure (status %02x:%02x)"
-			  "%s%s%s\n",
-			  alarms1, alarms2,
-			  (alarms1 & LM87_ALARM_TEMP_INT) ?
+			  "LM87 detected a hardware %s (status %02x:%02x)"
+			  "%s%s%s%s\n",
+			  is_failure ? "failure" : "problem",
+			  alarms & 0xff, alarms >> 8,
+			  (alarms & LM87_ALARM_TEMP_INT) ?
 			  "; board is overheating" : "",
-			  (alarms1 & LM87_ALARM_TEMP_EXT1) ?
+			  (alarms & LM87_ALARM_TEMP_EXT1) ?
 			  "; controller is overheating" : "",
-			  (alarms1 & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1)
-			   || alarms2) ?
-			  "; electrical fault" : "");
-		return -ERANGE;
-	}
+			  temp_crit ? "; reached critical temperature" : "",
+			  elec_fault ? "; electrical fault" : "");
 
-	return 0;
+	return is_failure ? -ERANGE : 0;
 }
 
 #else /* !CONFIG_SENSORS_LM87 */
-- 
1.7.3.2



-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


  parent reply	other threads:[~2010-12-02 23:46 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-02 23:44 sfc: Bug fixes and cleanup Ben Hutchings
2010-12-02 23:46 ` [PATCH net-next-2.6 01/17] sfc: Reduce log level for MCDI error response in efx_mcdi_rpc() Ben Hutchings
2010-12-03 17:08   ` David Miller
2010-12-02 23:46 ` [PATCH net-next-2.6 02/17] sfc: Fix condition for no-op in set_phy_flash_cfg() Ben Hutchings
2010-12-03 17:08   ` David Miller
2010-12-02 23:46 ` Ben Hutchings [this message]
2010-12-03 17:08   ` [PATCH net-next-2.6 03/17] sfc: Distinguish critical and non-critical over-temperature conditions David Miller
2010-12-02 23:46 ` [PATCH net-next-2.6 04/17] sfc: Read-to-clear LM87 alarm/interrupt status at start of day Ben Hutchings
2010-12-03 17:08   ` David Miller
2010-12-02 23:46 ` [PATCH net-next-2.6 05/17] sfc: Clear RXIN_SEL when soft-resetting QT2025C Ben Hutchings
2010-12-03 17:08   ` David Miller
2010-12-02 23:46 ` [PATCH net-next-2.6 06/17] sfc: Fix event based MCDI completion and MC REBOOT/CMDDONE ordering issue Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 07/17] sfc: Remove broken automatic fallback for invalid Falcon chip/board config Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 08/17] sfc: Expose Falcon BootROM config through MTD, not ethtool Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 09/17] sfc: Remove unnecessary inclusion of various private header files Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 10/17] sfc: Move SPI state to struct falcon_nic_data Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 11/17] sfc: Move mdio_lock " Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 12/17] sfc: Move Falcon global event handling to falcon.c Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 13/17] sfc: Move xmac_poll_required into struct falcon_nic_data Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:47 ` [PATCH net-next-2.6 14/17] sfc: Update kernel-doc to match earlier move of Toeplitz hash key Ben Hutchings
2010-12-03 17:09   ` David Miller
2010-12-02 23:48 ` [PATCH net-next-2.6 15/17] sfc: When waking a stopped tx_queue, only lock that tx_queue Ben Hutchings
2010-12-03 17:10   ` David Miller
2010-12-02 23:48 ` [PATCH net-next-2.6 16/17] sfc: Use current MAC address, not NVRAM MAC address, for WoL filter Ben Hutchings
2010-12-03 17:10   ` David Miller
2010-12-02 23:48 ` [PATCH net-next-2.6 17/17] sfc: Store MAC address from NVRAM in net_device::perm_addr Ben Hutchings
2010-12-03 17:10   ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1291333584.3259.26.camel@bwh-desktop \
    --to=bhutchings@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=linux-net-drivers@solarflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).