Netdev List
 help / color / mirror / Atom feed
* [RFC v2 PATCH] m68knommu: added dm9000 support
From: Angelo Dureghello @ 2011-01-05 18:03 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: Randy Dunlap

This patch allows to use the dm9000 network chip with a m68knommu 
big-endian cpu. From the HW point of view, the cpu data bus connected to 
the dm9000 chip should be hardware-byte-swapped, crossing the bytes 
wires (D0:7 to D24:31, etc.). In anyway, has been also added an option 
to swap the bytes in the driver, if some cpu has been wired straight 
D0:D31 to dm9000.

Signed-off-by: Angelo Dureghello <angelo70@gmail.com>
---

--- linux/drivers/net/Kconfig.orig	2011-01-05 17:11:37.992376124 +0100
+++ linux/drivers/net/Kconfig	2011-01-04 22:33:14.132301872 +0100
@@ -960,7 +960,7 @@ config TI_DAVINCI_EMAC
 
 config DM9000
 	tristate "DM9000 support"
-	depends on ARM || BLACKFIN || MIPS
+	depends on COLDFIRE || ARM || BLACKFIN || MIPS
 	select CRC32
 	select MII
 	---help---
@@ -986,6 +986,14 @@ config DM9000_FORCE_SIMPLE_PHY_POLL
 	  costly MII PHY reads. Note, this will not work if the chip is
 	  operating with an external PHY.
 
+config DM9000_32BIT_SW_SWAP
+	bool "Software byte swap for 32 bit data bus"
+	depends on DM9000 && COLDFIRE
+	---help---
+	  This configuration allows to swap data bytes from the dm9000
+	  driver itself, when the big endian cpu is wired straight to
+	  the dm9000 32 bit data bus.
+
 config ENC28J60
 	tristate "ENC28J60 support"
 	depends on EXPERIMENTAL && SPI && NET_ETHERNET
@@ -3347,4 +3355,3 @@ config VMXNET3
          module will be called vmxnet3.
 
 endif # NETDEVICES
-

--- linux/drivers/net/dm9000.c.orig	2010-12-30 23:19:39.747836070 +0100
+++ linux/drivers/net/dm9000.c	2011-01-05 16:30:48.636116500 +0100
@@ -158,9 +158,17 @@ dm9000_reset(board_info_t * db)
 	dev_dbg(db->dev, "resetting device\n");
 
 	/* RESET device */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(DM9000_NCR, db->io_addr);
+#else
 	writeb(DM9000_NCR, db->io_addr);
+#endif
 	udelay(200);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(NCR_RST, db->io_data);
+#else
 	writeb(NCR_RST, db->io_data);
+#endif
 	udelay(200);
 }
 
@@ -170,8 +178,13 @@ dm9000_reset(board_info_t * db)
 static u8
 ior(board_info_t * db, int reg)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg, db->io_addr);
+	return (u8)readl(db->io_data);
+#else
 	writeb(reg, db->io_addr);
 	return readb(db->io_data);
+#endif
 }
 
 /*
@@ -181,43 +194,72 @@ ior(board_info_t * db, int reg)
 static void
 iow(board_info_t * db, int reg, int value)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg, db->io_addr);
+	writel(value, db->io_data);
+#else
 	writeb(reg, db->io_addr);
 	writeb(value, db->io_data);
+#endif
 }
 
 /* routines for sending block to chip */
 
 static void dm9000_outblk_8bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writesbsw(reg, data, count);
+#else
 	writesb(reg, data, count);
+#endif
 }
 
 static void dm9000_outblk_16bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writeswsw(reg, data, (count+1) >> 1);
+#else
 	writesw(reg, data, (count+1) >> 1);
+#endif
 }
 
 static void dm9000_outblk_32bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writeslsw(reg, data, (count+3) >> 2);
+#else
 	writesl(reg, data, (count+3) >> 2);
+#endif
 }
 
 /* input block from chip to memory */
 
 static void dm9000_inblk_8bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	readsbsw(reg, data, count);
+#else
 	readsb(reg, data, count);
+#endif
 }
 
 
 static void dm9000_inblk_16bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	readswsw(reg, data, (count+1) >> 1);
+#else
 	readsw(reg, data, (count+1) >> 1);
+#endif
 }
 
 static void dm9000_inblk_32bit(void __iomem *reg, void *data, int count)
 {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	readslsw(reg, data, (count+3) >> 2);
+#else
 	readsl(reg, data, (count+3) >> 2);
+#endif
 }
 
 /* dump block from chip to null */
@@ -863,8 +905,13 @@ static void dm9000_timeout(struct net_de
 	netif_wake_queue(dev);
 
 	/* Restore previous register address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg_save, db->io_addr);
+#else
 	writeb(reg_save, db->io_addr);
-	spin_unlock_irqrestore(&db->lock, flags);
+#endif
+
+	spin_unlock_irqrestore(&db->lock,flags);
 }
 
 static void dm9000_send_packet(struct net_device *dev,
@@ -908,7 +955,11 @@ dm9000_start_xmit(struct sk_buff *skb, s
 	spin_lock_irqsave(&db->lock, flags);
 
 	/* Move data to DM9000 TX RAM */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   writel(DM9000_MWCMD, db->io_addr);
+#else
 	writeb(DM9000_MWCMD, db->io_addr);
+#endif	
 
 	(db->outblk)(db->io_data, skb->data, skb->len);
 	dev->stats.tx_bytes += skb->len;
@@ -981,7 +1032,11 @@ dm9000_rx(struct net_device *dev)
 		ior(db, DM9000_MRCMDX);	/* Dummy read */
 
 		/* Get most updated data */
-		rxbyte = readb(db->io_data);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+      rxbyte = (u8)readl(db->io_data);
+#else
+      rxbyte = readb(db->io_data);
+#endif
 
 		/* Status check: this byte must be 0 or 1 */
 		if (rxbyte & DM9000_PKT_ERR) {
@@ -996,8 +1051,13 @@ dm9000_rx(struct net_device *dev)
 
 		/* A packet ready now  & Get status/length */
 		GoodPacket = true;
-		writeb(DM9000_MRCMD, db->io_addr);
 
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+		writel(DM9000_MRCMD, db->io_addr);
+#else
+      writeb(DM9000_MRCMD, db->io_addr);
+#endif		
+		
 		(db->inblk)(db->io_data, &rxhdr, sizeof(rxhdr));
 
 		RxLen = le16_to_cpu(rxhdr.RxLen);
@@ -1077,7 +1137,7 @@ static irqreturn_t dm9000_interrupt(int
 	unsigned long flags;
 	u8 reg_save;
 
-	dm9000_dbg(db, 3, "entering %s\n", __func__);
+	//dm9000_dbg(db, 3, "entering %s\n", __func__);
 
 	/* A real interrupt coming */
 
@@ -1085,7 +1145,11 @@ static irqreturn_t dm9000_interrupt(int
 	spin_lock_irqsave(&db->lock, flags);
 
 	/* Save previous register address */
-	reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif
 
 	/* Disable all interrupts */
 	iow(db, DM9000_IMR, IMR_PAR);
@@ -1100,7 +1164,7 @@ static irqreturn_t dm9000_interrupt(int
 	/* Received the coming packet */
 	if (int_status & ISR_PRS)
 		dm9000_rx(dev);
-
+		
 	/* Trnasmit Interrupt check */
 	if (int_status & ISR_PTS)
 		dm9000_tx_done(dev, db);
@@ -1116,8 +1180,12 @@ static irqreturn_t dm9000_interrupt(int
 	iow(db, DM9000_IMR, db->imr_all);
 
 	/* Restore previous register address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg_save, db->io_addr);
+#else
 	writeb(reg_save, db->io_addr);
-
+#endif
+	
 	spin_unlock_irqrestore(&db->lock, flags);
 
 	return IRQ_HANDLED;
@@ -1233,11 +1301,15 @@ dm9000_phy_read(struct net_device *dev,
 	int ret;
 
 	mutex_lock(&db->addr_lock);
-
+	
 	spin_lock_irqsave(&db->lock,flags);
 
 	/* Save previous register address */
-	reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif
 
 	/* Fill the phyxcer register into REG_0C */
 	iow(db, DM9000_EPAR, DM9000_PHY | reg);
@@ -1250,7 +1322,11 @@ dm9000_phy_read(struct net_device *dev,
 	dm9000_msleep(db, 1);		/* Wait read complete */
 
 	spin_lock_irqsave(&db->lock,flags);
-	reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif
 
 	iow(db, DM9000_EPCR, 0x0);	/* Clear phyxcer read command */
 
@@ -1258,9 +1334,14 @@ dm9000_phy_read(struct net_device *dev,
 	ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL);
 
 	/* restore the previous address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg_save, db->io_addr);
+#else
 	writeb(reg_save, db->io_addr);
-	spin_unlock_irqrestore(&db->lock,flags);
+#endif
 
+	spin_unlock_irqrestore(&db->lock,flags);
+	
 	mutex_unlock(&db->addr_lock);
 
 	dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret);
@@ -1284,7 +1365,11 @@ dm9000_phy_write(struct net_device *dev,
 	spin_lock_irqsave(&db->lock,flags);
 
 	/* Save previous register address */
-	reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif
 
 	/* Fill the phyxcer register into REG_0C */
 	iow(db, DM9000_EPAR, DM9000_PHY | reg);
@@ -1295,18 +1380,31 @@ dm9000_phy_write(struct net_device *dev,
 
 	iow(db, DM9000_EPCR, EPCR_EPOS | EPCR_ERPRW);	/* Issue phyxcer write command */
 
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg_save, db->io_addr);
+#else
 	writeb(reg_save, db->io_addr);
+#endif
+
 	spin_unlock_irqrestore(&db->lock, flags);
 
 	dm9000_msleep(db, 1);		/* Wait write complete */
 
 	spin_lock_irqsave(&db->lock,flags);
-	reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif
 
 	iow(db, DM9000_EPCR, 0x0);	/* Clear phyxcer write command */
 
 	/* restore the previous address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+	writel(reg_save, db->io_addr);
+#else
 	writeb(reg_save, db->io_addr);
+#endif
 
 	spin_unlock_irqrestore(&db->lock, flags);
 	mutex_unlock(&db->addr_lock);
@@ -1713,4 +1811,3 @@ MODULE_AUTHOR("Sascha Hauer, Ben Dooks")
 MODULE_DESCRIPTION("Davicom DM9000 network driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:dm9000");
-


--- linux/arch/m68k/include/asm/io_no.h.orig	2011-01-05 16:53:55.904905038 +0100
+++ linux/arch/m68k/include/asm/io_no.h	2011-01-04 23:45:08.893049554 +0100
@@ -47,6 +47,91 @@ static inline unsigned int _swapl(volati
 #define writew(b,addr) (void)((*(volatile unsigned short *) (addr)) = (b))
 #define writel(b,addr) (void)((*(volatile unsigned int *) (addr)) = (b))
 
+static inline void writesb (void __iomem *reg, void *data, int count)
+{
+	unsigned char *p = (unsigned char*) data;
+
+	while (count--) writeb(*p++, reg);
+}
+
+static inline void writesbsw (void __iomem *reg, void *data, int count)
+{
+	unsigned char *p = (unsigned char *) data;
+
+	while (count--) writel((int)(*p++), reg);
+}
+
+static inline void writesw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short*) data;
+
+   while (count--) writew(*p++, reg);
+}
+
+static inline void writeswsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) writel((int)(_swapw(*p++)), reg);
+}
+
+static inline void writesl (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long*) data;
+
+   while (count--) writel(*p++, reg);
+}
+
+static inline void writeslsw (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) writel((int)(_swapl(*p++)), reg);
+}
+
+static inline void readsb (void __iomem *reg, void *data, int count)
+{
+   unsigned char *p = (unsigned char *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readsbsw (void __iomem *reg, void *data, int count)
+{
+   unsigned char *p = (unsigned char *) data;
+
+   while (count--) *p++ = (unsigned char)readl(reg);
+}
+
+static inline void readsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readswsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) *p++ = _swapw((unsigned short)readw(reg));
+}
+
+static inline void readsl (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readslsw (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) *p++ = _swapl(readl(reg));
+}
+
+
 #define __raw_readb readb
 #define __raw_readw readw
 #define __raw_readl readl
@@ -180,4 +265,3 @@ extern void iounmap(void *addr);
 #endif /* __KERNEL__ */
 
 #endif /* _M68KNOMMU_IO_H */
-

^ permalink raw reply

* Re: [PATCH v2] netfilter: fix the race when initializing nf_ct_expect_hash_rnd
From: Eric Dumazet @ 2011-01-05 18:03 UTC (permalink / raw)
  To: Changli Gao; +Cc: Patrick McHardy, netfilter-devel, David S. Miller, netdev
In-Reply-To: <1294237403-15616-1-git-send-email-xiaosuo@gmail.com>

Le mercredi 05 janvier 2011 à 22:23 +0800, Changli Gao a écrit :
> Since nf_ct_expect_dst_hash() may be called without nf_conntrack_lock
> locked, nf_ct_expect_hash_rnd should be initialized in the atomic way.
> 
> In this patch, we use nf_conntrack_hash_rnd instead of
> nf_ct_expect_hash_rnd.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Problem is Patrick seems not responsive these days ;)



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v3 08/10] ARM: mxs: add ocotp read function
From: Jamie Lokier @ 2011-01-05 17:56 UTC (permalink / raw)
  To: Jamie Iles
  Cc: gerg, B32542, netdev, s.hauer, bryan.wu, baruch, w.sang, r64343,
	Shawn Guo, eric, Uwe Kleine-König, davem, linux-arm-kernel,
	lw
In-Reply-To: <20110105172501.GB2112@gallagher>

Jamie Iles wrote:
> On Wed, Jan 05, 2011 at 05:44:09PM +0100, Uwe Kleine-König wrote:
> > Hello Jamie,
> > On Wed, Jan 05, 2011 at 04:16:46PM +0000, Jamie Iles wrote:
> > > On Wed, Jan 05, 2011 at 10:07:35PM +0800, Shawn Guo wrote:
> > > > +	/* check both BUSY and ERROR cleared */
> > > > +	while ((__raw_readl(ocotp_base) &
> > > > +		(BM_OCOTP_CTRL_BUSY | BM_OCOTP_CTRL_ERROR)) && --timeout)
> > > > +		/* nothing */;
> > > 
> > > Is it worth using cpu_relax() in these polling loops?
> > I don't know what cpu_relax does for other platforms, but on ARM it's
> > just a memory barrier which AFAICT doesn't help here at all (which
> > doesn't need to be correct).  Why do you think it would be better?
> 
> Well I don't see that there's anything broken without cpu_relax() but 
> using cpu_relax() seems to be the most common way of doing busy polling 
> loops that I've seen. It's also a bit easier to read than a comment and 
> semi-colon. Perhaps it's just personal preference.

cpu_relax() is a hint to the CPU to, for example, save power or be
less aggressive on the memory bus (to save power or be fairer).

Currently these architectures do more than just a barrier in cpu_relax():
x86, IA64, PowerPC, Tile and S390.

Although it's just a hint on ARM at the moment, it might change in
future - especially with power mattering on so many ARM systems.
(Even now, just changing it to a very short udelay might save power
on existing ARMs without breaking drivers.)


By the way, I see ARM defines cpu_relax as smp_mb() on arch >= 6.  Is
that correct and useful?  On other architectures*, barrier() is enough
of a barrier, but it's conceivable that smp_mb() would have some
ARM-specific fairness or bus activity benefit - in which case it
should probably be mb().

* - except Blackfin, which historically derived a lot from ARM headers.

-- Jamie

^ permalink raw reply

* Re: [PATCH] ipv4: IP defragmentation must be ECN aware
From: Eric Dumazet @ 2011-01-05 17:52 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev
In-Reply-To: <20110105094830.63a68230@nehalam>

Le mercredi 05 janvier 2011 à 09:48 -0800, Stephen Hemminger a écrit :

> At least make it unsigned int?
> 

Let's keep it simple and use u8 as you suggested ;)

[PATCH v2] ipv4: IP defragmentation must be ECN aware

RFC3168 (The Addition of Explicit Congestion Notification to IP)
states :

5.3.  Fragmentation

   ECN-capable packets MAY have the DF (Don't Fragment) bit set.
   Reassembly of a fragmented packet MUST NOT lose indications of
   congestion.  In other words, if any fragment of an IP packet to be
   reassembled has the CE codepoint set, then one of two actions MUST be
   taken:

      * Set the CE codepoint on the reassembled packet.  However, this
        MUST NOT occur if any of the other fragments contributing to
        this reassembly carries the Not-ECT codepoint.

      * The packet is dropped, instead of being reassembled, for any
        other reason.

This patch implements this requirement for IPv4, choosing the first
action : 

If one fragment had NO-ECT codepoint
        reassembled frame has NO-ECT
ElIf one fragment had CE codepoint
        reassembled frame has CE

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
Use u8 instead of int in ip4_frag_ecn()
 net/ipv4/ip_fragment.c |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+)


diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e6215bd..e6b53a7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -45,6 +45,7 @@
 #include <linux/udp.h>
 #include <linux/inet.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/inet_ecn.h>
 
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -70,11 +71,28 @@ struct ipq {
 	__be32		daddr;
 	__be16		id;
 	u8		protocol;
+	u8		ecn; /* RFC3168 support */
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
 
+#define IPFRAG_ECN_CLEAR  0x01 /* one frag had INET_ECN_NOT_ECT */
+#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */
+
+static inline u8 ip4_frag_ecn(u8 tos)
+{
+	tos = (tos & INET_ECN_MASK) + 1;
+	/*
+	 * After the last operation we have (in binary):
+	 * INET_ECN_NOT_ECT => 001
+	 * INET_ECN_ECT_1   => 010
+	 * INET_ECN_ECT_0   => 011
+	 * INET_ECN_CE      => 100
+	 */
+	return (tos & 2) ? 0 : tos;
+}
+
 static struct inet_frags ip4_frags;
 
 int ip_frag_nqueues(struct net *net)
@@ -137,6 +155,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 
 	qp->protocol = arg->iph->protocol;
 	qp->id = arg->iph->id;
+	qp->ecn = ip4_frag_ecn(arg->iph->tos);
 	qp->saddr = arg->iph->saddr;
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
@@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.fragments = NULL;
 	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
+	qp->ecn = 0;
 
 	return 0;
 }
@@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	int flags, offset;
 	int ihl, end;
 	int err = -ENOENT;
+	u8 ecn;
 
 	if (qp->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
@@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		goto err;
 	}
 
+	ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
 	offset = ntohs(ip_hdr(skb)->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
@@ -472,6 +494,7 @@ found:
 	}
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
+	qp->ecn |= ecn;
 	atomic_add(skb->truesize, &qp->q.net->mem);
 	if (offset == 0)
 		qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
+	/* RFC3168 5.3 Fragmentation support
+	 * If one fragment had INET_ECN_NOT_ECT,
+	 *	reassembled frame also has INET_ECN_NOT_ECT
+	 * Elif one fragment had INET_ECN_CE
+	 *	reassembled frame also has INET_ECN_CE
+	 */
+	if (qp->ecn & IPFRAG_ECN_CLEAR)
+		iph->tos &= ~INET_ECN_MASK;
+	else if (qp->ecn & IPFRAG_ECN_SET_CE)
+		iph->tos |= INET_ECN_CE;
+
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
 	qp->q.fragments_tail = NULL;



^ permalink raw reply related

* Re: [PATCH] ipv4: IP defragmentation must be ECN aware
From: Stephen Hemminger @ 2011-01-05 17:48 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1294249261.10633.54.camel@edumazet-laptop>

On Wed, 05 Jan 2011 18:41:01 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le mercredi 05 janvier 2011 à 09:13 -0800, Stephen Hemminger a écrit :
> > On Wed, 05 Jan 2011 14:59:02 +0100
> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > 
> > > +
> > > +static inline int ip4_frag_ecn(int tos)
> > 
> > Since tos is only a byte, this should be:
> > 
> > static inline u8 ip4_frag_ecn(u8 tos)
> > 
> > 
> 
> In fact, generated code is the same on x86, but some arches have faster
> arithmetic on WORD units.
> 
> And I added the 'inline' because on x86_64 gcc, compiler chose _not_ to
> inline this 6 instruction sequence ! Code was much larger.
> 
> 31 d2                   xor    %edx,%edx
> 0f b6 40 01             movzbl 0x1(%rax),%eax
> 83 e0 03                and    $0x3,%eax
> ff c0                   inc    %eax
> a8 02                   test   $0x2,%al
> 0f 44 d0                cmove  %eax,%edx
> 
> 
> We do roughly the same (working on WORD arith) in 
>
At least make it unsigned int?

-- 

^ permalink raw reply

* Re: [net-next-2.6 PATCH v5 2/2] net_sched: implement a root container qdisc sch_mqprio
From: Stephen Hemminger @ 2011-01-05 17:47 UTC (permalink / raw)
  To: John Fastabend
  Cc: Jarek Poplawski, davem@davemloft.net, hadi@cyberus.ca,
	tgraf@infradead.org, eric.dumazet@gmail.com,
	bhutchings@solarflare.com, nhorman@tuxdriver.com,
	netdev@vger.kernel.org
In-Reply-To: <4D24ACA4.3000301@intel.com>

On Wed, 05 Jan 2011 09:38:44 -0800
John Fastabend <john.r.fastabend@intel.com> wrote:

> On 1/4/2011 2:59 PM, Jarek Poplawski wrote:
> > On Tue, Jan 04, 2011 at 10:56:46AM -0800, John Fastabend wrote:
> >> This implements a mqprio queueing discipline that by default creates
> >> a pfifo_fast qdisc per tx queue and provides the needed configuration
> >> interface.
> >>
> >> Using the mqprio qdisc the number of tcs currently in use along
> >> with the range of queues alloted to each class can be configured. By
> >> default skbs are mapped to traffic classes using the skb priority.
> >> This mapping is configurable.
> >>
> >> Configurable parameters,
> >>
> >> struct tc_mqprio_qopt {
> >>         __u8    num_tc;
> >>         __u8    prio_tc_map[TC_BITMASK + 1];
> >>         __u8    hw;
> >>         __u16   count[TC_MAX_QUEUE];
> >>         __u16   offset[TC_MAX_QUEUE];
> >> };
> >>
> >> Here the count/offset pairing give the queue alignment and the
> >> prio_tc_map gives the mapping from skb->priority to tc.
> >>
> >> The hw bit determines if the hardware should configure the count
> >> and offset values. If the hardware bit is set then the operation
> >> will fail if the hardware does not implement the ndo_setup_tc
> >> operation. This is to avoid undetermined states where the hardware
> >> may or may not control the queue mapping. Also minimal bounds
> >> checking is done on the count/offset to verify a queue does not
> >> exceed num_tx_queues and that queue ranges do not overlap. Otherwise
> >> it is left to user policy or hardware configuration to create
> >> useful mappings.
> >>
> >> It is expected that hardware QOS schemes can be implemented by
> >> creating appropriate mappings of queues in ndo_tc_setup().
> >>
> >> One expected use case is drivers will use the ndo_setup_tc to map
> >> queue ranges onto 802.1Q traffic classes. This provides a generic
> >> mechanism to map network traffic onto these traffic classes and
> >> removes the need for lower layer drivers to know specifics about
> >> traffic types.
> >>
> >> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> >> ---
> >>
> >>  include/linux/netdevice.h |    3
> >>  include/linux/pkt_sched.h |   10 +
> >>  net/sched/Kconfig         |   12 +
> >>  net/sched/Makefile        |    1
> >>  net/sched/sch_generic.c   |    4
> >>  net/sched/sch_mqprio.c    |  413 +++++++++++++++++++++++++++++++++++++++++++++
> >>  6 files changed, 443 insertions(+), 0 deletions(-)
> >>  create mode 100644 net/sched/sch_mqprio.c
> >>
> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> >> index ae51323..19a855b 100644
> >> --- a/include/linux/netdevice.h
> >> +++ b/include/linux/netdevice.h
> >> @@ -764,6 +764,8 @@ struct netdev_tc_txq {
> >>   * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
> >>   *                     struct nlattr *port[]);
> >>   * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
> >> + *
> >> + * int (*ndo_setup_tc)(struct net_device *dev, int tc);
> >
> >  * int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
> >
> >>   */
> >>  #define HAVE_NET_DEVICE_OPS
> >>  struct net_device_ops {
> >> @@ -822,6 +824,7 @@ struct net_device_ops {
> >>                                                  struct nlattr *port[]);
> >>       int                     (*ndo_get_vf_port)(struct net_device *dev,
> >>                                                  int vf, struct sk_buff *skb);
> >> +     int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
> >>  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
> >>       int                     (*ndo_fcoe_enable)(struct net_device *dev);
> >>       int                     (*ndo_fcoe_disable)(struct net_device *dev);
> >> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> >> index 2cfa4bc..1c5310a 100644
> >> --- a/include/linux/pkt_sched.h
> >> +++ b/include/linux/pkt_sched.h
> >> @@ -2,6 +2,7 @@
> >>  #define __LINUX_PKT_SCHED_H
> >>
> >>  #include <linux/types.h>
> >> +#include <linux/netdevice.h>
> >

This won't be acceptable.

All the TC api needs to be in linux/pkt_sched.h.

I regularly take the sanitized headers from kernel version an put them
in iproute2 source.



-- 

^ permalink raw reply

* Re: [PATCH] net: ixp4xx_eth: Return proper error for eth_init_one
From: Krzysztof Halasa @ 2011-01-05 17:43 UTC (permalink / raw)
  To: Axel Lin; +Cc: linux-kernel, David S. Miller, netdev
In-Reply-To: <1294205046.8294.2.camel@mola>

Axel Lin <axel.lin@gmail.com> writes:

> Return PTR_ERR(port->phydev) instead of 1 if phy_connect failed.
>
> Signed-off-by: Axel Lin <axel.lin@gmail.com>

> --- a/drivers/net/arm/ixp4xx_eth.c
> +++ b/drivers/net/arm/ixp4xx_eth.c
> @@ -1229,8 +1229,10 @@ static int __devinit eth_init_one(struct platform_device *pdev)
>  	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, "0", plat->phy);
>  	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, 0,
>  				   PHY_INTERFACE_MODE_MII);
> -	if ((err = IS_ERR(port->phydev)))
> +	if (IS_ERR(port->phydev)) {
> +		err = PTR_ERR(port->phydev);
>  		goto err_free_mem;
> +	}
>  
>  	port->phydev->irq = PHY_POLL;

Right. Thanks.

Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
-- 
Krzysztof Halasa

^ permalink raw reply

* Re: [PATCH] ipv4: IP defragmentation must be ECN aware
From: Eric Dumazet @ 2011-01-05 17:41 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev
In-Reply-To: <20110105091340.3f8833ef@nehalam>

Le mercredi 05 janvier 2011 à 09:13 -0800, Stephen Hemminger a écrit :
> On Wed, 05 Jan 2011 14:59:02 +0100
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > +
> > +static inline int ip4_frag_ecn(int tos)
> 
> Since tos is only a byte, this should be:
> 
> static inline u8 ip4_frag_ecn(u8 tos)
> 
> 

In fact, generated code is the same on x86, but some arches have faster
arithmetic on WORD units.

And I added the 'inline' because on x86_64 gcc, compiler chose _not_ to
inline this 6 instruction sequence ! Code was much larger.

31 d2                   xor    %edx,%edx
0f b6 40 01             movzbl 0x1(%rax),%eax
83 e0 03                and    $0x3,%eax
ff c0                   inc    %eax
a8 02                   test   $0x2,%al
0f 44 d0                cmove  %eax,%edx


We do roughly the same (working on WORD arith) in 

static inline int IP_ECN_set_ce(struct iphdr *iph)
{
u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
...
}

What others think ? I have no real strong opinion.



^ permalink raw reply

* Re: [net-next-2.6 PATCH v5 2/2] net_sched: implement a root container qdisc sch_mqprio
From: John Fastabend @ 2011-01-05 17:38 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: davem@davemloft.net, hadi@cyberus.ca, shemminger@vyatta.com,
	tgraf@infradead.org, eric.dumazet@gmail.com,
	bhutchings@solarflare.com, nhorman@tuxdriver.com,
	netdev@vger.kernel.org
In-Reply-To: <20110104225936.GA2030@del.dom.local>

On 1/4/2011 2:59 PM, Jarek Poplawski wrote:
> On Tue, Jan 04, 2011 at 10:56:46AM -0800, John Fastabend wrote:
>> This implements a mqprio queueing discipline that by default creates
>> a pfifo_fast qdisc per tx queue and provides the needed configuration
>> interface.
>>
>> Using the mqprio qdisc the number of tcs currently in use along
>> with the range of queues alloted to each class can be configured. By
>> default skbs are mapped to traffic classes using the skb priority.
>> This mapping is configurable.
>>
>> Configurable parameters,
>>
>> struct tc_mqprio_qopt {
>>         __u8    num_tc;
>>         __u8    prio_tc_map[TC_BITMASK + 1];
>>         __u8    hw;
>>         __u16   count[TC_MAX_QUEUE];
>>         __u16   offset[TC_MAX_QUEUE];
>> };
>>
>> Here the count/offset pairing give the queue alignment and the
>> prio_tc_map gives the mapping from skb->priority to tc.
>>
>> The hw bit determines if the hardware should configure the count
>> and offset values. If the hardware bit is set then the operation
>> will fail if the hardware does not implement the ndo_setup_tc
>> operation. This is to avoid undetermined states where the hardware
>> may or may not control the queue mapping. Also minimal bounds
>> checking is done on the count/offset to verify a queue does not
>> exceed num_tx_queues and that queue ranges do not overlap. Otherwise
>> it is left to user policy or hardware configuration to create
>> useful mappings.
>>
>> It is expected that hardware QOS schemes can be implemented by
>> creating appropriate mappings of queues in ndo_tc_setup().
>>
>> One expected use case is drivers will use the ndo_setup_tc to map
>> queue ranges onto 802.1Q traffic classes. This provides a generic
>> mechanism to map network traffic onto these traffic classes and
>> removes the need for lower layer drivers to know specifics about
>> traffic types.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---
>>
>>  include/linux/netdevice.h |    3
>>  include/linux/pkt_sched.h |   10 +
>>  net/sched/Kconfig         |   12 +
>>  net/sched/Makefile        |    1
>>  net/sched/sch_generic.c   |    4
>>  net/sched/sch_mqprio.c    |  413 +++++++++++++++++++++++++++++++++++++++++++++
>>  6 files changed, 443 insertions(+), 0 deletions(-)
>>  create mode 100644 net/sched/sch_mqprio.c
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index ae51323..19a855b 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -764,6 +764,8 @@ struct netdev_tc_txq {
>>   * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
>>   *                     struct nlattr *port[]);
>>   * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
>> + *
>> + * int (*ndo_setup_tc)(struct net_device *dev, int tc);
>
>  * int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
>
>>   */
>>  #define HAVE_NET_DEVICE_OPS
>>  struct net_device_ops {
>> @@ -822,6 +824,7 @@ struct net_device_ops {
>>                                                  struct nlattr *port[]);
>>       int                     (*ndo_get_vf_port)(struct net_device *dev,
>>                                                  int vf, struct sk_buff *skb);
>> +     int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
>>  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
>>       int                     (*ndo_fcoe_enable)(struct net_device *dev);
>>       int                     (*ndo_fcoe_disable)(struct net_device *dev);
>> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
>> index 2cfa4bc..1c5310a 100644
>> --- a/include/linux/pkt_sched.h
>> +++ b/include/linux/pkt_sched.h
>> @@ -2,6 +2,7 @@
>>  #define __LINUX_PKT_SCHED_H
>>
>>  #include <linux/types.h>
>> +#include <linux/netdevice.h>
>
> This should better be consulted with Stephen wrt. iproute patch.

OK. Stephen is there a better way to do this? Possibly push the TC_xxx defines into a linux/if_* header? But that doesn't seem right either. I'll poke around some to see if this can be avoided.

>
>>
>>  /* Logical priority bands not depending on specific packet scheduler.
>>     Every scheduler will map them to real traffic classes, if it has
>> @@ -481,4 +482,13 @@ struct tc_drr_stats {
>>       __u32   deficit;
>>  };
>>
>> +/* MQPRIO */
>> +struct tc_mqprio_qopt {
>> +     __u8    num_tc;
>> +     __u8    prio_tc_map[TC_BITMASK + 1];
>> +     __u8    hw;
>> +     __u16   count[TC_MAX_QUEUE];
>> +     __u16   offset[TC_MAX_QUEUE];
>> +};
>> +
>>  #endif
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index a36270a..f52f5eb 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -205,6 +205,18 @@ config NET_SCH_DRR
>>
>>         If unsure, say N.
>>
>> +config NET_SCH_MQPRIO
>> +     tristate "Multi-queue priority scheduler (MQPRIO)"
>> +     help
>> +       Say Y here if you want to use the Multi-queue Priority scheduler.
>> +       This scheduler allows QOS to be offloaded on NICs that have support
>> +       for offloading QOS schedulers.
>> +
>> +       To compile this driver as a module, choose M here: the module will
>> +       be called sch_mqprio.
>> +
>> +       If unsure, say N.
>> +
>>  config NET_SCH_INGRESS
>>       tristate "Ingress Qdisc"
>>       depends on NET_CLS_ACT
>> diff --git a/net/sched/Makefile b/net/sched/Makefile
>> index 960f5db..26ce681 100644
>> --- a/net/sched/Makefile
>> +++ b/net/sched/Makefile
>> @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)        += sch_multiq.o
>>  obj-$(CONFIG_NET_SCH_ATM)    += sch_atm.o
>>  obj-$(CONFIG_NET_SCH_NETEM)  += sch_netem.o
>>  obj-$(CONFIG_NET_SCH_DRR)    += sch_drr.o
>> +obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
>>  obj-$(CONFIG_NET_CLS_U32)    += cls_u32.o
>>  obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
>>  obj-$(CONFIG_NET_CLS_FW)     += cls_fw.o
>> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
>> index 34dc598..723b278 100644
>> --- a/net/sched/sch_generic.c
>> +++ b/net/sched/sch_generic.c
>> @@ -540,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
>>       .dump           =       pfifo_fast_dump,
>>       .owner          =       THIS_MODULE,
>>  };
>> +EXPORT_SYMBOL(pfifo_fast_ops);
>>
>>  struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
>>                         struct Qdisc_ops *ops)
>> @@ -674,6 +675,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
>>
>>       return oqdisc;
>>  }
>> +EXPORT_SYMBOL(dev_graft_qdisc);
>>
>>  static void attach_one_default_qdisc(struct net_device *dev,
>>                                    struct netdev_queue *dev_queue,
>> @@ -761,6 +763,7 @@ void dev_activate(struct net_device *dev)
>>               dev_watchdog_up(dev);
>>       }
>>  }
>> +EXPORT_SYMBOL(dev_activate);
>>
>>  static void dev_deactivate_queue(struct net_device *dev,
>>                                struct netdev_queue *dev_queue,
>> @@ -840,6 +843,7 @@ void dev_deactivate(struct net_device *dev)
>>       list_add(&dev->unreg_list, &single);
>>       dev_deactivate_many(&single);
>>  }
>> +EXPORT_SYMBOL(dev_deactivate);
>>
>>  static void dev_init_scheduler_queue(struct net_device *dev,
>>                                    struct netdev_queue *dev_queue,
>> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
>> new file mode 100644
>> index 0000000..b16dc2c
>> --- /dev/null
>> +++ b/net/sched/sch_mqprio.c
>> @@ -0,0 +1,413 @@
>> +/*
>> + * net/sched/sch_mqprio.c
>> + *
>> + * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * version 2 as published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/types.h>
>> +#include <linux/slab.h>
>> +#include <linux/kernel.h>
>> +#include <linux/string.h>
>> +#include <linux/errno.h>
>> +#include <linux/skbuff.h>
>> +#include <net/netlink.h>
>> +#include <net/pkt_sched.h>
>> +#include <net/sch_generic.h>
>> +
>> +struct mqprio_sched {
>> +     struct Qdisc            **qdiscs;
>> +     int hw_owned;
>> +};
>> +
>> +static void mqprio_destroy(struct Qdisc *sch)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     unsigned int ntx;
>> +
>> +     if (!priv->qdiscs)
>> +             return;
>> +
>> +     for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
>> +             qdisc_destroy(priv->qdiscs[ntx]);
>> +
>> +     if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
>> +             dev->netdev_ops->ndo_setup_tc(dev, 0);
>> +     else
>> +             netdev_set_num_tc(dev, 0);
>> +
>> +     kfree(priv->qdiscs);
>> +}
>> +
>> +static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
>> +{
>> +     int i, j;
>> +
>> +     /* Verify num_tc is not out of max range */
>> +     if (qopt->num_tc > TC_MAX_QUEUE)
>> +             return -EINVAL;
>> +
>> +     for (i = 0; i < qopt->num_tc; i++) {
>> +             unsigned int last = qopt->offset[i] + qopt->count[i];
>
> (empty line after declarations)
>

fixed

>> +             /* Verify the queue offset is in the num tx range */
>> +             if (qopt->offset[i] >= dev->num_tx_queues)
>> +                     return -EINVAL;
>> +             /* Verify the queue count is in tx range being equal to the
>> +              * num_tx_queues indicates the last queue is in use.
>> +              */
>> +             else if (last > dev->num_tx_queues)
>> +                     return -EINVAL;
>> +
>> +             /* Verify that the offset and counts do not overlap */
>> +             for (j = i + 1; j < qopt->num_tc; j++) {
>> +                     if (last > qopt->offset[j])
>> +                             return -EINVAL;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     struct netdev_queue *dev_queue;
>> +     struct Qdisc *qdisc;
>> +     int i, err = -EOPNOTSUPP;
>> +     struct tc_mqprio_qopt *qopt = NULL;
>> +
>> +     /* Unwind attributes on failure */
>> +     u8 unwnd_tc = dev->num_tc;
>> +     u8 unwnd_map[TC_BITMASK + 1];
>> +     struct netdev_tc_txq unwnd_txq[TC_MAX_QUEUE];
>> +
>> +     if (sch->parent != TC_H_ROOT)
>> +             return -EOPNOTSUPP;
>> +
>> +     if (!netif_is_multiqueue(dev))
>> +             return -EOPNOTSUPP;
>> +
>> +     if (nla_len(opt) < sizeof(*qopt))
>> +             return -EINVAL;
>> +     qopt = nla_data(opt);
>> +
>> +     memcpy(unwnd_map, dev->prio_tc_map, sizeof(unwnd_map));
>> +     memcpy(unwnd_txq, dev->tc_to_txq, sizeof(unwnd_txq));
>> +
>> +     /* If the mqprio options indicate that hardware should own
>> +      * the queue mapping then run ndo_setup_tc if this can not
>> +      * be done fail immediately.
>> +      */
>> +     if (qopt->hw && dev->netdev_ops->ndo_setup_tc) {
>> +             priv->hw_owned = 1;
>> +             err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
>> +             if (err)
>> +                     return err;
>> +     } else if (!qopt->hw) {
>> +             if (mqprio_parse_opt(dev, qopt))
>> +                     return -EINVAL;
>> +
>> +             if (netdev_set_num_tc(dev, qopt->num_tc))
>> +                     return -EINVAL;
>> +
>> +             for (i = 0; i < qopt->num_tc; i++)
>> +                     netdev_set_tc_queue(dev, i,
>> +                                         qopt->count[i], qopt->offset[i]);
>> +     } else {
>> +             return -EINVAL;
>> +     }
>> +
>> +     /* Always use supplied priority mappings */
>> +     for (i = 0; i < TC_BITMASK + 1; i++) {
>> +             if (netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i])) {
>> +                     err = -EINVAL;
>
> This would probably trigger if we try qopt->num_tc == 0. Is it expected?

netdev_set_prio_tc_map() returns 0 on sucess. This if(..) is a bit strange though.

        err = netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i])
        if (err)
                ...

Is cleaner IMHO.

>
>> +                     goto tc_err;
>> +             }
>> +     }
>> +
>> +     /* pre-allocate qdisc, attachment can't fail */
>> +     priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
>> +                            GFP_KERNEL);
>> +     if (priv->qdiscs == NULL) {
>> +             err = -ENOMEM;
>> +             goto tc_err;
>> +     }
>> +
>> +     for (i = 0; i < dev->num_tx_queues; i++) {
>> +             dev_queue = netdev_get_tx_queue(dev, i);
>> +             qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
>> +                                       TC_H_MAKE(TC_H_MAJ(sch->handle),
>> +                                                 TC_H_MIN(i + 1)));
>> +             if (qdisc == NULL) {
>> +                     err = -ENOMEM;
>> +                     goto err;
>> +             }
>> +             qdisc->flags |= TCQ_F_CAN_BYPASS;
>> +             priv->qdiscs[i] = qdisc;
>> +     }
>> +
>> +     sch->flags |= TCQ_F_MQROOT;
>> +     return 0;
>> +
>> +err:
>> +     mqprio_destroy(sch);
>> +tc_err:
>> +     if (priv->hw_owned)
>> +             dev->netdev_ops->ndo_setup_tc(dev, unwnd_tc);
>
> Setting here (again) to unwind a bit later looks strange.
> Why not this 'else' only?

The entire unwind stuff is a bit awkward. With a bit more work up front parsing the parameters the unwinding can be avoided all together.

>
>> +     else
>> +             netdev_set_num_tc(dev, unwnd_tc);
>> +
>> +     memcpy(dev->prio_tc_map, unwnd_map, sizeof(unwnd_map));
>> +     memcpy(dev->tc_to_txq, unwnd_txq, sizeof(unwnd_txq));
>> +
>> +     return err;
>> +}
>> +
>> +static void mqprio_attach(struct Qdisc *sch)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     struct Qdisc *qdisc;
>> +     unsigned int ntx;
>> +
>> +     /* Attach underlying qdisc */
>> +     for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
>> +             qdisc = priv->qdiscs[ntx];
>> +             qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
>> +             if (qdisc)
>> +                     qdisc_destroy(qdisc);
>> +     }
>> +     kfree(priv->qdiscs);
>> +     priv->qdiscs = NULL;
>> +}
>> +
>> +static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
>> +                                          unsigned long cl)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
>> +
>> +     if (ntx >= dev->num_tx_queues)
>> +             return NULL;
>> +     return netdev_get_tx_queue(dev, ntx);
>> +}
>> +
>> +static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
>> +                 struct Qdisc **old)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     if (dev->flags & IFF_UP)
>> +             dev_deactivate(dev);
>> +
>> +     *old = dev_graft_qdisc(dev_queue, new);
>> +
>> +     if (dev->flags & IFF_UP)
>> +             dev_activate(dev);
>> +
>> +     return 0;
>> +}
>> +
>> +static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     unsigned char *b = skb_tail_pointer(skb);
>> +     struct tc_mqprio_qopt opt;
>> +     struct Qdisc *qdisc;
>> +     unsigned int i;
>> +
>> +     sch->q.qlen = 0;
>> +     memset(&sch->bstats, 0, sizeof(sch->bstats));
>> +     memset(&sch->qstats, 0, sizeof(sch->qstats));
>> +
>> +     for (i = 0; i < dev->num_tx_queues; i++) {
>> +             qdisc = netdev_get_tx_queue(dev, i)->qdisc;
>> +             spin_lock_bh(qdisc_lock(qdisc));
>> +             sch->q.qlen             += qdisc->q.qlen;
>> +             sch->bstats.bytes       += qdisc->bstats.bytes;
>> +             sch->bstats.packets     += qdisc->bstats.packets;
>> +             sch->qstats.qlen        += qdisc->qstats.qlen;
>> +             sch->qstats.backlog     += qdisc->qstats.backlog;
>> +             sch->qstats.drops       += qdisc->qstats.drops;
>> +             sch->qstats.requeues    += qdisc->qstats.requeues;
>> +             sch->qstats.overlimits  += qdisc->qstats.overlimits;
>> +             spin_unlock_bh(qdisc_lock(qdisc));
>> +     }
>> +
>> +     opt.num_tc = dev->num_tc;
>> +     memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
>> +     opt.hw = priv->hw_owned;
>> +
>> +     for (i = 0; i < dev->num_tc; i++) {
>> +             opt.count[i] = dev->tc_to_txq[i].count;
>> +             opt.offset[i] = dev->tc_to_txq[i].offset;
>> +     }
>> +
>> +     NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
>> +
>> +     return skb->len;
>> +nla_put_failure:
>> +     nlmsg_trim(skb, b);
>> +     return -1;
>> +}
>> +
>> +static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
>> +{
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     return dev_queue->qdisc_sleeping;
>> +}
>> +
>> +static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
>> +{
>> +     unsigned int ntx = TC_H_MIN(classid);
>
> We need to 'get' tc classes too, eg for individual dumps. Then we
> should omit them in .leaf, .graft etc.
>

OK missed this. Looks like iproute2 always sets NLM_F_DUMP which works because it uses cl_ops->walk

# tc -s class show dev eth3 classid 800b:1
class mqprio 800b:1 root
 Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0


>> +
>> +     if (!mqprio_queue_get(sch, ntx))
>> +             return 0;
>> +     return ntx;
>> +}
>> +
>> +static void mqprio_put(struct Qdisc *sch, unsigned long cl)
>> +{
>> +}
>> +
>> +static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
>> +                      struct sk_buff *skb, struct tcmsg *tcm)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +
>> +     if (cl <= dev->num_tc) {
>> +             tcm->tcm_parent = TC_H_ROOT;
>> +             tcm->tcm_info = 0;
>> +     } else {
>> +             int i;
>> +             struct netdev_queue *dev_queue;
>> +             dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +             tcm->tcm_parent = 0;
>> +             for (i = 0; i < netdev_get_num_tc(dev); i++) {
>
>
> Why dev->num_tc above, netdev_get_num_tc(dev) here, and dev->num_tc
> below?

No reason just inconsistant I will use dev->num_tc.

>
>> +                     struct netdev_tc_txq tc = dev->tc_to_txq[i];
>> +                     int q_idx = cl - dev->num_tc;
>
> (empty line after declarations)
>

fixed

>> +                     if (q_idx >= tc.offset &&
>> +                         q_idx < tc.offset + tc.count) {
>
> cl == 17, tc.offset == 0, tc.count == 1, num_tc = 16, q_idx = 1,
> !(1 < 0 + 1), doesn't belong to the parent #1?
>

Should be

	if (q_idx > tc.offset &&
	    q_idx <= tc.offset + tc.count) 

Now for cl == 17, tc.offset == , tc.count == 1, num_tc = 16, q_idx = 1,
(1 <= 0 + 1) belongs to the parent #1.

>> +                             tcm->tcm_parent =
>> +                                     TC_H_MAKE(TC_H_MAJ(sch->handle),
>> +                                               TC_H_MIN(i + 1));
>> +                             break;
>> +                     }
>> +             }
>> +             tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
>> +     }
>> +     tcm->tcm_handle |= TC_H_MIN(cl);
>> +     return 0;
>> +}
>> +
>> +static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
>> +                            struct gnet_dump *d)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +
>> +     if (cl <= netdev_get_num_tc(dev)) {
>> +             int i;
>> +             struct Qdisc *qdisc;
>> +             struct gnet_stats_queue qstats = {0};
>> +             struct gnet_stats_basic_packed bstats = {0};
>> +             struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
>> +
>> +             /* Drop lock here it will be reclaimed before touching
>> +              * statistics this is required because the d->lock we
>> +              * hold here is the look on dev_queue->qdisc_sleeping
>> +              * also acquired below.
>> +              */
>> +             spin_unlock_bh(d->lock);
>> +
>> +             for (i = tc.offset; i < tc.offset + tc.count; i++) {
>> +                     qdisc = netdev_get_tx_queue(dev, i)->qdisc;
>> +                     spin_lock_bh(qdisc_lock(qdisc));
>> +                     bstats.bytes      += qdisc->bstats.bytes;
>> +                     bstats.packets    += qdisc->bstats.packets;
>> +                     qstats.qlen       += qdisc->qstats.qlen;
>> +                     qstats.backlog    += qdisc->qstats.backlog;
>> +                     qstats.drops      += qdisc->qstats.drops;
>> +                     qstats.requeues   += qdisc->qstats.requeues;
>> +                     qstats.overlimits += qdisc->qstats.overlimits;
>> +                     spin_unlock_bh(qdisc_lock(qdisc));
>> +             }
>> +             /* Reclaim root sleeping lock before completing stats */
>> +             spin_lock_bh(d->lock);
>> +             if (gnet_stats_copy_basic(d, &bstats) < 0 ||
>> +                 gnet_stats_copy_queue(d, &qstats) < 0)
>> +                     return -1;
>> +     } else {
>> +             struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>
> (empty line after declarations)
>

fixed.

>> +             sch = dev_queue->qdisc_sleeping;
>> +             sch->qstats.qlen = sch->q.qlen;
>> +             if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
>> +                 gnet_stats_copy_queue(d, &sch->qstats) < 0)
>> +                     return -1;
>> +     }
>> +     return 0;
>> +}
>> +
>> +static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     unsigned long ntx;
>> +     u8 num_tc = netdev_get_num_tc(dev);
>> +
>> +     if (arg->stop)
>> +             return;
>> +
>> +     /* Walk hierarchy with a virtual class per tc */
>> +     arg->count = arg->skip;
>> +     for (ntx = arg->skip; ntx < dev->num_tx_queues + num_tc; ntx++) {
>
> Should we report possibly unused/unconfigured tx_queues?

I think it may be OK select_queue() could push skbs onto these queues and we may still want to see the statistics in this case. Although (real_num_tx_queues + num_tc) may make sense I see no reason to show queues above real_num_tx_queues.

Thanks,
John.

^ permalink raw reply

* Re: [net-next-2.6 PATCH] ethtool: update get_rx_ntuple to correctly interpret string count
From: Ben Hutchings @ 2011-01-05 17:28 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <4D24A2DD.2040603@intel.com>

On Wed, 2011-01-05 at 08:57 -0800, Alexander Duyck wrote:
[...]
> I'm fine with us replacing the ETHTOOL_GRXNTUPLE interface, but I would 
> prefer to do it after the merge windows for 2.6.39 has opened.  For now 
> I would like to get this patch accepted as my main concern is getting a 
> minor fix in versus rewriting the entire interface.

So long as there are no in-tree implementations of
ethtool_ops::get_rx_ntuple then it's a valid candidate for removal.
Since you now want to implement it, I think you should submit the
implementation along with the fix for the calling code.

> While we're at it how would you feel about us inverting the masks for 
> setting up an ntuple by making them an inclusion mask instead of an 
> exclusion one?  The reason why I ask is because I have to perform an and 
> operation over all the input anyway before I can use it to compute the 
> hashes and as such I am having to invert almost all of the mask bits, 
> and it appears you are having to do this as well for many of the masks 
> in sfc.

We can't change the userland interface but we could potentially invert
the masks in the ethtool core.  I'm really not convinced that this is
worth the trouble though.  (And it would be a massive pain for the OOT
versions of our drivers.)

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [RFC] sched: CHOKe packet scheduler
From: Eric Dumazet @ 2011-01-05 17:25 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev
In-Reply-To: <20110105091718.02f8a00f@nehalam>

Le mercredi 05 janvier 2011 à 09:17 -0800, Stephen Hemminger a écrit :
> On Wed, 05 Jan 2011 07:19:35 +0100
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > Le mardi 04 janvier 2011 à 16:29 -0800, Stephen Hemminger a écrit :
> > > +static struct sk_buff *skb_peek_random(struct sk_buff_head *list)
> > > +{
> > > +	struct sk_buff *skb = list->next;
> > > +	unsigned int idx = net_random() % list->qlen;
> > > +
> > > +	while (skb && idx-- > 0)
> > > +		skb = skb->next;
> > > +
> > > +	return skb;
> > > +}
> > 
> > You could avoid the divide op :
> > 
> > unsigned int idx = reciprocal_divide(random32(), list->qlen);
> 
> How would this work, it is a mod not a divide??
> 

It works, because random32() provides a 32bit 'random' number.
between 0 and 0xFFFFFFFF

We multiply it by X, get a 64bit number between 0 and 0xFFFFFFFF * X
then we right shift it by 32, get a number between 0 and X - 1 

We dont need to get the modulus, just a random number between 0 and X -
1

Dont worry, we should add a helper function to do that, since it might
be used in many places.

/* deliver a random number between 0 and N - 1 */
u32 random_N(unsigned int N)
{
	reciprocal_divide(random32(), N);
}





^ permalink raw reply

* Re: [PATCH v3 08/10] ARM: mxs: add ocotp read function
From: Jamie Iles @ 2011-01-05 17:25 UTC (permalink / raw)
  To: Uwe Kleine-König
  Cc: Jamie Iles, Shawn Guo, gerg, B32542, netdev, s.hauer, baruch,
	w.sang, r64343, eric, bryan.wu, davem, linux-arm-kernel, lw
In-Reply-To: <20110105164409.GV25121@pengutronix.de>

On Wed, Jan 05, 2011 at 05:44:09PM +0100, Uwe Kleine-König wrote:
> Hello Jamie,
> On Wed, Jan 05, 2011 at 04:16:46PM +0000, Jamie Iles wrote:
> > On Wed, Jan 05, 2011 at 10:07:35PM +0800, Shawn Guo wrote:
> > > +	/* check both BUSY and ERROR cleared */
> > > +	while ((__raw_readl(ocotp_base) &
> > > +		(BM_OCOTP_CTRL_BUSY | BM_OCOTP_CTRL_ERROR)) && --timeout)
> > > +		/* nothing */;
> > 
> > Is it worth using cpu_relax() in these polling loops?
> I don't know what cpu_relax does for other platforms, but on ARM it's
> just a memory barrier which AFAICT doesn't help here at all (which
> doesn't need to be correct).  Why do you think it would be better?

Well I don't see that there's anything broken without cpu_relax() but 
using cpu_relax() seems to be the most common way of doing busy polling 
loops that I've seen. It's also a bit easier to read than a comment and 
semi-colon. Perhaps it's just personal preference.

Jamie

^ permalink raw reply

* Re: [RFC] sched: CHOKe packet scheduler
From: Stephen Hemminger @ 2011-01-05 17:17 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1294208375.3420.46.camel@edumazet-laptop>

On Wed, 05 Jan 2011 07:19:35 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le mardi 04 janvier 2011 à 16:29 -0800, Stephen Hemminger a écrit :
> > +static struct sk_buff *skb_peek_random(struct sk_buff_head *list)
> > +{
> > +	struct sk_buff *skb = list->next;
> > +	unsigned int idx = net_random() % list->qlen;
> > +
> > +	while (skb && idx-- > 0)
> > +		skb = skb->next;
> > +
> > +	return skb;
> > +}
> 
> You could avoid the divide op :
> 
> unsigned int idx = reciprocal_divide(random32(), list->qlen);

How would this work, it is a mod not a divide??

-- 

^ permalink raw reply

* Re: [BUG] net_sched: pfifo_head_drop problem
From: Stephen Hemminger @ 2011-01-05 17:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, netdev, Florian Westphal, Patrick McHardy,
	Hagen Paul Pfeifer, Jarek Poplawski
In-Reply-To: <1294246850.2775.244.camel@edumazet-laptop>

On Wed, 05 Jan 2011 18:00:50 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> While reviewing CHOKe stuff, I found following problem :
> 
> commit 57dbb2d83d100ea (sched: add head drop fifo queue)
> introduced pfifo_head_drop, and broke the invariant that
> sch->bstats.bytes and sch->bstats.packets are COUNTER (increasing
> counters only)
> 
> This can break estimators because est_timer() handle unsigned deltas
> only. A decreasing counter can then give a huge unsigned delta.
> 
> My suggestion would be to change things so that sch->bstats.bytes and
> sch->bstats.packets are incremented in dequeue() only, not at enqueue()
> time.
> 
> It would be more sensible anyway for very low speeds, and big bursts.
> Right now, if we drop packets, they still are accounted in estimators.
> 
> Or maybe my understanding of estimators is wrong, and only apply to
> enqueue rate, not dequeue rate ?
> 
> If so, we should remove the 
> 
> sch->bstats.bytes -= qdisc_pkt_len(skb_head);
> sch->bstats.packets--;
> 
> done in pfifo_tail_enqueue() in case we drop the head skb.
> 
> 
> My preference would be to add dropped pack/byte rates to estimators...
> It might be good for tuning.

Agreed counters should reflect dequeued packets not enqueued packets.


-- 

^ permalink raw reply

* [BUG] net_sched: pfifo_head_drop problem
From: Eric Dumazet @ 2011-01-05 17:00 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Florian Westphal, Patrick McHardy, Hagen Paul Pfeifer,
	Stephen Hemminger, Jarek Poplawski

While reviewing CHOKe stuff, I found following problem :

commit 57dbb2d83d100ea (sched: add head drop fifo queue)
introduced pfifo_head_drop, and broke the invariant that
sch->bstats.bytes and sch->bstats.packets are COUNTER (increasing
counters only)

This can break estimators because est_timer() handle unsigned deltas
only. A decreasing counter can then give a huge unsigned delta.

My suggestion would be to change things so that sch->bstats.bytes and
sch->bstats.packets are incremented in dequeue() only, not at enqueue()
time.

It would be more sensible anyway for very low speeds, and big bursts.
Right now, if we drop packets, they still are accounted in estimators.

Or maybe my understanding of estimators is wrong, and only apply to
enqueue rate, not dequeue rate ?

If so, we should remove the 

sch->bstats.bytes -= qdisc_pkt_len(skb_head);
sch->bstats.packets--;

done in pfifo_tail_enqueue() in case we drop the head skb.


My preference would be to add dropped pack/byte rates to estimators...
It might be good for tuning.




^ permalink raw reply

* Re: [PATCH] ipv4: IP defragmentation must be ECN aware
From: Stephen Hemminger @ 2011-01-05 17:13 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1294235942.2775.191.camel@edumazet-laptop>

On Wed, 05 Jan 2011 14:59:02 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> +
> +static inline int ip4_frag_ecn(int tos)

Since tos is only a byte, this should be:

static inline u8 ip4_frag_ecn(u8 tos)


-- 

^ permalink raw reply

* Re: [RFC PATCH] m68knommu: added dm9000 support
From: Randy Dunlap @ 2011-01-05 16:59 UTC (permalink / raw)
  To: Angelo Dureghello; +Cc: linux-kernel, netdev
In-Reply-To: <4D24A0E4.1070805@gmail.com>

On Wed, 05 Jan 2011 17:48:36 +0100 Angelo Dureghello wrote:

> This patch allows to use the dm9000 network chip with a m68knommu 
> big-endian cpu. From the HW point of view, the cpu data bus connected to 
> the dm9000 chip should be hardware-byte-swapped, crossing the bytes 
> wires (D0:7 to D24:31, etc.). In anyway, has been also added an option 
> to swap the bytes in the driver, if some cpu has been wired straight 
> D0:D31 to dm9000.
> 
> Signed-off-by: Angelo Dureghello <angelo70@gmail.com>
> ---
> 
> --- drivers/net/Kconfig.orig  2011-01-05 17:11:37.992376124 +0100
> +++ drivers/net/Kconfig 2011-01-04 22:33:14.132301872 +0100

File names should begin at top level of linux kernel source tree, like this e.g.:

> --- linux/drivers/net/Kconfig.orig  2011-01-05 17:11:37.992376124 +0100
> +++ linux/drivers/net/Kconfig 2011-01-04 22:33:14.132301872 +0100


> @@ -960,7 +960,7 @@ config TI_DAVINCI_EMAC
> 
>   config DM9000
>     tristate "DM9000 support"
> -  depends on ARM || BLACKFIN || MIPS
> +  depends on COLDFIRE || ARM || BLACKFIN || MIPS
>     select CRC32
>     select MII
>     ---help---

Something has modified tab(s) to spaces in this patch, so the patch won't
apply cleanly.  See if Documentation/email-clients.txt can help you.

Oh, are you using the gmail web-based email client?  That won't work
for kernel patches.  You could use gmail via SMTP.


> @@ -986,6 +986,14 @@ config DM9000_FORCE_SIMPLE_PHY_POLL
>       costly MII PHY reads. Note, this will not work if the chip is
>       operating with an external PHY.
> 
> +config DM9000_32BIT_SW_SWAP
> +  bool "Software byte swap for 32 bit data bus"
> +  depends on DM9000 && COLDFIRE
> +  ---help---
> +    This configuration allows to swap data bytes from the dm9000
> +    driver itself, when the big endian cpu is wired straight to
> +    the dm9000 32 bit data bus.
> +
>   config ENC28J60
>     tristate "ENC28J60 support"
>     depends on EXPERIMENTAL && SPI && NET_ETHERNET


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
desserts:  http://www.xenotime.net/linux/recipes/

^ permalink raw reply

* Re: [net-next-2.6 PATCH] ethtool: update get_rx_ntuple to correctly interpret string count
From: Alexander Duyck @ 2011-01-05 16:57 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <1294241216.15866.14.camel@bwh-desktop>

On 1/5/2011 7:26 AM, Ben Hutchings wrote:
> On Tue, 2011-01-04 at 17:06 -0800, Alexander Duyck wrote:
>> On 1/4/2011 4:01 PM, Ben Hutchings wrote:
>>> On Tue, 2011-01-04 at 15:29 -0800, Alexander Duyck wrote:
>>>> Currently any strings returned via the get_rx_ntuple call will just be
>>>> dropped because the num_strings will be zero.  In order to correct this I
>>>> am updating things so that the return value of get_rx_ntuple is the number
>>>> of strings that were written, or a negative value if there was an error.
>>> [...]
>>>
>>> Nothing implements ethtool_ops::get_rx_ntuple, anyway.
>>>
>>> The fallback implementation is totally bogus, too.  Maximum of 1024
>>> filters?  Erm, sfc can handle more than that.  And doing complex string
>>> formatting in the kernel, even though all the parsing is in ethtool?
>>>
>>> Please, let's write off ETHTOOL_GRXNTUPLE as a failed experiment and
>>> replace it with a command that behaves more like ETHTOOL_GRXCLSRLALL.
>>>
>>> Ben.
>>
>> In order to address several different issues in the perfect filters
>> provided by 82599 I found it necessary to implement get_rx_ntuple so
>> that the driver could maintain the filter list inside of the driver
>> instead of having it maintained by the stack.  In doing so though I
>> found the bug.
>>
>> I agree the fallback implementation has a limitation on the number and
>> format of filters it supports.  However declaring the function a "failed
>> experiment" and just dropping it isn't exactly constructive since we
>> have customers that are making use of the feature.
> [...]
>
> We can at least drop that fallback implementation since it apparently
> doesn't work properly for either of the drivers that currently use it.
>
> In the medium term, I do want to replace it with a binary interface and
> move that formatting to ethtool.  ETHTOOL_GRXNTUPLE could be kept around
> for a while for ixgbe only, while your customers have a chance to get
> the updated ethtool.
>
> Ben.
>

I'm fine with us replacing the ETHTOOL_GRXNTUPLE interface, but I would 
prefer to do it after the merge windows for 2.6.39 has opened.  For now 
I would like to get this patch accepted as my main concern is getting a 
minor fix in versus rewriting the entire interface.

While we're at it how would you feel about us inverting the masks for 
setting up an ntuple by making them an inclusion mask instead of an 
exclusion one?  The reason why I ask is because I have to perform an and 
operation over all the input anyway before I can use it to compute the 
hashes and as such I am having to invert almost all of the mask bits, 
and it appears you are having to do this as well for many of the masks 
in sfc.

Thanks,

Alex

^ permalink raw reply

* [RFC PATCH] m68knommu: added dm9000 support
From: Angelo Dureghello @ 2011-01-05 16:48 UTC (permalink / raw)
  To: linux-kernel, netdev

This patch allows to use the dm9000 network chip with a m68knommu 
big-endian cpu. From the HW point of view, the cpu data bus connected to 
the dm9000 chip should be hardware-byte-swapped, crossing the bytes 
wires (D0:7 to D24:31, etc.). In anyway, has been also added an option 
to swap the bytes in the driver, if some cpu has been wired straight 
D0:D31 to dm9000.

Signed-off-by: Angelo Dureghello <angelo70@gmail.com>
---

--- drivers/net/Kconfig.orig  2011-01-05 17:11:37.992376124 +0100
+++ drivers/net/Kconfig 2011-01-04 22:33:14.132301872 +0100
@@ -960,7 +960,7 @@ config TI_DAVINCI_EMAC

  config DM9000
    tristate "DM9000 support"
-  depends on ARM || BLACKFIN || MIPS
+  depends on COLDFIRE || ARM || BLACKFIN || MIPS
    select CRC32
    select MII
    ---help---
@@ -986,6 +986,14 @@ config DM9000_FORCE_SIMPLE_PHY_POLL
      costly MII PHY reads. Note, this will not work if the chip is
      operating with an external PHY.

+config DM9000_32BIT_SW_SWAP
+  bool "Software byte swap for 32 bit data bus"
+  depends on DM9000 && COLDFIRE
+  ---help---
+    This configuration allows to swap data bytes from the dm9000
+    driver itself, when the big endian cpu is wired straight to
+    the dm9000 32 bit data bus.
+
  config ENC28J60
    tristate "ENC28J60 support"
    depends on EXPERIMENTAL && SPI && NET_ETHERNET
@@ -3347,4 +3355,3 @@ config VMXNET3
           module will be called vmxnet3.

  endif # NETDEVICES
-


--- drivers/net/dm9000.c.orig    2010-12-30 23:19:39.747836070 +0100
+++ drivers/net/dm9000.c    2011-01-05 16:30:48.636116500 +0100
@@ -158,9 +158,17 @@ dm9000_reset(board_info_t * db)
      dev_dbg(db->dev, "resetting device\n");

      /* RESET device */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(DM9000_NCR, db->io_addr);
+#else
      writeb(DM9000_NCR, db->io_addr);
+#endif
      udelay(200);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(NCR_RST, db->io_data);
+#else
      writeb(NCR_RST, db->io_data);
+#endif
      udelay(200);
  }

@@ -170,8 +178,13 @@ dm9000_reset(board_info_t * db)
  static u8
  ior(board_info_t * db, int reg)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg, db->io_addr);
+    return (u8)readl(db->io_data);
+#else
      writeb(reg, db->io_addr);
      return readb(db->io_data);
+#endif
  }

  /*
@@ -181,43 +194,72 @@ ior(board_info_t * db, int reg)
  static void
  iow(board_info_t * db, int reg, int value)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg, db->io_addr);
+    writel(value, db->io_data);
+#else
      writeb(reg, db->io_addr);
      writeb(value, db->io_data);
+#endif
  }

  /* routines for sending block to chip */

  static void dm9000_outblk_8bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writesbsw(reg, data, count);
+#else
      writesb(reg, data, count);
+#endif
  }

  static void dm9000_outblk_16bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writeswsw(reg, data, (count+1) >> 1);
+#else
      writesw(reg, data, (count+1) >> 1);
+#endif
  }

  static void dm9000_outblk_32bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writeslsw(reg, data, (count+3) >> 2);
+#else
      writesl(reg, data, (count+3) >> 2);
+#endif
  }

  /* input block from chip to memory */

  static void dm9000_inblk_8bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    readsbsw(reg, data, count);
+#else
      readsb(reg, data, count);
+#endif
  }


  static void dm9000_inblk_16bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    readswsw(reg, data, (count+1) >> 1);
+#else
      readsw(reg, data, (count+1) >> 1);
+#endif
  }

  static void dm9000_inblk_32bit(void __iomem *reg, void *data, int count)
  {
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    readslsw(reg, data, (count+3) >> 2);
+#else
      readsl(reg, data, (count+3) >> 2);
+#endif
  }

  /* dump block from chip to null */
@@ -863,8 +905,13 @@ static void dm9000_timeout(struct net_de
      netif_wake_queue(dev);

      /* Restore previous register address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg_save, db->io_addr);
+#else
      writeb(reg_save, db->io_addr);
-    spin_unlock_irqrestore(&db->lock, flags);
+#endif
+
+    spin_unlock_irqrestore(&db->lock,flags);
  }

  static void dm9000_send_packet(struct net_device *dev,
@@ -908,7 +955,11 @@ dm9000_start_xmit(struct sk_buff *skb, s
      spin_lock_irqsave(&db->lock, flags);

      /* Move data to DM9000 TX RAM */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   writel(DM9000_MWCMD, db->io_addr);
+#else
      writeb(DM9000_MWCMD, db->io_addr);
+#endif

      (db->outblk)(db->io_data, skb->data, skb->len);
      dev->stats.tx_bytes += skb->len;
@@ -981,7 +1032,11 @@ dm9000_rx(struct net_device *dev)
          ior(db, DM9000_MRCMDX);    /* Dummy read */

          /* Get most updated data */
-        rxbyte = readb(db->io_data);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+      rxbyte = (u8)readl(db->io_data);
+#else
+      rxbyte = readb(db->io_data);
+#endif

          /* Status check: this byte must be 0 or 1 */
          if (rxbyte & DM9000_PKT_ERR) {
@@ -996,8 +1051,13 @@ dm9000_rx(struct net_device *dev)

          /* A packet ready now & Get status/length */
          GoodPacket = true;
-        writeb(DM9000_MRCMD, db->io_addr);

+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+        writel(DM9000_MRCMD, db->io_addr);
+#else
+      writeb(DM9000_MRCMD, db->io_addr);
+#endif
+
          (db->inblk)(db->io_data, &rxhdr, sizeof(rxhdr));

          RxLen = le16_to_cpu(rxhdr.RxLen);
@@ -1077,7 +1137,7 @@ static irqreturn_t dm9000_interrupt(int
      unsigned long flags;
      u8 reg_save;

-    dm9000_dbg(db, 3, "entering %s\n", __func__);
+    //dm9000_dbg(db, 3, "entering %s\n", __func__);

      /* A real interrupt coming */

@@ -1085,7 +1145,11 @@ static irqreturn_t dm9000_interrupt(int
      spin_lock_irqsave(&db->lock, flags);

      /* Save previous register address */
-    reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif

      /* Disable all interrupts */
      iow(db, DM9000_IMR, IMR_PAR);
@@ -1100,7 +1164,7 @@ static irqreturn_t dm9000_interrupt(int
      /* Received the coming packet */
      if (int_status & ISR_PRS)
          dm9000_rx(dev);
-
+
      /* Trnasmit Interrupt check */
      if (int_status & ISR_PTS)
          dm9000_tx_done(dev, db);
@@ -1116,8 +1180,12 @@ static irqreturn_t dm9000_interrupt(int
      iow(db, DM9000_IMR, db->imr_all);

      /* Restore previous register address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg_save, db->io_addr);
+#else
      writeb(reg_save, db->io_addr);
-
+#endif
+
      spin_unlock_irqrestore(&db->lock, flags);

      return IRQ_HANDLED;
@@ -1233,11 +1301,15 @@ dm9000_phy_read(struct net_device *dev,
      int ret;

      mutex_lock(&db->addr_lock);
-
+
      spin_lock_irqsave(&db->lock,flags);

      /* Save previous register address */
-    reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif

      /* Fill the phyxcer register into REG_0C */
      iow(db, DM9000_EPAR, DM9000_PHY | reg);
@@ -1250,7 +1322,11 @@ dm9000_phy_read(struct net_device *dev,
      dm9000_msleep(db, 1);        /* Wait read complete */

      spin_lock_irqsave(&db->lock,flags);
-    reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif

      iow(db, DM9000_EPCR, 0x0);    /* Clear phyxcer read command */

@@ -1258,9 +1334,14 @@ dm9000_phy_read(struct net_device *dev,
      ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL);

      /* restore the previous address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg_save, db->io_addr);
+#else
      writeb(reg_save, db->io_addr);
-    spin_unlock_irqrestore(&db->lock,flags);
+#endif

+    spin_unlock_irqrestore(&db->lock,flags);
+
      mutex_unlock(&db->addr_lock);

      dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret);
@@ -1284,7 +1365,11 @@ dm9000_phy_write(struct net_device *dev,
      spin_lock_irqsave(&db->lock,flags);

      /* Save previous register address */
-    reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif

      /* Fill the phyxcer register into REG_0C */
      iow(db, DM9000_EPAR, DM9000_PHY | reg);
@@ -1295,18 +1380,31 @@ dm9000_phy_write(struct net_device *dev,

      iow(db, DM9000_EPCR, EPCR_EPOS | EPCR_ERPRW);    /* Issue phyxcer 
write command */

+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg_save, db->io_addr);
+#else
      writeb(reg_save, db->io_addr);
+#endif
+
      spin_unlock_irqrestore(&db->lock, flags);

      dm9000_msleep(db, 1);        /* Wait write complete */

      spin_lock_irqsave(&db->lock,flags);
-    reg_save = readb(db->io_addr);
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+   reg_save = (u8)readl(db->io_addr);
+#else
+   reg_save = readb(db->io_addr);
+#endif

      iow(db, DM9000_EPCR, 0x0);    /* Clear phyxcer write command */

      /* restore the previous address */
+#ifdef CONFIG_DM9000_32BIT_SW_SWAP
+    writel(reg_save, db->io_addr);
+#else
      writeb(reg_save, db->io_addr);
+#endif

      spin_unlock_irqrestore(&db->lock, flags);
      mutex_unlock(&db->addr_lock);
@@ -1713,4 +1811,3 @@ MODULE_AUTHOR("Sascha Hauer, Ben Dooks")
  MODULE_DESCRIPTION("Davicom DM9000 network driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS("platform:dm9000");
-

--- arch/m68k/include/asm/io_no.h.orig    2011-01-05 16:53:55.904905038 
+0100
+++ arch/m68k/include/asm/io_no.h    2011-01-04 23:45:08.893049554 +0100
@@ -47,6 +47,91 @@ static inline unsigned int _swapl(volati
  #define writew(b,addr) (void)((*(volatile unsigned short *) (addr)) = (b))
  #define writel(b,addr) (void)((*(volatile unsigned int *) (addr)) = (b))

+static inline void writesb (void __iomem *reg, void *data, int count)
+{
+    unsigned char *p = (unsigned char*) data;
+
+    while (count--) writeb(*p++, reg);
+}
+
+static inline void writesbsw (void __iomem *reg, void *data, int count)
+{
+    unsigned char *p = (unsigned char *) data;
+
+    while (count--) writel((int)(*p++), reg);
+}
+
+static inline void writesw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short*) data;
+
+   while (count--) writew(*p++, reg);
+}
+
+static inline void writeswsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) writel((int)(_swapw(*p++)), reg);
+}
+
+static inline void writesl (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long*) data;
+
+   while (count--) writel(*p++, reg);
+}
+
+static inline void writeslsw (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) writel((int)(_swapl(*p++)), reg);
+}
+
+static inline void readsb (void __iomem *reg, void *data, int count)
+{
+   unsigned char *p = (unsigned char *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readsbsw (void __iomem *reg, void *data, int count)
+{
+   unsigned char *p = (unsigned char *) data;
+
+   while (count--) *p++ = (unsigned char)readl(reg);
+}
+
+static inline void readsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readswsw (void __iomem *reg, void *data, int count)
+{
+   unsigned short *p = (unsigned short *) data;
+
+   while (count--) *p++ = _swapw((unsigned short)readw(reg));
+}
+
+static inline void readsl (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) *p++ = readb(reg);
+}
+
+static inline void readslsw (void __iomem *reg, void *data, int count)
+{
+   unsigned long *p = (unsigned long *) data;
+
+   while (count--) *p++ = _swapl(readl(reg));
+}
+
+
  #define __raw_readb readb
  #define __raw_readw readw
  #define __raw_readl readl
@@ -180,4 +265,3 @@ extern void iounmap(void *addr);
  #endif /* __KERNEL__ */

  #endif /* _M68KNOMMU_IO_H */
-

^ permalink raw reply

* Re: [PATCH v3 08/10] ARM: mxs: add ocotp read function
From: Uwe Kleine-König @ 2011-01-05 16:44 UTC (permalink / raw)
  To: Jamie Iles
  Cc: Shawn Guo, gerg, B32542, netdev, s.hauer, baruch, w.sang, r64343,
	eric, bryan.wu, davem, linux-arm-kernel, lw
In-Reply-To: <20110105161235.GA2112@gallagher>

Hello Jamie,

On Wed, Jan 05, 2011 at 04:16:46PM +0000, Jamie Iles wrote:
> On Wed, Jan 05, 2011 at 10:07:35PM +0800, Shawn Guo wrote:
> > Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
> > ---
> > Changes for v2:
> >  - Add mutex locking for mxs_read_ocotp()
> >  - Use type size_t for count and i
> >  - Add comment for clk_enable/disable skipping
> >  - Add ERROR bit clearing and polling step
> > 
> >  arch/arm/mach-mxs/Makefile              |    2 +-
> >  arch/arm/mach-mxs/include/mach/common.h |    1 +
> >  arch/arm/mach-mxs/ocotp.c               |   79 +++++++++++++++++++++++++++++++
> >  3 files changed, 81 insertions(+), 1 deletions(-)
> >  create mode 100644 arch/arm/mach-mxs/ocotp.c
> > 
> [...]
> > diff --git a/arch/arm/mach-mxs/ocotp.c b/arch/arm/mach-mxs/ocotp.c
> > new file mode 100644
> > index 0000000..902ef59
> > --- /dev/null
> > +++ b/arch/arm/mach-mxs/ocotp.c
> > @@ -0,0 +1,79 @@
> > +/*
> > + * Copyright 2010 Freescale Semiconductor, Inc. All Rights Reserved.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include <linux/delay.h>
> > +#include <linux/err.h>
> > +#include <linux/mutex.h>
> > +
> > +#include <mach/mxs.h>
> > +
> > +#define BM_OCOTP_CTRL_BUSY		(1 << 8)
> > +#define BM_OCOTP_CTRL_ERROR		(1 << 9)
> > +#define BM_OCOTP_CTRL_RD_BANK_OPEN	(1 << 12)
> > +
> > +static DEFINE_MUTEX(ocotp_mutex);
> > +
> > +int mxs_read_ocotp(unsigned offset, size_t count, u32 *values)
> > +{
> > +	void __iomem *ocotp_base = MXS_IO_ADDRESS(MXS_OCOTP_BASE_ADDR);
> > +	int timeout = 0x400;
> > +	size_t i;
> > +
> > +	mutex_lock(&ocotp_mutex);
> > +
> > +	/*
> > +	 * clk_enable(hbus_clk) for ocotp can be skipped
> > +	 * as it must be on when system is running.
> > +	 */
> > +
> > +	/* try to clear ERROR bit */
> > +	__mxs_clrl(BM_OCOTP_CTRL_ERROR, ocotp_base);
> > +
> > +	/* check both BUSY and ERROR cleared */
> > +	while ((__raw_readl(ocotp_base) &
> > +		(BM_OCOTP_CTRL_BUSY | BM_OCOTP_CTRL_ERROR)) && --timeout)
> > +		/* nothing */;
> 
> Is it worth using cpu_relax() in these polling loops?
I don't know what cpu_relax does for other platforms, but on ARM it's
just a memory barrier which AFAICT doesn't help here at all (which
doesn't need to be correct).  Why do you think it would be better?

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

^ permalink raw reply

* Re: [PATCH v3 05/10] net/fec: add dual fec support for mx28
From: Uwe Kleine-König @ 2011-01-05 16:34 UTC (permalink / raw)
  To: Shawn Guo
  Cc: davem, gerg, baruch, eric, bryan.wu, r64343, B32542, lw, w.sang,
	s.hauer, netdev, linux-arm-kernel
In-Reply-To: <1294236457-17476-6-git-send-email-shawn.guo@freescale.com>

Hello,

On Wed, Jan 05, 2011 at 10:07:32PM +0800, Shawn Guo wrote:
> This patch is to add mx28 dual fec support. Here are some key notes
> for mx28 fec controller.
> 
>  - The mx28 fec controller naming ENET-MAC is a different IP from FEC
>    used on other i.mx variants.  But they are basically compatible
>    on software interface, so it's possible to share the same driver.
>  - ENET-MAC design made an improper assumption that it runs on a
>    big-endian system. As the result, driver has to swap every frame
>    going to and coming from the controller.
>  - The external phys can only be configured by fec0, which means fec1
>    can not work independently and both phys need to be configured by
>    mii_bus attached on fec0.
>  - ENET-MAC reset will get mac address registers reset too.
>  - ENET-MAC MII/RMII mode and 10M/100M speed are configured
>    differently FEC.
>  - ETHER_EN bit must be set to get ENET-MAC interrupt work.
> 
> Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
> ---
> Changes for v3:
>  - Move v2 changes into patch #3
>  - Use device name to check if it's running on ENET-MAC
> 
>  drivers/net/Kconfig |    7 ++-
>  drivers/net/fec.c   |  140 +++++++++++++++++++++++++++++++++++++++++++++------
>  drivers/net/fec.h   |    5 +-
>  3 files changed, 131 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index 4f1755b..f34629b 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -1944,18 +1944,19 @@ config 68360_ENET
>  config FEC
>  	bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
>  	depends on M523x || M527x || M5272 || M528x || M520x || M532x || \
> -		MACH_MX27 || ARCH_MX35 || ARCH_MX25 || ARCH_MX5
> +		MACH_MX27 || ARCH_MX35 || ARCH_MX25 || ARCH_MX5 || SOC_IMX28
>  	select PHYLIB
>  	help
>  	  Say Y here if you want to use the built-in 10/100 Fast ethernet
>  	  controller on some Motorola ColdFire and Freescale i.MX processors.
>  
>  config FEC2
> -	bool "Second FEC ethernet controller (on some ColdFire CPUs)"
> +	bool "Second FEC ethernet controller"
>  	depends on FEC
>  	help
>  	  Say Y here if you want to use the second built-in 10/100 Fast
> -	  ethernet controller on some Motorola ColdFire processors.
> +	  ethernet controller on some Motorola ColdFire and Freescale
> +	  i.MX processors.
>  
>  config FEC_MPC52xx
>  	tristate "MPC52xx FEC driver"
> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
> index 8a1c51f..67ba263 100644
> --- a/drivers/net/fec.c
> +++ b/drivers/net/fec.c
> @@ -17,6 +17,8 @@
>   *
>   * Bug fixes and cleanup by Philippe De Muyter (phdm@macqel.be)
>   * Copyright (c) 2004-2006 Macq Electronique SA.
> + *
> + * Copyright (C) 2010 Freescale Semiconductor, Inc.
>   */
>  
>  #include <linux/module.h>
> @@ -45,20 +47,33 @@
>  
>  #include <asm/cacheflush.h>
>  
> -#ifndef CONFIG_ARCH_MXC
> +#if !defined(CONFIG_ARCH_MXC) && !defined(CONFIG_SOC_IMX28)
maybe !defined(CONFIG_ARM)?

>  #include <asm/coldfire.h>
>  #include <asm/mcfsim.h>
>  #endif
>  
>  #include "fec.h"
>  
> -#ifdef CONFIG_ARCH_MXC
> -#include <mach/hardware.h>
> +#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
>  #define FEC_ALIGNMENT	0xf
>  #else
>  #define FEC_ALIGNMENT	0x3
>  #endif
>  
> +#define DRIVER_NAME	"fec"
> +#define ENET_MAC_NAME	"enet-mac"
> +
> +static struct platform_device_id fec_devtype[] = {
> +	{
> +		.name = DRIVER_NAME,
> +	}, {
> +		.name = ENET_MAC_NAME,
> +	}
I'd done it differently:

	{
		.name = "fec",
		.driver_data = 0,
	}, {
		.name = "imx28-fec",
		.driver_data = HAS_ENET_MAC | ...,
	}

and then test the bits in driver_data (which you get using
platform_get_device_id() when you need to distinguish.
Comparing names doesn't scale, assume there are three further features
to distinguish, then you need to use strtok or index and get device
names like enet-mac-with-feature1-but-without-feature2-and-feature3.

> +};
> +
> +static unsigned fec_is_enetmac;
> +static struct mii_bus *fec_mii_bus;
In practice this might work, but actually these are per-device
properties, not driver-global.  So it should go into the private data
struct.

> +
>  static unsigned char macaddr[ETH_ALEN];
>  module_param_array(macaddr, byte, NULL, 0);
>  MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
> @@ -129,7 +144,8 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
>   * account when setting it.
>   */
>  #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
> -    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARCH_MXC)
> +    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
> +    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
>  #define	OPT_FRAME_SIZE	(PKT_MAXBUF_SIZE << 16)
>  #else
>  #define	OPT_FRAME_SIZE	0
> @@ -208,6 +224,17 @@ static void fec_stop(struct net_device *dev);
>  /* Transmitter timeout */
>  #define TX_TIMEOUT (2 * HZ)
>  
> +static void *swap_buffer(void *bufaddr, int len)
> +{
> +	int i;
> +	unsigned int *buf = bufaddr;
> +
> +	for (i = 0; i < (len + 3) / 4; i++, buf++)
> +		*buf = __swab32(*buf);
Would it better to use cpu_to_be32 here?  Then the compiler might
be smart enough to optimize it away on BE.  (Currently the code
generated for a BE build would be wrong with your patch, wouldn't it?)
> +
> +	return bufaddr;
> +}
> +
>  static netdev_tx_t
>  fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> @@ -256,6 +283,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		bufaddr = fep->tx_bounce[index];
>  	}
>  
> +	/*
> +	 * enet-mac design made an improper assumption that it's running
> +	 * on a big endian system. As the result, driver has to swap
if he was really aware that he limits the performant use of the fec to
big endian systems, can you please make him stop designing hardware!?

> +	 * every frame going to and coming from the controller.
> +	 */
> +	if (fec_is_enetmac)
> +		swap_buffer(bufaddr, skb->len);
> +
>  	/* Save skb pointer */
>  	fep->tx_skbuff[fep->skb_cur] = skb;
>  
> @@ -487,6 +522,9 @@ fec_enet_rx(struct net_device *dev)
>  	        dma_unmap_single(NULL, bdp->cbd_bufaddr, bdp->cbd_datlen,
>          			DMA_FROM_DEVICE);
>  
> +		if (fec_is_enetmac)
> +			swap_buffer(data, pkt_len);
> +
>  		/* This does 16 byte alignment, exactly what we need.
>  		 * The packet length includes FCS, but we don't want to
>  		 * include that when passing upstream as it messes up
> @@ -689,6 +727,7 @@ static int fec_enet_mii_probe(struct net_device *dev)
>  	char mdio_bus_id[MII_BUS_ID_SIZE];
>  	char phy_name[MII_BUS_ID_SIZE + 3];
>  	int phy_id;
> +	int dev_id = fep->pdev->id;
>  
>  	fep->phy_dev = NULL;
>  
> @@ -700,6 +739,8 @@ static int fec_enet_mii_probe(struct net_device *dev)
>  			continue;
>  		if (fep->mii_bus->phy_map[phy_id]->phy_id == 0)
>  			continue;
> +		if (fec_is_enetmac && dev_id--)
> +			continue;
>  		strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
>  		break;
>  	}
> @@ -741,6 +782,28 @@ static int fec_enet_mii_init(struct platform_device *pdev)
>  	struct fec_enet_private *fep = netdev_priv(dev);
>  	int err = -ENXIO, i;
>  
> +	/*
> +	 * The dual fec interfaces are not equivalent with enet-mac.
> +	 * Here are the differences:
> +	 *
> +	 *  - fec0 supports MII & RMII modes while fec1 only supports RMII
> +	 *  - fec0 acts as the 1588 time master while fec1 is slave
> +	 *  - external phys can only be configured by fec0
> +	 *
> +	 * That is to say fec1 can not work independently. It only works
> +	 * when fec0 is working. The reason behind this design is that the
> +	 * second interface is added primarily for Switch mode.
> +	 *
> +	 * Because of the last point above, both phys are attached on fec0
> +	 * mdio interface in board design, and need to be configured by
> +	 * fec0 mii_bus.
> +	 */
> +	if (fec_is_enetmac && pdev->id) {
> +		/* fec1 uses fec0 mii_bus */
> +		fep->mii_bus = fec_mii_bus;
> +		return 0;
> +	}
> +
>  	fep->mii_timeout = 0;
>  
>  	/*
> @@ -777,6 +840,10 @@ static int fec_enet_mii_init(struct platform_device *pdev)
>  	if (mdiobus_register(fep->mii_bus))
>  		goto err_out_free_mdio_irq;
>  
> +	/* save fec0 mii_bus */
> +	if (fec_is_enetmac)
> +		fec_mii_bus = fep->mii_bus;
> +
>  	return 0;
>  
>  err_out_free_mdio_irq:
> @@ -1149,11 +1216,22 @@ fec_restart(struct net_device *dev, int duplex)
>  {
>  	struct fec_enet_private *fep = netdev_priv(dev);
>  	int i;
> +	u32 val, temp_mac[2];
>  
>  	/* Whack a reset.  We should wait for this. */
>  	writel(1, fep->hwp + FEC_ECNTRL);
>  	udelay(10);
>  
> +	/*
> +	 * enet-mac reset will reset mac address registers too,
> +	 * so need to reconfigure it.
> +	 */
> +	if (fec_is_enetmac) {
> +		memcpy(&temp_mac, dev->dev_addr, ETH_ALEN);
> +		writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
> +		writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
where is the value saved to temp_mac[]?  For me it looks you write
uninitialized data into the mac registers.
> +	}
> +
>  	/* Clear any outstanding interrupt. */
>  	writel(0xffc00000, fep->hwp + FEC_IEVENT);
>  
> @@ -1200,20 +1278,45 @@ fec_restart(struct net_device *dev, int duplex)
>  	/* Set MII speed */
>  	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
>  
> -#ifdef FEC_MIIGSK_ENR
> -	if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) {
> -		/* disable the gasket and wait */
> -		writel(0, fep->hwp + FEC_MIIGSK_ENR);
> -		while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
> -			udelay(1);
> +	/*
> +	 * The phy interface and speed need to get configured
> +	 * differently on enet-mac.
> +	 */
> +	if (fec_is_enetmac) {
> +		val = readl(fep->hwp + FEC_R_CNTRL);
>  
> -		/* configure the gasket: RMII, 50 MHz, no loopback, no echo */
> -		writel(1, fep->hwp + FEC_MIIGSK_CFGR);
> +		/* MII or RMII */
> +		if (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
> +			val |= (1 << 8);
> +		else
> +			val &= ~(1 << 8);
>  
> -		/* re-enable the gasket */
> -		writel(2, fep->hwp + FEC_MIIGSK_ENR);
> -	}
> +		/* 10M or 100M */
> +		if (fep->phy_dev && fep->phy_dev->speed == SPEED_100)
> +			val &= ~(1 << 9);
> +		else
> +			val |= (1 << 9);
> +
> +		writel(val, fep->hwp + FEC_R_CNTRL);
> +	} else {
> +#ifdef FEC_MIIGSK_ENR
> +		if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) {
> +			/* disable the gasket and wait */
> +			writel(0, fep->hwp + FEC_MIIGSK_ENR);
> +			while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
> +				udelay(1);
> +
> +			/*
> +			 * configure the gasket:
> +			 *   RMII, 50 MHz, no loopback, no echo
> +			 */
> +			writel(1, fep->hwp + FEC_MIIGSK_CFGR);
> +
> +			/* re-enable the gasket */
> +			writel(2, fep->hwp + FEC_MIIGSK_ENR);
> +		}
>  #endif
> +	}
>  
>  	/* And last, enable the transmit and receive processing */
>  	writel(2, fep->hwp + FEC_ECNTRL);
> @@ -1301,6 +1404,10 @@ fec_probe(struct platform_device *pdev)
>  		}
>  	}
>  
> +	/* check if it's ENET-MAC controller via device name */
> +	if (!strcmp(pdev->name, ENET_MAC_NAME))
> +		fec_is_enetmac = 1;
> +
>  	fep->clk = clk_get(&pdev->dev, "fec_clk");
>  	if (IS_ERR(fep->clk)) {
>  		ret = PTR_ERR(fep->clk);
> @@ -1410,12 +1517,13 @@ static const struct dev_pm_ops fec_pm_ops = {
>  
>  static struct platform_driver fec_driver = {
>  	.driver	= {
> -		.name	= "fec",
> +		.name	= DRIVER_NAME,
>  		.owner	= THIS_MODULE,
>  #ifdef CONFIG_PM
>  		.pm	= &fec_pm_ops,
>  #endif
>  	},
> +	.id_table = fec_devtype,
>  	.probe	= fec_probe,
>  	.remove	= __devexit_p(fec_drv_remove),
>  };
> diff --git a/drivers/net/fec.h b/drivers/net/fec.h
> index 2c48b25..ace318d 100644
> --- a/drivers/net/fec.h
> +++ b/drivers/net/fec.h
> @@ -14,7 +14,8 @@
>  /****************************************************************************/
>  
>  #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
> -    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARCH_MXC)
> +    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
> +    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
>  /*
>   *	Just figures, Motorola would have to change the offsets for
>   *	registers in the same peripheral device on different models
> @@ -78,7 +79,7 @@
>  /*
>   *	Define the buffer descriptor structure.
>   */
> -#ifdef CONFIG_ARCH_MXC
> +#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
>  struct bufdesc {
>  	unsigned short cbd_datlen;	/* Data length */
>  	unsigned short cbd_sc;	/* Control and status info */
> -- 
> 1.7.1
> 
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

^ permalink raw reply

* Gaah: selinux_socket_unix_stream_connect oops
From: Linus Torvalds @ 2011-01-05 16:27 UTC (permalink / raw)
  To: David Miller, Network Development, Jeremy Fitzhardinge,
	James Morris

[-- Attachment #1: Type: text/plain, Size: 2441 bytes --]

This was actually a regression entry, but only ever reported once by
Jeremy, I think. So it was basically ignored as not being very common
and there not being any hints about what causes it.

But after doing the 2.6.37 release, and intending to put it on all the
machines I have access to, guess what I find on the kids computer?
Right.

It must be a reasonably rare race condition, because that computer had
been up for three weeks or so (since middle of December), but
yesterday evening it crashed due to that thing.

The code disassembly is

  13:	55                   	push   %ebp
  14:	89 e5                	mov    %esp,%ebp
  16:	57                   	push   %edi
  17:	8d 7d 90             	lea    -0x70(%ebp),%edi
  1a:	56                   	push   %esi
  1b:	53                   	push   %ebx
  1c:	83 ec 6c             	sub    $0x6c,%esp
  1f:	8b 40 14             	mov    0x14(%eax),%eax
  22:	8b 52 14             	mov    0x14(%edx),%edx
  25:	8b 98 58 01 00 00    	mov    0x158(%eax),%ebx
  2b:*	8b 82 58 01 00 00    	mov    0x158(%edx),%eax     <-- trapping
instruction
  31:	89 45 8c             	mov    %eax,-0x74(%ebp)
  34:	31 c0                	xor    %eax,%eax
  36:	8b b1 58 01 00 00    	mov    0x158(%ecx),%esi
  3c:	89 7d 88             	mov    %edi,-0x78(%ebp)

which means that it's "other->sk" that is NULL, which I think matches
Jeremy's case exactly.

The logs have a hint: this seems to have coincided with the
console-kit-daemon giving a warning like:

  WARNING: Couldn't read /proc/13585/environ: Failed to open file
'/proc/13585/environ': No such file or directory

and then NetworkManager having a bunch of authentication warnings that
end up about being

  Could not get UID of name ':1.3871': no such name

(full text in the attachment).

So I wonder if there is some subtle race that happens when one end of
a unix domain socket attaches just as another end disconnects?
Especially as "security_unix_stream_connect()" is called before the
whole connect sequence is really final. It's generally
"unix_release()" that sets 'sock->sk' to NULL.

Btw, why do we pass in "sock" and "other->sk_socket" ("struct
socket"), when it appears that what the security code really wants to
get "struct sock" (which would be "sk" and "other" in the caller)? The
calling convention seems to result in (a) this NULL pointer thing and
(b) all these extra dereferences.

Comments? Ideas?

                              Linus

[-- Attachment #2: kids.txt --]
[-- Type: text/plain, Size: 5215 bytes --]

Happened with

 Linux version 2.6.37-rc5-00333-gdaefc3d

after perhaps three weeks of uptime.  That kernel isn't in git, it has
an extra patch (to force-enable AHCI on the mac mini), so it's really
v2.6.37-rc5-332-g0fcdcfbbc98f in baseline.

---
Jan  4 17:02:50 kids console-kit-daemon[2884]: WARNING: Couldn't read /proc/13585/environ: Failed to open file '/proc/13585/environ': No such file or directory
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.network-control: (6) Remote Exception invoking org.freedesktop.PolicyKit1.Authority.CheckAuthorization() on /org/freede...
				...NameHasNoOwner: Could not get UID of name ':1.3871': no such name
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> User connections unavailable: (6) Remote Exception invoking org.freedesktop.PolicyKit1.Authority.CheckAuthorization() ...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.enable-disable-network: (6) Remote Exception invoking org.fre...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.sleep-wake: (6) Remote Exception invoking org.freedesktop.Pol...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.enable-disable-wifi: (6) Remote Exception invoking org.freede...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.enable-disable-wwan: (6) Remote Exception invoking org.freede...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.use-user-connections: (6) Remote Exception invoking org.freed...
Jan  4 17:02:50 kids NetworkManager[2438]: <warn> error requesting auth for org.freedesktop.NetworkManager.network-control: (6) Remote Exception invoking org.freedeskto...
Jan  4 17:02:50 kids bonobo-activation-server (celeste-15075): could not associate with desktop session: Failed to connect to socket /tmp/dbus-8AXjzuuw2I: Connection re...
				...NameHasNoOwner: Could not get UID of name ':1.3871': no such name
Jan  4 17:02:50 kids kernel: [1755465.955480] BUG: unable to handle kernel NULL pointer dereference at 00000158
Jan  4 17:02:50 kids kernel: [1755465.956226] IP: [<c111297e>] selinux_socket_unix_stream_connect+0x18/0x84
Jan  4 17:02:50 kids kernel: [1755465.956226] *pde = 00000000
Jan  4 17:02:50 kids kernel: [1755465.956226] Oops: 0000 [#1] SMP
Jan  4 17:02:50 kids kernel: [1755465.956226] last sysfs file: /sys/devices/virtual/sound/timer/uevent
Jan  4 17:02:50 kids kernel: [1755465.956226] Modules linked in: [last unloaded: scsi_wait_scan]
Jan  4 17:02:50 kids kernel: [1755465.956226]
Jan  4 17:02:50 kids kernel: [1755465.956226] Pid: 15075, comm: bonobo-activati Not tainted 2.6.37-rc5-00333-gdaefc3d #13 Mac-F4208EC8/Macmini1,1
Jan  4 17:02:50 kids kernel: [1755465.956226] EIP: 0060:[<c111297e>] EFLAGS: 00210296 CPU: 1
Jan  4 17:02:50 kids kernel: [1755465.956226] EIP is at selinux_socket_unix_stream_connect+0x18/0x84
Jan  4 17:02:50 kids kernel: [1755465.956226] EAX: ee48b200 EBX: f1f983c0 ECX: ee48be00 EDX: 00000000
Jan  4 17:02:50 kids kernel: [1755465.956226] ESI: ee48b200 EDI: f59c5e1c EBP: f59c5e8c ESP: f59c5e14
Jan  4 17:02:50 kids kernel: [1755465.956226]  DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Jan  4 17:02:50 kids kernel: [1755465.956226] Process bonobo-activati (pid: 15075, ti=f59c4000 task=ee45eb50 task.ti=f59c4000)
Jan  4 17:02:50 kids kernel: [1755465.956226] Stack:
Jan  4 17:02:50 kids kernel: [1755465.956226]  00000000 00000000 00000000 00000000 00000000 00200046 00200046 00000059
Jan  4 17:02:50 kids kernel: [1755465.956226]  00000013 00000000 f59c5e44 c103070e f59c5e5c c1003cc5 f59c5e64 ee432a00
Jan  4 17:02:50 kids kernel: [1755465.956226]  ee48b200 ee48be00 f59c5ed4 c1002ce9 ee432a00 f59c5ec0 e1b02900 ee48b200
Jan  4 17:02:50 kids kernel: [1755465.956226] Call Trace:
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c103070e>] ? irq_exit+0x39/0x5d
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c1003cc5>] ? do_IRQ+0x83/0x97
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c1002ce9>] ? common_interrupt+0x29/0x30
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c111072c>] ? security_unix_stream_connect+0x10/0x13
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c1390029>] ? unix_stream_connect+0x1e3/0x35e
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c1316047>] ? sys_connect+0x60/0x7d
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c1316b29>] ? sys_socketcall+0x8f/0x1a5
Jan  4 17:02:50 kids kernel: [1755465.956226]  [<c100278c>] ? sysenter_do_call+0x12/0x22
Jan  4 17:02:50 kids kernel: [1755465.956226] Code: 56 68 00 00 04 00 e8 ba f2 ff ff 8d 65 f4 5b 5e 5f c9 c3 55 89 e5 57 8d 7d 90 56 53 83 ec 6c 8b 40 14 8b 52 14 8b 98 58 01 00 00 <8b> 82 58 01 00 00 89 45 8c 31 c0 8b b1 58 01 00 00 89 7d 88 b9
Jan  4 17:02:50 kids kernel: [1755465.956226] EIP: [<c111297e>] selinux_socket_unix_stream_connect+0x18/0x84 SS:ESP 0068:f59c5e14
Jan  4 17:02:50 kids kernel: [1755465.956226] CR2: 0000000000000158
Jan  4 17:02:50 kids kernel: [1755466.037178] ---[ end trace 9fd0d9b8feb78e69 ]---

^ permalink raw reply

* Re: [PATCH v3 08/10] ARM: mxs: add ocotp read function
From: Jamie Iles @ 2011-01-05 16:16 UTC (permalink / raw)
  To: Shawn Guo
  Cc: davem, gerg, baruch, eric, bryan.wu, r64343, B32542,
	u.kleine-koenig, lw, w.sang, s.hauer, netdev, linux-arm-kernel
In-Reply-To: <1294236457-17476-9-git-send-email-shawn.guo@freescale.com>

Hi Shawn,

On Wed, Jan 05, 2011 at 10:07:35PM +0800, Shawn Guo wrote:
> Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
> ---
> Changes for v2:
>  - Add mutex locking for mxs_read_ocotp()
>  - Use type size_t for count and i
>  - Add comment for clk_enable/disable skipping
>  - Add ERROR bit clearing and polling step
> 
>  arch/arm/mach-mxs/Makefile              |    2 +-
>  arch/arm/mach-mxs/include/mach/common.h |    1 +
>  arch/arm/mach-mxs/ocotp.c               |   79 +++++++++++++++++++++++++++++++
>  3 files changed, 81 insertions(+), 1 deletions(-)
>  create mode 100644 arch/arm/mach-mxs/ocotp.c
> 
[...]
> diff --git a/arch/arm/mach-mxs/ocotp.c b/arch/arm/mach-mxs/ocotp.c
> new file mode 100644
> index 0000000..902ef59
> --- /dev/null
> +++ b/arch/arm/mach-mxs/ocotp.c
> @@ -0,0 +1,79 @@
> +/*
> + * Copyright 2010 Freescale Semiconductor, Inc. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/err.h>
> +#include <linux/mutex.h>
> +
> +#include <mach/mxs.h>
> +
> +#define BM_OCOTP_CTRL_BUSY		(1 << 8)
> +#define BM_OCOTP_CTRL_ERROR		(1 << 9)
> +#define BM_OCOTP_CTRL_RD_BANK_OPEN	(1 << 12)
> +
> +static DEFINE_MUTEX(ocotp_mutex);
> +
> +int mxs_read_ocotp(unsigned offset, size_t count, u32 *values)
> +{
> +	void __iomem *ocotp_base = MXS_IO_ADDRESS(MXS_OCOTP_BASE_ADDR);
> +	int timeout = 0x400;
> +	size_t i;
> +
> +	mutex_lock(&ocotp_mutex);
> +
> +	/*
> +	 * clk_enable(hbus_clk) for ocotp can be skipped
> +	 * as it must be on when system is running.
> +	 */
> +
> +	/* try to clear ERROR bit */
> +	__mxs_clrl(BM_OCOTP_CTRL_ERROR, ocotp_base);
> +
> +	/* check both BUSY and ERROR cleared */
> +	while ((__raw_readl(ocotp_base) &
> +		(BM_OCOTP_CTRL_BUSY | BM_OCOTP_CTRL_ERROR)) && --timeout)
> +		/* nothing */;

Is it worth using cpu_relax() in these polling loops?

Jamie

^ permalink raw reply

* Re: [PATCH v2] net: Allow ethtool to set interface in loopback mode.
From: Jeff Garzik @ 2011-01-05 16:22 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Stephen Hemminger, Mahesh Bandewar, David Miller, Laurent Chavey,
	Tom Herbert, netdev
In-Reply-To: <1294190504.2992.3.camel@localhost>

On 01/04/2011 08:21 PM, Ben Hutchings wrote:
> On Tue, 2011-01-04 at 16:36 -0800, Stephen Hemminger wrote:
>> On Tue,  4 Jan 2011 16:30:01 -0800
>> Mahesh Bandewar<maheshb@google.com>  wrote:
>>
>>> This patch enables ethtool to set the loopback mode on a given interface.
>>> By configuring the interface in loopback mode in conjunction with a policy
>>> route / rule, a userland application can stress the egress / ingress path
>>> exposing the flows of the change in progress and potentially help developer(s)
>>> understand the impact of those changes without even sending a packet out
>>> on the network.
>>>
>>> Following set of commands illustrates one such example -
>>> 	a) ip -4 addr add 192.168.1.1/24 dev eth1
>>> 	b) ip -4 rule add from all iif eth1 lookup 250
>>> 	c) ip -4 route add local 0/0 dev lo proto kernel scope host table 250
>>> 	d) arp -Ds 192.168.1.100 eth1
>>> 	e) arp -Ds 192.168.1.200 eth1
>>> 	f) sysctl -w net.ipv4.ip_nonlocal_bind=1
>>> 	g) sysctl -w net.ipv4.conf.all.accept_local=1
>>> 	# Assuming that the machine has 8 cores
>>> 	h) taskset 000f netserver -L 192.168.1.200
>>> 	i) taskset 00f0 netperf -t TCP_CRR -L 192.168.1.100 -H 192.168.1.200 -l 30
>>>
>>> Signed-off-by: Mahesh Bandewar<maheshb@google.com>
>>> Reviewed-by: Ben Hutchings<bhutchings@solarflare.com>
>>
>> Since this is a boolean it SHOULD go into ethtool_flags rather than
>> being a high level operation.
>
> It could do, but I though ETHTOOL_{G,S}FLAGS were intended for
> controlling offload features.

It doesn't have to be.  As Stephen guessed, [GS]FLAGS are basically 
common flags -- as differentiated from private, 
driver-specific/hardware-specific flags.

	Jeff




^ permalink raw reply

* Re: [PATCH] atl1: fix oops when changing tx/rx ring params
From: Luca Tettamanti @ 2011-01-05 15:45 UTC (permalink / raw)
  To: J. K. Cliburn
  Cc: David Miller, netdev, stable, jussuf, chris.snook, Xiong.Huang
In-Reply-To: <AANLkTinizyGFhzTgTMQ=ojL4v+htqvRM9c62dgHUss3f@mail.gmail.com>

On Wed, Jan 5, 2011 at 4:42 PM, Luca Tettamanti <kronos.it@gmail.com> wrote:
> So here you're using pointers to freed memory.
> In order to preserve the stats you'd have to copy the structure.

Doh. I still haven't recovered from all the partying ;-)
Sorry for the noise...

L

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox