Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH 1/1] ARC VMAC ethernet driver.
From: Eric Dumazet @ 2011-02-17 10:13 UTC (permalink / raw)
  To: Andreas Fenkart; +Cc: netdev
In-Reply-To: <1297935091-15504-1-git-send-email-afenkart@gmail.com>

Le jeudi 17 février 2011 à 10:31 +0100, Andreas Fenkart a écrit :
> Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
> ---
>  drivers/net/Kconfig   |   10 +
>  drivers/net/Makefile  |    1 +
>  drivers/net/arcvmac.c | 1494 +++++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/net/arcvmac.h |  265 +++++++++
>  4 files changed, 1770 insertions(+), 0 deletions(-)
> 



> +/* merge buffer chaining  */
> +struct sk_buff *vmac_merge_rx_buffers_unlocked(struct net_device *dev,
> +		struct vmac_buffer_desc *after,
> +		int pkt_len) /* data */
> +{
> +	struct vmac_priv *ap = netdev_priv(dev);
> +	struct sk_buff *merge_skb, *cur_skb;
> +	struct dma_fifo *rx_ring;
> +	struct vmac_buffer_desc *desc;
> +
> +	/* locking: same as vmac_rx_receive */
> +
> +	rx_ring = &ap->rx_ring;
> +	desc = &ap->rxbd[rx_ring->tail];
> +
> +	WARN_ON(desc == after);
> +
> +	/* strip FCS */
> +	pkt_len -= 4;
> +
> +	merge_skb = netdev_alloc_skb_ip_align(dev, pkt_len + NET_IP_ALIGN);


You can remove the "+ NET_IP_ALIGN", its already done in
netdev_alloc_skb_ip_align()

Also, it seems strange you want to build one big SKB (no frag), while
this NIC is able to feed multiple frags.

(Change to get a SKB to hold a 9000 bytes frame is very very low if your
memory gets fragmented)

> +	if (!merge_skb) {
> +		dev_err(&ap->pdev->dev, "failed to allocate merged rx_skb, rx skb's left %d\n",
> +				fifo_used(rx_ring));
> +
> +		return NULL;
> +	}
> +
> +	while (desc != after && pkt_len) {
> +		struct vmac_buffer_desc *desc;
> +		int buf_len, valid;
> +
> +		/* desc needs wrapping */
> +		desc = &ap->rxbd[rx_ring->tail];
> +		cur_skb = ap->rx_skbuff[rx_ring->tail];
> +		WARN_ON(!cur_skb);
> +
> +		dma_unmap_single(&ap->pdev->dev, desc->data, ap->rx_skb_size,
> +				DMA_FROM_DEVICE);
> +
> +		/* do not copy FCS */
> +		buf_len = le32_to_cpu(desc->info) & BD_LEN;
> +		valid = min(pkt_len, buf_len);
> +		pkt_len -= valid;
> +
> +		memcpy(skb_put(merge_skb, valid), cur_skb->data, valid);
> +
> +		fifo_inc_tail(rx_ring);
> +	}
> +
> +	/* merging_pressure++ */
> +
> +	if (unlikely(pkt_len != 0))
> +		dev_err(&ap->pdev->dev, "buffer chaining bytes missing %d\n",
> +				pkt_len);
> +
> +	WARN_ON(desc != after);
> +
> +	return merge_skb;
> +}
> +

> +
> +int vmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> +	struct vmac_priv *ap = netdev_priv(dev);
> +	struct vmac_buffer_desc *desc;
> +	unsigned long flags;
> +	unsigned int tmp;
> +
> +	/* running under xmit lock */
> +	/* locking: modifies tx_ring head, tx_reclaim only tail */
> +
> +	/* no scatter/gatter see features below */
> +	WARN_ON(skb_shinfo(skb)->nr_frags != 0);
> +	WARN_ON(skb->len > MAX_TX_BUFFER_LEN);
> +
> +	if (unlikely(fifo_full(&ap->tx_ring))) {
> +		netif_stop_queue(dev);
> +		vmac_toggle_txint(dev, 1);
> +		dev_err(&ap->pdev->dev, "xmit called with no tx desc available\n");
> +		return NETDEV_TX_BUSY;
> +	}
> +
> +	if (unlikely(skb->len < ETH_ZLEN)) {
> +		struct sk_buff *short_skb;
> +		short_skb = netdev_alloc_skb_ip_align(dev, ETH_ZLEN);

I guess you dont really need the _ip_align() version here

> +		if (!short_skb)
> +			return NETDEV_TX_LOCKED;
> +
> +		memset(short_skb->data, 0, ETH_ZLEN);
> +		memcpy(skb_put(short_skb, ETH_ZLEN), skb->data, skb->len);
> +		dev_kfree_skb(skb);
> +		skb = short_skb;
> +	}
> +
> +	/* fill descriptor */
> +	ap->tx_skbuff[ap->tx_ring.head] = skb;
> +	desc = &ap->txbd[ap->tx_ring.head];
> +	WARN_ON(desc->info & cpu_to_le32(BD_DMA_OWN));
> +
> +	desc->data = dma_map_single(&ap->pdev->dev, skb->data, skb->len,
> +			DMA_TO_DEVICE);
> +
> +	/* dma might already be polling */
> +	wmb();
> +	desc->info = cpu_to_le32(BD_DMA_OWN | BD_FRST | BD_LAST | skb->len);
> +	wmb();

Not sure you need this wmb();

> +
> +	/* lock device data */
> +	spin_lock_irqsave(&ap->lock, flags);
> +
> +	/* kick tx dma */
> +	tmp = vmac_readl(ap, STAT);
> +	vmac_writel(ap, tmp | TXPL_MASK, STAT);
> +
> +	dev->stats.tx_packets++;
> +	dev->stats.tx_bytes += skb->len;
> +	dev->trans_start = jiffies;

trans_start doesnt need to be set anymore in drivers.

> +	fifo_inc_head(&ap->tx_ring);
> +
> +	/* vmac_tx_reclaim outside of vmac_tx_timeout */
> +	if (fifo_used(&ap->tx_ring) > 8)
> +		vmac_tx_reclaim_unlocked(dev, 0);
> +
> +	/* unlock device data */
> +	spin_unlock_irqrestore(&ap->lock, flags);
> +
> +	/* stop queue if no more desc available */
> +	if (fifo_full(&ap->tx_ring)) {
> +		netif_stop_queue(dev);
> +		vmac_toggle_txint(dev, 1);
> +	}
> +
> +	return NETDEV_TX_OK;
> +}
> +


> +static void create_multicast_filter(struct net_device *dev,
> +	unsigned long *bitmask)
> +{
> +	unsigned long crc;
> +	char *addrs;
> +
> +	/* locking: done by net_device */
> +
> +	WARN_ON(netdev_mc_count(dev) == 0);
> +	WARN_ON(dev->flags & IFF_ALLMULTI);
> +
> +	bitmask[0] = bitmask[1] = 0;
> +
> +	{
> +		struct netdev_hw_addr *ha;
> +		netdev_for_each_mc_addr(ha, dev) {
> +			addrs = ha->addr;
> +
> +			/* skip non-multicast addresses */
> +			if (!(*addrs & 1))
> +				continue;
> +
> +			crc = ether_crc_le(ETH_ALEN, addrs);
> +			set_bit(crc >> 26, bitmask);

I am wondering if it works on 64bit arches ;)

> +		}
> +	}
> +}
> +

> +static struct ethtool_ops vmac_ethtool_ops = {

please add const qualifier

static const struct ethtool_ops vmac_ethtool_ops = {


> +	.get_settings		= vmacether_get_settings,
> +	.set_settings		= vmacether_set_settings,
> +	.get_drvinfo		= vmacether_get_drvinfo,
> +	.get_link		= ethtool_op_get_link,
> +};
> +

> +static int __devinit vmac_probe(struct platform_device *pdev)
> +{
> +	struct net_device *dev;
> +	struct vmac_priv *ap;
> +	struct resource *mem;
> +	int err;
> +
> +	/* locking: no concurrency */
> +
> +	if (dma_get_mask(&pdev->dev) > DMA_BIT_MASK(32) ||
> +			pdev->dev.coherent_dma_mask > DMA_BIT_MASK(32)) {
> +		dev_err(&pdev->dev, "arcvmac supports only 32-bit DMA addresses\n");
> +		return -ENODEV;
> +	}
> +
> +	dev = alloc_etherdev(sizeof(*ap));
> +	if (!dev) {
> +		dev_err(&pdev->dev, "etherdev alloc failed, aborting.\n");
> +		return -ENOMEM;
> +	}
> +
> +	ap = netdev_priv(dev);
> +
> +	err = -ENODEV;
> +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!mem) {
> +		dev_err(&pdev->dev, "no mmio resource defined\n");
> +		goto err_out;
> +	}
> +	ap->mem = mem;
> +
> +	err = platform_get_irq(pdev, 0);
> +	if (err < 0) {
> +		dev_err(&pdev->dev, "no irq found\n");
> +		goto err_out;
> +	}
> +	dev->irq = err;
> +
> +	spin_lock_init(&ap->lock);
> +
> +	SET_NETDEV_DEV(dev, &pdev->dev);
> +	ap->dev = dev;
> +	ap->pdev = pdev;
> +
> +	/* init rx timeout (used for oom) */
> +	init_timer(&ap->refill_timer);
> +	ap->refill_timer.function = vmac_refill_rx_timer;
> +	ap->refill_timer.data = (unsigned long)dev;
> +	spin_lock_init(&ap->refill_lock);
> +
> +	netif_napi_add(dev, &ap->napi, vmac_poll, 2);

2 ?

You have 16 skb in RX ring, please use 16 (or 64)

> +	dev->netdev_ops = &vmac_netdev_ops;
> +	dev->ethtool_ops = &vmac_ethtool_ops;
> +
> +	dev->flags |= IFF_MULTICAST;
> +
> +	dev->base_addr = (unsigned long)ap->regs; /* TODO */
> +
> +	/* prevent buffer chaining, favor speed over space */
> +	ap->rx_skb_size = ETH_FRAME_LEN + VMAC_BUFFER_PAD;
> +
> +	/* private struct functional */
> +
> +	/* temporarily map registers to fetch mac addr */
> +	err = get_register_map(ap);
> +	if (err)
> +		goto err_out;
> +
> +	/* mac address intialize, set vmac_open  */
> +	read_mac_reg(dev, dev->dev_addr); /* TODO */
> +
> +	if (!is_valid_ether_addr(dev->dev_addr))
> +		random_ether_addr(dev->dev_addr);
> +
> +	err = register_netdev(dev);
> +	if (err) {
> +		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
> +		goto err_out;
> +	}
> +
> +	/* release the memory region, till open is called */
> +	put_register_map(ap);
> +
> +	dev_info(&pdev->dev, "ARC VMAC at 0x%pP irq %d %pM\n", &mem->start,
> +	    dev->irq, dev->dev_addr);
> +	platform_set_drvdata(pdev, ap);
> +
> +	return 0;
> +
> +err_out:
> +	free_netdev(dev);
> +	return err;
> +}
> +

> diff --git a/drivers/net/arcvmac.h b/drivers/net/arcvmac.h
> new file mode 100644
> index 0000000..ee570a5
> --- /dev/null
> +++ b/drivers/net/arcvmac.h
> @@ -0,0 +1,265 @@
> +/*
> + * ARC VMAC Driver
> + *
> + * Copyright (C) 2003-2006 Codito Technologies, for linux-2.4 port
> + * Copyright (C) 2006-2007 Celunite Inc, for linux-2.6 port
> + * Copyright (C) 2007-2008 Sagem Communications, Fehmi HAFSI
> + * Copyright (C) 2009-2011 Sagem Communications, Andreas Fenkart
> + * All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + *
> + */
> +
> +#ifndef _ARCVMAC_H
> +#define _ARCVMAC_H
> +
> +#define DRV_NAME		"arcvmac"
> +#define DRV_VERSION		"1.0"
> +
> +/* Buffer descriptors */
> +#define TX_BDT_LEN		16    /* Number of receive BD's */

16 is a bit small. Is it a hardware limitation or user choice ?

> +#define RX_BDT_LEN		256   /* Number of transmit BD's */

If hardware permits it, I suggest using 128 RX and 128 TX descs

> +
> +/* BD poll rate, in 1024 cycles. @100Mhz: x * 1024 cy * 10ns = 1ms */
> +#define POLLRATE_TIME		200
> +
> +/* next power of two, bigger than ETH_FRAME_LEN + VLAN  */
> +#define MAX_RX_BUFFER_LEN	0x800	/* 2^11 = 2048 = 0x800 */
> +#define MAX_TX_BUFFER_LEN	0x800	/* 2^11 = 2048 = 0x800 */
> +
> +/* 14 bytes of ethernet header, 4 bytes VLAN, FCS,
> + * plus extra pad to prevent buffer chaining of
> + * maximum sized ethernet packets (1514 bytes) */
> +#define	VMAC_BUFFER_PAD		(ETH_HLEN + 4 + ETH_FCS_LEN + 4)
> +
> +/* VMAC register definitions, offsets in bytes */
> +#define VMAC_ID			0x00
> +
> +/* stat/enable use same bit mask */
> +#define VMAC_STAT		0x04
> +#define VMAC_ENABLE		0x08
> +#  define TXINT_MASK		0x00000001 /* Transmit interrupt */
> +#  define RXINT_MASK		0x00000002 /* Receive interrupt */
> +#  define ERR_MASK		0x00000004 /* Error interrupt */
> +#  define TXCH_MASK		0x00000008 /* Transmit chaining error */
> +#  define MSER_MASK		0x00000010 /* Missed packet counter error */
> +#  define RXCR_MASK		0x00000100 /* RXCRCERR counter rolled over	 */
> +#  define RXFR_MASK		0x00000200 /* RXFRAMEERR counter rolled over */
> +#  define RXFL_MASK		0x00000400 /* RXOFLOWERR counter rolled over */
> +#  define MDIO_MASK		0x00001000 /* MDIO complete */
> +#  define TXPL_MASK		0x80000000 /* TXPOLL */
> +
> +#define VMAC_CONTROL		0x0c
> +#  define EN_MASK		0x00000001 /* VMAC enable */
> +#  define TXRN_MASK		0x00000008 /* TX enable */
> +#  define RXRN_MASK		0x00000010 /* RX enable */
> +#  define DSBC_MASK		0x00000100 /* Disable receive broadcast */
> +#  define ENFL_MASK		0x00000400 /* Enable Full Duplex */
> +#  define PROM_MASK		0x00000800 /* Promiscuous mode */
> +
> +#define VMAC_POLLRATE		0x10
> +
> +#define VMAC_RXERR		0x14
> +#  define RXERR_CRC		0x000000ff
> +#  define RXERR_FRM		0x0000ff00
> +#  define RXERR_OFLO		0x00ff0000 /* fifo overflow */
> +
> +#define VMAC_MISS		0x18
> +#define VMAC_TXRINGPTR		0x1c
> +#define VMAC_RXRINGPTR		0x20
> +#define VMAC_ADDRL		0x24
> +#define VMAC_ADDRH		0x28
> +#define VMAC_LAFL		0x2c
> +#define VMAC_LAFH		0x30
> +#define VMAC_MAC_TXRING_HEAD	0x38
> +#define VMAC_MAC_RXRING_HEAD	0x3C
> +
> +#define VMAC_MDIO_DATA		0x34
> +#  define MDIO_SFD		0xC0000000
> +#  define MDIO_OP		0x30000000
> +#  define MDIO_ID_MASK		0x0F800000
> +#  define MDIO_REG_MASK		0x007C0000
> +#  define MDIO_TA		0x00030000
> +#  define MDIO_DATA_MASK	0x0000FFFF
> +/* common combinations */
> +#  define MDIO_BASE		0x40020000
> +#  define MDIO_OP_READ		0x20000000
> +#  define MDIO_OP_WRITE		0x10000000
> +
> +/* Buffer descriptor INFO bit masks */
> +#define BD_DMA_OWN		0x80000000 /* buffer ownership, 0 CPU, 1 DMA */
> +#define BD_BUFF			0x40000000 /* buffer invalid, rx */
> +#define BD_UFLO			0x20000000 /* underflow, tx */
> +#define BD_LTCL			0x10000000 /* late collision, tx  */
> +#define BD_RETRY_CT		0x0f000000 /* tx */
> +#define BD_DROP			0x00800000 /* drop, more than 16 retries, tx */
> +#define BD_DEFER		0x00400000 /* traffic on the wire, tx */
> +#define BD_CARLOSS		0x00200000 /* carrier loss while transmission, tx, rx? */
> +/* 20:19 reserved */
> +#define BD_ADCR			0x00040000 /* add crc, ignored if not disaddcrc */
> +#define BD_LAST			0x00020000 /* Last buffer in chain */
> +#define BD_FRST			0x00010000 /* First buffer in chain */
> +/* 15:11 reserved */
> +#define BD_LEN			0x000007FF
> +
> +/* common combinations */
> +#define BD_TX_ERR		(BD_UFLO | BD_LTCL | BD_RETRY_CT | BD_DROP | \
> +		BD_DEFER | BD_CARLOSS)
> +
> +
> +/* arcvmac private data structures */
> +struct vmac_buffer_desc {
> +	__le32 info;
> +	__le32 data;
> +};
> +
> +struct dma_fifo {
> +	int head; /* head */
> +	int tail; /* tail */
> +	int size;
> +};
> +
> +struct	vmac_priv {
> +	struct net_device *dev;
> +	struct platform_device *pdev;
> +
> +	struct completion mdio_complete;
> +	spinlock_t lock; /* protects structure plus hw regs of device */
> +
> +	/* base address of register set */
> +	char *regs;
> +	struct resource *mem;
> +
> +	/* DMA ring buffers */
> +	struct vmac_buffer_desc *rxbd;
> +	dma_addr_t rxbd_dma;
> +
> +	struct vmac_buffer_desc *txbd;
> +	dma_addr_t txbd_dma;
> +
> +	/* socket buffers */
> +	struct sk_buff *rx_skbuff[RX_BDT_LEN];
> +	struct sk_buff *tx_skbuff[TX_BDT_LEN];
> +	int rx_skb_size;
> +
> +	/* skb / dma desc managing */
> +	struct dma_fifo rx_ring; /* valid rx buffers */
> +	struct dma_fifo tx_ring;
> +
> +	/* descriptor last polled/processed by the VMAC */
> +	unsigned long dma_rx_head;
> +
> +	/* timer to retry rx skb allocation, if failed during receive */
> +	struct timer_list refill_timer;
> +	spinlock_t refill_lock;
> +
> +	struct napi_struct napi;
> +
> +	/* rx buffer chaining */
> +	int rx_merge_error;
> +	int tx_timeout_error;
> +
> +	/* PHY stuff */
> +	struct mii_bus *mii_bus;
> +	struct phy_device *phy_dev;
> +
> +	int link;
> +	int speed;
> +	int duplex;
> +
> +	/* debug */
> +	int shutdown;
> +};
> +
> +/* DMA ring management */
> +
> +/* for a fifo with size n,
> + * - [0..n] fill levels are n + 1 states
> + * - there are only n different deltas (head - tail) values
> + * => not all fill levels can be represented with head, tail
> + *    pointers only
> + * we give up the n fill level, aka fifo full */
> +
> +/* sacrifice one elt as a sentinel */
> +static inline int fifo_used(struct dma_fifo *f);
> +static inline int fifo_inc_ct(int ct, int size);
> +static inline void fifo_dump(struct dma_fifo *fifo);
> +
> +static inline int fifo_empty(struct dma_fifo *f)
> +{
> +	return (f->head == f->tail);

return f->head == f->tail;

> +}
> +
> +static inline int fifo_free(struct dma_fifo *f)
> +{
> +	int free;
> +
> +	free = f->tail - f->head;
> +	if (free <= 0)
> +		free += f->size;
> +
> +	return free;
> +}
> +
> +static inline int fifo_used(struct dma_fifo *f)
> +{
> +	int used;
> +
> +	used = f->head - f->tail;
> +	if (used < 0)
> +		used += f->size;
> +
> +	return used;
> +}
> +
> +static inline int fifo_full(struct dma_fifo *f)
> +{
> +	return (fifo_used(f) + 1) == f->size;
> +}
> +
> +/* manipulate */
> +static inline void fifo_init(struct dma_fifo *fifo, int size)
> +{
> +	fifo->size = size;
> +	fifo->head = fifo->tail = 0; /* empty */
> +}
> +
> +static inline void fifo_inc_head(struct dma_fifo *fifo)
> +{
> +	BUG_ON(fifo_full(fifo));
> +	fifo->head = fifo_inc_ct(fifo->head, fifo->size);
> +}
> +
> +static inline void fifo_inc_tail(struct dma_fifo *fifo)
> +{
> +	BUG_ON(fifo_empty(fifo));
> +	fifo->tail = fifo_inc_ct(fifo->tail, fifo->size);
> +}
> +
> +/* internal funcs */
> +static inline void fifo_dump(struct dma_fifo *fifo)
> +{
> +	printk(KERN_INFO "fifo: head %d, tail %d, size %d\n", fifo->head,
> +			fifo->tail,
> +			fifo->size);

	pr_info() is preferred in new code
> +}
> +
> +static inline int fifo_inc_ct(int ct, int size)
> +{
> +	return (++ct == size) ? 0 : ct;
> +}
> +
> +#endif	  /* _ARCVMAC_H */



^ permalink raw reply

* Re: [PATCH] drivers/net: Call netif_carrier_off at the end of the probe
From: Francois Romieu @ 2011-02-17 10:19 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: davem, netdev, aabdulla, Ben Hutchings
In-Reply-To: <1297931060.9613.2.camel@ceranb.intra.cera.cz>

Ivan Vecera <ivecera@redhat.com> :
[...]
> Dave, Francois, is it acceptable?

Apparently both are acceptable. :o/

As a mildly convinced coward, I can not tell if it will help more applications
than it will break.

-- 
Ueimor

^ permalink raw reply

* Re: [PATCH] drivers/net: Call netif_carrier_off at the end of the probe
From: Ivan Vecera @ 2011-02-17 10:32 UTC (permalink / raw)
  To: Francois Romieu; +Cc: davem, netdev, aabdulla, Ben Hutchings
In-Reply-To: <20110217101901.GA11897@electric-eye.fr.zoreil.com>

On Thu, 2011-02-17 at 11:19 +0100, Francois Romieu wrote:
> Ivan Vecera <ivecera@redhat.com> :
> [...]
> > Dave, Francois, is it acceptable?
> 
> Apparently both are acceptable. :o/
> 
> As a mildly convinced coward, I can not tell if it will help more applications
> than it will break.
> 
It should not break any application as the patch only avoids the
operstate to be 'unknown' instead of 'up'.



^ permalink raw reply

* Re: [PATCH 1/1] tproxy: do not assign timewait sockets to skb->sk
From: Patrick McHardy @ 2011-02-17 10:40 UTC (permalink / raw)
  To: Florian Westphal
  Cc: KOVACS Krisztian, netfilter-devel, netdev, Balazs Scheidler
In-Reply-To: <20110216113040.GE8821@Chamillionaire.breakpoint.cc>

Am 16.02.2011 12:30, schrieb Florian Westphal:
> KOVACS Krisztian <hidden@balabit.hu> wrote:
>> On 02/14/2011 04:51 PM, Patrick McHardy wrote:
>>> Looks fine to me. Balazs. Krisztian, any objections?
>>
>> Seems to be OK, as far as I can see.
>>
>> Florian, did you make sure the tests still run after applying this patch?
>>
>> http://git.balabit.hu/?p=bazsi/tproxy-test.git;a=summary
> 
> Thanks for the hint, I cloned this and ran it on my test setup:
> ./tproxy-test.py
> [..]
> PASS: ('192.168.10.8', 50080), we got a connection as we deserved
> PASS: everything is fine

Applied, thanks everyone.


^ permalink raw reply

* Read sent/received bytes -- without opening a file in /sys or /proc
From: Jan-Philip gehrcke @ 2011-02-17 10:57 UTC (permalink / raw)
  To: netdev

Dear list,

I'm about to write a small C program for measurements of the bandwidth
on some network interface of my home router (uname -a: Linux
fritz.fonwlan.box 2.6.19.2 #2 Thu Nov 18 16:35:17 CET 2010 mips
GNU/Linux). I would like to accomplish very *precise* results, while
keeping the absolute measurement time low.

In general, this problem requires the knowledge of time and byte
differences. Therefore, two time measurements and two byte counter
measurements for each, received and sent bytes, are required. Currently,
I am measuring time via

    clock_gettime(CLOCK_MONOTONIC, ...)

and reading the byte counters by reading files in sysfs, e.g.

     /sys/class/net/${interface}/statistics/rx_bytes.

This is very easy and works well, but reading these files four times
takes some (varying) time* on my device, which introduces an error to my
calculation.

Hence, I am wondering, if there is a way to receive this kind of data
directly and much faster, via "kernel API". My question is related to
this unanswered one from 2005:
http://linux.derkeiler.com/Mailing-Lists/RedHat/2005-02/0557.html

*On my device, reading such a file and interpreting the result as
integer takes 1-10 ms, measured via:

    clock_gettime(CLOCK_MONOTONIC, &t_start);
    rx_bytes = read_int_from_file("/sys/class/net/wan/statistics/rx_bytes");
    clock_gettime(CLOCK_MONOTONIC, &t_end);

read_int_from_file() basically consists of fopen(), fgets(), and sscanf().

Please note that I don't have kernel/driver programming experience so far.

Thank you for help and any suggestions!

    Jan-Philip Gehrcke

--
http://gehrcke.de

^ permalink raw reply

* Re: [PATCH 1/2] igb: Allow extra 4 bytes on RX for vlan tags.
From: Jeff Kirsher @ 2011-02-17 11:04 UTC (permalink / raw)
  To: greearb; +Cc: netdev
In-Reply-To: <1297375149-18458-1-git-send-email-greearb@candelatech.com>

On Thu, Feb 10, 2011 at 13:59,  <greearb@candelatech.com> wrote:
> From: Ben Greear <greearb@candelatech.com>
>
> This allows the NIC to receive 1518 byte (not counting
> FCS) packets when MTU is 1500, thus allowing 1500 MTU
> VLAN frames to be received.  Please note that no VLANs
> were actually configured on the NIC...it was just acting
> as pass-through device.
>
> Signed-off-by: Ben Greear <greearb@candelatech.com>
> ---
> :100644 100644 58c665b... 30c9cc6... M  drivers/net/igb/igb_main.c
>  drivers/net/igb/igb_main.c |    5 +++--
>  1 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> index 58c665b..30c9cc6 100644
> --- a/drivers/net/igb/igb_main.c
> +++ b/drivers/net/igb/igb_main.c
> @@ -2281,7 +2281,8 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
>        adapter->rx_itr_setting = IGB_DEFAULT_ITR;
>        adapter->tx_itr_setting = IGB_DEFAULT_ITR;
>
> -       adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
> +       adapter->max_frame_size = (netdev->mtu + ETH_HLEN + ETH_FCS_LEN
> +                                  + VLAN_HLEN);
>        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
>
>        spin_lock_init(&adapter->stats64_lock);
> @@ -4303,7 +4304,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
>  {
>        struct igb_adapter *adapter = netdev_priv(netdev);
>        struct pci_dev *pdev = adapter->pdev;
> -       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
> +       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
>        u32 rx_buffer_len, i;
>
>        if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {

While testing this patch, validation found that the patch reduces the
maximum mtu size
by 4 bytes (reduces it from 9216 to 9212).  This is not a desired side
effect of this patch.

Thoughts?

-- 
Cheers,
Jeff

^ permalink raw reply

* Read sent/received bytes -- without opening a file in /sys or /proc
From: Jan-Philip Gehrcke @ 2011-02-17 10:47 UTC (permalink / raw)
  To: netdev

Dear list,

I'm about to write a small C program for measurements of the bandwidth
on some network interface of my home router (uname -a: Linux
fritz.fonwlan.box 2.6.19.2 #2 Thu Nov 18 16:35:17 CET 2010 mips
GNU/Linux). I would like to accomplish very *precise* results, while
keeping the absolute measurement time low.

In general, this problem requires the knowledge of time and byte
differences. Therefore, two time measurements and two byte counter
measurements for each, received and sent bytes, are required. Currently,
I am measuring time via

    clock_gettime(CLOCK_MONOTONIC, ...)

and reading the byte counters by reading files in sysfs, e.g.

     /sys/class/net/${interface}/statistics/rx_bytes.

This is very easy and works well, but reading these files four times
takes some (varying) time* on my device, which introduces an error to my
calculation.

Hence, I am wondering, if there is a way to receive this kind of data
directly and much faster, via "kernel API". My question is related to
this unanswered one from 2005:
http://linux.derkeiler.com/Mailing-Lists/RedHat/2005-02/0557.html

*On my device, reading such a file and interpreting the result as
integer takes 1-10 ms, measured via:

    clock_gettime(CLOCK_MONOTONIC, &t_start);
    rx_bytes = read_int_from_file("/sys/class/net/wan/statistics/rx_bytes");
    clock_gettime(CLOCK_MONOTONIC, &t_end);

read_int_from_file() basically consists of fopen(), fgets(), and sscanf().

Please note that I don't have kernel/driver programming experience so far.

Thank you for help and any suggestions!

    Jan-Philip Gehrcke

--
http://gehrcke.de

^ permalink raw reply

* [PATCH 1/2] r8169: Correct settings of rtl8102e
From: Hayes Wang @ 2011-02-17 11:37 UTC (permalink / raw)
  To: romieu; +Cc: netdev, linux-kernel, Hayes Wang

Adjust and remove certain settings of RTL8102E which are for previous chips.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
---
 drivers/net/r8169.c |   20 ++++++--------------
 1 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 59ccf0c..9eaf78f 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -3042,7 +3042,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_mwi_2;
 	}
 
-	tp->cp_cmd = PCIMulRW | RxChkSum;
+	tp->cp_cmd = RxChkSum;
 
 	if ((sizeof(dma_addr_t) > 4) &&
 	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) {
@@ -3845,8 +3845,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 	Cxpl_dbg_sel | \
 	ASF | \
 	PktCntrDisable | \
-	PCIDAC | \
-	PCIMulRW)
+	Mac_dbgo_sel)
 
 static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
 {
@@ -3876,8 +3875,6 @@ static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
 	if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
 		RTL_W8(Config1, cfg1 & ~LEDS0);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
-
 	rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
@@ -3889,8 +3886,6 @@ static void rtl_hw_start_8102e_2(void __iomem *ioaddr, struct pci_dev *pdev)
 
 	RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
 	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
-
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev)
@@ -3916,6 +3911,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		}
 	}
 
+	RTL_W8(Cfg9346, Cfg9346_Unlock);
+
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_07:
 		rtl_hw_start_8102e_1(ioaddr, pdev);
@@ -3930,14 +3927,13 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(Cfg9346, Cfg9346_Lock);
 
 	RTL_W8(MaxTxPacketSize, TxPacketMax);
 
 	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
 
-	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
-
+	tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
 	RTL_W16(CPlusCmd, tp->cp_cmd);
 
 	RTL_W16(IntrMitigate, 0x0000);
@@ -3947,14 +3943,10 @@ static void rtl_hw_start_8101(struct net_device *dev)
 	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
 	rtl_set_rx_tx_config_registers(tp);
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
-
 	RTL_R8(IntrMask);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
-
 	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
 
 	RTL_W16(IntrMask, tp->intr_event);
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH 2/2] r8169: Support RTL8105E
From: Hayes Wang @ 2011-02-17 11:37 UTC (permalink / raw)
  To: romieu; +Cc: netdev, linux-kernel, Hayes Wang
In-Reply-To: <1297942668-23274-1-git-send-email-hayeswang@realtek.com>

Support the new chips for RTL8105E

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
---
 drivers/net/r8169.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 9eaf78f..b80c583 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -36,6 +36,7 @@
 
 #define FIRMWARE_8168D_1	"rtl_nic/rtl8168d-1.fw"
 #define FIRMWARE_8168D_2	"rtl_nic/rtl8168d-2.fw"
+#define FIRMWARE_8105E_1	"rtl_nic/rtl8105e-1.fw"
 
 #ifdef RTL8169_DEBUG
 #define assert(expr) \
@@ -123,6 +124,8 @@ enum mac_version {
 	RTL_GIGA_MAC_VER_26 = 0x1a, // 8168D
 	RTL_GIGA_MAC_VER_27 = 0x1b, // 8168DP
 	RTL_GIGA_MAC_VER_28 = 0x1c, // 8168DP
+	RTL_GIGA_MAC_VER_29 = 0x1d, // 8105E
+	RTL_GIGA_MAC_VER_30 = 0x1e, // 8105E
 };
 
 #define _R(NAME,MAC,MASK) \
@@ -160,7 +163,9 @@ static const struct {
 	_R("RTL8168d/8111d",	RTL_GIGA_MAC_VER_25, 0xff7e1880), // PCI-E
 	_R("RTL8168d/8111d",	RTL_GIGA_MAC_VER_26, 0xff7e1880), // PCI-E
 	_R("RTL8168dp/8111dp",	RTL_GIGA_MAC_VER_27, 0xff7e1880), // PCI-E
-	_R("RTL8168dp/8111dp",	RTL_GIGA_MAC_VER_28, 0xff7e1880)  // PCI-E
+	_R("RTL8168dp/8111dp",	RTL_GIGA_MAC_VER_28, 0xff7e1880), // PCI-E
+	_R("RTL8105e",		RTL_GIGA_MAC_VER_29, 0xff7e1880), // PCI-E
+	_R("RTL8105e",		RTL_GIGA_MAC_VER_30, 0xff7e1880)  // PCI-E
 };
 #undef _R
 
@@ -227,6 +232,10 @@ enum rtl_registers {
 	MultiIntr	= 0x5c,
 	PHYAR		= 0x60,
 	PHYstatus	= 0x6c,
+	DLLPR		= 0xd0,
+	DbgReg		= 0xd1,
+	TWSI		= 0xd2,
+	MCU		= 0xd3,
 	RxMaxSize	= 0xda,
 	CPlusCmd	= 0xe0,
 	IntrMitigate	= 0xe2,
@@ -427,6 +436,13 @@ enum rtl_register_content {
 
 	/* DumpCounterCommand */
 	CounterDump	= 0x8,
+
+	/* MCU */
+	EnNDP		= (1 << 3),
+	EnOOBReset		= (1 << 2),
+
+	/* DLLPR */
+	PmSwitch	= (1 << 6),
 };
 
 enum desc_status_bit {
@@ -568,6 +584,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(RTL8169_VERSION);
 MODULE_FIRMWARE(FIRMWARE_8168D_1);
 MODULE_FIRMWARE(FIRMWARE_8168D_2);
+MODULE_FIRMWARE(FIRMWARE_8105E_1);
 
 static int rtl8169_open(struct net_device *dev);
 static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
@@ -1143,7 +1160,9 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 		    (tp->mac_version != RTL_GIGA_MAC_VER_13) &&
 		    (tp->mac_version != RTL_GIGA_MAC_VER_14) &&
 		    (tp->mac_version != RTL_GIGA_MAC_VER_15) &&
-		    (tp->mac_version != RTL_GIGA_MAC_VER_16)) {
+		    (tp->mac_version != RTL_GIGA_MAC_VER_16) &&
+		    (tp->mac_version != RTL_GIGA_MAC_VER_29) &&
+		    (tp->mac_version != RTL_GIGA_MAC_VER_30)) {
 			giga_ctrl |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
 		} else {
 			netif_info(tp, link, dev,
@@ -1559,6 +1578,9 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 		{ 0x7c800000, 0x30000000,	RTL_GIGA_MAC_VER_11 },
 
 		/* 8101 family. */
+		{ 0x7cf00000, 0x40a00000,	RTL_GIGA_MAC_VER_30 },
+		{ 0x7cf00000, 0x40900000,	RTL_GIGA_MAC_VER_29 },
+		{ 0x7c800000, 0x40800000,	RTL_GIGA_MAC_VER_30 },
 		{ 0x7cf00000, 0x34a00000,	RTL_GIGA_MAC_VER_09 },
 		{ 0x7cf00000, 0x24a00000,	RTL_GIGA_MAC_VER_09 },
 		{ 0x7cf00000, 0x34900000,	RTL_GIGA_MAC_VER_08 },
@@ -2435,6 +2457,57 @@ static void rtl8102e_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
 }
 
+static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001},
+		{ 0x15, 0x7701},
+		{ 0x1f, 0x0000}
+	};
+
+	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl_writephy(tp, 0x18, 0x0310);
+	msleep(100);
+
+	if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
+		netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
+
+	if(RTL_R8(0xef) & 0x08) {
+		static const struct phy_reg phy_reg_init1[] = {
+			{ 0x1f, 0x0005},
+			{ 0x1a, 0x0004},
+			{ 0x1f, 0x0000}
+		};
+		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
+	} else {
+		static const struct phy_reg phy_reg_init1[] = {
+			{ 0x1f, 0x0005},
+			{ 0x1a, 0x0000},
+			{ 0x1f, 0x0000}
+		};
+		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
+	}
+
+	if(RTL_R8(0xef) & 0x010) {
+		static const struct phy_reg phy_reg_init1[] = {
+			{ 0x1f, 0x0004},
+			{ 0x1c, 0x0000},
+			{ 0x1f, 0x0000}
+		};
+		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
+	} else {
+		static const struct phy_reg phy_reg_init1[] = {
+			{ 0x1f, 0x0004},
+			{ 0x1c, 0x0200},
+			{ 0x1f, 0x0000}
+		};
+		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
+	}
+
+	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
 static void rtl_hw_phy_config(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -2502,6 +2575,10 @@ static void rtl_hw_phy_config(struct net_device *dev)
 	case RTL_GIGA_MAC_VER_28:
 		rtl8168d_4_hw_phy_config(tp);
 		break;
+	case RTL_GIGA_MAC_VER_29:
+	case RTL_GIGA_MAC_VER_30:
+		rtl8105e_hw_phy_config(tp);
+		break;
 
 	default:
 		break;
@@ -2940,6 +3017,8 @@ static void __devinit rtl_init_pll_power_ops(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_09:
 	case RTL_GIGA_MAC_VER_10:
 	case RTL_GIGA_MAC_VER_16:
+	case RTL_GIGA_MAC_VER_29:
+	case RTL_GIGA_MAC_VER_30:
 		ops->down	= r810x_pll_power_down;
 		ops->up		= r810x_pll_power_up;
 		break;
@@ -3895,6 +3974,31 @@ static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev)
 	rtl_ephy_write(ioaddr, 0x03, 0xc2f9);
 }
 
+static void rtl_hw_start_8105e_1(void __iomem *ioaddr, struct pci_dev *pdev)
+{
+	static const struct ephy_info e_info_8105e_1[] = {
+		{ 0x07,	0, 0x4000 },
+		{ 0x19,	0, 0x0200 },
+		{ 0x19,	0, 0x0020 },
+		{ 0x1e,	0, 0x2000 },
+		{ 0x03,	0, 0x0001 },
+		{ 0x19,	0, 0x0100 },
+		{ 0x19,	0, 0x0004 },
+		{ 0x0a,	0, 0x0020 }
+	};
+
+	/* Force LAN exit from ASPM if Rx/Tx are not idel */
+	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+
+	/* disable Early Tally Counter */
+	RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+
+	RTL_W8(MCU, RTL_R8(MCU) | EnNDP | EnOOBReset);
+	RTL_W8(DLLPR, RTL_R8(DLLPR) | PmSwitch);
+
+	rtl_ephy_init(ioaddr, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
+}
+
 static void rtl_hw_start_8101(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -3925,6 +4029,11 @@ static void rtl_hw_start_8101(struct net_device *dev)
 	case RTL_GIGA_MAC_VER_09:
 		rtl_hw_start_8102e_2(ioaddr, pdev);
 		break;
+
+	case RTL_GIGA_MAC_VER_29:
+	case RTL_GIGA_MAC_VER_30:
+		rtl_hw_start_8105e_1(ioaddr, pdev);
+		break;
 	}
 
 	RTL_W8(Cfg9346, Cfg9346_Lock);
-- 
1.7.3.2


^ permalink raw reply related

* Re: NFS on little-endian platform - Microblaze
From: Michal Simek @ 2011-02-17 12:01 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, David Miller,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA,
	ltp-list-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
In-Reply-To: <1297865074.6596.10.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>

Trond Myklebust wrote:
> On Wed, 2011-02-16 at 14:53 +0100, Michal Simek wrote: 
>> Trond Myklebust wrote:
>>> On Wed, 2011-02-16 at 14:16 +0100, Michal Simek wrote: 
>>>> Hi again,
>>>>
>>>> I forget to cc linux-nfs mailing list.
>>>>
>>>> Michal
>>>>
>>>> P.S.: Tested on kernels 2.6.38-rc4, 2.6.37 and 2.6.36
>>>>
>>>> Michal Simek wrote:
>>>>> Hi All,
>>>>>
>>>>> I am trying to understand one problem which we have found.
>>>>> The problem is that I can't on Microblaze little-endian platform
>>>>> mount nfs without -o nolock options. (Log below)
>>>>> Selecting tcp or udp has no effect.
>>>>> I am using emaclite driver and there is no problem on big endian 
>>>>> microblaze.
>>>>>
>>>>> ping, telnet, http, ftp, iperf, netperf work well.
>>>>>
>>>>> That's why I have a question if there is any endian specific option for 
>>>>> NFS?
>>>>>
>>>>> Thanks,
>>>>> Michal
>>>>>
>>>>> ~ # mount -t nfs 192.168.0.101:/tftpboot/nfs /mnt
>>>>> svc: failed to register lockdv1 RPC service (errno 13).
>>>>> lockd_up: makesock failed, error=-13
>>>>> svc: failed to register lockdv1 RPC service (errno 13).
>>>>> ~ # mount -t nfs -o nolock 192.168.0.101:/tftpboot/nfs /mnt
>>>>> ~ # mount
>>>>> rootfs on / type rootfs (rw)
>>>>> proc on /proc type proc (rw,relatime)
>>>>> none on /var type ramfs (rw,relatime)
>>>>> none on /sys type sysfs (rw,relatime)
>>>>> none on /etc/config type ramfs (rw,relatime)
>>>>> none on /dev/pts type devpts (rw,relatime,mode=600)
>>>>> 192.168.0.101:/tftpboot/nfs on /mnt type nfs 
>>>>> (rw,relatime,vers=3,rsize=32768,wsize=32768,namlen=255,hard,nolock,proto=udp,port=65535,timeo=7,retrans=3,sec=sys,local_lock=all,addr=192.168.0.101) 
>>>>>
>>>>> ~ #
>>>>> ~ # ps
>>>>> PID   USER     TIME   COMMAND
>>>>>     1 root       0:02 init
>>>>>     2 root       0:00 [kthreadd]
>>>>>     3 root       0:00 [ksoftirqd/0]
>>>>>     4 root       0:00 [kworker/0:0]
>>>>>     5 root       0:00 [kworker/u:0]
>>>>>     6 root       0:00 [khelper]
>>>>>     7 root       0:00 [sync_supers]
>>>>>     8 root       0:00 [bdi-default]
>>>>>     9 root       0:00 [kblockd]
>>>>>    10 root       0:00 [rpciod]
>>>>>    11 root       0:00 [kworker/0:1]
>>>>>    12 root       0:00 [kswapd0]
>>>>>    13 root       0:00 [fsnotify_mark]
>>>>>    14 root       0:00 [aio]
>>>>>    15 root       0:00 [nfsiod]
>>>>>    16 root       0:00 [kworker/u:1]
>>>>>    58 root       0:00 udhcpc -R -n -p /var/run/udhcpc.eth0.pid -i eth0
>>>>>    62 1          0:00 /bin/portmap
>>>>>    64 root       0:00 /bin/inetd /etc/inetd.conf
>>>>>    65 root       0:01 -sh
>>>>>    66 root       0:00 /bin/syslogd -n
>>>>>    67 root       0:00 /bin/flatfsd
>>>>>    68 root       0:00 [kworker/0:2]
>>>>>    91 root       0:00 ps
>>> Where is rpc.statd? Without it, the above behaviour is 100% expected.
>> I see on BE that lockd is used but it is enabled on little endian too but hasn't started.
>>
>> Enabled options:
>> CONFIG_NETWORK_FILESYSTEMS=y
>> CONFIG_NFS_FS=y
>> CONFIG_NFS_V3=y
>> CONFIG_LOCKD=y
>> CONFIG_LOCKD_V4=y
>> CONFIG_NFS_COMMON=y
>> CONFIG_SUNRPC=y
>>
>> On Be lockd is up.
>>     69 root       0:00 /bin/flatfsd
>>     71 root       0:00 [lockd]
>>     73 root       0:00 ps
>>
>> I have to look why.
>> How is it started?
> 
> Either rpc.bind or rpc.portmap and then rpc.statd need to be started
> manually (in that order) before you may mount the NFS partition without
> '-onolock'. The lockd daemon itself will be started by the kernel
> whenever there is a need for it.
> 
> Please check your 'init' boot scripts to find out why they are not being
> started as expected.
> 

It seems to me that the problem is with sunrpc in connection to endian.
I am looking for any package which can test sunrpc on embedded systems.
Can you recommend me any package?

I have found RPC tests in LTP but it is designed for desktops not embedded.

Thanks,
Michal

-- 
Michal Simek, Ing. (M.Eng)
w: www.monstr.eu p: +42-0-721842854
Maintainer of Linux kernel 2.6 Microblaze Linux - http://www.monstr.eu/fdt/
Microblaze U-BOOT custodian
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [RFC patch net-next-2.6] net: convert bonding to use rx_handler
From: Jiri Pirko @ 2011-02-17 12:52 UTC (permalink / raw)
  To: netdev
  Cc: davem, shemminger, kaber, fubar, eric.dumazet, nicolas.2p.debian,
	andy

Hello.

This is an attempt to convert bonding to use rx_handler. Result should be
cleaner __netif_receive_skb() with much less exceptions needed. I think I
covered all aspects, not sure though. I gave this quick smoke test on my
testing env. Please comment, test.

Thanks!

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/bonding/bond_main.c |   75 ++++++++++++++++++++-
 include/linux/skbuff.h          |    1 +
 net/core/dev.c                  |  144 +++++++++++---------------------------
 3 files changed, 117 insertions(+), 103 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 77e3c6a..7bfb74b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1423,6 +1423,68 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
 	bond->setup_by_slave = 1;
 }
 
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+static bool bond_should_deliver_exact_match(struct sk_buff *skb,
+					    struct net_device *slave_dev,
+					    struct net_device *bond_dev)
+{
+	if (slave_dev->priv_flags & IFF_SLAVE_INACTIVE) {
+		if (slave_dev->priv_flags & IFF_SLAVE_NEEDARP &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			return false;
+
+		if (bond_dev->priv_flags & IFF_MASTER_ALB &&
+		    skb->pkt_type != PACKET_BROADCAST &&
+		    skb->pkt_type != PACKET_MULTICAST)
+				return false;
+
+		if (bond_dev->priv_flags & IFF_MASTER_8023AD &&
+		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+			return false;
+
+		return true;
+	}
+	return false;
+}
+
+static struct sk_buff *bond_handle_frame(struct sk_buff *skb)
+{
+	struct net_device *slave_dev;
+	struct net_device *bond_dev;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+	slave_dev = skb->dev;
+	bond_dev = slave_dev->master;
+	if (unlikely(!bond_dev))
+		return skb;
+
+	if (bond_dev->priv_flags & IFF_MASTER_ARPMON)
+		slave_dev->last_rx = jiffies;
+
+	if (bond_should_deliver_exact_match(skb, slave_dev, bond_dev)) {
+		skb->deliver_no_wcard = 1;
+		return skb;
+	}
+
+	skb->dev = bond_dev;
+
+	if (bond_dev->priv_flags & IFF_MASTER_ALB &&
+	    bond_dev->priv_flags & IFF_BRIDGE_PORT &&
+	    skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, bond_dev->dev_addr, ETH_ALEN);
+	}
+
+	netif_rx(skb);
+	return NULL;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1599,11 +1661,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		pr_debug("Error %d calling netdev_set_bond_master\n", res);
 		goto err_restore_mac;
 	}
+	res = netdev_rx_handler_register(slave_dev, bond_handle_frame, NULL);
+	if (res) {
+		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
+		goto err_unset_master;
+	}
+
 	/* open the slave since the application closed it */
 	res = dev_open(slave_dev);
 	if (res) {
 		pr_debug("Opening slave %s failed\n", slave_dev->name);
-		goto err_unset_master;
+		goto err_unreg_rxhandler;
 	}
 
 	new_slave->dev = slave_dev;
@@ -1811,6 +1879,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 err_close:
 	dev_close(slave_dev);
 
+err_unreg_rxhandler:
+	netdev_rx_handler_unregister(slave_dev);
+
 err_unset_master:
 	netdev_set_bond_master(slave_dev, NULL);
 
@@ -1992,6 +2063,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 		netif_addr_unlock_bh(bond_dev);
 	}
 
+	netdev_rx_handler_unregister(slave_dev);
 	netdev_set_bond_master(slave_dev, NULL);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2114,6 +2186,7 @@ static int bond_release_all(struct net_device *bond_dev)
 			netif_addr_unlock_bh(bond_dev);
 		}
 
+		netdev_rx_handler_unregister(slave_dev);
 		netdev_set_bond_master(slave_dev, NULL);
 
 		/* close slave before restoring its mac address */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 31f02d0..15b54ea 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -325,6 +325,7 @@ struct sk_buff {
 
 	struct sock		*sk;
 	struct net_device	*dev;
+	struct net_device	*orig_dev;
 
 	/*
 	 * This is the control buffer. It is free to use for every
diff --git a/net/core/dev.c b/net/core/dev.c
index a413276..ae4381a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1530,12 +1530,17 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
+static inline int __deliver_skb(struct sk_buff *skb,
+				struct packet_type *pt_prev)
+{
+	return pt_prev->func(skb, skb->dev, pt_prev, skb->orig_dev);
+}
+
 static inline int deliver_skb(struct sk_buff *skb,
-			      struct packet_type *pt_prev,
-			      struct net_device *orig_dev)
+			      struct packet_type *pt_prev)
 {
 	atomic_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	return __deliver_skb(skb, pt_prev);
 }
 
 /*
@@ -1558,7 +1563,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		    (ptype->af_packet_priv == NULL ||
 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
 			if (pt_prev) {
-				deliver_skb(skb2, pt_prev, skb->dev);
+				deliver_skb(skb2, pt_prev);
 				pt_prev = ptype;
 				continue;
 			}
@@ -1591,7 +1596,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 	if (pt_prev)
-		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+		__deliver_skb(skb2, pt_prev);
 	rcu_read_unlock();
 }
 
@@ -3020,8 +3025,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 }
 
 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
-					 struct packet_type **pt_prev,
-					 int *ret, struct net_device *orig_dev)
+					 struct packet_type **pt_prev, int *ret)
 {
 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
 
@@ -3029,7 +3033,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 		goto out;
 
 	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*ret = deliver_skb(skb, *pt_prev);
 		*pt_prev = NULL;
 	}
 
@@ -3091,63 +3095,30 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
-					      struct net_device *master)
-{
-	if (skb->pkt_type == PACKET_HOST) {
-		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
-
-		memcpy(dest, master->dev_addr, ETH_ALEN);
-	}
-}
-
-/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- * ARP on active-backup slaves with arp_validate enabled.
- */
-static int __skb_bond_should_drop(struct sk_buff *skb,
-				  struct net_device *master)
+static void vlan_on_bond_hook(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dev;
-
-	if (master->priv_flags & IFF_MASTER_ARPMON)
-		dev->last_rx = jiffies;
-
-	if ((master->priv_flags & IFF_MASTER_ALB) &&
-	    (master->priv_flags & IFF_BRIDGE_PORT)) {
-		/* Do address unmangle. The local destination address
-		 * will be always the one master has. Provides the right
-		 * functionality in a bridge.
-		 */
-		skb_bond_set_mac_by_master(skb, master);
-	}
-
-	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-			return 0;
-
-		if (master->priv_flags & IFF_MASTER_ALB) {
-			if (skb->pkt_type != PACKET_BROADCAST &&
-			    skb->pkt_type != PACKET_MULTICAST)
-				return 0;
-		}
-		if (master->priv_flags & IFF_MASTER_8023AD &&
-		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
-			return 0;
+	/*
+	 * Make sure ARP frames received on VLAN interfaces stacked on
+	 * bonding interfaces still make their way to any base bonding
+	 * device that may have registered for a specific ptype.
+	 */
+	if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
+	    vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
+	    skb->protocol == htons(ETH_P_ARP)) {
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
-		return 1;
+		if (!skb2)
+			return;
+		skb2->dev = vlan_dev_real_dev(skb->dev);
+		netif_rx(skb2);
 	}
-	return 0;
 }
 
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
-	struct net_device *orig_dev;
-	struct net_device *null_or_orig;
-	struct net_device *orig_or_bond;
+	struct net_device *null_or_dev;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -3163,29 +3134,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
-	/*
-	 * bonding note: skbs received on inactive slaves should only
-	 * be delivered to pkt handlers that are exact matches.  Also
-	 * the deliver_no_wcard flag will be set.  If packet handlers
-	 * are sensitive to duplicate packets these skbs will need to
-	 * be dropped at the handler.
-	 */
-	null_or_orig = NULL;
-	orig_dev = skb->dev;
-	if (skb->deliver_no_wcard)
-		null_or_orig = orig_dev;
-	else if (netif_is_bond_slave(orig_dev)) {
-		struct net_device *bond_master = ACCESS_ONCE(orig_dev->master);
-
-		if (likely(bond_master)) {
-			if (__skb_bond_should_drop(skb, bond_master)) {
-				skb->deliver_no_wcard = 1;
-				/* deliver only exact match */
-				null_or_orig = orig_dev;
-			} else
-				skb->dev = bond_master;
-		}
-	}
+	if (!skb->orig_dev)
+		skb->orig_dev = skb->dev;
 
 	__this_cpu_inc(softnet_data.processed);
 	skb_reset_network_header(skb);
@@ -3204,26 +3154,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-		    ptype->dev == orig_dev) {
+		if (!ptype->dev || ptype->dev == skb->dev) {
 			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
+				ret = deliver_skb(skb, pt_prev);
 			pt_prev = ptype;
 		}
 	}
 
 #ifdef CONFIG_NET_CLS_ACT
-	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+	skb = handle_ing(skb, &pt_prev, &ret);
 	if (!skb)
 		goto out;
 ncls:
 #endif
 
-	/* Handle special case of bridge or macvlan */
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(skb, pt_prev);
 			pt_prev = NULL;
 		}
 		skb = rx_handler(skb);
@@ -3233,7 +3181,7 @@ ncls:
 
 	if (vlan_tx_tag_present(skb)) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(skb, pt_prev);
 			pt_prev = NULL;
 		}
 		if (vlan_hwaccel_do_receive(&skb)) {
@@ -3243,32 +3191,24 @@ ncls:
 			goto out;
 	}
 
-	/*
-	 * Make sure frames received on VLAN interfaces stacked on
-	 * bonding interfaces still make their way to any base bonding
-	 * device that may have registered for a specific ptype.  The
-	 * handler may have to adjust skb->dev and orig_dev.
-	 */
-	orig_or_bond = orig_dev;
-	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
-	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
-		orig_or_bond = vlan_dev_real_dev(skb->dev);
-	}
+	vlan_on_bond_hook(skb);
+
+	/* deliver only exact match when indicated */
+	null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL;
 
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type && (ptype->dev == null_or_orig ||
-		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
-		     ptype->dev == orig_or_bond)) {
+		if (ptype->type == type &&
+		    (ptype->dev == null_or_dev || ptype->dev == skb->dev)) {
 			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
+				ret = deliver_skb(skb, pt_prev);
 			pt_prev = ptype;
 		}
 	}
 
 	if (pt_prev) {
-		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		ret = __deliver_skb(skb, pt_prev);
 	} else {
 		atomic_long_inc(&skb->dev->rx_dropped);
 		kfree_skb(skb);
-- 
1.7.3.4


^ permalink raw reply related

* Re: [PATCH v2] bnx2x: Support for managing RX indirection table
From: Eilon Greenstein @ 2011-02-17 12:58 UTC (permalink / raw)
  To: Eric Dumazet, Tom Herbert; +Cc: davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <1297933764.2670.20.camel@edumazet-laptop>

On Thu, 2011-02-17 at 01:09 -0800, Eric Dumazet wrote:
> Le mercredi 16 février 2011 à 12:27 -0800, Tom Herbert a écrit :
> > Support fetching and retrieving RX indirection table via ethtool.
> > 
> > Signed-off-by: Tom Herbert <therbert@google.com>
> > ---
> >  drivers/net/bnx2x/bnx2x.h         |    2 +
> >  drivers/net/bnx2x/bnx2x_ethtool.c |   56 +++++++++++++++++++++++++++++++++++++
> >  drivers/net/bnx2x/bnx2x_main.c    |   22 +++++++++++---
> >  3 files changed, 75 insertions(+), 5 deletions(-)
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Acked-by: Eilon Greenstein <eilong@broadcom.com>

Thanks everyone! 



^ permalink raw reply

* Re: [PATCH] drivers/net: Call netif_carrier_off at the end of the probe
From: Francois Romieu @ 2011-02-17 13:15 UTC (permalink / raw)
  To: Ivan Vecera; +Cc: davem, netdev, aabdulla, Ben Hutchings
In-Reply-To: <1297938739.9613.4.camel@ceranb.intra.cera.cz>

Ivan Vecera <ivecera@redhat.com> :
[...]
> It should not break any application as the patch only avoids the
> operstate to be 'unknown' instead of 'up'.

Apologies, I missed this point. It does not turn 'down' at boot into
'unknown'.

Acked-by: Francois Romieu <romieu@fr.zoreil.com>

-- 
Ueimor

^ permalink raw reply

* Re: [RFC patch net-next-2.6] net: convert bonding to use rx_handler
From: Eric Dumazet @ 2011-02-17 13:20 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, shemminger, kaber, fubar, nicolas.2p.debian, andy
In-Reply-To: <20110217125221.GA10436@psychotron.redhat.com>

Le jeudi 17 février 2011 à 13:52 +0100, Jiri Pirko a écrit :
> Hello.
> 
> This is an attempt to convert bonding to use rx_handler. Result should be
> cleaner __netif_receive_skb() with much less exceptions needed. I think I
> covered all aspects, not sure though. I gave this quick smoke test on my
> testing env. Please comment, test.
> 
> Thanks!
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> ---
>  drivers/net/bonding/bond_main.c |   75 ++++++++++++++++++++-
>  include/linux/skbuff.h          |    1 +
>  net/core/dev.c                  |  144 +++++++++++---------------------------
>  3 files changed, 117 insertions(+), 103 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c


> +
> +static struct sk_buff *bond_handle_frame(struct sk_buff *skb)
> +{
> +	struct net_device *slave_dev;
> +	struct net_device *bond_dev;
> +
> +	skb = skb_share_check(skb, GFP_ATOMIC);
> +	if (unlikely(!skb))
> +		return NULL;
> +	slave_dev = skb->dev;
> +	bond_dev = slave_dev->master;

I suggest being 10%% safe here :

	bond_dev = ACCESS_ONCE(slave_dev->master);

> +	if (unlikely(!bond_dev))
> +		return skb;
> +
> +	if (bond_dev->priv_flags & IFF_MASTER_ARPMON)
> +		slave_dev->last_rx = jiffies;
> +
> +	if (bond_should_deliver_exact_match(skb, slave_dev, bond_dev)) {
> +		skb->deliver_no_wcard = 1;
> +		return skb;
> +	}
> +
> +	skb->dev = bond_dev;
> +
> +	if (bond_dev->priv_flags & IFF_MASTER_ALB &&
> +	    bond_dev->priv_flags & IFF_BRIDGE_PORT &&
> +	    skb->pkt_type == PACKET_HOST) {
> +		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
> +
> +		memcpy(dest, bond_dev->dev_addr, ETH_ALEN);
> +	}
> +
> +	netif_rx(skb);
> +	return NULL;
> +}
> +


> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 31f02d0..15b54ea 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -325,6 +325,7 @@ struct sk_buff {
>  
>  	struct sock		*sk;
>  	struct net_device	*dev;
> +	struct net_device	*orig_dev;
>  
>  	/*
>  	 * This is the control buffer. It is free to use for every

Thats a problem. lifetime of this field is so small, I wonder if you
cant find a solution to handle this differently. Maybe a percpu
variable, or in cb[] ?




^ permalink raw reply

* [PATCH] cxgb{3,4}: streamline Kconfig options
From: Jan Beulich @ 2011-02-17 13:29 UTC (permalink / raw)
  To: divy, dm; +Cc: linux-kbuild, netdev

The CHELSIO_T{3,4}_DEPENDS options are really awkward, and can be
easily dropped if the reverse dependencies of SCSI_CXGB{3,4}_ISCSI on
the former get converted to normal (forward) ones referring to
CHELSIO_T{3,4}.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

---
 drivers/net/Kconfig               |   14 ++------------
 drivers/scsi/cxgbi/cxgb3i/Kconfig |    3 +--
 drivers/scsi/cxgbi/cxgb4i/Kconfig |    3 +--
 3 files changed, 4 insertions(+), 16 deletions(-)

--- 2.6.38-rc5/drivers/net/Kconfig
+++ 2.6.38-rc5-kconfig-chelsio/drivers/net/Kconfig
@@ -2594,14 +2594,9 @@ config CHELSIO_T1_1G
 	  Enables support for Chelsio's gigabit Ethernet PCI cards.  If you
 	  are using only 10G cards say 'N' here.
 
-config CHELSIO_T3_DEPENDS
-	tristate
-	depends on PCI && INET
-	default y
-
 config CHELSIO_T3
 	tristate "Chelsio Communications T3 10Gb Ethernet support"
-	depends on CHELSIO_T3_DEPENDS
+	depends on PCI && INET
 	select FW_LOADER
 	select MDIO
 	help
@@ -2619,14 +2614,9 @@ config CHELSIO_T3
 	  To compile this driver as a module, choose M here: the module
 	  will be called cxgb3.
 
-config CHELSIO_T4_DEPENDS
-	tristate
-	depends on PCI && INET
-	default y
-
 config CHELSIO_T4
 	tristate "Chelsio Communications T4 Ethernet support"
-	depends on CHELSIO_T4_DEPENDS
+	depends on PCI && INET
 	select FW_LOADER
 	select MDIO
 	help
--- 2.6.38-rc5/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ 2.6.38-rc5-kconfig-chelsio/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -1,7 +1,6 @@
 config SCSI_CXGB3_ISCSI
 	tristate "Chelsio T3 iSCSI support"
-	depends on CHELSIO_T3_DEPENDS
-	select CHELSIO_T3
+	depends on CHELSIO_T3
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  This driver supports iSCSI offload for the Chelsio T3 devices.
--- 2.6.38-rc5/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ 2.6.38-rc5-kconfig-chelsio/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,7 +1,6 @@
 config SCSI_CXGB4_ISCSI
 	tristate "Chelsio T4 iSCSI support"
-	depends on CHELSIO_T4_DEPENDS
-	select CHELSIO_T4
+	depends on CHELSIO_T4
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  This driver supports iSCSI offload for the Chelsio T4 devices.




^ permalink raw reply

* Re: [PATCH 2/2] r8169: Support RTL8105E
From: Francois Romieu @ 2011-02-17 13:39 UTC (permalink / raw)
  To: Hayes Wang; +Cc: netdev, linux-kernel
In-Reply-To: <1297942668-23274-2-git-send-email-hayeswang@realtek.com>

Hayes Wang <hayeswang@realtek.com> :
[...]
> @@ -227,6 +232,10 @@ enum rtl_registers {
>  	MultiIntr	= 0x5c,
>  	PHYAR		= 0x60,
>  	PHYstatus	= 0x6c,
> +	DLLPR		= 0xd0,
> +	DbgReg		= 0xd1,
> +	TWSI		= 0xd2,
> +	MCU		= 0xd3,

You can probably fill some of those in 'enum rtl8168_8101_registers'
(DbgReg is already there).

>  	RxMaxSize	= 0xda,
>  	CPlusCmd	= 0xe0,
>  	IntrMitigate	= 0xe2,
> @@ -427,6 +436,13 @@ enum rtl_register_content {
>  
>  	/* DumpCounterCommand */
>  	CounterDump	= 0x8,
> +
> +	/* MCU */
> +	EnNDP		= (1 << 3),
> +	EnOOBReset		= (1 << 2),
                  ^^^^ -> extraneous tab
> +
> +	/* DLLPR */
> +	PmSwitch	= (1 << 6),

They are a bit old-fashioned.

See rtl8168_8101_registers / DBG_REG / FIX_NAK_{1, 2}.

[...]
> @@ -2435,6 +2457,57 @@ static void rtl8102e_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
>  }
>  
> +static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
> +{
> +	void __iomem *ioaddr = tp->mmio_addr;
> +	static const struct phy_reg phy_reg_init[] = {
> +		{ 0x1f, 0x0001},
> +		{ 0x15, 0x7701},
> +		{ 0x1f, 0x0000}
> +	};
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	rtl_writephy(tp, 0x18, 0x0310);
> +	msleep(100);
> +
> +	if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
> +		netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
> +
> +	if(RTL_R8(0xef) & 0x08) {
         ^^ -> missing space.

Add a symbol for the 0xef register ?

> +		static const struct phy_reg phy_reg_init1[] = {
> +			{ 0x1f, 0x0005},
> +			{ 0x1a, 0x0004},
> +			{ 0x1f, 0x0000}
> +		};
> +		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
> +	} else {
> +		static const struct phy_reg phy_reg_init1[] = {
> +			{ 0x1f, 0x0005},
> +			{ 0x1a, 0x0000},
> +			{ 0x1f, 0x0000}
> +		};
> +		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
> +	}
> +
> +	if(RTL_R8(0xef) & 0x010) {
         ^^ -> missing space.

> +		static const struct phy_reg phy_reg_init1[] = {
> +			{ 0x1f, 0x0004},
> +			{ 0x1c, 0x0000},
> +			{ 0x1f, 0x0000}
> +		};
> +		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
> +	} else {
> +		static const struct phy_reg phy_reg_init1[] = {
> +			{ 0x1f, 0x0004},
> +			{ 0x1c, 0x0200},
> +			{ 0x1f, 0x0000}
> +		};
> +		rtl_writephy_batch(tp, phy_reg_init1, ARRAY_SIZE(phy_reg_init1));
> +	}

Nit: I would rather use an array of array and remove some code duplication.

Otherwise ok.

-- 
Ueimor

^ permalink raw reply

* Re: [RFC patch net-next-2.6] net: convert bonding to use rx_handler
From: Jiri Pirko @ 2011-02-17 13:49 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, davem, shemminger, kaber, fubar, nicolas.2p.debian, andy
In-Reply-To: <1297948835.2604.50.camel@edumazet-laptop>

Thu, Feb 17, 2011 at 02:20:35PM CET, eric.dumazet@gmail.com wrote:
>Le jeudi 17 février 2011 à 13:52 +0100, Jiri Pirko a écrit :
>> Hello.
>> 
>> This is an attempt to convert bonding to use rx_handler. Result should be
>> cleaner __netif_receive_skb() with much less exceptions needed. I think I
>> covered all aspects, not sure though. I gave this quick smoke test on my
>> testing env. Please comment, test.
>> 
>> Thanks!
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>> ---
>>  drivers/net/bonding/bond_main.c |   75 ++++++++++++++++++++-
>>  include/linux/skbuff.h          |    1 +
>>  net/core/dev.c                  |  144 +++++++++++---------------------------
>>  3 files changed, 117 insertions(+), 103 deletions(-)
>> 
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>
>
>> +
>> +static struct sk_buff *bond_handle_frame(struct sk_buff *skb)
>> +{
>> +	struct net_device *slave_dev;
>> +	struct net_device *bond_dev;
>> +
>> +	skb = skb_share_check(skb, GFP_ATOMIC);
>> +	if (unlikely(!skb))
>> +		return NULL;
>> +	slave_dev = skb->dev;
>> +	bond_dev = slave_dev->master;
>
>I suggest being 10%% safe here :
>
>	bond_dev = ACCESS_ONCE(slave_dev->master);

Right, will change this.

>
>> +	if (unlikely(!bond_dev))
>> +		return skb;
>> +
>> +	if (bond_dev->priv_flags & IFF_MASTER_ARPMON)
>> +		slave_dev->last_rx = jiffies;
>> +
>> +	if (bond_should_deliver_exact_match(skb, slave_dev, bond_dev)) {
>> +		skb->deliver_no_wcard = 1;
>> +		return skb;
>> +	}
>> +
>> +	skb->dev = bond_dev;
>> +
>> +	if (bond_dev->priv_flags & IFF_MASTER_ALB &&
>> +	    bond_dev->priv_flags & IFF_BRIDGE_PORT &&
>> +	    skb->pkt_type == PACKET_HOST) {
>> +		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
>> +
>> +		memcpy(dest, bond_dev->dev_addr, ETH_ALEN);
>> +	}
>> +
>> +	netif_rx(skb);
>> +	return NULL;
>> +}
>> +
>
>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 31f02d0..15b54ea 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -325,6 +325,7 @@ struct sk_buff {
>>  
>>  	struct sock		*sk;
>>  	struct net_device	*dev;
>> +	struct net_device	*orig_dev;
>>  
>>  	/*
>>  	 * This is the control buffer. It is free to use for every
>
>Thats a problem. lifetime of this field is so small, I wonder if you
>cant find a solution to handle this differently. Maybe a percpu
>variable, or in cb[] ?

Yes, I was not feeling absolutely comfortable puting this here.
You mean global percpu variable living in net/core/dev.c? I must say I
would probably like skb->orig_dev more than that.

As for cb - I do not like that much. Also I think there might be
collision e.g. with bridge code.

>
>
>

^ permalink raw reply

* Question about tg3 and bnx2 driver suppliers
From: Michael Durket @ 2011-02-17 13:24 UTC (permalink / raw)
  To: netdev

I'm trying to fix a problem with packet drops and frame errors (really rxbds_empty and rx_discards (or rx_fw_discards depending on whether or not its a tg3 supported chip or a bnx2 chip)). We have this happening on pretty much all the RedHat 5.x systems we're running these drivers on. In investigating what remedies to employ to fix this, I'm trying to determine if new versions of this driver are supplied by Broadcom, or by people within the Linux community so 
I can find the right place to ask questions about it. I'm pretty sure this isn't the right list for these types of questions, but if someone could tell me whether I should be contacting Broadcom directly, or the name of some other Linux mailing list or forum that might help, I'd appreciate it.

^ permalink raw reply

* Re: Question about tg3 and bnx2 driver suppliers
From: Ben Hutchings @ 2011-02-17 14:16 UTC (permalink / raw)
  To: Michael Durket; +Cc: netdev
In-Reply-To: <3BA51C2C-B28F-4FED-BF5D-B9BF46EAF3F7@highwire.stanford.edu>

On Thu, 2011-02-17 at 05:24 -0800, Michael Durket wrote:
> I'm trying to fix a problem with packet drops and frame errors (really
> rxbds_empty and rx_discards (or rx_fw_discards depending on whether or
> not its a tg3 supported chip or a bnx2 chip)). We have this happening
> on pretty much all the RedHat 5.x systems we're running these drivers
> on. In investigating what remedies to employ to fix this, I'm trying
> to determine if new versions of this driver are supplied by Broadcom,
> or by people within the Linux community so 
> I can find the right place to ask questions about it. I'm pretty sure
> this isn't the right list for these types of questions, but if someone
> could tell me whether I should be contacting Broadcom directly, or the
> name of some other Linux mailing list or forum that might help, I'd
> appreciate it.

If you received the drivers as part of RHEL, you should ask Red Hat.
However, in general, RH takes drivers from Linus's tree - either as part
of a complete release, or as a backport.  Most development on network
drivers for current production seems to be done by developers employed
or contracted by the corresponding manufacturers, but it is subject to
review by the Linux community as represented here on netdev.

The MAINTAINERS file in the Linux source tree lists the primary
maintainer(s) for each driver:

BROADCOM BNX2 GIGABIT ETHERNET DRIVER
M:	Michael Chan <mchan@broadcom.com>
L:	netdev@vger.kernel.org
S:	Supported
F:	drivers/net/bnx2.*
F:	drivers/net/bnx2_*

BROADCOM TG3 GIGABIT ETHERNET DRIVER
M:	Matt Carlson <mcarlson@broadcom.com>
M:	Michael Chan <mchan@broadcom.com>
L:	netdev@vger.kernel.org
S:	Supported
F:	drivers/net/tg3.*

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: Question about tg3 and bnx2 driver suppliers
From: Eric Dumazet @ 2011-02-17 14:17 UTC (permalink / raw)
  To: Michael Durket; +Cc: netdev
In-Reply-To: <3BA51C2C-B28F-4FED-BF5D-B9BF46EAF3F7@highwire.stanford.edu>

Le jeudi 17 février 2011 à 05:24 -0800, Michael Durket a écrit :
> I'm trying to fix a problem with packet drops and frame errors (really
> rxbds_empty and rx_discards (or rx_fw_discards depending on whether or
> not its a tg3 supported chip or a bnx2 chip)). We have this happening
> on pretty much all the RedHat 5.x systems we're running these drivers
> on. In investigating what remedies to employ to fix this, I'm trying
> to determine if new versions of this driver are supplied by Broadcom,
> or by people within the Linux community so 
> I can find the right place to ask questions about it. I'm pretty sure
> this isn't the right list for these types of questions, but if someone
> could tell me whether I should be contacting Broadcom directly, or the
> name of some other Linux mailing list or forum that might help, I'd
> appreciate it.
> 

It _is_ the right place, dont worry ;)

One possible cause of packet drops is when softirqs are disabled for too
long periods, even if NIC has a big RX ring (check ethtool -g eth0)

We fixed one big offender some weeks ago : iptables -L

Fix is in commit 83723d60717f8da0f53f

http://git2.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=83723d60717f8da0f53f91cf42a845ed56c09662




^ permalink raw reply

* biosdevname v0.3.7
From: Matt Domsch @ 2011-02-17 16:15 UTC (permalink / raw)
  To: linux-hotplug, netdev, K, Narendra, Hargrave, Jordan,
	Rose, Charles, Co

biosdevname, now version 0.3.7.

Major visible changes include no longer using '#' in device names (by
popular demand), no longer suggesting new names if running inside a VM
guest (tested with KVM, SLES 10 Xen, XenServer, VMware ESX, but it
uses the generic cpuid test, so should work on most virt platforms),
and a new kernel command line option 'biosdevname={0|1}' which udev
honors, to force enabling or disabling of invoking biosdevname.

Still to come: NPAR partition->port mapping support (initially
Dell-specific, but if NIC vendors have other ways to expose this,
please let me know).

Grab it here:
http://linux.dell.com/files/biosdevname/permalink/biosdevname-0.3.7.tar.gz
http://linux.dell.com/files/biosdevname/permalink/biosdevname-0.3.7.tar.gz.sign
git://linux.dell.com/biosdevname.git

I built this today for Fedora rawhide and F15, and I encourage
other distributions to pick it up as well.

shortlog:

Andrew Cooper (3):
      Fix segfault when BIOS advertises zero sized PIRQ Routing Table
      Add 'bonding' and 'openvswitch' to the virtual devices list
      Typo fixes

Harald Hoyer (1):
      Add kernel command line parameter "biosdevname={0|1}" to turn off/on

Matt Domsch (7):
      don't build or package dump_pirq, use biosdecode from dmidecode instead
      don't use '#' in names, use 'p' instead, by popular demand
      properly look for SMBIOS, then $PIR, then recurse
      update changelog
      Fix test for PIRQ table version
      fail PIRQ lookups if device domain is not 0
      exit(4) if running a virtual machine

Thanks,
Matt

-- 
Matt Domsch
Technology Strategist
Dell | Office of the CTO

^ permalink raw reply

* Re: [PATCH 1/2] igb: Allow extra 4 bytes on RX for vlan tags.
From: Ben Greear @ 2011-02-17 17:28 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: netdev
In-Reply-To: <AANLkTinsWRAJ2bepa1NjQ4f0Wz46DaxWO6f_nEbmBAB0@mail.gmail.com>

On 02/17/2011 03:04 AM, Jeff Kirsher wrote:
> On Thu, Feb 10, 2011 at 13:59,<greearb@candelatech.com>  wrote:
>> From: Ben Greear<greearb@candelatech.com>
>>
>> This allows the NIC to receive 1518 byte (not counting
>> FCS) packets when MTU is 1500, thus allowing 1500 MTU
>> VLAN frames to be received.  Please note that no VLANs
>> were actually configured on the NIC...it was just acting
>> as pass-through device.
>>
>> Signed-off-by: Ben Greear<greearb@candelatech.com>
>> ---
>> :100644 100644 58c665b... 30c9cc6... M  drivers/net/igb/igb_main.c
>>   drivers/net/igb/igb_main.c |    5 +++--
>>   1 files changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
>> index 58c665b..30c9cc6 100644
>> --- a/drivers/net/igb/igb_main.c
>> +++ b/drivers/net/igb/igb_main.c
>> @@ -2281,7 +2281,8 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
>>         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
>>         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
>>
>> -       adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
>> +       adapter->max_frame_size = (netdev->mtu + ETH_HLEN + ETH_FCS_LEN
>> +                                  + VLAN_HLEN);
>>         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
>>
>>         spin_lock_init(&adapter->stats64_lock);
>> @@ -4303,7 +4304,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
>>   {
>>         struct igb_adapter *adapter = netdev_priv(netdev);
>>         struct pci_dev *pdev = adapter->pdev;
>> -       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
>> +       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
>>         u32 rx_buffer_len, i;
>>
>>         if ((new_mtu<  68) || (max_frame>  MAX_JUMBO_FRAME_SIZE)) {
>
> While testing this patch, validation found that the patch reduces the
> maximum mtu size
> by 4 bytes (reduces it from 9216 to 9212).  This is not a desired side
> effect of this patch.

You could add handling for that case and have it act as it used to when
new_mtu is greater than 9212?

I tested e1000e and it worked w/out hacking at 1500 MTU, so maybe
check how it does it?

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply

* Re: Read sent/received bytes -- without opening a file in /sys or /proc
From: Ben Greear @ 2011-02-17 17:32 UTC (permalink / raw)
  To: Jan-Philip gehrcke; +Cc: netdev
In-Reply-To: <4D5CFF19.6050400@googlemail.com>

On 02/17/2011 02:57 AM, Jan-Philip gehrcke wrote:
> Dear list,
>
> I'm about to write a small C program for measurements of the bandwidth
> on some network interface of my home router (uname -a: Linux
> fritz.fonwlan.box 2.6.19.2 #2 Thu Nov 18 16:35:17 CET 2010 mips
> GNU/Linux). I would like to accomplish very *precise* results, while
> keeping the absolute measurement time low.
>
> In general, this problem requires the knowledge of time and byte
> differences. Therefore, two time measurements and two byte counter
> measurements for each, received and sent bytes, are required. Currently,
> I am measuring time via
>
>      clock_gettime(CLOCK_MONOTONIC, ...)
>
> and reading the byte counters by reading files in sysfs, e.g.
>
>       /sys/class/net/${interface}/statistics/rx_bytes.
>
> This is very easy and works well, but reading these files four times
> takes some (varying) time* on my device, which introduces an error to my
> calculation.
>
> Hence, I am wondering, if there is a way to receive this kind of data
> directly and much faster, via "kernel API". My question is related to
> this unanswered one from 2005:
> http://linux.derkeiler.com/Mailing-Lists/RedHat/2005-02/0557.html
>
> *On my device, reading such a file and interpreting the result as
> integer takes 1-10 ms, measured via:
>
>      clock_gettime(CLOCK_MONOTONIC,&t_start);
>      rx_bytes = read_int_from_file("/sys/class/net/wan/statistics/rx_bytes");
>      clock_gettime(CLOCK_MONOTONIC,&t_end);
>
> read_int_from_file() basically consists of fopen(), fgets(), and sscanf().
>
>
> Please note that I don't have kernel/driver programming experience so far.
>
> Thank you for help and any suggestions!
>
>      Jan-Philip Gehrcke

The netlink API can get stats for you, but I don't know if it would
be any more accurate time-wise.  It's not exactly trivial to
implement this API, but you can look at the 'ip' program to get
an idea:

git clone git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git


There is also /proc/net/dev, but that may be worse than using sysfs
as far as performance and time accuracy goes.

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply

* Re: Multicast snooping fixes and suggestions
From: Linus Lüssing @ 2011-02-17 18:17 UTC (permalink / raw)
  To: Stephen Hemminger, David S. Miller, bridge
  Cc: netdev, linux-kernel, Herbert Xu, YOSHIFUJI Hideaki
In-Reply-To: <20110215154128.2a28632c@nehalam>

> These look correct. Did you test them with real traffic?

Yes, I did. With these patches the hashlist and linked lists per port
are being filled correctly for IPv6 - initially. Verified that with
both some printk()s for the per port mglists as well as with vlc. With
patch 5/5 this also worked fine with transient link local addresses,
verified that with 'vlc -vvv "udp://@[ff12::123%eth1]"' on a device
connected to the other one with the bridge and could stream
a video as expected with no multicast traffic on any other bridge port.

However, the MLD queries are/were still broken, the queries initiated
by the bridge device do not get a response from the multicast listeners.
The following additional, attached patches fix this issue.

Last but not least, there are still a couple of bugs I could observe:
- I have attached a laptop with two interfaces with a multicast listener
  each to another PC playing with the bridge device. With the fixes
  below, the laptop sends a multicast listener report to the other PC
  on each interface, however these reports' IPv6 header's source addresses
  seem to be a random one from any of the laptop's two interfaces'
  link local addresses (which has to be a bug in the IPv6 code, as
  this one is generating the reports and not the bridge code) as long
  as it matches the selected multicast address (which was ff12::123 in
  this case).
- If there is no multicast listener present, then the multicast packets
  get flooded on all bridge ports.

And two issues with a little lower priority, I suppose:
- Packets do not get delivered to the bridge interface itself when
  a multicast listener has been started on this bridge interface
  (might be related to http://www.spinics.net/lists/linux-net/msg17556.html,
  so possibly a bug in the IPv6 code again).
~ Quitting of a multicast listener with a MLDv2 message is interpreted as
  a join, resulting in relatively long timeouts - but this MLDv1
  interpretation of MLDv2 messages seems to be intended so far due to its
  simplicity according to the comment in the code.

Cheers, Linus

^ permalink raw reply

* [PATCH 1/2] bridge: Fix MLD queries' ethernet source address
From: Linus Lüssing @ 2011-02-17 18:17 UTC (permalink / raw)
  To: Stephen Hemminger, David S. Miller, bridge
  Cc: netdev, linux-kernel, Herbert Xu, YOSHIFUJI Hideaki,
	Linus Lüssing
In-Reply-To: <20110215154128.2a28632c@nehalam>

Map the IPv6 header's destination multicast address to an ethernet
source address instead of the MLD queries multicast address.

For instance for a general MLD query (multicast address in the MLD query
set to ::), this would wrongly be mapped to 33:33:00:00:00:00, although
an MLD queries destination MAC should always be 33:33:00:00:00:01 which
matches the IPv6 header's multicast destination ff02::1.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
---
 net/bridge/br_multicast.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b5eb28a..f904a2e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -435,7 +435,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	eth = eth_hdr(skb);
 
 	memcpy(eth->h_source, br->dev->dev_addr, 6);
-	ipv6_eth_mc_map(group, eth->h_dest);
 	eth->h_proto = htons(ETH_P_IPV6);
 	skb_put(skb, sizeof(*eth));
 
@@ -449,6 +448,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->hop_limit = 1;
 	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
 	hopopt[0] = IPPROTO_ICMPV6;		/* next hdr */
-- 
1.7.2.3


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox