* [PATCH 1/2] net: Push protocol type directly down to header_ops->cache()
From: David Miller @ 2011-07-13 8:52 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/firewire/net.c | 6 +++---
drivers/isdn/i4l/isdn_net.c | 5 +++--
drivers/net/plip.c | 6 +++---
include/linux/etherdevice.h | 2 +-
include/linux/netdevice.h | 2 +-
net/core/neighbour.c | 2 +-
net/ethernet/eth.c | 3 +--
7 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index b9762d0..eced1c2 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -261,16 +261,16 @@ static int fwnet_header_rebuild(struct sk_buff *skb)
}
static int fwnet_header_cache(const struct neighbour *neigh,
- struct hh_cache *hh)
+ struct hh_cache *hh, __be16 type)
{
struct net_device *net;
struct fwnet_header *h;
- if (hh->hh_type == cpu_to_be16(ETH_P_802_3))
+ if (type == cpu_to_be16(ETH_P_802_3))
return -1;
net = neigh->dev;
h = (struct fwnet_header *)((u8 *)hh->hh_data + 16 - sizeof(*h));
- h->h_proto = hh->hh_type;
+ h->h_proto = type;
memcpy(h->h_dest, neigh->ha, net->addr_len);
hh->hh_len = FWNET_HLEN;
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 9798811..48e9cc0 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1983,13 +1983,14 @@ isdn_net_rebuild_header(struct sk_buff *skb)
return ret;
}
-static int isdn_header_cache(const struct neighbour *neigh, struct hh_cache *hh)
+static int isdn_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
+ __be16 type)
{
const struct net_device *dev = neigh->dev;
isdn_net_local *lp = netdev_priv(dev);
if (lp->p_encap == ISDN_NET_ENCAP_ETHER)
- return eth_header_cache(neigh, hh);
+ return eth_header_cache(neigh, hh, type);
return -1;
}
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index ca4df7f..a9e9ca8 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -152,7 +152,7 @@ static int plip_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type, const void *daddr,
const void *saddr, unsigned len);
static int plip_hard_header_cache(const struct neighbour *neigh,
- struct hh_cache *hh);
+ struct hh_cache *hh, __be16 type);
static int plip_open(struct net_device *dev);
static int plip_close(struct net_device *dev);
static int plip_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
@@ -1026,11 +1026,11 @@ plip_hard_header(struct sk_buff *skb, struct net_device *dev,
}
static int plip_hard_header_cache(const struct neighbour *neigh,
- struct hh_cache *hh)
+ struct hh_cache *hh, __be16 type)
{
int ret;
- ret = eth_header_cache(neigh, hh);
+ ret = eth_header_cache(neigh, hh, type);
if (ret == 0) {
struct ethhdr *eth;
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index ab68f78..05955cf 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -38,7 +38,7 @@ extern int eth_header(struct sk_buff *skb, struct net_device *dev,
const void *daddr, const void *saddr, unsigned len);
extern int eth_rebuild_header(struct sk_buff *skb);
extern int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
-extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh);
+extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
extern void eth_header_cache_update(struct hh_cache *hh,
const struct net_device *dev,
const unsigned char *haddr);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 30f17e4..564d89f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -308,7 +308,7 @@ struct header_ops {
const void *saddr, unsigned len);
int (*parse)(const struct sk_buff *skb, unsigned char *haddr);
int (*rebuild)(struct sk_buff *skb);
- int (*cache)(const struct neighbour *neigh, struct hh_cache *hh);
+ int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
void (*cache_update)(struct hh_cache *hh,
const struct net_device *dev,
const unsigned char *haddr);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 50bd960..8f7e1d8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1247,7 +1247,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 2);
- if (dev->header_ops->cache(n, hh)) {
+ if (dev->header_ops->cache(n, hh, protocol)) {
kfree(hh);
return;
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 44d2b42..5cffb63 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -233,9 +233,8 @@ EXPORT_SYMBOL(eth_header_parse);
* @hh: destination cache entry
* Create an Ethernet header template from the neighbour.
*/
-int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh)
+int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
{
- __be16 type = hh->hh_type;
struct ethhdr *eth;
const struct net_device *dev = neigh->dev;
--
1.7.6
^ permalink raw reply related
* [PATCH 2/2] net: Kill support for multiple hh_cache entries per neighbour
From: David Miller @ 2011-07-13 8:52 UTC (permalink / raw)
To: netdev
This never, ever, happens.
Neighbour entries are always tied to one address family, and therefore
one set of dst_ops, and therefore one dst_ops->protocol "hh_type"
value.
This capability was blindly imported by Alexey Kuznetsov when he wrote
the neighbour layer.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/netdevice.h | 9 ++-------
net/core/neighbour.c | 37 ++++++++++++++++++-------------------
2 files changed, 20 insertions(+), 26 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 564d89f..7538237 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -252,7 +252,6 @@ struct netdev_hw_addr_list {
netdev_hw_addr_list_for_each(ha, &(dev)->mc)
struct hh_cache {
- struct hh_cache *hh_next; /* Next entry */
atomic_t hh_refcnt; /* number of users */
/*
* We want hh_output, hh_len, hh_lock and hh_data be a in a separate
@@ -260,12 +259,8 @@ struct hh_cache {
* They are mostly read, but hh_refcnt may be changed quite frequently,
* incurring cache line ping pongs.
*/
- __be16 hh_type ____cacheline_aligned_in_smp;
- /* protocol identifier, f.e ETH_P_IP
- * NOTE: For VLANs, this will be the
- * encapuslated type. --BLG
- */
- u16 hh_len; /* length of header */
+ u16 hh_len ____cacheline_aligned_in_smp;
+ u16 __pad;
int (*hh_output)(struct sk_buff *skb);
seqlock_t hh_lock;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8f7e1d8..f879bb5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -702,9 +702,9 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
printk(KERN_WARNING "Impossible event.\n");
- while ((hh = neigh->hh) != NULL) {
- neigh->hh = hh->hh_next;
- hh->hh_next = NULL;
+ hh = neigh->hh;
+ if (hh) {
+ neigh->hh = NULL;
write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
@@ -737,7 +737,8 @@ static void neigh_suspect(struct neighbour *neigh)
neigh->output = neigh->ops->output;
- for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh = neigh->hh;
+ if (hh)
hh->hh_output = neigh->ops->output;
}
@@ -754,7 +755,8 @@ static void neigh_connect(struct neighbour *neigh)
neigh->output = neigh->ops->connected_output;
- for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh = neigh->hh;
+ if (hh)
hh->hh_output = neigh->ops->hh_output;
}
@@ -1025,7 +1027,8 @@ static void neigh_update_hhs(const struct neighbour *neigh)
update = neigh->dev->header_ops->cache_update;
if (update) {
- for (hh = neigh->hh; hh; hh = hh->hh_next) {
+ hh = neigh->hh;
+ if (hh) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1211,19 +1214,17 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
-static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst)
{
struct hh_cache *hh;
smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
- for (hh = n->hh; hh; hh = hh->hh_next) {
- if (hh->hh_type == protocol) {
- atomic_inc(&hh->hh_refcnt);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
- return true;
- }
+ hh = n->hh;
+ if (hh) {
+ atomic_inc(&hh->hh_refcnt);
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+ return true;
}
return false;
}
@@ -1235,7 +1236,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
struct hh_cache *hh;
struct net_device *dev = dst->dev;
- if (likely(neigh_hh_lookup(n, dst, protocol)))
+ if (likely(neigh_hh_lookup(n, dst)))
return;
/* slow path */
@@ -1244,7 +1245,6 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
return;
seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 2);
if (dev->header_ops->cache(n, hh, protocol)) {
@@ -1255,7 +1255,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
write_lock_bh(&n->lock);
/* must check if another thread already did the insert */
- if (neigh_hh_lookup(n, dst, protocol)) {
+ if (neigh_hh_lookup(n, dst)) {
kfree(hh);
goto end;
}
@@ -1265,7 +1265,6 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
else
hh->hh_output = n->ops->output;
- hh->hh_next = n->hh;
smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
n->hh = hh;
--
1.7.6
^ permalink raw reply related
* Re: [PATCH 2/3] iwlegacy: Add missing comma between constant string array
From: Stanislaw Gruszka @ 2011-07-13 9:25 UTC (permalink / raw)
To: John W. Linville; +Cc: Joe Perches, linux-wireless, netdev, linux-kernel
In-Reply-To: <20110711183542.GB2650@tuxdriver.com>
On Mon, Jul 11, 2011 at 02:35:42PM -0400, John W. Linville wrote:
> How is this array indexed? Aren't you changing the index of the
> later strings?
We read index directly from device memory (and check agains
ARRAY_SIZE). Patch make indexing correct with code author
intention, ACK. iwlagn should be simmilarly fine.
Stanislaw
> On Fri, Jul 08, 2011 at 11:20:24PM -0700, Joe Perches wrote:
> > Multiple quoted strings are concatenated without comma separators.
> >
> > Signed-off-by: Joe Perches <joe@perches.com>
> > ---
> > drivers/net/wireless/iwlegacy/iwl4965-base.c | 2 +-
> > 1 files changed, 1 insertions(+), 1 deletions(-)
> >
> > diff --git a/drivers/net/wireless/iwlegacy/iwl4965-base.c b/drivers/net/wireless/iwlegacy/iwl4965-base.c
> > index 46242d2..1433466 100644
> > --- a/drivers/net/wireless/iwlegacy/iwl4965-base.c
> > +++ b/drivers/net/wireless/iwlegacy/iwl4965-base.c
> > @@ -1484,7 +1484,7 @@ static const char * const desc_lookup_text[] = {
> > "NMI_INTERRUPT_DATA_ACTION_PT",
> > "NMI_TRM_HW_ER",
> > "NMI_INTERRUPT_TRM",
> > - "NMI_INTERRUPT_BREAK_POINT"
> > + "NMI_INTERRUPT_BREAK_POINT",
> > "DEBUG_0",
> > "DEBUG_1",
> > "DEBUG_2",
> > --
> > 1.7.6.131.g99019
> >
> >
>
> --
> John W. Linville Someday the world will need a hero, and you
> linville@tuxdriver.com might be all we have. Be ready.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH] net: change return values in mac_pton() function
From: Arend van Spriel @ 2011-07-13 9:30 UTC (permalink / raw)
To: davem; +Cc: Arend van Spriel, netdev, linux-kernel, Alexey Dobriyan
The original commit adding this function noted a diverge from usual
0=success/-E=fail, but no motivation for it. To stay consistent this
commit adheres to the usual approach. The callers check for result
is changed from 'if(!mac_pton(x, y))' to 'if(mac_pton(x,y) < 0)'.
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
---
drivers/net/netconsole.c | 2 +-
net/core/netpoll.c | 2 +-
net/core/pktgen.c | 4 ++--
net/core/utils.c | 10 +++++-----
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index dfc8272..2e8adba 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -437,7 +437,7 @@ static ssize_t store_remote_mac(struct netconsole_target *nt,
return -EINVAL;
}
- if (!mac_pton(buf, remote_mac))
+ if (mac_pton(buf, remote_mac) < 0)
return -EINVAL;
if (buf[3 * ETH_ALEN - 1] && buf[3 * ETH_ALEN - 1] != '\n')
return -EINVAL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index adf84dd..6b70699 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -691,7 +691,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
if (*cur != 0) {
/* MAC address */
- if (!mac_pton(cur, np->remote_mac))
+ if (mac_pton(cur, np->remote_mac) < 0)
goto parse_failed;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f76079c..f17ab6a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1429,7 +1429,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
- if (!mac_pton(valstr, pkt_dev->dst_mac))
+ if (mac_pton(valstr, pkt_dev->dst_mac) < 0)
return -EINVAL;
/* Set up Dest MAC */
memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
@@ -1446,7 +1446,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
- if (!mac_pton(valstr, pkt_dev->src_mac))
+ if (mac_pton(valstr, pkt_dev->src_mac) < 0)
return -EINVAL;
/* Set up Src MAC */
memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
diff --git a/net/core/utils.c b/net/core/utils.c
index 386e263f..73299f1 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -304,20 +304,20 @@ int mac_pton(const char *s, u8 *mac)
/* XX:XX:XX:XX:XX:XX */
if (strlen(s) < 3 * ETH_ALEN - 1)
- return 0;
+ return -EINVAL;
/* Don't dirty result unless string is valid MAC. */
for (i = 0; i < ETH_ALEN; i++) {
if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
- return 0;
+ return -EINVAL;
if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
- return 0;
+ return -EINVAL;
if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
- return 0;
+ return -EINVAL;
}
for (i = 0; i < ETH_ALEN; i++) {
mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
}
- return 1;
+ return 0;
}
EXPORT_SYMBOL(mac_pton);
--
1.7.4.1
^ permalink raw reply related
* Re: [PATCH 1/4] slip: remove redundant NULL-pointer check before calling slhc_free
From: David Miller @ 2011-07-13 9:32 UTC (permalink / raw)
To: matvejchikov; +Cc: netdev
In-Reply-To: <CAKh5naafU9=0dZr+QiXjbn57hzCM9BN3YA8h0FpJD6dMH+g4hA@mail.gmail.com>
From: Matvejchikov Ilya <matvejchikov@gmail.com>
Date: Wed, 13 Jul 2011 11:45:37 +0400
> Signed-off-by: Matvejchikov Ilya <matvejchikov@gmail.com>
Applied.
^ permalink raw reply
* Re: [PATCH 2/4] slip: fix MTU comparation operation when reallocating buffers
From: David Miller @ 2011-07-13 9:32 UTC (permalink / raw)
To: matvejchikov; +Cc: netdev
In-Reply-To: <CAKh5nabAU=nYJdcbM55J=FDhcrZgW4G+vOBuOjT8TwniT457Jg@mail.gmail.com>
From: Matvejchikov Ilya <matvejchikov@gmail.com>
Date: Wed, 13 Jul 2011 11:46:17 +0400
> In sl_realloc_bufs() there is no reason to check if the requested MTU greater
> than or equal to the current MTU value as this function called only
> when requested
> MTU not equals to the current value. So, the ">=" operation can be
> safely replaced
> with the ">".
>
> Signed-off-by: Matvejchikov Ilya <matvejchikov@gmail.com>
Patch corrupted by email client:
> printk(KERN_WARNING "%s: unable to grow slip buffers, MTU change
> cancelled.\n",
That should be one line.
I applied this by hand.
^ permalink raw reply
* Re: [PATCH 3/4] slip: remove redundant check slip_devs for NULL
From: David Miller @ 2011-07-13 9:32 UTC (permalink / raw)
To: matvejchikov; +Cc: netdev
In-Reply-To: <CAKh5naa_ARaJvZFOMva9V22YCTkVyPMw1+-x=GTtx-niwpOFSg@mail.gmail.com>
From: Matvejchikov Ilya <matvejchikov@gmail.com>
Date: Wed, 13 Jul 2011 11:46:57 +0400
> As slip_devs is initialized on module load stage there is no reason to
> check it for NULL anywhere instead of the deinitialization routine because
> if we can't get enough memory on startup we don't run at all.
>
> Signed-off-by: Matvejchikov Ilya <matvejchikov@gmail.com>
Applied.
^ permalink raw reply
* Re: [PATCH 4/4] slip: remove dead code within the slip initialization
From: David Miller @ 2011-07-13 9:32 UTC (permalink / raw)
To: matvejchikov; +Cc: netdev
In-Reply-To: <CAKh5naaqpjwaUUwJEkpN4g9iyQeUtZFn+XnCED4R=Ovo_hJ59A@mail.gmail.com>
From: Matvejchikov Ilya <matvejchikov@gmail.com>
Date: Wed, 13 Jul 2011 11:47:38 +0400
> This following code contains a dead "if (dev).." block:
...
> The reason is that the code starting with "if (dev).." is never called as
> when we found an empty slot (dev == NULL) we break the loop and "if (dev).."
> not works eiter the loop ends and we get out with "i >= slip_maxdev".
>
> Signed-off-by: Matvejchikov Ilya <matvejchikov@gmail.com>
Applied.
^ permalink raw reply
* Re: IEEE 802.1ag / 802.1x / Y1731
From: Satendra... @ 2011-07-13 9:39 UTC (permalink / raw)
To: Balaji G; +Cc: equinox, netdev
In-Reply-To: <BANLkTi=qe9f_zJRqC9nuzMGViy0YLAzJJg@mail.gmail.com>
Hi Balaji,
Thanks for sharing this information.
We have decided to use dot1ag utils for 802.1ag. Its a user space
implementation of 802.1ag.
I have a question: is there any development going on for Y.1731 for
linux or freebsd
either as user space application like dot1ag OR network stack
enhancements OR anything else)?
Thanks,
Satendra
On 2 July 2011 13:22, Balaji G <balajig.foss@gmail.com> wrote:
> Hi
> Currently ECFM is supported @ the user space level or rather lot of
> companies buy L2 stacks and integrate it with the OS so these stacks have
> ECFM support. When we talk about ECFM we need to really find out whether we
> are going to talk about suppporting ECFM over Provider Bridging Ports or are
> we gonna support ECFM over PBB which is 802.1AH because the place where we
> configure ECFM entities play an important role and this could be defined
> only if we get to know on what we support ECFM. But as on date Linux kernel
> does not have support for ECFM or for Y.1731
> Cheers,
> - Balaji
>
> On Fri, Jul 1, 2011 at 5:42 PM, Satendra... <satendra.pratap@gmail.com>
> wrote:
>>
>> Hi Balaji,
>> You mean CFM OAM 802.1ag can not be supported under linux yet.
>> I mean there are many routers which are running linux and if they want
>> to support CFM OAM on
>> their routers they will have to make changes in Linux networking stack?
>> Could you please tell about the support of Y.1731 / 802.1ah / 802.1x
>> in the linux networking
>> stack ?
>>
>> Thanks,
>> Satendra
>>
>> On 1 July 2011 15:51, Balaji G <balajig.foss@gmail.com> wrote:
>> >>Could anyone please tell me whether there is any support in Linux
>> >>networking stack for 802.1ag ? If yes please mention the linux version.
>> > Linux Kernel does not support 802.1ag
>> > Thanks,
>> > Cheers,
>> > - Balaji
>> >
>> >
>> >
>> > On Fri, Jul 1, 2011 at 1:46 PM, Satendra... <satendra.pratap@gmail.com>
>> > wrote:
>> >>
>> >> Hello All,
>> >> Could anyone please tell me whether there is any support in Linux
>> >> networking stack
>> >> for 802.1ag ? If yes please mention the linux version.
>> >> What I need to do is to use CFM OAM (802.1ag) open source client on
>> >> our routers running linux
>> >> on them. Open source client to be used is dot1ag utils.
>> >>
>> >> Thanks,
>> >> Satendra
>> >>
>> >> On 24 June 2011 17:36, Satendra... <satendra.pratap@gmail.com> wrote:
>> >> > Hi,
>> >> > Could someone please tell me which version of linux has got support
>> >> > for below (all three) protocols:
>> >> > - IEEE 802.1ag
>> >> > - 802.1x
>> >> > - Y1731
>> >> >
>> >> > Thanks,
>> >> > Satendra
>> >> >
>> >> --
>> >> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> >> the body of a message to majordomo@vger.kernel.org
>> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> >
>> >
>
>
^ permalink raw reply
* Re: [PATCH] net: change return values in mac_pton() function
From: Alexey Dobriyan @ 2011-07-13 10:09 UTC (permalink / raw)
To: Arend van Spriel; +Cc: davem, netdev, linux-kernel
In-Reply-To: <1310549424-5484-1-git-send-email-arend@broadcom.com>
On Wed, Jul 13, 2011 at 12:30 PM, Arend van Spriel <arend@broadcom.com> wrote:
> The original commit adding this function noted a diverge from usual
> 0=success/-E=fail, but no motivation for it.
I thought it was obvious, but indeed wasn't explicitely
mentioned in changelog. But see inet_pton(3).
> To stay consistent this
> commit adheres to the usual approach. The callers check for result
> is changed from 'if(!mac_pton(x, y))' to 'if(mac_pton(x,y) < 0)'.
> @@ -304,20 +304,20 @@ int mac_pton(const char *s, u8 *mac)
>
> /* XX:XX:XX:XX:XX:XX */
> if (strlen(s) < 3 * ETH_ALEN - 1)
> - return 0;
> + return -EINVAL;
^ permalink raw reply
* Re: [PATCH] net: change return values in mac_pton() function
From: David Miller @ 2011-07-13 11:12 UTC (permalink / raw)
To: adobriyan; +Cc: arend, netdev, linux-kernel
In-Reply-To: <CACVxJT-TXBd5S+HNfzNR2hnGUk2hMfTSQrkodUunUXOQ6+X8eg@mail.gmail.com>
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jul 2011 13:09:03 +0300
> On Wed, Jul 13, 2011 at 12:30 PM, Arend van Spriel <arend@broadcom.com> wrote:
>> The original commit adding this function noted a diverge from usual
>> 0=success/-E=fail, but no motivation for it.
>
> I thought it was obvious, but indeed wasn't explicitely
> mentioned in changelog. But see inet_pton(3).
Agreed it's better for mac_pton() to be consistent with existing,
similar, interfaces like inet_pton.
^ permalink raw reply
* [PATCH] net: Embed hh_cache inside of struct neighbour.
From: David Miller @ 2011-07-13 11:17 UTC (permalink / raw)
To: netdev; +Cc: eric.dumazet
Now that there is a one-to-one correspondance between neighbour
and hh_cache entries, we no longer need:
1) dynamic allocation
2) attachment to dst->hh
3) refcounting
Initialization of the hh_cache entry is indicated by hh_len
being non-zero, and such initialization is always done with
the neighbour's lock held as a writer.
Signed-off-by: David S. Miller <davem@davemloft.net>
--
This passes a bunch of basic testing I've done, and shaves
a few seconds off of the udpflood test.
Eric, I tried to do something reasonable with the resulting
dst_entry layout wrt. the 64-byte alignment thing. I'm open
to suggestions.
And also notice how we don't need that DST_NOCACHE test any more,
since we now don't do atomics or modify dst_entry state just to setup
the hh_cache entry.
I hadn't really thought about this aspect of DST_NOCACHE before, but
it means that with the routing cache removal patches applied, HH
caching was completely disabled :-/ Well, at least now that will be
fixed :))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7538237..5ccc0cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -252,14 +252,7 @@ struct netdev_hw_addr_list {
netdev_hw_addr_list_for_each(ha, &(dev)->mc)
struct hh_cache {
- atomic_t hh_refcnt; /* number of users */
-/*
- * We want hh_output, hh_len, hh_lock and hh_data be a in a separate
- * cache line on SMP.
- * They are mostly read, but hh_refcnt may be changed quite frequently,
- * incurring cache line ping pongs.
- */
- u16 hh_len ____cacheline_aligned_in_smp;
+ u16 hh_len;
u16 __pad;
int (*hh_output)(struct sk_buff *skb);
seqlock_t hh_lock;
@@ -273,12 +266,6 @@ struct hh_cache {
unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};
-static inline void hh_cache_put(struct hh_cache *hh)
-{
- if (atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
-}
-
/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
* Alternative is:
* dev->hard_header_len ? (dev->hard_header_len +
diff --git a/include/net/dst.h b/include/net/dst.h
index e12ddfb..0dd7ccb 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -38,7 +38,6 @@ struct dst_entry {
unsigned long expires;
struct dst_entry *path;
struct neighbour *neighbour;
- struct hh_cache *hh;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
@@ -47,6 +46,14 @@ struct dst_entry {
int (*input)(struct sk_buff*);
int (*output)(struct sk_buff*);
+ int flags;
+#define DST_HOST 0x0001
+#define DST_NOXFRM 0x0002
+#define DST_NOPOLICY 0x0004
+#define DST_NOHASH 0x0008
+#define DST_NOCACHE 0x0010
+#define DST_NOCOUNT 0x0020
+
short error;
short obsolete;
unsigned short header_len; /* more space at head required */
@@ -62,7 +69,7 @@ struct dst_entry {
* (L1_CACHE_SIZE would be too much)
*/
#ifdef CONFIG_64BIT
- long __pad_to_align_refcnt[1];
+ long __pad_to_align_refcnt[2];
#endif
/*
* __refcnt wants to be on a different cache line from
@@ -71,13 +78,6 @@ struct dst_entry {
atomic_t __refcnt; /* client references */
int __use;
unsigned long lastuse;
- int flags;
-#define DST_HOST 0x0001
-#define DST_NOXFRM 0x0002
-#define DST_NOPOLICY 0x0004
-#define DST_NOHASH 0x0008
-#define DST_NOCACHE 0x0010
-#define DST_NOCOUNT 0x0020
union {
struct dst_entry *next;
struct rtable __rcu *rt_next;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6fe8c2c..bd8f9f0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -108,7 +108,7 @@ struct neighbour {
__u8 dead;
seqlock_t ha_lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
- struct hh_cache *hh;
+ struct hh_cache hh;
int (*output)(struct sk_buff *skb);
const struct neigh_ops *ops;
struct rcu_head rcu;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 56149ec..75ee421 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -343,14 +343,16 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct neighbour *neigh;
struct dst_entry *dst;
skb->dev = bridge_parent(skb->dev);
if (!skb->dev)
goto free_skb;
dst = skb_dst(skb);
- if (dst->hh) {
- neigh_hh_bridge(dst->hh, skb);
+ neigh = dst->neighbour;
+ if (neigh->hh.hh_len) {
+ neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
return br_handle_frame_finish(skb);
} else if (dst->neighbour) {
diff --git a/net/core/dst.c b/net/core/dst.c
index 6135f36..4aacc14 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -172,7 +172,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->expires = 0UL;
dst->path = dst;
dst->neighbour = NULL;
- dst->hh = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -226,19 +225,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct neighbour *neigh;
- struct hh_cache *hh;
smp_rmb();
again:
neigh = dst->neighbour;
- hh = dst->hh;
child = dst->child;
- dst->hh = NULL;
- if (hh)
- hh_cache_put(hh);
-
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f879bb5..77a399f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
@@ -702,14 +703,11 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
printk(KERN_WARNING "Impossible event.\n");
- hh = neigh->hh;
- if (hh) {
- neigh->hh = NULL;
-
+ hh = &neigh->hh;
+ if (hh->hh_len) {
write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
write_sequnlock_bh(&hh->hh_lock);
- hh_cache_put(hh);
}
skb_queue_purge(&neigh->arp_queue);
@@ -737,8 +735,8 @@ static void neigh_suspect(struct neighbour *neigh)
neigh->output = neigh->ops->output;
- hh = neigh->hh;
- if (hh)
+ hh = &neigh->hh;
+ if (hh->hh_len)
hh->hh_output = neigh->ops->output;
}
@@ -755,8 +753,8 @@ static void neigh_connect(struct neighbour *neigh)
neigh->output = neigh->ops->connected_output;
- hh = neigh->hh;
- if (hh)
+ hh = &neigh->hh;
+ if (hh->hh_len)
hh->hh_output = neigh->ops->hh_output;
}
@@ -1017,7 +1015,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(const struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1027,8 +1025,8 @@ static void neigh_update_hhs(const struct neighbour *neigh)
update = neigh->dev->header_ops->cache_update;
if (update) {
- hh = neigh->hh;
- if (hh) {
+ hh = &neigh->hh;
+ if (hh->hh_len) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1214,62 +1212,29 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
-static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst)
-{
- struct hh_cache *hh;
-
- smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
- hh = n->hh;
- if (hh) {
- atomic_inc(&hh->hh_refcnt);
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
- return true;
- }
- return false;
-}
-
/* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
- __be16 protocol)
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
- struct hh_cache *hh;
struct net_device *dev = dst->dev;
-
- if (likely(neigh_hh_lookup(n, dst)))
- return;
-
- /* slow path */
- hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
- if (!hh)
- return;
-
- seqlock_init(&hh->hh_lock);
- atomic_set(&hh->hh_refcnt, 2);
-
- if (dev->header_ops->cache(n, hh, protocol)) {
- kfree(hh);
- return;
- }
+ __be16 prot = dst->ops->protocol;
+ struct hh_cache *hh = &n->hh;
write_lock_bh(&n->lock);
- /* must check if another thread already did the insert */
- if (neigh_hh_lookup(n, dst)) {
- kfree(hh);
+ /* Only one thread can come in here and initialize the
+ * hh_cache entry.
+ */
+ if (hh->hh_len)
+ goto end;
+
+ if (dev->header_ops->cache(n, hh, prot))
goto end;
- }
if (n->nud_state & NUD_CONNECTED)
hh->hh_output = n->ops->hh_output;
else
hh->hh_output = n->ops->output;
- smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
- n->hh = hh;
-
- if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
- hh_cache_put(hh);
end:
write_unlock_bh(&n->lock);
}
@@ -1312,10 +1277,8 @@ int neigh_resolve_output(struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache &&
- !dst->hh &&
- !(dst->flags & DST_NOCACHE))
- neigh_hh_init(neigh, dst, dst->ops->protocol);
+ if (dev->header_ops->cache && !neigh->hh.hh_len)
+ neigh_hh_init(neigh, dst);
do {
seq = read_seqbegin(&neigh->ha_lock);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 54119d5..a621b96 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
+ struct neighbour *neigh;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -203,11 +204,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2;
}
- if (dst->hh)
- return neigh_hh_output(dst->hh, skb);
- else if (dst->neighbour)
- return dst->neighbour->output(skb);
-
+ neigh = dst->neighbour;
+ if (neigh) {
+ struct hh_cache *hh = &neigh->hh;
+ if (hh->hh_len)
+ return neigh_hh_output(hh, skb);
+ else
+ return dst->neighbour->output(skb);
+ }
if (net_ratelimit())
printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
kfree_skb(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c6388e8..a52bb74 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -426,9 +426,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)),
r->rt_key_tos,
- r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
- r->dst.hh ? (r->dst.hh->hh_output ==
- dev_queue_xmit) : 0,
+ -1,
+ (r->dst.neighbour ?
+ (r->dst.neighbour->hh.hh_output ==
+ dev_queue_xmit) : 0),
r->rt_spec_dst, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9d4b165..f0f144c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -100,6 +100,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ struct neighbour *neigh;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -134,11 +135,14 @@ static int ip6_finish_output2(struct sk_buff *skb)
skb->len);
}
- if (dst->hh)
- return neigh_hh_output(dst->hh, skb);
- else if (dst->neighbour)
- return dst->neighbour->output(skb);
-
+ neigh = dst->neighbour;
+ if (neigh) {
+ struct hh_cache *hh = &neigh->hh;
+ if (hh->hh_len)
+ return neigh_hh_output(hh, skb);
+ else
+ return dst->neighbour->output(skb);
+ }
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
^ permalink raw reply related
* Respected Friend
From: David Jacob @ 2011-07-13 10:20 UTC (permalink / raw)
Respected friend
How are you? I hope everything is alright with you ? My
name is David Jacob.my purpose of contacting you is to
solicit for your help in other to execute a lagitimate
business which requires a foreign patner.The total fund is
valued at Four million Two hundred thousand us-dollars.i
wish you can assist and I beleive you will have the desire
to communicate with me for further discussions.thanks and
God bless you.Thanks from David
---
Professional hosting for everyone - http://www.host.ru
^ permalink raw reply
* Re: [PATCH] net: change return values in mac_pton() function
From: Arend van Spriel @ 2011-07-13 11:21 UTC (permalink / raw)
To: David Miller
Cc: adobriyan@gmail.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20110713.041253.1879038886370324446.davem@davemloft.net>
On 07/13/2011 01:12 PM, David Miller wrote:
> From: Alexey Dobriyan<adobriyan@gmail.com>
> Date: Wed, 13 Jul 2011 13:09:03 +0300
>
>> On Wed, Jul 13, 2011 at 12:30 PM, Arend van Spriel<arend@broadcom.com> wrote:
>>> The original commit adding this function noted a diverge from usual
>>> 0=success/-E=fail, but no motivation for it.
>> I thought it was obvious, but indeed wasn't explicitely
>> mentioned in changelog. But see inet_pton(3).
> Agreed it's better for mac_pton() to be consistent with existing,
> similar, interfaces like inet_pton.
I just liked the general approach of zero indicating success. But even
in the realm called Linux not everything can be black and white, I guess :-D
Please drop the patch.
Gr. AvS
--
Almost nobody dances sober, unless they happen to be insane.
-- H.P. Lovecraft --
^ permalink raw reply
* RE: [PATCH net-next v2 3/7] r8169: adjust the settings about RxConfig
From: hayeswang @ 2011-07-13 12:16 UTC (permalink / raw)
To: 'Francois Romieu'; +Cc: netdev, linux-kernel
In-Reply-To: <20110712161258.GA6157@electric-eye.fr.zoreil.com>
[-- Attachment #1: Type: text/plain, Size: 975 bytes --]
Francois Romieu [mailto:romieu@fr.zoreil.com]
> Sent: Wednesday, July 13, 2011 12:13 AM
> To: Hayeswang
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next v2 3/7] r8169: adjust the
> settings about RxConfig
>
> Hayes Wang <hayeswang@realtek.com> :
> > Set the init value before reset in probe function. And then just
> > modify the relative bits and keep the init settings.
>
> It breaks my old PCI Netgear 8110s (RTL_GIGA_MAC_VER_03/XID
> 04000000). Once the device is up, RxConfig is changed from
> 0x0000e70e to 0x0000000e (missed write ?).
>
I find that the RxConfig has to be set after the tx/rx is enabled for 8110S(x)
series.
> Is there any side effect / objection if this patch is removed
> from the series and scheduled for a later time ?
>
The original values are different from the suggested values, so I hope to apply
this patch.
Please try the attached file. It should fix this issue.
Best Regards,
Hayes
[-- Attachment #2: 0001-r8169-fix-the-RxConfig-couldn-t-be-written.patch --]
[-- Type: application/octet-stream, Size: 964 bytes --]
>From d291f25398cd6de2d9ef8545e70f003dd091f192 Mon Sep 17 00:00:00 2001
From: Hayes <hayeswang@realtek.com>
Date: Wed, 13 Jul 2011 11:44:06 +0800
Subject: [PATCH] r8169:fix the RxConfig couldn't be written
For 8110S, 8110SB, and 8110SC series, the initial value of RxConfig
needs to be set after the tx/rx is enabled.
Signed-off-by Hayes Wang <hayeswang@realtek.com>
---
drivers/net/r8169.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 19d739a..f04cd8a 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4065,6 +4065,9 @@ static void rtl_hw_start_8169(struct net_device *dev)
tp->mac_version == RTL_GIGA_MAC_VER_04)
RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+ /* RxConfig needs to be initialized after tx/rx enable */
+ rtl_init_rxcfg(tp);
+
RTL_W8(EarlyTxThres, NoEarlyTx);
rtl_set_rx_max_size(ioaddr, rx_buf_sz);
--
1.7.3.4
^ permalink raw reply related
* Re: [PATCH] connector: add an event for monitoring process tracers
From: Evgeniy Polyakov @ 2011-07-13 12:48 UTC (permalink / raw)
To: Vladimir Zapolskiy; +Cc: David S. Miller, Oleg Nesterov, Roland McGrath, netdev
In-Reply-To: <1310502757-32103-1-git-send-email-vzapolskiy@gmail.com>
Hi Vladimir
On Tue, Jul 12, 2011 at 11:32:37PM +0300, Vladimir Zapolskiy (vzapolskiy@gmail.com) wrote:
> This change adds a procfs connector event, which is emitted on every
> successful process tracer attach or detach.
>
> If some process connects to other one, kernelspace connector reports
> process id and thread group id of both these involved processes. On
> disconnection null process id is returned.
>
> Such an event allows to create a simple automated userspace mechanism
> to be aware about processes connecting to others, therefore predefined
> process policies can be applied to them if needed.
Looks good to me, thank you.
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
--
Evgeniy Polyakov
^ permalink raw reply
* Which one does less damange? "tcp_tw_recycle" or "tcp_max_tw_buckets"
From: Jenny Lee @ 2011-07-13 12:55 UTC (permalink / raw)
To: netdev
Hello,
<apology>
I apologize if this is not the right place to post this. But I could not find linux-net mailing list mentioned on kernel.org website.
From: http://www.kernel.org/pub/linux/docs/lkml/ :
The linux-net@vger.kernel.org mailing list is for networking user questions.
Majordomo: >>>> subscribe linux-net **** subscribe: unknown list 'linux-net'.
Couldn't also get an answer on #kernel IRC channel. So I am posting here.
</apology>
I have a situation where I am running out of ephemeral ports.
* RHEL6 x64 Machine (kernel-2.6.32-71).
* I have 64K available ports.
* I am using squid.
* Client is using CONNECT (HTTP inside) through squid, doing 500 reqs/second. Squid has many parents.
* Squid outgoing IP is SNAT'ted to 1000 IPs.
Persistent connections and all did not do any good for me. Squid developers were very helpful, implemented many improvements for me but still no use.
Apparently this 64K limit per tuple does not seem to work as intended. I have many IPs, yet all hell breaks loose when 64K ports are used up. The max amount of TIME_WAITs from a single IP I have seen is 15K, yet I run out of ports at 64K.
I have tried fiddling with all kinds of values (including tcp_tw_reuse with tcp timestamps), timeouts, etc. but nothing helped.
I have 2 solutions:
* tcp_tw_recycle: This solved all my issues. I have not experienced any visible problems. Client can do > 1000 reqs/sec.
* tcp_max_tw_buckets: Redhat default is 180K. Keeping this at 64K helps. Kernel emits "TIME_WAIT bucket overflow" occassionally. But everythign seem to be working.
My question:
Which one would be wiser to do:
To keep "tcp_tw_recycle" on, or to keep "tcp_max_tw_buckets" at 64K where I will get bucket overflow errors once an hour for couple of seconds?
Thank you in advance.
Jenny
^ permalink raw reply
* Re: Kernel crash after using new Intel NIC (igb)
From: Maximilian Engelhardt @ 2011-07-13 13:38 UTC (permalink / raw)
To: Eric Dumazet
Cc: Arun Sharma, David Miller, linux-kernel, netdev, Yann Dupont,
Denys Fedoryshchenko
In-Reply-To: <1306466831.2543.58.camel@edumazet-laptop>
[-- Attachment #1: Type: Text/Plain, Size: 1101 bytes --]
On Friday 27 May 2011 05:27:11 Eric Dumazet wrote:
> Le jeudi 26 mai 2011 à 17:09 -0700, Arun Sharma a écrit :
> > On 5/26/11 3:01 PM, Eric Dumazet wrote:
> > >> Yeah - using the refcnt seems better than list_empty(), but I'm not
> > >> sure that your patch addresses the race above.
> > >
> > > It does.
> >
> > True. I can't find any holes in this method and it resolves the "failure
> > to unlink from unused" case.
> >
> > Perhaps wrap the while(1) loop into its own primitive in atomic.h or use
> > an existing primitive?
>
> Sure, here is a formal submission I cooked.
>
> Thanks
Thank you for your patch. We have it running now for some time and didn't
observe any crashes.
I have one question left. We want to install the same card in another server
that is currently running Debian stable with a 2.6.32 kernel. Is this bug also
present in that version or has it been introduces afterwards? This server is
used by many people for Internet access, so it would be quite bad if it
crashes.
Thanks again for your work on the patch.
Greetings,
Maxi
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* [PATCH] net: Add support for handling queueing in hardware
From: Andy Fleming @ 2011-07-13 13:52 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev
The QDisc code does a bunch of locking which is unnecessary if
you have hardware which handles all of the queueing. Add
support for this, and skip over all of the queueing code if
the feature is enabled on a given device, which breaks QDisc
support on dpaa_eth, and also coopts the FCOE feature bit.
Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: Ben Collins <ben.c@servergy.com>
Cc: netdev@vger.kernel.org
---
include/linux/netdev_features.h | 2 ++
net/core/dev.c | 6 ++++++
2 files changed, 8 insertions(+)
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 3dd3934..ffb4587 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -56,6 +56,7 @@ enum {
NETIF_F_LOOPBACK_BIT, /* Enable loopback */
NETIF_F_RXFCS_BIT, /* Append FCS to skb pkt data */
NETIF_F_RXALL_BIT, /* Receive errored frames too */
+ NETIF_F_HW_QDISC_BIT, /* Supports hardware Qdisc */
/*
* Add your fresh new feature above and remember to update
@@ -80,6 +81,7 @@ enum {
#define NETIF_F_GSO_ROBUST __NETIF_F(GSO_ROBUST)
#define NETIF_F_HIGHDMA __NETIF_F(HIGHDMA)
#define NETIF_F_HW_CSUM __NETIF_F(HW_CSUM)
+#define NETIF_F_HW_QDISC __NETIF_F(HW_QDISC)
#define NETIF_F_HW_VLAN_FILTER __NETIF_F(HW_VLAN_FILTER)
#define NETIF_F_HW_VLAN_RX __NETIF_F(HW_VLAN_RX)
#define NETIF_F_HW_VLAN_TX __NETIF_F(HW_VLAN_TX)
diff --git a/net/core/dev.c b/net/core/dev.c
index dffbef7..6818b18 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2743,6 +2743,12 @@ int dev_queue_xmit(struct sk_buff *skb)
skb_update_prio(skb);
+ if (dev->features & NETIF_F_HW_QDISC) {
+ txq = netdev_pick_tx(dev, skb);
+ rc = dev_hard_start_xmit(skb, dev, txq);
+ goto out;
+ }
+
txq = netdev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
--
1.8.1.2
^ permalink raw reply related
* Re: [PATCH] connector: add an event for monitoring process tracers
From: David Miller @ 2011-07-13 14:53 UTC (permalink / raw)
To: zbr; +Cc: vzapolskiy, oleg, roland, netdev
In-Reply-To: <20110713124832.GA28307@ioremap.net>
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Wed, 13 Jul 2011 16:48:32 +0400
> On Tue, Jul 12, 2011 at 11:32:37PM +0300, Vladimir Zapolskiy (vzapolskiy@gmail.com) wrote:
>> This change adds a procfs connector event, which is emitted on every
>> successful process tracer attach or detach.
>>
>> If some process connects to other one, kernelspace connector reports
>> process id and thread group id of both these involved processes. On
>> disconnection null process id is returned.
>>
>> Such an event allows to create a simple automated userspace mechanism
>> to be aware about processes connecting to others, therefore predefined
>> process policies can be applied to them if needed.
>
> Looks good to me, thank you.
> Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Since this isn't really "networking" it would be really nice if this
was taken in via some other tree, you can add my ack:
Acked-by: David S. Miller <davem@davemloft.net>
^ permalink raw reply
* Re: [PATCH] connector: add an event for monitoring process tracers
From: Oleg Nesterov @ 2011-07-13 15:09 UTC (permalink / raw)
To: Vladimir Zapolskiy
Cc: Evgeniy Polyakov, David S. Miller, Roland McGrath, netdev
In-Reply-To: <1310502757-32103-1-git-send-email-vzapolskiy@gmail.com>
On 07/12, Vladimir Zapolskiy wrote:
>
> Note, a detach signal is not emitted, if a tracer process terminates
> without explicit PTRACE_DETACH request. Such cases can be covered
> listening to PROC_EVENT_EXIT connector events.
Hmm. More and more reasons to make the implicit detach sleepable...
But. There is another case. The (dead) tracee can be detached via
do_wait(). Perhaps this falls into "covered listening to EXIT" too,
but imho makes sense to document in the changelog. Oh, and probably
we will add the ability to detach a zombie...
I don't really understand why do you need this, but I won't argue.
As for the patch,
> +void proc_ptrace_connector(struct task_struct *task)
> +{
> + struct cn_msg *msg;
> + struct proc_event *ev;
> + struct timespec ts;
> + __u8 buffer[CN_PROC_MSG_SIZE];
> + struct task_struct *tracer;
> +
> + if (atomic_read(&proc_event_num_listeners) < 1)
> + return;
> +
> + msg = (struct cn_msg *)buffer;
> + ev = (struct proc_event *)msg->data;
> + get_seq(&msg->seq, &ev->cpu);
> + ktime_get_ts(&ts); /* get high res monotonic timestamp */
> + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
> + ev->what = PROC_EVENT_PTRACE;
> + ev->event_data.ptrace.process_pid = task->pid;
> + ev->event_data.ptrace.process_tgid = task->tgid;
> +
> + rcu_read_lock();
> + tracer = tracehook_tracer_task(task);
> + if (tracer) {
> + ev->event_data.ptrace.tracer_pid = tracer->pid;
> + ev->event_data.ptrace.tracer_tgid = tracer->tgid;
> + } else {
> + ev->event_data.ptrace.tracer_pid = 0;
> + ev->event_data.ptrace.tracer_tgid = 0;
> + }
This doesn't look right. The code uses tracehook_tracer_task() to
figure out whether this task traced or not. But this is racy.
ptrace_attach:
...attach...
/* WINDOW */
proc_ptrace_connector(task);
The task can exit in between, and the caller's subthread can do
wait4() and release it. In this case proc_ptrace_connector() will
see tracehook_tracer_task() == NULL and report "detach".
The similar race in ptrace_detach() path. Another tracer can attach
to this task before we proc_ptrace_connector().
I think proc_ptrace_connector() needs the explicit "task_struct *tracer"
argument, NULL if ptrace_detach(). Or a simple boolean, the tracer is
current.
If you think this is fine - I won't argue.
But in any case, please rediff against
git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc.git ptrace
tracehook_tracer_task() was removed, and
> @@ -260,6 +261,9 @@ out:
> if (wait_trap)
> wait_event(current->signal->wait_chldexit,
> !(task->group_stop & GROUP_STOP_TRAPPING));
> + if (!retval)
> + proc_ptrace_connector(task);
> +
> return retval;
> }
this chunk probably should be updated.
Oleg.
^ permalink raw reply
* any way to let host act as TCP server OR client on same IP/port?
From: Chris Friesen @ 2011-07-13 16:30 UTC (permalink / raw)
To: netdev
I've been asked an interesting question about TCP. We have some people
that want to set up a TCP socket that can listen for connections on a
given IP/port, but also initiate connections from that same IP/port.
(Only one at a time, of course.)
The TCP state machine seems to allow this (moving from LISTEN to
SYN_SENT) but it's not a normal transition.
Is there any way to do this using the socket API?
I thought up a hack whereby we could use NFQUEUE to detect an incoming
SYN and delay it while we call listen() on the socket. Is there any
better way to do this?
Thanks,
Chris
--
Chris Friesen
Software Developer
GENBAND
chris.friesen@genband.com
www.genband.com
^ permalink raw reply
* Re: any way to let host act as TCP server OR client on same IP/port?
From: Eric Dumazet @ 2011-07-13 17:52 UTC (permalink / raw)
To: Chris Friesen; +Cc: netdev
In-Reply-To: <4E1DC83C.3020506@genband.com>
Le mercredi 13 juillet 2011 à 10:30 -0600, Chris Friesen a écrit :
> I've been asked an interesting question about TCP. We have some people
> that want to set up a TCP socket that can listen for connections on a
> given IP/port, but also initiate connections from that same IP/port.
> (Only one at a time, of course.)
>
> The TCP state machine seems to allow this (moving from LISTEN to
> SYN_SENT) but it's not a normal transition.
>
> Is there any way to do this using the socket API?
>
> I thought up a hack whereby we could use NFQUEUE to detect an incoming
> SYN and delay it while we call listen() on the socket. Is there any
> better way to do this?
Could you try SO_REUSEADDR, on both listener and connect attempt ?
^ permalink raw reply
* [PATCH] bnx2: do not start the interface if reset fails
From: Flavio Leitner @ 2011-07-13 17:54 UTC (permalink / raw)
To: netdev, Michael Chan; +Cc: Flavio Leitner
When bnx2_reset_task() is called, it will stop,
(re)initialize and start the interface to restore
the working condition.
The bnx2_init_nic() calls bnx2_reset_nic() which will
reset the chip and then call bnx2_free_skbs() to free
all the skbs.
The problem happens when bnx2_init_chip() fails because
bnx2_reset_nic() will just return skipping the ring
initializations at bnx2_init_all_rings(). Later, the
reset task starts the interface again and the system
crashes due a NULL pointer access (no skb in the ring).
This patch just check the return code and if an error is
reported, warn the user and abort. It's better to have a
non working interface than a crash.
Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
drivers/net/bnx2.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7915d14..7fb71fc 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -6296,6 +6296,7 @@ static void
bnx2_reset_task(struct work_struct *work)
{
struct bnx2 *bp = container_of(work, struct bnx2, reset_task);
+ int rc;
rtnl_lock();
if (!netif_running(bp->dev)) {
@@ -6305,10 +6306,15 @@ bnx2_reset_task(struct work_struct *work)
bnx2_netif_stop(bp, true);
- bnx2_init_nic(bp, 1);
+ rc = bnx2_init_nic(bp, 1);
+ if (rc) {
+ netdev_err(bp->dev, "failed to reset the NIC, aborting\n");
+ goto out;
+ }
atomic_set(&bp->intr_sem, 1);
bnx2_netif_start(bp, true);
+out:
rtnl_unlock();
}
--
1.7.6
^ permalink raw reply related
* Re: [PATCH 2/2] ipv4: Use universal hash for ARP.
From: Roland Dreier @ 2011-07-13 17:59 UTC (permalink / raw)
To: David Miller; +Cc: johnwheffner, mj, netdev
In-Reply-To: <20110711.014845.1009062692530385177.davem@davemloft.net>
On Mon, Jul 11, 2011 at 1:48 AM, David Miller <davem@davemloft.net> wrote:
> +static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd)
> +{
> + u32 val = key ^ dev->ifindex;
> +
> + return val * hash_rnd;
> +}
OK, I suggested this, and there's nothing obviously wrong with it.
But I would hope that someone actually vetted that this provides
enough variation between different hash_rnd values to avoid hash
chain attacks.
- R.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox