* [RFC PATCH net-next v2 1/1] ipv6: add support of ECMP
From: Nicolas Dichtel @ 2012-09-14 7:59 UTC (permalink / raw)
To: yoshfuji; +Cc: bernat, netdev, davem, Nicolas Dichtel
In-Reply-To: <1347609548-14494-1-git-send-email-nicolas.dichtel@6wind.com>
This patch adds the support of equal cost multipath for IPv6.
The patch is based on a previous work from
Luc Saillard <luc.saillard@6wind.com>.
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
include/net/ip6_fib.h | 13 ++++
net/ipv6/Kconfig | 33 ++++++++
net/ipv6/ip6_fib.c | 73 ++++++++++++++++++
net/ipv6/route.c | 209 +++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 325 insertions(+), 3 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cd64cf3..37e502a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -47,6 +47,10 @@ struct fib6_config {
unsigned long fc_expires;
struct nlattr *fc_mx;
int fc_mx_len;
+#ifdef CONFIG_IPV6_MULTIPATH
+ struct nlattr *fc_mp;
+ int fc_mp_len;
+#endif
struct nl_info fc_nlinfo;
};
@@ -98,6 +102,15 @@ struct rt6_info {
struct fib6_node *rt6i_node;
struct in6_addr rt6i_gateway;
+#ifdef CONFIG_IPV6_MULTIPATH
+ /*
+ * siblings is a list of rt6_info that have the the same metric/weight,
+ * destination, but not the same gateway. nsiblings is just a cache
+ * to speed up lookup.
+ */
+ unsigned int rt6i_nsiblings;
+ struct list_head rt6i_siblings;
+#endif
atomic_t rt6i_ref;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe72..e0c92dc 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -266,4 +266,37 @@ config IPV6_PIMSM_V2
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
If unsure, say N.
+config IPV6_MULTIPATH
+ bool "IPv6: equal cost multipath for IPv6 routing"
+ depends on IPV6
+ default y
+ ---help---
+ Enable this option to support ECMP for IPv6.
+
+choice
+ prompt "IPv6: choose Multipath algorithm"
+ depends on IPV6_MULTIPATH
+ default IPV6_MULTIPATH_HASH
+ ---help---
+ Define the method to select route between each possible path.
+ The recommanded algorithm (by RFC4311) is HASH method.
+
+ config IPV6_MULTIPATH_HASH
+ bool "IPv6: MULTIPATH hash/flow algorithm"
+ ---help---
+ Multipath routes are chosen according to hash of packet header to
+ ensure a flow keeps the same route.
+ This algorithm is recommanded by RFC4311.
+
+ config IPV6_MULTIPATH_RR
+ bool "IPv6: MULTIPATH round robin algorithm"
+ ---help---
+ Multipath routes are chosen according to Round Robin.
+
+ config IPV6_MULTIPATH_RANDOM
+ bool "IPv6: MULTIPATH random algorithm"
+ ---help---
+ Multipath routes are chosen in a random fashion.
+endchoice
+
endif # IPV6
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 13690d6..3541e44 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -672,6 +672,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
iter->rt6i_idev == rt->rt6i_idev &&
ipv6_addr_equal(&iter->rt6i_gateway,
&rt->rt6i_gateway)) {
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (rt->rt6i_nsiblings)
+ rt->rt6i_nsiblings = 0;
+#endif
if (!(iter->rt6i_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +684,23 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_set_expires(iter, rt->dst.expires);
return -EEXIST;
}
+#ifdef CONFIG_IPV6_MULTIPATH
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY &&
+ !(rt->rt6i_flags & RTF_EXPIRES) &&
+ !(iter->rt6i_flags & RTF_EXPIRES))
+ rt->rt6i_nsiblings++;
+#endif
}
if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +713,43 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
+#ifdef CONFIG_IPV6_MULTIPATH
+ /* Link this route to others same route. */
+ if (rt->rt6i_nsiblings) {
+ unsigned int rt6i_nsiblings;
+ struct rt6_info *sibling, *temp_sibling;
+
+ /* Find the first route that have the same metric */
+ sibling = fn->leaf;
+ while (sibling) {
+ if (sibling->rt6i_metric == rt->rt6i_metric) {
+ list_add_tail(&rt->rt6i_siblings,
+ &sibling->rt6i_siblings);
+ break;
+ }
+ sibling = sibling->dst.rt6_next;
+ }
+ /* For each sibling in the list, increment the counter of
+ * siblings. We can check if all the counter are equal.
+ */
+ rt6i_nsiblings = 0;
+ list_for_each_entry_safe(sibling, temp_sibling,
+ &rt->rt6i_siblings,
+ rt6i_siblings) {
+ sibling->rt6i_nsiblings++;
+ if (unlikely(sibling->rt6i_nsiblings !=
+ rt->rt6i_nsiblings)) {
+ pr_err("Wrong number of siblings for route %p (%d)\n",
+ sibling, sibling->rt6i_nsiblings);
+ }
+ rt6i_nsiblings++;
+ }
+ if (unlikely(rt6i_nsiblings != rt->rt6i_nsiblings)) {
+ pr_err("Wrong number of siblings for route %p. I have %d routes, but count %d siblings\n",
+ rt, rt6i_nsiblings, rt->rt6i_nsiblings);
+ }
+ }
+#endif
/*
* insert node
*/
@@ -1197,6 +1255,21 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (fn->rr_ptr == rt)
fn->rr_ptr = NULL;
+#ifdef CONFIG_IPV6_MULTIPATH
+ /* Remove this entry from other siblings */
+ if (rt->rt6i_nsiblings) {
+ struct rt6_info *sibling, *next_sibling;
+
+ /* For each siblings, decrement the counter of siblings */
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->rt6i_siblings, rt6i_siblings) {
+ sibling->rt6i_nsiblings--;
+ }
+ rt->rt6i_nsiblings = 0;
+ list_del_init(&rt->rt6i_siblings);
+ }
+#endif
+
/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 399613b..431f7ad 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,9 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
+#ifdef CONFIG_IPV6_MULTIPATH
+#include <net/nexthop.h>
+#endif
#include <asm/uaccess.h>
@@ -288,6 +291,10 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+#ifdef CONFIG_IPV6_MULTIPATH
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ rt->rt6i_nsiblings = 0;
+#endif
}
return rt;
}
@@ -388,6 +395,124 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+#ifdef CONFIG_IPV6_MULTIPATH
+/*
+ * Multipath route selection.
+ */
+
+#ifdef CONFIG_IPV6_MULTIPATH_RANDOM
+/*
+ * Pseudo random candidate function
+ */
+static int rt6_info_hash_randomfn(unsigned int candidate_count)
+{
+ return random32() % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_RR
+/*
+ * Fake Round Robin candidate function
+ * If we want real RR, we need to add a counter in each route
+ */
+static int rt6_info_hash_falserr(unsigned int candidate_count)
+{
+ static unsigned int seed;
+ seed++;
+ return seed % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_HASH
+/*
+ * Pseudo random candidate using the src port, and other information
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+ const struct flowi6 *fl6)
+{
+ unsigned int val = fl6->flowi6_proto;
+
+ val ^= fl6->daddr.s6_addr32[0];
+ val ^= fl6->daddr.s6_addr32[1];
+ val ^= fl6->daddr.s6_addr32[2];
+ val ^= fl6->daddr.s6_addr32[3];
+
+ val ^= fl6->saddr.s6_addr32[0];
+ val ^= fl6->saddr.s6_addr32[1];
+ val ^= fl6->saddr.s6_addr32[2];
+ val ^= fl6->saddr.s6_addr32[3];
+
+ /* Work only if this not encapsulated */
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_UDP:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ val ^= fl6->fl6_sport;
+ val ^= fl6->fl6_dport;
+ break;
+
+ case IPPROTO_ICMPV6:
+ val ^= fl6->fl6_icmp_type;
+ val ^= fl6->fl6_icmp_code;
+ break;
+ }
+ /* RFC6438 recommands to use flowlabel */
+ val ^= fl6->flowlabel;
+
+ /* Perhaps, we need to tune, this function? */
+ val = val ^ (val >> 7) ^ (val >> 12);
+ return val % candidate_count;
+}
+#endif
+
+/*
+ * This function return an index used to select (at random, round robin, ...)
+ * a route between any siblings.
+ *
+ * Note: fl6 can be NULL
+ */
+static unsigned int rt6_info_hashfn(const struct rt6_info *rt,
+ const struct flowi6 *fl6)
+{
+ int candidate_count = rt->rt6i_nsiblings + 1;
+
+#if defined(CONFIG_IPV6_MULTIPATH_RR)
+ return rt6_info_hash_falserr(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_RANDOM)
+ return rt6_info_hash_randomfn(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_HASH)
+ if (fl6 == NULL)
+ return 0;
+ return rt6_info_hash_nhsfn(candidate_count, fl6);
+#else
+ return 0;
+#endif
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+ struct flowi6 *fl6)
+{
+ struct rt6_info *sibling, *next_sibling;
+ int route_choosen;
+
+ route_choosen = rt6_info_hashfn(match, fl6);
+ /* Don't change the route, if route_choosen == 0
+ * (siblings does not include ourself)
+ */
+ if (route_choosen)
+ list_for_each_entry_safe(sibling, next_sibling,
+ &match->rt6i_siblings, rt6i_siblings) {
+ route_choosen--;
+ if (route_choosen == 0) {
+ match = sibling;
+ break;
+ }
+ }
+ return match;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
/*
* Route lookup. Any table->tb6_lock is implied.
*/
@@ -705,6 +830,10 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
restart:
rt = fn->leaf;
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
+#endif
BACKTRACK(net, &fl6->saddr);
out:
dst_use(&rt->dst, jiffies);
@@ -866,7 +995,10 @@ restart_2:
restart:
rt = rt6_select(fn, oif, strict | reachable);
-
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (rt->rt6i_nsiblings && oif == 0)
+ rt = rt6_multipath_select(rt, fl6);
+#endif
BACKTRACK(net, &fl6->saddr);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -2247,6 +2379,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
+#ifdef CONFIG_IPV6_MULTIPATH
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+#endif
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2324,11 +2459,69 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_TABLE])
cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (tb[RTA_MULTIPATH]) {
+ cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+ cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+ }
+#endif
+
err = 0;
errout:
return err;
}
+#ifdef CONFIG_IPV6_MULTIPATH
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 0, last_err = 0;
+
+beginning:
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+ remaining = cfg->fc_mp_len;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+ if (err) {
+ last_err = err;
+ /* If we are trying to remove a route, do not stop the
+ * loop when ip6_route_del() fails (because next hop is
+ * already gone), we should try to remove all next hops.
+ */
+ if (add) {
+ /* If add fails, we should try to delete all
+ * next hops that have been already added.
+ */
+ add = 0;
+ goto beginning;
+ }
+ }
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ return last_err;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
@@ -2338,7 +2531,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_del(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 0);
+ else
+#endif
+ return ip6_route_del(&cfg);
}
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2350,7 +2548,12 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
if (err < 0)
return err;
- return ip6_route_add(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+ if (cfg.fc_mp)
+ return ip6_route_multipath(&cfg, 1);
+ else
+#endif
+ return ip6_route_add(&cfg);
}
static inline size_t rt6_nlmsg_size(void)
--
1.7.12
^ permalink raw reply related
* [RFC PATCH net-next v2 0/1] Add support of ECMPv6
From: Nicolas Dichtel @ 2012-09-14 7:59 UTC (permalink / raw)
To: yoshfuji; +Cc: bernat, netdev, davem
In-Reply-To: <505058F5.9020707@linux-ipv6.org>
Here is a proposal to add the support of ECMPv6. The previous patch
from Vincent against iproute2 can be used, but a little other patch is needed
too, see http://patchwork.ozlabs.org/patch/183277/
If the kernel patch is approved, I will submit formally the patch for
iproute2.
Here is an example of a command to add an ECMP route:
$ ip -6 route add 3ffe:304:124:2306::/64 \
nexthop via fe80::230:1bff:feb4:e05c dev eth0 weight 1 \
nexthop via fe80::230:1bff:feb4:dd4f dev eth0 weight 1
v2: rename CONFIG_IPV6_MULTIPATH_ROUTE to CONFIG_IPV6_MULTIPATH_HASH
use flowlabel in the hash function
add reference to RFC
fix a small identation issue
remove "If unsure, say N." from the help of CONFIG_IPV6_MULTIPATH
Comments are welcome.
Regards,
Nicolas
^ permalink raw reply
* Re: [net-next.git 3/8 (V2)] stmmac: add the initial tx coalesce schema
From: Giuseppe CAVALLARO @ 2012-09-14 7:36 UTC (permalink / raw)
To: David Miller; +Cc: netdev, bhutchings
In-Reply-To: <20120913.162333.1518469374321928795.davem@davemloft.net>
On 9/13/2012 10:23 PM, David Miller wrote:
> From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
> Date: Tue, 11 Sep 2012 08:55:09 +0200
>
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&priv->tx_lock, flags);
>>
>> - spin_lock(&priv->tx_lock);
>> + priv->xstats.tx_clean++;
>
> You are changing the locking here for the sake of the new timer.
>
> But timers run in software interrupt context, so this change is
> completely unnecessary since NAPI runs in software interrupt context
> as well, and neither timers nor NAPI run in hardware interrupts
> context.
Indeed It can be called by the ISR too in this new implementation.
I have added the spin_lock_irqsave/restore otherwise, testing with
CONFIG_PROVE_LOOKING, I get the following warning on ARM SMP.
[ 8.030000]
[ 8.030000] =================================
[ 8.030000] [ INFO: inconsistent lock state ]
[ 8.030000] 3.4.7_stm24_0302-b2000+ #103 Not tainted
[ 8.030000] ---------------------------------
[ 8.030000] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
[ 8.030000] swapper/0/1 [HC1[1]:SC0[0]:HE0:SE1] takes:
[ 8.030000] (&(&priv->tx_lock)->rlock){?.-...}, at: [<802651d8>]
stmmac_tx+0x1c/0x388
[ 8.030000] {HARDIRQ-ON-W} state was registered at:
[ 8.030000] [<800562b4>] __lock_acquire+0x638/0x179c
[ 8.030000] [<80057884>] lock_acquire+0x60/0x74
[ 8.030000] [<80428a08>] _raw_spin_lock+0x40/0x50
[ 8.030000] [<802651d8>] stmmac_tx+0x1c/0x388
[ 8.030000] [<80026be0>] run_timer_softirq+0x180/0x23c
[ 8.030000] [<80020ccc>] __do_softirq+0xa0/0x114
[ 8.030000] [<80021204>] irq_exit+0x58/0x7c
[ 8.030000] [<8000dc80>] handle_IRQ+0x7c/0xb8
[ 8.030000] [<80008464>] gic_handle_irq+0x34/0x58
[ 8.030000] [<80429684>] __irq_svc+0x44/0x78
[ 8.030000] [<8001c3f4>] vprintk+0x41c/0x480
[ 8.030000] [<8042097c>] printk+0x18/0x24
[ 8.030000] [<805aef6c>] prepare_namespace+0x1c/0x1a4
[ 8.030000] [<805ae980>] kernel_init+0x1c8/0x20c
[ 8.030000] [<8000deb8>] kernel_thread_exit+0x0/0x8
[ 8.030000] irq event stamp: 254745
[ 8.030000] hardirqs last enabled at (254744): [<80429240>]
_raw_spin_unlock_irqrestore+0x3c/0x6c
[ 8.030000] hardirqs last disabled at (254745): [<80429674>]
__irq_svc+0x34/0x78
[ 8.030000] softirqs last enabled at (254741): [<8035d964>]
dev_queue_xmit+0x6a4/0x724
[ 8.030000] softirqs last disabled at (254737): [<8035d2d4>]
dev_queue_xmit+0x14/0x724
[ 8.030000]
[ 8.030000] other info that might help us debug this:
[ 8.030000] Possible unsafe locking scenario:
[ 8.030000]
[ 8.030000] CPU0
[ 8.030000] ----
[ 8.030000] lock(&(&priv->tx_lock)->rlock);
[ 8.030000] <Interrupt>
[ 8.030000] lock(&(&priv->tx_lock)->rlock);
[ 8.030000]
[ 8.030000] *** DEADLOCK ***
> Therefore, disabling hardware interrupts for this lock is unnecessary
> and will decrease performance.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply
* Re: [RFC PATCH 4/5] bnx2fc: Use new fcoe_sysfs control interface
From: Bhanu Prakash Gollapudi @ 2012-09-14 7:28 UTC (permalink / raw)
To: Robert Love; +Cc: netdev, gregkh, linux-scsi, devel
In-Reply-To: <20120910225930.13140.97949.stgit@fritz>
On 09/10/2012 03:59 PM, Robert Love wrote:
> Convert bnx2fc to use the new fcoe_sysfs create, delete,
> enable, disable, start and mode.
>
> bnx2fc doesn't support VN2VN. bnx2fc will not initialize
> the set_fcoe_ctlr_mode routine and therefore its instances
> will always be in FABRIC mode. There was previously an
> explicit check for the ctlr's mode, but this is no longer
> needed because not implementing set_fcoe_ctlr_mode implies
> that the ctlr cannot change from the FABRIC mode.
>
> Signed-off-by: Robert Love <robert.w.love@intel.com>
> ---
> drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 98 +++++++++++++++++++++++--------------
> 1 file changed, 60 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
> index f52f668f..560c8c8 100644
> --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
> +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
<snip>
> /**
> + * bnx2fc_alloc - Alocate a bnx2fc FCoE interface
> + *
> + * @cdev: The FCoE Controller Device to start
> + *
> + * Called from sysfs.
> + *
> + * Returns: 0 for success
> + */
> +static int bnx2fc_start(struct fcoe_ctlr_device *cdev)
> +{
> + struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev);
> + struct fc_lport *lport = ctlr->lp;
> + struct fcoe_port *port = lport_priv(lport);
> + struct bnx2fc_interface *interface = port->priv;
> +
> + lport->boot_time = jiffies;
> +
> + /* Make this master N_port */
> + ctlr->lp = lport;
ctlr->lp should be set in bnx2fc_alloc() as we access it here in the
beginning of this function.
> +
> + if (!bnx2fc_link_ok(lport)) {
> + fcoe_ctlr_link_up(ctlr);
> + fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT;
> + set_bit(ADAPTER_STATE_READY, &interface->hba->adapter_state);
> + }
> +
> + BNX2FC_HBA_DBG(lport, "create: START DISC\n");
> + bnx2fc_start_disc(interface);
I think more changes are required for bnx2fc as fc_lport_init() is
called just before calling fc_fabric_login() - whcih is called during
'start'. Because of this, if we just call 'create' followed by 'destroy'
without calling 'start', lport is not initialized and I expect to see
some panics when destroy is called.
Let me try testing your patches and send you any fixes that are required.
> + interface->enabled = true;
> +
> + return 0;
> +}
> +
> +/**
> * bnx2fc_find_hba_for_cnic - maps cnic instance to bnx2fc hba instance
> *
> * @cnic: Pointer to cnic device instance
> @@ -2271,10 +2292,8 @@ static struct fcoe_transport bnx2fc_transport = {
> .attached = false,
> .list = LIST_HEAD_INIT(bnx2fc_transport.list),
> .match = bnx2fc_match,
> - .create = bnx2fc_create,
> + .alloc = bnx2fc_alloc,
> .destroy = bnx2fc_destroy,
> - .enable = bnx2fc_enable,
> - .disable = bnx2fc_disable,
> };
>
> /**
> @@ -2514,6 +2533,9 @@ module_init(bnx2fc_mod_init);
> module_exit(bnx2fc_mod_exit);
>
> static struct fcoe_sysfs_function_template bnx2fc_fcoe_sysfs_templ = {
> + .set_fcoe_ctlr_start = bnx2fc_start,
> + .set_fcoe_ctlr_enable = bnx2fc_enable,
> + .set_fcoe_ctlr_disable = bnx2fc_disable,
> .get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode,
> .get_fcoe_ctlr_link_fail = bnx2fc_ctlr_get_lesb,
> .get_fcoe_ctlr_vlink_fail = bnx2fc_ctlr_get_lesb,
>
>
^ permalink raw reply
* [PATCH net-next 2/2] gre: add GSO support
From: Eric Dumazet @ 2012-09-14 7:25 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Maciej Żenczykowski
From: Eric Dumazet <edumazet@google.com>
Add GSO support to GRE tunnels.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
---
net/ipv4/ip_gre.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b062a98..f233c1d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,6 +745,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
__be32 dst;
int mtu;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ goto tx_error;
+
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -1296,6 +1300,11 @@ static void ipgre_dev_free(struct net_device *dev)
free_netdev(dev);
}
+#define GRE_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
@@ -1309,6 +1318,9 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
}
static int ipgre_tunnel_init(struct net_device *dev)
^ permalink raw reply related
* [PATCH net-next 1/2] net: provide a default dev->ethtool_ops
From: Eric Dumazet @ 2012-09-14 7:24 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Maciej Żenczykowski
From: Eric Dumazet <edumazet@google.com>
Instead of forcing device drivers to provide empty ethtool_ops or tweak
net/core/ethtool.c again, we could provide a generic ethtool_ops.
This occurred to me when I wanted to add GSO support to GRE tunnels.
ethtool -k support should be generic for all drivers.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
---
net/core/dev.c | 5 +++
net/core/ethtool.c | 54 +++++++++++++++++--------------------------
2 files changed, 27 insertions(+), 32 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index b1e6d63..ff8dcfc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6045,6 +6045,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
strcpy(dev->name, name);
dev->group = INIT_NETDEV_GROUP;
+ if (!dev->ethtool_ops) {
+ static const struct ethtool_ops default_ethtool_ops;
+
+ dev->ethtool_ops = &default_ethtool_ops;
+ }
return dev;
free_all:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cbf033d..aef0162 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -175,7 +175,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_FEATURES)
return ARRAY_SIZE(netdev_features_strings);
- if (ops && ops->get_sset_count && ops->get_strings)
+ if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
return -EOPNOTSUPP;
@@ -311,7 +311,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
ASSERT_RTNL();
- if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+ if (!dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +355,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
- if (ops && ops->get_drvinfo) {
+ if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &info);
} else if (dev->dev.parent && dev->dev.parent->driver) {
strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +370,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
* this method of obtaining string set info is deprecated;
* Use ETHTOOL_GSSET_INFO instead.
*/
- if (ops && ops->get_sset_count) {
+ if (ops->get_sset_count) {
int rc;
rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +383,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops && ops->get_regs_len)
+ if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
- if (ops && ops->get_eeprom_len)
+ if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -1275,7 +1275,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
struct ethtool_dump dump;
const struct ethtool_ops *ops = dev->ethtool_ops;
- if (!dev->ethtool_ops->get_dump_flag)
+ if (!ops->get_dump_flag)
return -EOPNOTSUPP;
if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1299,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
void *data = NULL;
- if (!dev->ethtool_ops->get_dump_data ||
- !dev->ethtool_ops->get_dump_flag)
+ if (!ops->get_dump_data || !ops->get_dump_flag)
return -EOPNOTSUPP;
if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1349,7 +1348,7 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
err = phydev->drv->ts_info(phydev, &info);
- } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
+ } else if (ops->get_ts_info) {
err = ops->get_ts_info(dev, &info);
@@ -1410,8 +1409,9 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
modinfo.eeprom_len);
}
-/* The main entry point in this file. Called from net/core/dev.c */
-
+/* The main entry point in this file. Called from net/core/dev.c
+ * with RTNL held.
+ */
int dev_ethtool(struct net *net, struct ifreq *ifr)
{
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -1419,25 +1419,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
u32 ethcmd;
int rc;
u32 old_features;
+ const struct ethtool_ops *ops;
if (!dev || !netif_device_present(dev))
return -ENODEV;
+ ops = dev->ethtool_ops;
if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
- if (!dev->ethtool_ops) {
- /* A few commands do not require any driver support,
- * are unprivileged, and do not change anything, so we
- * can take a shortcut to them. */
- if (ethcmd == ETHTOOL_GDRVINFO)
- return ethtool_get_drvinfo(dev, useraddr);
- else if (ethcmd == ETHTOOL_GET_TS_INFO)
- return ethtool_get_ts_info(dev, useraddr);
- else
- return -EOPNOTSUPP;
- }
-
/* Allow some commands to be done by anyone */
switch (ethcmd) {
case ETHTOOL_GSET:
@@ -1476,8 +1466,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return -EPERM;
}
- if (dev->ethtool_ops->begin) {
- rc = dev->ethtool_ops->begin(dev);
+ if (ops->begin) {
+ rc = ops->begin(dev);
if (rc < 0)
return rc;
}
@@ -1504,11 +1494,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
break;
case ETHTOOL_GMSGLVL:
rc = ethtool_get_value(dev, useraddr, ethcmd,
- dev->ethtool_ops->get_msglevel);
+ ops->get_msglevel);
break;
case ETHTOOL_SMSGLVL:
rc = ethtool_set_value_void(dev, useraddr,
- dev->ethtool_ops->set_msglevel);
+ ops->set_msglevel);
break;
case ETHTOOL_GEEE:
rc = ethtool_get_eee(dev, useraddr);
@@ -1570,11 +1560,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
break;
case ETHTOOL_GPFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
- dev->ethtool_ops->get_priv_flags);
+ ops->get_priv_flags);
break;
case ETHTOOL_SPFLAGS:
rc = ethtool_set_value(dev, useraddr,
- dev->ethtool_ops->set_priv_flags);
+ ops->set_priv_flags);
break;
case ETHTOOL_GRXFH:
case ETHTOOL_GRXRINGS:
@@ -1655,8 +1645,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = -EOPNOTSUPP;
}
- if (dev->ethtool_ops->complete)
- dev->ethtool_ops->complete(dev);
+ if (ops->complete)
+ ops->complete(dev);
if (old_features != dev->features)
netdev_features_change(dev);
^ permalink raw reply related
* Re: [PATCH] Xen backend support for paged out grant targets.
From: Ian Campbell @ 2012-09-14 7:19 UTC (permalink / raw)
To: Andres Lagar-Cavilla
Cc: Andres Lagar-Cavilla, xen-devel@xen.lists.org,
Konrad Rzeszutek Wilk, David Vrabel, David Miller,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <B4C805E1-3390-4002-BECC-7D1AFF2BD56D@gmail.com>
On Thu, 2012-09-13 at 20:45 +0100, Andres Lagar-Cavilla wrote:
> On Sep 13, 2012, at 2:11 PM, Ian Campbell wrote:
>
> > On Thu, 2012-09-13 at 18:28 +0100, Andres Lagar-Cavilla wrote:
> >>
> >> * Add placeholder in array of grant table error descriptions for
> >> unrelated error code we jump over.
> >
> > Why not just define it, it's listed here:
> > http://xenbits.xen.org/docs/unstable/hypercall/include,public,grant_table.h.html#Enum_grant_status
> Well, a) we'd be defining something no one will be using (for the
> moment)
Even if no one in the kernel is using it, having "placeholder" as an
entry in GNTTABOP_error_msgs is just silly, even things which don't
understand GNTST_address_too_big directly could end up looking it up
here.
> b) I would be signing-off on something unrelated.
Lets take this patch instead then.
8<------------------------------------------------
>From cb9daaf3029accb6d5fef58b450a625b27190429 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 14 Sep 2012 08:10:06 +0100
Subject: [PATCH] xen: resynchronise grant table status codes with upstream
Adds GNTST_address_too_big and GNTST_eagain.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
include/xen/interface/grant_table.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index a17d844..84a8fbf 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -519,7 +519,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
-#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */
+#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */
+#define GNTST_address_too_big (-11) /* transfer page address too large. */
+#define GNTST_eagain (-12) /* Operation not done; try again. */
#define GNTTABOP_error_msgs { \
"okay", \
@@ -532,7 +534,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
"no spare translation slot in the I/O MMU", \
"permission denied", \
"bad page", \
- "copy arguments cross page boundary" \
+ "copy arguments cross page boundary", \
+ "page address size too large", \
+ "operation not done; try again" \
}
#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
--
1.7.10.4
^ permalink raw reply related
* [PATCH] iproute2: bridge: finish removing replace option in man pages
From: John Fastabend @ 2012-09-14 6:50 UTC (permalink / raw)
To: shemminger; +Cc: netdev
This patch finishes removing the replace option from the bridge
man page which I missed in this commit
commit 57b9785de32404da3d2ac5483469b7fcc5a9c9e7
Author: John Fastabend <john.r.fastabend@intel.com>
Date: Mon Aug 27 10:52:31 2012 -0700
iproute2: bridge: remove replace and change options
Also add documentation for "{ self | embedded }" already shown on
the cmd line help msg.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
man/man8/bridge.8 | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/man/man8/bridge.8 b/man/man8/bridge.8
index 63d166b..5ce8219 100644
--- a/man/man8/bridge.8
+++ b/man/man8/bridge.8
@@ -22,11 +22,12 @@ bridge \- show / manipulate bridge addresses and devices
\fB\-s\fR[\fItatistics\fR]
.ti -8
-.BR "bridge fdb" " { " add " | " del " | " change " | " replace " } "
+.BR "bridge fdb" " { " add " | " del " } "
.I LLADDR
.B dev
.IR DEV " { "
-.BR local " | " temp " }"
+.BR local " | " temp " } { "
+.BR self " } { " embedded " } "
.ti -8
.BR "bridge fdb" " [ " show " ] [ "
@@ -92,11 +93,9 @@ objects contain known Ethernet addresses on a link.
The corresponding commands display fdb entries, add new entries,
and delete old ones.
-.SS bridge fdb add - add a new neighbor entry
-.SS bridge fdb change - change an existing entry
-.SS bridge fdb replace - add a new entry or change an existing one
+.SS bridge fdb add - add a new fdb entry
-These commands create new neighbor records or update existing ones.
+This command creates a new fdb entry.
.TP
.BI "ADDRESS"
@@ -117,6 +116,14 @@ and is never forwarded.
- the address is a dynamic entry, and will be removed if not used.
.sp
+.B self
+- the address is associated with a software fdb (default)
+.sp
+
+.B embedded
+- the address is associated with an offloaded fdb
+.sp
+
.in -8
.SS bridge fdb delete - delete a forwarding database entry
^ permalink raw reply related
* Re: [RFC PATCH 2/5] libfcoe: Create new libfcoe control interfaces
From: Bhanu Prakash Gollapudi @ 2012-09-14 7:06 UTC (permalink / raw)
To: Robert Love; +Cc: netdev, gregkh, linux-scsi, devel
In-Reply-To: <20120910225919.13140.63240.stgit@fritz>
On 9/10/2012 3:59 PM, Robert Love wrote:
> This patch is the first in a series that will remove
> libfcoe's create, destroy, enable and disable module
> parameters and replace them with interface files in
> the new /sys/bus/fcoe subsystem.
>
> Old layout:
>
> /sys/module/libfcoe/parameters/{create,destroy,enable,disable,vn2vn_create}
>
> New layout:
>
> /sys/bus/fcoe/ctlr_{create,destroy}
> /sys/bus/fcoe/ctlr_X/{enable,disable,start}
>
> This patch moves fcoe drivers to the following
> initialization sequence-
>
> 1) create/alloc
> 2) configure
> 3) start
>
> A control sysfs interface at /sys/bus/fcoe/ctlr_create
> is added. Writing the interface name to this file
> will allocate memory and create a sysfs entry for a
> new fcoe_ctlr_device. The user may then tune the interface in
> any desired way. After configuration the user will
> echo any value into the /sys/bus/fcoe/devices/ctlr_X/start
> interface to proceed with logging in.
>
> VN2VN logins will still use the module parameters.
> A follow up patch to this one will make the 'mode'
> attribute of the fcoe_ctlr_device writable. Which will
> allow a user to change the ctlr's mode to 'VN2VN'.
>
> Signed-off-by: Robert Love <robert.w.love@intel.com>
> ---
> Documentation/ABI/testing/sysfs-bus-fcoe | 43 ++++++++++++
> drivers/scsi/fcoe/fcoe.h | 9 +++
> drivers/scsi/fcoe/fcoe_ctlr.c | 2 -
> drivers/scsi/fcoe/fcoe_sysfs.c | 78 ++++++++++++++++++++++
> drivers/scsi/fcoe/fcoe_transport.c | 105 +++++++++++++++++++++++++++++-
> include/scsi/fcoe_sysfs.h | 4 +
> include/scsi/libfcoe.h | 14 ++++
> 7 files changed, 250 insertions(+), 5 deletions(-)
>
<snip>
> diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
> index bd899bf..ccb92323 100644
> --- a/drivers/scsi/fcoe/fcoe_ctlr.c
> +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
> @@ -147,7 +147,7 @@ static void fcoe_ctlr_map_dest(struct fcoe_ctlr *fip)
> */
> void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_state mode)
> {
> - fcoe_ctlr_set_state(fip, FIP_ST_LINK_WAIT);
> + fcoe_ctlr_set_state(fip, FIP_ST_DISABLED);
Robert, what is the reason for initializing it to DISABLED? Unless the
FIP state is FIP_ST_LINK_WAIT, fcoe_ctlr_link_up() doesnt set
lport->link_up and hence does not allow any FIP/FCoE frames to be sent out.
> fip->mode = mode;
> INIT_LIST_HEAD(&fip->fcfs);
> mutex_init(&fip->ctlr_mutex);
<snip>
> @@ -627,6 +626,108 @@ static int libfcoe_device_notification(struct notifier_block *notifier,
> return NOTIFY_OK;
> }
>
> +ssize_t fcoe_ctlr_create_store(struct bus_type *bus,
> + const char *buf, size_t count)
> +{
> + struct net_device *netdev = NULL;
> + struct fcoe_transport *ft = NULL;
> + struct fcoe_ctlr_device *ctlr_dev = NULL;
> + int rc = -ENODEV;
> + int err;
> +
> + mutex_lock(&ft_mutex);
> +
> + netdev = fcoe_if_to_netdev(buf);
> + if (!netdev) {
> + LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buf);
> + rc = -ENODEV;
> + goto out_nodev;
> + }
> +
> + ft = fcoe_netdev_map_lookup(netdev);
> + if (ft) {
> + LIBFCOE_TRANSPORT_DBG("transport %s already has existing "
> + "FCoE instance on %s.\n",
> + ft->name, netdev->name);
> + rc = -EEXIST;
> + goto out_putdev;
> + }
> +
> + ft = fcoe_transport_lookup(netdev);
> + if (!ft) {
> + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
> + netdev->name);
> + rc = -ENODEV;
> + goto out_putdev;
> + }
> +
> + /* pass to transport create */
> + err = ft->alloc ? ft->alloc(netdev) : -ENODEV;
> + if (err) {
> + fcoe_del_netdev_mapping(netdev);
> + rc = -ENOMEM;
> + goto out_putdev;
> + }
> +
> + err = fcoe_add_netdev_mapping(netdev, ft);
> + if (err) {
> + LIBFCOE_TRANSPORT_DBG("failed to add new netdev mapping "
> + "for FCoE transport %s for %s.\n",
> + ft->name, netdev->name);
> + rc = -ENODEV;
> + goto out_putdev;
> + }
> +
> + LIBFCOE_TRANSPORT_DBG("transport %s %s to create fcoe on %s.\n",
> + ft->name, (ctlr_dev) ? "succeeded" : "failed",
> + netdev->name);
Where is ctlr_dev updated? I guess you're intending to check return
status of ft->alloc() here.
> +
> +out_putdev:
> + dev_put(netdev);
> +out_nodev:
> + mutex_unlock(&ft_mutex);
> + return rc;
> +}
> +
> +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus,
> + const char *buf, size_t count)
> +{
> + int rc = -ENODEV;
> + struct net_device *netdev = NULL;
> + struct fcoe_transport *ft = NULL;
> +
> + mutex_lock(&ft_mutex);
> +
> + netdev = fcoe_if_to_netdev(buf);
> + if (!netdev) {
> + LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buf);
> + goto out_nodev;
> + }
> +
> + ft = fcoe_netdev_map_lookup(netdev);
> + if (!ft) {
> + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
> + netdev->name);
> + goto out_putdev;
> + }
> +
> + /* pass to transport destroy */
> + rc = ft->destroy(netdev);
> + if (rc)
> + goto out_putdev;
> +
> + fcoe_del_netdev_mapping(netdev);
> + LIBFCOE_TRANSPORT_DBG("transport %s %s to destroy fcoe on %s.\n",
> + ft->name, (rc) ? "failed" : "succeeded",
> + netdev->name);
> + rc = count; /* required for successful return */
> +out_putdev:
> + dev_put(netdev);
> +out_nodev:
> + mutex_unlock(&ft_mutex);
> + return rc;
> +}
> +EXPORT_SYMBOL(fcoe_ctlr_destroy_store);
>
> /**
> * fcoe_transport_create() - Create a fcoe interface
> diff --git a/include/scsi/fcoe_sysfs.h b/include/scsi/fcoe_sysfs.h
> index 421ae67..8c5ea70 100644
> --- a/include/scsi/fcoe_sysfs.h
> +++ b/include/scsi/fcoe_sysfs.h
> @@ -36,6 +36,9 @@ struct fcoe_sysfs_function_template {
> void (*get_fcoe_ctlr_fcs_error)(struct fcoe_ctlr_device *);
> void (*get_fcoe_ctlr_mode)(struct fcoe_ctlr_device *);
> void (*set_fcoe_ctlr_mode)(struct fcoe_ctlr_device *);
> + int (*set_fcoe_ctlr_start)(struct fcoe_ctlr_device *);
> + int (*set_fcoe_ctlr_enable)(struct fcoe_ctlr_device *);
> + int (*set_fcoe_ctlr_disable)(struct fcoe_ctlr_device *);
> void (*get_fcoe_fcf_selected)(struct fcoe_fcf_device *);
> void (*get_fcoe_fcf_vlan_id)(struct fcoe_fcf_device *);
> };
> @@ -64,6 +67,7 @@ struct fcoe_ctlr_device {
>
> int fcf_dev_loss_tmo;
> enum fip_conn_type mode;
> + u8 started:1;
>
> /* expected in host order for displaying */
> struct fcoe_fc_els_lesb lesb;
> diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
> index 20533cc..b19a489 100644
> --- a/include/scsi/libfcoe.h
> +++ b/include/scsi/libfcoe.h
> @@ -289,8 +289,11 @@ static inline bool is_fip_mode(struct fcoe_ctlr *fip)
> * @attached: whether this transport is already attached
> * @list: list linkage to all attached transports
> * @match: handler to allow the transport driver to match up a given netdev
> + * @alloc: handler to allocate per-instance FCoE structures
> + * (no discovery or login)
> * @create: handler to sysfs entry of create for FCoE instances
> - * @destroy: handler to sysfs entry of destroy for FCoE instances
> + * @destroy: handler to delete per-instance FCoE structures
> + * (frees all memory)
> * @enable: handler to sysfs entry of enable for FCoE instances
> * @disable: handler to sysfs entry of disable for FCoE instances
> */
> @@ -299,6 +302,7 @@ struct fcoe_transport {
> bool attached;
> struct list_head list;
> bool (*match) (struct net_device *device);
> + int (*alloc) (struct net_device *device);
> int (*create) (struct net_device *device, enum fip_state fip_mode);
> int (*destroy) (struct net_device *device);
> int (*enable) (struct net_device *device);
> @@ -375,4 +379,12 @@ struct fcoe_netdev_mapping {
> int fcoe_transport_attach(struct fcoe_transport *ft);
> int fcoe_transport_detach(struct fcoe_transport *ft);
>
> +/* sysfs store handler for ctrl_control interface */
> +ssize_t fcoe_ctlr_create_store(struct bus_type *bus,
> + const char *buf, size_t count);
> +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus,
> + const char *buf, size_t count);
> +
> #endif /* _LIBFCOE_H */
> +
> +
>
>
^ permalink raw reply
* [PATCH net-next] net: dev: fix incorrect getting net device's name
From: Gao feng @ 2012-09-14 6:58 UTC (permalink / raw)
To: davem; +Cc: ebiederm, eric.dumazet, netdev, Gao feng
When moving a nic from net namespace A to net namespace B,
in dev_change_net_namesapce,we call __dev_get_by_name to
decide if the netns B has the device has the same name.
if the netns B already has the same named device,we call
dev_get_valid_name to try to get a valid name for this nic in
the netns B,but net_device->nd_net still point to netns A now.
this patch fix it.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
net/core/dev.c | 28 ++++++++++++++++++++--------
1 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index b1e6d63..381ea68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_get_valid_name(struct net_device *dev, const char *name)
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
{
- struct net *net;
+ char buf[IFNAMSIZ];
+ int ret;
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
+static int dev_get_valid_name(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ BUG_ON(!net);
if (!dev_valid_name(name))
return -EINVAL;
if (strchr(name, '%'))
- return dev_alloc_name(dev, name);
+ return dev_alloc_name_ns(net, dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
else if (dev->name != name)
@@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- err = dev_get_valid_name(dev, newname);
+ err = dev_get_valid_name(net, dev, newname);
if (err < 0)
return err;
@@ -5585,7 +5597,7 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = dev_get_valid_name(dev, dev->name);
+ ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
@@ -6229,7 +6241,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (dev_get_valid_name(dev, pat) < 0)
+ if (dev_get_valid_name(net, dev, pat) < 0)
goto out;
}
--
1.7.7.6
^ permalink raw reply related
* Re: [RFC/RFT 14/15] rtlwifi: Modify files for addition of rtl8723ae
From: Kalle Valo @ 2012-09-14 6:10 UTC (permalink / raw)
To: Larry Finger
Cc: Julian Calaby, linville-2XuSBdqkA4R54TAoqtyWWQ,
linux-wireless-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA, chaoming_li-kXabqFNEczNtrwSWzY7KCg
In-Reply-To: <505170C6.8090003-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org> writes:
>> Is this supposed to be bit #29 - I notice that COMP_USB is also bit 29.
>
> Yes, that is OK. One will only be used for PCI-based drivers, and the
> other is obviously for USB. As nearly all the bits of a 32-bit
> quantity are used, I wanted to save one if possible.
>
> In the final version, I'll code this as
>
> #define COMP_USB BIT(29)
> #define COMP_EASY_CONCURRENT COMP_USB
>
> That way will be more obvious.
You could also add a comment explaining it, just to make sure that
everyone understands it.
--
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH net-next] ipv6: recursive check rt->dst.from when call rt6_check_expired
From: roy.qing.li @ 2012-09-14 5:54 UTC (permalink / raw)
To: gaofeng, netdev
From: Li RongQing <roy.qing.li@gmail.com>
If dst cache dst_a copies from dst_b, and dst_b copies from dst_c, check
if dst_a is expired or not, we should not end with dst_a->dst.from, dst_b,
we should check dst_c.
CC: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
net/ipv6/route.c | 6 +-----
1 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 83dafa5..0607ee3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -369,15 +369,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static bool rt6_check_expired(const struct rt6_info *rt)
{
- struct rt6_info *ort = NULL;
-
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
} else if (rt->dst.from) {
- ort = (struct rt6_info *) rt->dst.from;
- return (ort->rt6i_flags & RTF_EXPIRES) &&
- time_after(jiffies, ort->dst.expires);
+ return rt6_check_expired((struct rt6_info *) rt->dst.from);
}
return false;
}
--
1.7.4.1
^ permalink raw reply related
* Re: [PATCH] ipconfig: Inform user if carrier is not ready
From: Francois Romieu @ 2012-09-14 5:36 UTC (permalink / raw)
To: Erwan Velu; +Cc: David Miller, netdev
In-Reply-To: <50525758.1090609@gmail.com>
Erwan Velu <erwanaliasr1@gmail.com> :
[...]
> This patch is just adding a simple message every second telling we are
> waiting the carrier to come up.
> ---
> net/ipv4/ipconfig.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
The Signed-off-by: line is missing.
> diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
> index 67e8a6b..d9f34b7 100644
> --- a/net/ipv4/ipconfig.c
> +++ b/net/ipv4/ipconfig.c
> @@ -205,6 +205,7 @@ static int __init ic_open_devs(void)
> struct net_device *dev;
> unsigned short oflags;
> unsigned long start;
> + unsigned int loops=0;
(nit)
unsigned int loops = 0;
> last = &ic_first_dev;
> rtnl_lock();
> @@ -266,6 +267,13 @@ static int __init ic_open_devs(void)
> if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
> goto have_carrier;
>
> + loops++;
> + /* This loop is blocking the boot process until we get the
> carrier or reach the timeout.
Please split it into 80 cols max lines.
[...]
> + * Every second, we display a short message indicating we
> wait the carrier */
(you can remove this part of the comment)
--
Ueimor
^ permalink raw reply
* Re: [PATCH net-next] r8169: use unlimited DMA burst for TX
From: Francois Romieu @ 2012-09-14 5:19 UTC (permalink / raw)
To: Michal Schmidt; +Cc: netdev, Hayes Wang, Ivan Vecera
In-Reply-To: <1347234926-5263-1-git-send-email-mschmidt@redhat.com>
Michal Schmidt <mschmidt@redhat.com> :
[...]
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
--
Ueimor
^ permalink raw reply
* linux-next: manual merge of the workqueues tree with the net tree
From: Stephen Rothwell @ 2012-09-14 5:34 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-next, linux-kernel, David Miller, netdev, Karsten Keil
[-- Attachment #1: Type: text/plain, Size: 523 bytes --]
Hi Tejun,
Today's linux-next merge of the workqueues tree got a conflict in
drivers/isdn/mISDN/hwchannel.c between commit 4b921eda5336 ("mISDN: Fix
wrong usage of flush_work_sync while holding locks") from the tree and
commit 43829731dd37 ("workqueue: deprecate flush[_delayed]_work_sync()")
from the workqueues tree.
The former supercedes the latter (I think) so I used that and can carry
the fix as necessary (no action is required).
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* Re: [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Eilon Greenstein @ 2012-09-14 5:20 UTC (permalink / raw)
To: Michal Schmidt
Cc: Eric Dumazet, netdev, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
David Miller, Havard Skinnemoen
In-Reply-To: <1347578079.8555.141.camel@edumazet-glaptop>
On Fri, 2012-09-14 at 01:14 +0200, Eric Dumazet wrote:
> On Fri, 2012-09-14 at 00:59 +0200, Michal Schmidt wrote:
> > Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
> > regression for IPv6. Rx checksum offload does not work. IPv6 packets
> > are passed to the stack with CHECKSUM_NONE.
> >
> > The hardware obviously cannot perform IP checksum validation for IPv6,
> > because there is no checksum in the IPv6 header. This should not prevent
> > us from setting CHECKSUM_UNNECESSARY.
> >
> > Tested on BCM57711.
> >
> > Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> > ---
> > drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
> > 1 file changed, 7 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > index af20c6e..e8e97a7 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > @@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
> > struct bnx2x_fastpath *fp,
> > struct bnx2x_eth_q_stats *qstats)
> > {
> > - /* Do nothing if no IP/L4 csum validation was done */
> > -
> > + /* Do nothing if no L4 csum validation was done.
> > + * We do not check whether IP csum was validated. For IPv4 we assume
> > + * that if the card got as far as validating the L4 csum, it also
> > + * validated the IP csum. IPv6 has no IP csum.
> > + */
> > if (cqe->fast_path_cqe.status_flags &
> > - (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
> > - ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
> > + ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
> > return;
> >
> > - /* If both IP/L4 validation were done, check if an error was found. */
> > + /* If L4 validation was done, check if an error was found. */
> >
> > if (cqe->fast_path_cqe.type_error_flags &
> > (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
>
> Thanks for fixing this bug !
>
> Acked-by: Eric Dumazet <edumazet@google.com>
Indeed - thanks Michal!
Acked-by: Eilon Greenstein <eilong@broadcom.com>
^ permalink raw reply
* Re: [PATCH] netfilter: Allow xt_nat.c and x_tables.c to compiled in
From: Cong Wang @ 2012-09-14 3:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: netdev
In-Reply-To: <87627hfi69.fsf@xmission.com>
On Fri, 14 Sep 2012 at 02:32 GMT, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> xt_init in x_tables.c must be called before xt_nat_init in xt_nat.c
> Reorder the makefile so that x_tables.o comes before xt_nat.o in
> netfilter.o.
>
> This allows me to built a kernel with both of these modules compiled in.
>
There is a patch to fix the same issue:
http://1984.lsi.us.es/git/nf-next/commit/?id=00545bec9412d130c77f72a08d6c8b6ad21d4a1
^ permalink raw reply
* [PATCH] netfilter: Allow xt_nat.c and x_tables.c to compiled in
From: Eric W. Biederman @ 2012-09-14 2:32 UTC (permalink / raw)
To: David Miller; +Cc: netdev, netfilter-devel, Patrick McHardy
xt_init in x_tables.c must be called before xt_nat_init in xt_nat.c
Reorder the makefile so that x_tables.o comes before xt_nat.o in
netfilter.o.
This allows me to built a kernel with both of these modules compiled in.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
net/netfilter/Makefile | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 98244d4..1f652b6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
+# generic X tables
+obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
@@ -64,9 +67,6 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# transparent proxy support
obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
-# generic X tables
-obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
-
# combos
obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
--
1.7.5.4
^ permalink raw reply related
* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2012-09-14 1:18 UTC (permalink / raw)
To: David Miller, netdev
Cc: linux-next, linux-kernel, Eric Dumazet, Eric W. Biederman
[-- Attachment #1: Type: text/plain, Size: 1713 bytes --]
Hi all,
Today's linux-next merge of the net-next tree got a conflict in
net/netfilter/nfnetlink_log.c between commit 0626af313957 ("netfilter:
take care of timewait sockets") from the tree and commit 9eea9515cb5f
("userns: nfnetlink_log: Report socket uids in the log sockets user
namespace") from the net-next tree.
Just context changes. I fixed it up (see below) and can carry the fix as
necessary (no action is required).
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
diff --cc net/netfilter/nfnetlink_log.c
index 5cfb5be,8cb67c4..0000000
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@@ -500,14 -501,16 +502,17 @@@ __build_packet_message(struct nfulnl_in
}
/* UID */
- if (skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
- struct file *file = skb->sk->sk_socket->file;
+ sk = skb->sk;
+ if (sk && sk->sk_state != TCP_TIME_WAIT) {
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ struct file *file = sk->sk_socket->file;
- __be32 uid = htonl(file->f_cred->fsuid);
- __be32 gid = htonl(file->f_cred->fsgid);
+ __be32 uid = htonl(from_kuid_munged(inst->peer_user_ns,
+ file->f_cred->fsuid));
+ __be32 gid = htonl(from_kgid_munged(inst->peer_user_ns,
+ file->f_cred->fsgid));
+ /* need to unlock here since NLA_PUT may goto */
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
nla_put_be32(inst->skb, NFULA_GID, gid))
goto nla_put_failure;
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2012-09-14 1:17 UTC (permalink / raw)
To: David Miller, netdev
Cc: linux-next, linux-kernel, Eric W. Biederman, Eric Dumazet
[-- Attachment #1: Type: text/plain, Size: 1409 bytes --]
Hi all,
Today's linux-next merge of the net-next tree got a conflict in
net/netfilter/xt_LOG.c between commit 0626af313957 ("netfilter: take care
of timewait sockets") from the net tree and commit 8c6e2a941ae7 ("userns:
Convert xt_LOG to print socket kuids and kgids as uids and gids") from
the net-next tree.
I fixed it up (I think - see below) and can carry the fix as necessary
(no action is required).
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
diff --cc net/netfilter/xt_LOG.c
index 91e9af4,02a2bf4..0000000
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@@ -145,19 -145,6 +145,21 @@@ static int dump_tcp_header(struct sbuf
return 0;
}
+static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk)
+{
+ if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ return;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
++ const struct cred *cred = sk->sk_socket->file->f_cred;
+ sb_add(m, "UID=%u GID=%u ",
- sk->sk_socket->file->f_cred->fsuid,
- sk->sk_socket->file->f_cred->fsgid);
++ from_kuid_munged(&init_user_ns, cred->fsuid),
++ from_kgid_munged(&init_user_ns, cred->fsgid));
++ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
/* One level of recursion won't kill us */
static void dump_ipv4_packet(struct sbuff *m,
const struct nf_loginfo *info,
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* Re: [PATCH net-next] r8169: use unlimited DMA burst for TX
From: Michal Schmidt @ 2012-09-13 23:27 UTC (permalink / raw)
To: 'Francois Romieu'
Cc: hayeswang, 'David Miller', netdev, ivecera
In-Reply-To: <CAF7E57619E34A17A56F6D13097342EA@realtek.com.tw>
On 09/11/2012 10:09 AM, hayeswang wrote:
> [Francois Romieu wrote:]
>> Hayes, should we:
>> - mimic Realtek's 8168, 8169 and 810x drivers ?
>> - always set TX_DMA_BURST at the max value ?
>> - do something different (per chipset) ?
>
> Our hw engineer suggets to set unlimited for both TX_DMA_BURST and RX_DMA_BURST
> for all chipsets.
Francois,
as this is exactly what the patch does, would you give an ACK?
Michal
^ permalink raw reply
* Re: [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Eric Dumazet @ 2012-09-13 23:14 UTC (permalink / raw)
To: Michal Schmidt
Cc: netdev, Eilon Greenstein, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
David Miller, Havard Skinnemoen
In-Reply-To: <1347577184-8417-1-git-send-email-mschmidt@redhat.com>
On Fri, 2012-09-14 at 00:59 +0200, Michal Schmidt wrote:
> Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
> regression for IPv6. Rx checksum offload does not work. IPv6 packets
> are passed to the stack with CHECKSUM_NONE.
>
> The hardware obviously cannot perform IP checksum validation for IPv6,
> because there is no checksum in the IPv6 header. This should not prevent
> us from setting CHECKSUM_UNNECESSARY.
>
> Tested on BCM57711.
>
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> ---
> drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
> 1 file changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index af20c6e..e8e97a7 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
> struct bnx2x_fastpath *fp,
> struct bnx2x_eth_q_stats *qstats)
> {
> - /* Do nothing if no IP/L4 csum validation was done */
> -
> + /* Do nothing if no L4 csum validation was done.
> + * We do not check whether IP csum was validated. For IPv4 we assume
> + * that if the card got as far as validating the L4 csum, it also
> + * validated the IP csum. IPv6 has no IP csum.
> + */
> if (cqe->fast_path_cqe.status_flags &
> - (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
> - ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
> + ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
> return;
>
> - /* If both IP/L4 validation were done, check if an error was found. */
> + /* If L4 validation was done, check if an error was found. */
>
> if (cqe->fast_path_cqe.type_error_flags &
> (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
Thanks for fixing this bug !
Acked-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [net-next.git 3/8 (V2)] stmmac: add the initial tx coalesce schema
From: Ben Hutchings @ 2012-09-13 23:11 UTC (permalink / raw)
To: David Miller; +Cc: peppe.cavallaro, netdev
In-Reply-To: <20120913.173727.314155374058895289.davem@davemloft.net>
On Thu, 2012-09-13 at 17:37 -0400, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Thu, 13 Sep 2012 22:10:50 +0100
>
> > On Thu, 2012-09-13 at 16:46 -0400, David Miller wrote:
> >> From: Ben Hutchings <bhutchings@solarflare.com>
> >> Date: Thu, 13 Sep 2012 21:42:51 +0100
> >>
> >> Well written NAPI drivers never need to disable hardware interrupts
> >> in their ->poll() method and it's callers, neither should you.
> >
> > Perhaps you should get round to reviewing netpoll, because it does
> > exactly this.
>
> Then I don't understand the point you're trying to make.
>
> Hardware interrupt disabling has absolutely no place in the
> NAPI polling fast paths.
>
> If NAPI drivers can't be implemented without hardware interrupt
> toggling in ->poll(), we've failed.
Right.
The problem being that NAPI poll functions *are* sometimes called in
hardware interrupt context. Thus, any spinlock that may be taken by a
NAPI handler, may well need to be taken with spinlock_irq or
spinlock_irqsave elsewhere. (This is horrible and I think it's well
past time that we ripped the NAPI polling out of netpoll.)
I think you're right that stmmac_tx() (completion handler?) doesn't need
to disable hardware interrupts, but sadly stmmac_xmit() does right now
unless Giuseppe can work out how to make their interaction lockless.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Michal Schmidt @ 2012-09-13 22:59 UTC (permalink / raw)
To: netdev
Cc: Eilon Greenstein, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
David Miller
Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
regression for IPv6. Rx checksum offload does not work. IPv6 packets
are passed to the stack with CHECKSUM_NONE.
The hardware obviously cannot perform IP checksum validation for IPv6,
because there is no checksum in the IPv6 header. This should not prevent
us from setting CHECKSUM_UNNECESSARY.
Tested on BCM57711.
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index af20c6e..e8e97a7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
struct bnx2x_fastpath *fp,
struct bnx2x_eth_q_stats *qstats)
{
- /* Do nothing if no IP/L4 csum validation was done */
-
+ /* Do nothing if no L4 csum validation was done.
+ * We do not check whether IP csum was validated. For IPv4 we assume
+ * that if the card got as far as validating the L4 csum, it also
+ * validated the IP csum. IPv6 has no IP csum.
+ */
if (cqe->fast_path_cqe.status_flags &
- (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
- ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
+ ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
return;
- /* If both IP/L4 validation were done, check if an error was found. */
+ /* If L4 validation was done, check if an error was found. */
if (cqe->fast_path_cqe.type_error_flags &
(ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
--
1.7.11.4
^ permalink raw reply related
* [PATCH] ipconfig: Inform user if carrier is not ready
From: Erwan Velu @ 2012-09-13 21:59 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20120913.164525.1171098883605242394.davem@davemloft.net>
From: Erwan Velu <erwanaliasr1@gmail.com>
While using the ip= option at the cmdline, the kernel can hold the boot
process for 2 minutes (CONF_CARRIER_TIMEOUT) if the carrier is not
present.
While waiting the carrier, user is not informed about this situation and
so could think the kernel is frozen.
This patch is just adding a simple message every second telling we are
waiting the carrier to come up.
---
net/ipv4/ipconfig.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67e8a6b..d9f34b7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -205,6 +205,7 @@ static int __init ic_open_devs(void)
struct net_device *dev;
unsigned short oflags;
unsigned long start;
+ unsigned int loops=0;
last = &ic_first_dev;
rtnl_lock();
@@ -266,6 +267,13 @@ static int __init ic_open_devs(void)
if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
goto have_carrier;
+ loops++;
+ /* This loop is blocking the boot process until we get the
carrier or reach the timeout.
+ * We have to inform the user about the situation as it could
look like a kernel freeze.
+ * Every second, we display a short message indicating we wait
the carrier */
+ if ((loops % 1000) == 0) {
+ pr_info("IP-Config: Waiting Carrier (%d/%d):\n",loops /
1000, CONF_CARRIER_TIMEOUT / 1000);
+ }
msleep(1);
}
have_carrier:
--
1.7.10
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox