* [PATCH net-next 06/10] net: netcp: ethss: get phy-handle only if link interface is MAC-to-PHY
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
Currently to parse phy-handle, driver doesn't check if the interface is
MAC to PHY. This patch add this check for all MAC to PHY interface types
supported by the driver.
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
drivers/net/ethernet/ti/netcp_ethss.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index cb48f88..9266961 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2956,7 +2956,9 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
}
slave->open = false;
- slave->phy_node = of_parse_phandle(node, "phy-handle", 0);
+ if ((slave->link_interface == SGMII_LINK_MAC_PHY) ||
+ (slave->link_interface == XGMII_LINK_MAC_PHY))
+ slave->phy_node = of_parse_phandle(node, "phy-handle", 0);
slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num);
if (slave->link_interface >= XGMII_LINK_MAC_PHY)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 05/10] net: netcp: store network statistics in 64 bits
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
From: Michael Scherban <m-scherban@ti.com>
Previously the network statistics were stored in 32 bit variable
which can cause some stats to roll over after several minutes of
high traffic. This implements 64 bit storage so larger numbers
can be stored.
Signed-off-by: Michael Scherban <m-scherban@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
drivers/net/ethernet/ti/netcp.h | 18 ++++++++++
drivers/net/ethernet/ti/netcp_core.c | 68 +++++++++++++++++++++++++++++-------
2 files changed, 74 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index a92abd6..d243c5d 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/soc/ti/knav_dma.h>
+#include <linux/u64_stats_sync.h>
/* Maximum Ethernet frame size supported by Keystone switch */
#define NETCP_MAX_FRAME_SIZE 9504
@@ -68,6 +69,20 @@ struct netcp_addr {
struct list_head node;
};
+struct netcp_stats {
+ struct u64_stats_sync syncp_rx ____cacheline_aligned_in_smp;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u32 rx_errors;
+ u32 rx_dropped;
+
+ struct u64_stats_sync syncp_tx ____cacheline_aligned_in_smp;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u32 tx_errors;
+ u32 tx_dropped;
+};
+
struct netcp_intf {
struct device *dev;
struct device *ndev_dev;
@@ -88,6 +103,9 @@ struct netcp_intf {
struct napi_struct rx_napi;
struct napi_struct tx_napi;
+ /* 64-bit netcp stats */
+ struct netcp_stats stats;
+
void *rx_channel;
const char *dma_chan_name;
u32 rx_pool_size;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 286fd8d..b077ed4 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -629,6 +629,7 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
static void netcp_empty_rx_queue(struct netcp_intf *netcp)
{
+ struct netcp_stats *rx_stats = &netcp->stats;
struct knav_dma_desc *desc;
unsigned int dma_sz;
dma_addr_t dma;
@@ -642,16 +643,17 @@ static void netcp_empty_rx_queue(struct netcp_intf *netcp)
if (unlikely(!desc)) {
dev_err(netcp->ndev_dev, "%s: failed to unmap Rx desc\n",
__func__);
- netcp->ndev->stats.rx_errors++;
+ rx_stats->rx_errors++;
continue;
}
netcp_free_rx_desc_chain(netcp, desc);
- netcp->ndev->stats.rx_dropped++;
+ rx_stats->rx_dropped++;
}
}
static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
{
+ struct netcp_stats *rx_stats = &netcp->stats;
unsigned int dma_sz, buf_len, org_buf_len;
struct knav_dma_desc *desc, *ndesc;
unsigned int pkt_sz = 0, accum_sz;
@@ -757,8 +759,8 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
if (unlikely(ret)) {
dev_err(netcp->ndev_dev, "RX hook %d failed: %d\n",
rx_hook->order, ret);
- netcp->ndev->stats.rx_errors++;
/* Free the primary descriptor */
+ rx_stats->rx_dropped++;
knav_pool_desc_put(netcp->rx_pool, desc);
dev_kfree_skb(skb);
return 0;
@@ -767,8 +769,10 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
/* Free the primary descriptor */
knav_pool_desc_put(netcp->rx_pool, desc);
- netcp->ndev->stats.rx_packets++;
- netcp->ndev->stats.rx_bytes += skb->len;
+ u64_stats_update_begin(&rx_stats->syncp_rx);
+ rx_stats->rx_packets++;
+ rx_stats->rx_bytes += skb->len;
+ u64_stats_update_end(&rx_stats->syncp_rx);
/* push skb up the stack */
skb->protocol = eth_type_trans(skb, netcp->ndev);
@@ -777,7 +781,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
free_desc:
netcp_free_rx_desc_chain(netcp, desc);
- netcp->ndev->stats.rx_errors++;
+ rx_stats->rx_errors++;
return 0;
}
@@ -1008,6 +1012,7 @@ static void netcp_free_tx_desc_chain(struct netcp_intf *netcp,
static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
unsigned int budget)
{
+ struct netcp_stats *tx_stats = &netcp->stats;
struct knav_dma_desc *desc;
struct netcp_tx_cb *tx_cb;
struct sk_buff *skb;
@@ -1022,7 +1027,7 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
desc = knav_pool_desc_unmap(netcp->tx_pool, dma, dma_sz);
if (unlikely(!desc)) {
dev_err(netcp->ndev_dev, "failed to unmap Tx desc\n");
- netcp->ndev->stats.tx_errors++;
+ tx_stats->tx_errors++;
continue;
}
@@ -1033,7 +1038,7 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
netcp_free_tx_desc_chain(netcp, desc, dma_sz);
if (!skb) {
dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
- netcp->ndev->stats.tx_errors++;
+ tx_stats->tx_errors++;
continue;
}
@@ -1050,8 +1055,10 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
netif_wake_subqueue(netcp->ndev, subqueue);
}
- netcp->ndev->stats.tx_packets++;
- netcp->ndev->stats.tx_bytes += skb->len;
+ u64_stats_update_begin(&tx_stats->syncp_tx);
+ tx_stats->tx_packets++;
+ tx_stats->tx_bytes += skb->len;
+ u64_stats_update_end(&tx_stats->syncp_tx);
dev_kfree_skb(skb);
pkts++;
}
@@ -1272,6 +1279,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
static int netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
struct netcp_intf *netcp = netdev_priv(ndev);
+ struct netcp_stats *tx_stats = &netcp->stats;
int subqueue = skb_get_queue_mapping(skb);
struct knav_dma_desc *desc;
int desc_count, ret = 0;
@@ -1287,7 +1295,7 @@ static int netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev)
/* If we get here, the skb has already been dropped */
dev_warn(netcp->ndev_dev, "padding failed (%d), packet dropped\n",
ret);
- ndev->stats.tx_dropped++;
+ tx_stats->tx_dropped++;
return ret;
}
skb->len = NETCP_MIN_PACKET_SIZE;
@@ -1315,7 +1323,7 @@ static int netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
drop:
- ndev->stats.tx_dropped++;
+ tx_stats->tx_dropped++;
if (desc)
netcp_free_tx_desc_chain(netcp, desc, sizeof(*desc));
dev_kfree_skb(skb);
@@ -1897,12 +1905,46 @@ static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
return 0;
}
+static struct rtnl_link_stats64 *
+netcp_get_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats)
+{
+ struct netcp_intf *netcp = netdev_priv(ndev);
+ struct netcp_stats *p = &netcp->stats;
+ u64 rxpackets, rxbytes, txpackets, txbytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp_rx);
+ rxpackets = p->rx_packets;
+ rxbytes = p->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp_rx, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp_tx);
+ txpackets = p->tx_packets;
+ txbytes = p->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp_tx, start));
+
+ stats->rx_packets = rxpackets;
+ stats->rx_bytes = rxbytes;
+ stats->tx_packets = txpackets;
+ stats->tx_bytes = txbytes;
+
+ /* The following are stored as 32 bit */
+ stats->rx_errors = p->rx_errors;
+ stats->rx_dropped = p->rx_dropped;
+ stats->tx_dropped = p->tx_dropped;
+
+ return stats;
+}
+
static const struct net_device_ops netcp_netdev_ops = {
.ndo_open = netcp_ndo_open,
.ndo_stop = netcp_ndo_stop,
.ndo_start_xmit = netcp_ndo_start_xmit,
.ndo_set_rx_mode = netcp_set_rx_mode,
.ndo_do_ioctl = netcp_ndo_ioctl,
+ .ndo_get_stats64 = netcp_get_stats,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_add_vid = netcp_rx_add_vid,
@@ -1949,6 +1991,8 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
INIT_LIST_HEAD(&netcp->txhook_list_head);
INIT_LIST_HEAD(&netcp->rxhook_list_head);
INIT_LIST_HEAD(&netcp->addr_list);
+ u64_stats_init(&netcp->stats.syncp_rx);
+ u64_stats_init(&netcp->stats.syncp_tx);
netcp->netcp_device = netcp_device;
netcp->dev = netcp_device->device;
netcp->ndev = ndev;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 03/10] net: netcp: extract eflag from desc for rx_hook handling
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
Extract the eflag bits from the received desc and pass it down
the rx_hook chain to be available for netcp modules. Also the
psdata and epib data has to be inspected by the netcp modules.
So the desc can be freed only after returning from the rx_hook.
So move knav_pool_desc_put() after the rx_hook processing.
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
---
drivers/net/ethernet/ti/netcp.h | 1 +
drivers/net/ethernet/ti/netcp_core.c | 20 +++++++++++++++++---
include/linux/soc/ti/knav_dma.h | 2 ++
3 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index 0f58c58..a92abd6 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -115,6 +115,7 @@ struct netcp_packet {
struct sk_buff *skb;
__le32 *epib;
u32 *psdata;
+ u32 eflags;
unsigned int psdata_len;
struct netcp_intf *netcp;
struct netcp_tx_pipe *tx_pipe;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index c243335..a136c56 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -122,6 +122,13 @@ static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc,
*ndesc = le32_to_cpu(desc->next_desc);
}
+static void get_desc_info(u32 *desc_info, u32 *pkt_info,
+ struct knav_dma_desc *desc)
+{
+ *desc_info = le32_to_cpu(desc->desc_info);
+ *pkt_info = le32_to_cpu(desc->packet_info);
+}
+
static u32 get_sw_data(int index, struct knav_dma_desc *desc)
{
/* No Endian conversion needed as this data is untouched by hw */
@@ -653,6 +660,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
struct netcp_packet p_info;
struct sk_buff *skb;
void *org_buf_ptr;
+ u32 tmp;
dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
if (!dma_desc)
@@ -724,9 +732,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
knav_pool_desc_put(netcp->rx_pool, ndesc);
}
- /* Free the primary descriptor */
- knav_pool_desc_put(netcp->rx_pool, desc);
-
/* check for packet len and warn */
if (unlikely(pkt_sz != accum_sz))
dev_dbg(netcp->ndev_dev, "mismatch in packet size(%d) & sum of fragments(%d)\n",
@@ -739,6 +744,11 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
p_info.skb = skb;
skb->dev = netcp->ndev;
p_info.rxtstamp_complete = false;
+ get_desc_info(&tmp, &p_info.eflags, desc);
+ p_info.epib = desc->epib;
+ p_info.psdata = (u32 __force *)desc->psdata;
+ p_info.eflags = ((p_info.eflags >> KNAV_DMA_DESC_EFLAGS_SHIFT) &
+ KNAV_DMA_DESC_EFLAGS_MASK);
list_for_each_entry(rx_hook, &netcp->rxhook_list_head, list) {
int ret;
@@ -748,10 +758,14 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
dev_err(netcp->ndev_dev, "RX hook %d failed: %d\n",
rx_hook->order, ret);
netcp->ndev->stats.rx_errors++;
+ /* Free the primary descriptor */
+ knav_pool_desc_put(netcp->rx_pool, desc);
dev_kfree_skb(skb);
return 0;
}
}
+ /* Free the primary descriptor */
+ knav_pool_desc_put(netcp->rx_pool, desc);
netcp->ndev->stats.rx_packets++;
netcp->ndev->stats.rx_bytes += skb->len;
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 35cb926..2b78826 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -41,6 +41,8 @@
#define KNAV_DMA_DESC_RETQ_SHIFT 0
#define KNAV_DMA_DESC_RETQ_MASK MASK(14)
#define KNAV_DMA_DESC_BUF_LEN_MASK MASK(22)
+#define KNAV_DMA_DESC_EFLAGS_MASK MASK(4)
+#define KNAV_DMA_DESC_EFLAGS_SHIFT 20
#define KNAV_DMA_NUM_EPIB_WORDS 4
#define KNAV_DMA_NUM_PS_WORDS 16
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 10/10] net: netcp: ale: add proper ale entry mask bits for netcp switch ALE
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA, linux-omap-u79uwXL29TY76Z2rM5mHXA,
grygorii.strashko-l0cyMroinI0, mugunthanvnm-l0cyMroinI0,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, arnd-r2nGTMty4D4,
davem-fT/PcQaiUtIeIZ0/mPfg9Q, devicetree-u79uwXL29TY76Z2rM5mHXA,
mark.rutland-5wv7dgnIgG8, robh+dt-DgEjT+Ai2ygdnm+yROfE0A
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2-l0cyMroinI0@public.gmane.org>
For NetCP NU Switch ALE, some of the mask bits are different than
defaults used in the driver. Add a new macro DEFINE_ALE_FIELD1 that use
a configurable mask bits and use it in the driver. These bits are set to
correct values by using the new variables added to cpsw_ale structure
and re-used in the macros. The parameter nu_switch_ale is configured by
the caller driver to indicate the ALE is for that switch and is used in
the ALE driver to do customization as needed.
Signed-off-by: Murali Karicheri <m-karicheri2-l0cyMroinI0@public.gmane.org>
Signed-off-by: Sekhar Nori <nsekhar-l0cyMroinI0@public.gmane.org>
---
drivers/net/ethernet/ti/cpsw_ale.c | 99 ++++++++++++++++++++++++++++++--------
drivers/net/ethernet/ti/cpsw_ale.h | 4 ++
2 files changed, 84 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 62a18d6..ddd43e0 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -29,6 +29,7 @@
#define ALE_VERSION_MAJOR(rev, mask) (((rev) >> 8) & (mask))
#define ALE_VERSION_MINOR(rev) (rev & 0xff)
+#define ALE_VERSION_1R3 0x0103
#define ALE_VERSION_1R4 0x0104
/* ALE Registers */
@@ -46,6 +47,7 @@
#define ALE_UNKNOWNVLAN_UNREG_MCAST_FLOOD 0x94
#define ALE_UNKNOWNVLAN_REG_MCAST_FLOOD 0x98
#define ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS 0x9C
+#define ALE_VLAN_MASK_MUX(reg) (0xc0 + (0x4 * (reg)))
#define ALE_TABLE_WRITE BIT(31)
@@ -96,20 +98,34 @@ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
cpsw_ale_set_field(ale_entry, start, bits, value); \
}
+#define DEFINE_ALE_FIELD1(name, start) \
+static inline int cpsw_ale_get_##name(u32 *ale_entry, u32 bits) \
+{ \
+ return cpsw_ale_get_field(ale_entry, start, bits); \
+} \
+static inline void cpsw_ale_set_##name(u32 *ale_entry, u32 value, \
+ u32 bits) \
+{ \
+ cpsw_ale_set_field(ale_entry, start, bits, value); \
+}
+
DEFINE_ALE_FIELD(entry_type, 60, 2)
DEFINE_ALE_FIELD(vlan_id, 48, 12)
DEFINE_ALE_FIELD(mcast_state, 62, 2)
-DEFINE_ALE_FIELD(port_mask, 66, 3)
+DEFINE_ALE_FIELD1(port_mask, 66)
DEFINE_ALE_FIELD(super, 65, 1)
DEFINE_ALE_FIELD(ucast_type, 62, 2)
-DEFINE_ALE_FIELD(port_num, 66, 2)
+DEFINE_ALE_FIELD1(port_num, 66)
DEFINE_ALE_FIELD(blocked, 65, 1)
DEFINE_ALE_FIELD(secure, 64, 1)
-DEFINE_ALE_FIELD(vlan_untag_force, 24, 3)
-DEFINE_ALE_FIELD(vlan_reg_mcast, 16, 3)
-DEFINE_ALE_FIELD(vlan_unreg_mcast, 8, 3)
-DEFINE_ALE_FIELD(vlan_member_list, 0, 3)
+DEFINE_ALE_FIELD1(vlan_untag_force, 24)
+DEFINE_ALE_FIELD1(vlan_reg_mcast, 16)
+DEFINE_ALE_FIELD1(vlan_unreg_mcast, 8)
+DEFINE_ALE_FIELD1(vlan_member_list, 0)
DEFINE_ALE_FIELD(mcast, 40, 1)
+/* ALE NetCP nu switch specific */
+DEFINE_ALE_FIELD(vlan_unreg_mcast_idx, 20, 3)
+DEFINE_ALE_FIELD(vlan_reg_mcast_idx, 44, 3)
/* The MAC address field in the ALE entry cannot be macroized as above */
static inline void cpsw_ale_get_addr(u32 *ale_entry, u8 *addr)
@@ -235,14 +251,16 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry,
{
int mask;
- mask = cpsw_ale_get_port_mask(ale_entry);
+ mask = cpsw_ale_get_port_mask(ale_entry,
+ ale->port_mask_bits);
if ((mask & port_mask) == 0)
return; /* ports dont intersect, not interested */
mask &= ~port_mask;
/* free if only remaining port is host port */
if (mask)
- cpsw_ale_set_port_mask(ale_entry, mask);
+ cpsw_ale_set_port_mask(ale_entry, mask,
+ ale->port_mask_bits);
else
cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
}
@@ -303,7 +321,7 @@ int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port,
cpsw_ale_set_ucast_type(ale_entry, ALE_UCAST_PERSISTANT);
cpsw_ale_set_secure(ale_entry, (flags & ALE_SECURE) ? 1 : 0);
cpsw_ale_set_blocked(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);
- cpsw_ale_set_port_num(ale_entry, port);
+ cpsw_ale_set_port_num(ale_entry, port, ale->port_num_bits);
idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
if (idx < 0)
@@ -350,9 +368,11 @@ int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
cpsw_ale_set_super(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);
cpsw_ale_set_mcast_state(ale_entry, mcast_state);
- mask = cpsw_ale_get_port_mask(ale_entry);
+ mask = cpsw_ale_get_port_mask(ale_entry,
+ ale->port_mask_bits);
port_mask |= mask;
- cpsw_ale_set_port_mask(ale_entry, port_mask);
+ cpsw_ale_set_port_mask(ale_entry, port_mask,
+ ale->port_mask_bits);
if (idx < 0)
idx = cpsw_ale_match_free(ale);
@@ -379,7 +399,8 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
cpsw_ale_read(ale, idx, ale_entry);
if (port_mask)
- cpsw_ale_set_port_mask(ale_entry, port_mask);
+ cpsw_ale_set_port_mask(ale_entry, port_mask,
+ ale->port_mask_bits);
else
cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
@@ -388,6 +409,21 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
}
EXPORT_SYMBOL_GPL(cpsw_ale_del_mcast);
+/* ALE NetCP NU switch specific vlan functions */
+static void cpsw_ale_set_vlan_mcast(struct cpsw_ale *ale, u32 *ale_entry,
+ int reg_mcast, int unreg_mcast)
+{
+ int idx;
+
+ /* Set VLAN registered multicast flood mask */
+ idx = cpsw_ale_get_vlan_reg_mcast_idx(ale_entry);
+ writel(reg_mcast, ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx));
+
+ /* Set VLAN unregistered multicast flood mask */
+ idx = cpsw_ale_get_vlan_unreg_mcast_idx(ale_entry);
+ writel(unreg_mcast, ale->params.ale_regs + ALE_VLAN_MASK_MUX(idx));
+}
+
int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
int reg_mcast, int unreg_mcast)
{
@@ -401,10 +437,16 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_VLAN);
cpsw_ale_set_vlan_id(ale_entry, vid);
- cpsw_ale_set_vlan_untag_force(ale_entry, untag);
- cpsw_ale_set_vlan_reg_mcast(ale_entry, reg_mcast);
- cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast);
- cpsw_ale_set_vlan_member_list(ale_entry, port);
+ cpsw_ale_set_vlan_untag_force(ale_entry, untag, ale->vlan_field_bits);
+ if (!ale->params.nu_switch_ale) {
+ cpsw_ale_set_vlan_reg_mcast(ale_entry, reg_mcast,
+ ale->vlan_field_bits);
+ cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
+ ale->vlan_field_bits);
+ } else {
+ cpsw_ale_set_vlan_mcast(ale, ale_entry, reg_mcast, unreg_mcast);
+ }
+ cpsw_ale_set_vlan_member_list(ale_entry, port, ale->vlan_field_bits);
if (idx < 0)
idx = cpsw_ale_match_free(ale);
@@ -430,7 +472,8 @@ int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask)
cpsw_ale_read(ale, idx, ale_entry);
if (port_mask)
- cpsw_ale_set_vlan_member_list(ale_entry, port_mask);
+ cpsw_ale_set_vlan_member_list(ale_entry, port_mask,
+ ale->vlan_field_bits);
else
cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
@@ -458,12 +501,15 @@ void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti)
if (type != ALE_TYPE_VLAN)
continue;
- unreg_mcast = cpsw_ale_get_vlan_unreg_mcast(ale_entry);
+ unreg_mcast =
+ cpsw_ale_get_vlan_unreg_mcast(ale_entry,
+ ale->vlan_field_bits);
if (allmulti)
unreg_mcast |= 1;
else
unreg_mcast &= ~1;
- cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast);
+ cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast,
+ ale->vlan_field_bits);
cpsw_ale_write(ale, idx, ale_entry);
}
}
@@ -769,6 +815,14 @@ void cpsw_ale_start(struct cpsw_ale *ale)
dev_info(ale->params.dev,
"ALE Table size %ld\n", ale->params.ale_entries);
+ /* set default bits for existing h/w */
+ ale->port_mask_bits = 3;
+ ale->port_num_bits = 2;
+ ale->vlan_field_bits = 3;
+
+ /* Set defaults override for ALE on NetCP NU switch and for version
+ * 1R3
+ */
if (ale->params.nu_switch_ale) {
/* Separate registers for unknown vlan configuration.
* Also there are N bits, where N is number of ale
@@ -793,6 +847,13 @@ void cpsw_ale_start(struct cpsw_ale *ale)
ale_controls[ALE_PORT_UNTAGGED_EGRESS].shift = 0;
ale_controls[ALE_PORT_UNTAGGED_EGRESS].offset =
ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS;
+ ale->port_mask_bits = ale->params.ale_ports;
+ ale->port_num_bits = ale->params.ale_ports - 1;
+ ale->vlan_field_bits = ale->params.ale_ports;
+ } else if (ale->version == ALE_VERSION_1R3) {
+ ale->port_mask_bits = ale->params.ale_ports;
+ ale->port_num_bits = 3;
+ ale->vlan_field_bits = ale->params.ale_ports;
}
cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index b1c7954..25d24e8 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -39,6 +39,10 @@ struct cpsw_ale {
unsigned long ageout;
int allmulti;
u32 version;
+ /* These bits are different on NetCP NU Switch ALE */
+ u32 port_mask_bits;
+ u32 port_num_bits;
+ u32 vlan_field_bits;
};
enum cpsw_ale_control {
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH net-next 09/10] net: netcp: ale: use ale_status to size the ale table
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA, linux-omap-u79uwXL29TY76Z2rM5mHXA,
grygorii.strashko-l0cyMroinI0, mugunthanvnm-l0cyMroinI0,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, arnd-r2nGTMty4D4,
davem-fT/PcQaiUtIeIZ0/mPfg9Q, devicetree-u79uwXL29TY76Z2rM5mHXA,
mark.rutland-5wv7dgnIgG8, robh+dt-DgEjT+Ai2ygdnm+yROfE0A
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2-l0cyMroinI0@public.gmane.org>
ALE h/w on newer version of NetCP (K2E/L/G) does provide a ALE_STATUS
register for the size of the ALE Table implemented in h/w. Currently
for example we set ALE Table size to 1024 for NetCP ALE on
K2E even though the ALE Status/Documentation shows it has 8192 entries.
So take advantage of this register to read the size of ALE table supported
and use that value in the driver for the newer version of NetCP ALE.
For NetCP lite, ALE Table size is much less (64) and indicated by a size
of zero in ALE_STATUS. So use that as a default for now. While at it,
also fix the ale table size on 10G switch to 2048 per User guide
http://www.ti.com/lit/ug/spruhj5/spruhj5.pdf
Signed-off-by: Murali Karicheri <m-karicheri2-l0cyMroinI0@public.gmane.org>
Signed-off-by: Sekhar Nori <nsekhar-l0cyMroinI0@public.gmane.org>
---
drivers/net/ethernet/ti/cpsw_ale.c | 31 ++++++++++++++++++++++++++++++-
drivers/net/ethernet/ti/netcp_ethss.c | 4 +---
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index e15db39..62a18d6 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -33,6 +33,7 @@
/* ALE Registers */
#define ALE_IDVER 0x00
+#define ALE_STATUS 0x04
#define ALE_CONTROL 0x08
#define ALE_PRESCALE 0x10
#define ALE_UNKNOWNVLAN 0x18
@@ -58,6 +59,10 @@
#define ALE_UCAST_OUI 2
#define ALE_UCAST_TOUCHED 3
+#define ALE_TABLE_SIZE_MULTIPLIER 1024
+#define ALE_STATUS_SIZE_MASK 0x1f
+#define ALE_TABLE_SIZE_DEFAULT 64
+
static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
{
int idx;
@@ -728,7 +733,7 @@ static void cpsw_ale_timer(unsigned long arg)
void cpsw_ale_start(struct cpsw_ale *ale)
{
- u32 rev;
+ u32 rev, ale_entries;
rev = __raw_readl(ale->params.ale_regs + ALE_IDVER);
if (!ale->params.major_ver_mask)
@@ -740,6 +745,30 @@ void cpsw_ale_start(struct cpsw_ale *ale)
ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask),
ALE_VERSION_MINOR(rev));
+ if (!ale->params.ale_entries) {
+ ale_entries =
+ __raw_readl(ale->params.ale_regs + ALE_STATUS) &
+ ALE_STATUS_SIZE_MASK;
+ /* ALE available on newer NetCP switches has introduced
+ * a register, ALE_STATUS, to indicate the size of ALE
+ * table which shows the size as a multiple of 1024 entries.
+ * For these, params.ale_entries will be set to zero. So
+ * read the register and update the value of ale_entries.
+ * ALE table on NetCP lite, is much smaller and is indicated
+ * by a value of zero in ALE_STATUS. So use a default value
+ * of ALE_TABLE_SIZE_DEFAULT for this. Caller is expected
+ * to set the value of ale_entries for all other versions
+ * of ALE.
+ */
+ if (!ale_entries)
+ ale_entries = ALE_TABLE_SIZE_DEFAULT;
+ else
+ ale_entries *= ALE_TABLE_SIZE_MULTIPLIER;
+ ale->params.ale_entries = ale_entries;
+ }
+ dev_info(ale->params.dev,
+ "ALE Table size %ld\n", ale->params.ale_entries);
+
if (ale->params.nu_switch_ale) {
/* Separate registers for unknown vlan configuration.
* Also there are N bits, where N is number of ale
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index b37fb73..80d68cb 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -94,7 +94,6 @@
#define GBENU_CPTS_OFFSET 0x1d000
#define GBENU_ALE_OFFSET 0x1e000
#define GBENU_HOST_PORT_NUM 0
-#define GBENU_NUM_ALE_ENTRIES 1024
/* 10G Ethernet SS defines */
#define XGBE_MODULE_NAME "netcp-xgbe"
@@ -114,7 +113,7 @@
#define XGBE10_ALE_OFFSET 0x700
#define XGBE10_HW_STATS_OFFSET 0x800
#define XGBE10_HOST_PORT_NUM 0
-#define XGBE10_NUM_ALE_ENTRIES 1024
+#define XGBE10_NUM_ALE_ENTRIES 2048
#define GBE_TIMER_INTERVAL (HZ / 2)
@@ -3548,7 +3547,6 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
gbe_dev->ale_reg = gbe_dev->switch_regs + GBENU_ALE_OFFSET;
gbe_dev->ale_ports = gbe_dev->max_num_ports;
gbe_dev->host_port = GBENU_HOST_PORT_NUM;
- gbe_dev->ale_entries = GBE13_NUM_ALE_ENTRIES;
gbe_dev->stats_en_mask = (1 << (gbe_dev->max_num_ports)) - 1;
/* Subsystem registers */
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH net-next 08/10] net: netcp: ale: update to support unknown vlan controls for NU switch
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
In NU Ethernet switch used on some of the Keystone SoCs, there is
separate UNKNOWNVLAN register for membership, unreg mcast flood, reg
mcast flood and force untag egress bits in ALE. So control for these
fields require different address offset, shift and size of field.
As this ALE has the same version number as ALE in CPSW found on other
SoCs, customazation based on version number is not possible. So
use a configuration parameter, nu_switch_ale, to identify the ALE
ALE found in NU Switch. Different treatment is needed for NU Switch
ALE due to difference in the ale table bits, separate unknown vlan
registers etc. The register information available in ale_controls,
needs to be updated to support the netcp NU switch h/w. So it is not
constant array any more since it needs to be updated based
on ALE type. The header of the file is also updated to indicate it
supports N port switch ALE, not just 3 port. The version mask is
3 bits in NU Switch ALE vs 8 bits on other ALE types.
While at it, change the debug print to info print so that ALE
version gets displayed in boot log.
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
drivers/net/ethernet/ti/cpsw_ale.c | 50 +++++++++++++++++++++++++++++++----
drivers/net/ethernet/ti/cpsw_ale.h | 13 ++++++++-
drivers/net/ethernet/ti/netcp_ethss.c | 5 +++-
3 files changed, 61 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 43b061b..e15db39 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -1,5 +1,5 @@
/*
- * Texas Instruments 3-Port Ethernet Switch Address Lookup Engine
+ * Texas Instruments N-Port Ethernet Switch Address Lookup Engine
*
* Copyright (C) 2012 Texas Instruments
*
@@ -27,8 +27,9 @@
#define BITMASK(bits) (BIT(bits) - 1)
-#define ALE_VERSION_MAJOR(rev) ((rev >> 8) & 0xff)
+#define ALE_VERSION_MAJOR(rev, mask) (((rev) >> 8) & (mask))
#define ALE_VERSION_MINOR(rev) (rev & 0xff)
+#define ALE_VERSION_1R4 0x0104
/* ALE Registers */
#define ALE_IDVER 0x00
@@ -39,6 +40,12 @@
#define ALE_TABLE 0x34
#define ALE_PORTCTL 0x40
+/* ALE NetCP NU switch specific Registers */
+#define ALE_UNKNOWNVLAN_MEMBER 0x90
+#define ALE_UNKNOWNVLAN_UNREG_MCAST_FLOOD 0x94
+#define ALE_UNKNOWNVLAN_REG_MCAST_FLOOD 0x98
+#define ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS 0x9C
+
#define ALE_TABLE_WRITE BIT(31)
#define ALE_TYPE_FREE 0
@@ -464,7 +471,7 @@ struct ale_control_info {
int bits;
};
-static const struct ale_control_info ale_controls[ALE_NUM_CONTROLS] = {
+static struct ale_control_info ale_controls[ALE_NUM_CONTROLS] = {
[ALE_ENABLE] = {
.name = "enable",
.offset = ALE_CONTROL,
@@ -724,8 +731,41 @@ void cpsw_ale_start(struct cpsw_ale *ale)
u32 rev;
rev = __raw_readl(ale->params.ale_regs + ALE_IDVER);
- dev_dbg(ale->params.dev, "initialized cpsw ale revision %d.%d\n",
- ALE_VERSION_MAJOR(rev), ALE_VERSION_MINOR(rev));
+ if (!ale->params.major_ver_mask)
+ ale->params.major_ver_mask = 0xff;
+ ale->version =
+ (ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask) << 8) |
+ ALE_VERSION_MINOR(rev);
+ dev_info(ale->params.dev, "initialized cpsw ale version %d.%d\n",
+ ALE_VERSION_MAJOR(rev, ale->params.major_ver_mask),
+ ALE_VERSION_MINOR(rev));
+
+ if (ale->params.nu_switch_ale) {
+ /* Separate registers for unknown vlan configuration.
+ * Also there are N bits, where N is number of ale
+ * ports and shift value should be 0
+ */
+ ale_controls[ALE_PORT_UNKNOWN_VLAN_MEMBER].bits =
+ ale->params.ale_ports;
+ ale_controls[ALE_PORT_UNKNOWN_VLAN_MEMBER].offset =
+ ALE_UNKNOWNVLAN_MEMBER;
+ ale_controls[ALE_PORT_UNKNOWN_MCAST_FLOOD].bits =
+ ale->params.ale_ports;
+ ale_controls[ALE_PORT_UNKNOWN_MCAST_FLOOD].shift = 0;
+ ale_controls[ALE_PORT_UNKNOWN_MCAST_FLOOD].offset =
+ ALE_UNKNOWNVLAN_UNREG_MCAST_FLOOD;
+ ale_controls[ALE_PORT_UNKNOWN_REG_MCAST_FLOOD].bits =
+ ale->params.ale_ports;
+ ale_controls[ALE_PORT_UNKNOWN_REG_MCAST_FLOOD].shift = 0;
+ ale_controls[ALE_PORT_UNKNOWN_REG_MCAST_FLOOD].offset =
+ ALE_UNKNOWNVLAN_REG_MCAST_FLOOD;
+ ale_controls[ALE_PORT_UNTAGGED_EGRESS].bits =
+ ale->params.ale_ports;
+ ale_controls[ALE_PORT_UNTAGGED_EGRESS].shift = 0;
+ ale_controls[ALE_PORT_UNTAGGED_EGRESS].offset =
+ ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS;
+ }
+
cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index a700189..b1c7954 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -1,5 +1,5 @@
/*
- * Texas Instruments 3-Port Ethernet Switch Address Lookup Engine APIs
+ * Texas Instruments N-Port Ethernet Switch Address Lookup Engine APIs
*
* Copyright (C) 2012 Texas Instruments
*
@@ -21,6 +21,16 @@ struct cpsw_ale_params {
unsigned long ale_ageout; /* in secs */
unsigned long ale_entries;
unsigned long ale_ports;
+ /* NU Switch has specific handling as number of bits in ALE entries
+ * are different than other versions of ALE. Also there are specific
+ * registers for unknown vlan specific fields. So use nu_switch_ale
+ * to identify this hardware.
+ */
+ bool nu_switch_ale;
+ /* mask bit used in NU Switch ALE is 3 bits instead of 8 bits. So
+ * pass it from caller.
+ */
+ u32 major_ver_mask;
};
struct cpsw_ale {
@@ -28,6 +38,7 @@ struct cpsw_ale {
struct timer_list timer;
unsigned long ageout;
int allmulti;
+ u32 version;
};
enum cpsw_ale_control {
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 4b2a911..b37fb73 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3716,7 +3716,10 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
ale_params.ale_ageout = GBE_DEFAULT_ALE_AGEOUT;
ale_params.ale_entries = gbe_dev->ale_entries;
ale_params.ale_ports = gbe_dev->ale_ports;
-
+ if (IS_SS_ID_MU(gbe_dev)) {
+ ale_params.major_ver_mask = 0x7;
+ ale_params.nu_switch_ale = true;
+ }
gbe_dev->ale = cpsw_ale_create(&ale_params);
if (!gbe_dev->ale) {
dev_err(gbe_dev->dev, "error initializing ale engine\n");
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 07/10] net: netcp: use hw capability to remove FCS word from rx packets
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
Some of the newer Ethernet switch hw (such as that on k2e/l/g) can
strip the Etherenet FCS from packet at the port 0 egress of the switch.
So use this capability instead of doing it in software.
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
drivers/net/ethernet/ti/netcp.h | 2 ++
drivers/net/ethernet/ti/netcp_core.c | 8 ++++++--
drivers/net/ethernet/ti/netcp_ethss.c | 10 ++++++++--
3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index d243c5d..8900a6f 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -102,6 +102,8 @@ struct netcp_intf {
void *rx_fdq[KNAV_DMA_FDQ_PER_CHAN];
struct napi_struct rx_napi;
struct napi_struct tx_napi;
+#define ETH_SW_CAN_REMOVE_ETH_FCS BIT(0)
+ u32 hw_cap;
/* 64-bit netcp stats */
struct netcp_stats stats;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index b077ed4..68a75cc 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -739,8 +739,12 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
dev_dbg(netcp->ndev_dev, "mismatch in packet size(%d) & sum of fragments(%d)\n",
pkt_sz, accum_sz);
- /* Remove ethernet FCS from the packet */
- __pskb_trim(skb, skb->len - ETH_FCS_LEN);
+ /* Newer version of the Ethernet switch can trim the Ethernet FCS
+ * from the packet and is indicated in hw_cap. So trim it only for
+ * older h/w
+ */
+ if (!(netcp->hw_cap & ETH_SW_CAN_REMOVE_ETH_FCS))
+ __pskb_trim(skb, skb->len - ETH_FCS_LEN);
/* Call each of the RX hooks */
p_info.skb = skb;
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 9266961..4b2a911 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -133,6 +133,7 @@
#define MACSL_FULLDUPLEX BIT(0)
#define GBE_CTL_P0_ENABLE BIT(2)
+#define ETH_SW_CTL_P0_TX_CRC_REMOVE BIT(13)
#define GBE13_REG_VAL_STAT_ENABLE_ALL 0xff
#define XGBE_REG_VAL_STAT_ENABLE_ALL 0xf
#define GBE_STATS_CD_SEL BIT(28)
@@ -2847,7 +2848,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev)
struct netcp_intf *netcp = netdev_priv(ndev);
struct gbe_slave *slave = gbe_intf->slave;
int port_num = slave->port_num;
- u32 reg;
+ u32 reg, val;
int ret;
reg = readl(GBE_REG_ADDR(gbe_dev, switch_regs, id_ver));
@@ -2877,7 +2878,12 @@ static int gbe_open(void *intf_priv, struct net_device *ndev)
writel(0, GBE_REG_ADDR(gbe_dev, switch_regs, ptype));
/* Control register */
- writel(GBE_CTL_P0_ENABLE, GBE_REG_ADDR(gbe_dev, switch_regs, control));
+ val = GBE_CTL_P0_ENABLE;
+ if (IS_SS_ID_MU(gbe_dev)) {
+ val |= ETH_SW_CTL_P0_TX_CRC_REMOVE;
+ netcp->hw_cap = ETH_SW_CAN_REMOVE_ETH_FCS;
+ }
+ writel(val, GBE_REG_ADDR(gbe_dev, switch_regs, control));
/* All statistics enabled and STAT AB visible by default */
writel(gbe_dev->stats_en_mask, GBE_REG_ADDR(gbe_dev, switch_regs,
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 04/10] net: netcp: remove the redundant memmov()
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
The psdata is populated with command data by netcp modules
to the tail of the buffer and set_words() copy the same
to the front of the psdata. So remove the redundant memmov
function call.
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
---
drivers/net/ethernet/ti/netcp_core.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index a136c56..286fd8d 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1226,9 +1226,9 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
/* psdata points to both native-endian and device-endian data */
__le32 *psdata = (void __force *)p_info.psdata;
- memmove(p_info.psdata, p_info.psdata + p_info.psdata_len,
- p_info.psdata_len);
- set_words(p_info.psdata, p_info.psdata_len, psdata);
+ set_words((u32 *)psdata +
+ (KNAV_DMA_NUM_PS_WORDS - p_info.psdata_len),
+ p_info.psdata_len, psdata);
tmp |= (p_info.psdata_len & KNAV_DMA_DESC_PSLEN_MASK) <<
KNAV_DMA_DESC_PSLEN_SHIFT;
}
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 02/10] net: netcp: ethss: add support of 10gbe pcsr link status
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
From: WingMan Kwok <w-kwok2@ti.com>
The 10GBASE-R Physical Coding Sublayer (PCS-R) module provides
functionality of a physical coding sublayer (PCS) on data being
transferred between a demuxed XGMII and SerDes supporting a 16
or 32 bit interface. From the driver point of view, whether
a ethernet link is up or not depends also on the status of the
block-lock bit of the PCSR. This patch adds the checking of that
bit in order to determine the link status.
Signed-off-by: WingMan Kwok <w-kwok2@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
.../devicetree/bindings/net/keystone-netcp.txt | 3 ++
drivers/net/ethernet/ti/netcp_ethss.c | 37 ++++++++++++++++++++--
2 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index 0854a73..57fc13f 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -75,6 +75,9 @@ Required properties:
- syscon-subsys: phandle to syscon node of the switch
subsystem registers.
+- syscon-pcsr: (10gbe only) phandle to syscon node of the
+ switch PCSR registers.
+
- reg: register location and the size for the following register
regions in the specified order.
- switch subsystem registers
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 473edda1..cb48f88 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -63,6 +63,12 @@
#define GBE13_ALE_OFFSET 0x600
#define GBE13_HOST_PORT_NUM 0
#define GBE13_NUM_ALE_ENTRIES 1024
+/* offset relative to PCSR regmap */
+#define XGBE10_PCSR_OFFSET(x) ((x) * 0x80)
+#define XGBE10_PCSR_RX_STATUS(x) (XGBE10_PCSR_OFFSET(x) + 0x0C)
+
+#define XGBE10_PCSR_BLOCK_LOCK_MASK BIT(30)
+#define XGBE10_PCSR_BLOCK_LOCK_SHIFT 30
/* 1G Ethernet NU SS defines */
#define GBENU_MODULE_NAME "netcp-gbenu"
@@ -2111,6 +2117,10 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev,
if (phy)
phy_print_status(phy);
+ else if (slave->link_interface == XGMII_LINK_MAC_MAC_FORCED) {
+ netdev_printk(KERN_INFO, ndev,
+ "Link is %s\n", (up ? "Up" : "Down"));
+ }
}
static bool gbe_phy_link_status(struct gbe_slave *slave)
@@ -2123,18 +2133,29 @@ static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev,
struct net_device *ndev)
{
int sp = slave->slave_num;
- int phy_link_state, sgmii_link_state = 1, link_state;
+ int phy_link_state, sw_link_state = 1, link_state, ret;
+ u32 pcsr_rx_stat;
if (!slave->open)
return;
if (!SLAVE_LINK_IS_XGMII(slave)) {
- sgmii_link_state =
+ sw_link_state =
netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp);
+ } else if (slave->link_interface == XGMII_LINK_MAC_MAC_FORCED) {
+ /* read status from pcsr status reg */
+ ret = regmap_read(gbe_dev->pcsr_regmap,
+ XGBE10_PCSR_RX_STATUS(sp), &pcsr_rx_stat);
+
+ if (ret)
+ return;
+
+ sw_link_state = (pcsr_rx_stat & XGBE10_PCSR_BLOCK_LOCK_MASK) >>
+ XGBE10_PCSR_BLOCK_LOCK_SHIFT;
}
phy_link_state = gbe_phy_link_status(slave);
- link_state = phy_link_state & sgmii_link_state;
+ link_state = phy_link_state & sw_link_state;
if (atomic_xchg(&slave->link_state, link_state) != link_state)
netcp_ethss_link_state_action(gbe_dev, ndev, slave,
@@ -3154,6 +3175,16 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
return PTR_ERR(gbe_dev->ss_regmap);
}
+ gbe_dev->pcsr_regmap = syscon_regmap_lookup_by_phandle(node,
+ "syscon-pcsr");
+
+ if (IS_ERR(gbe_dev->pcsr_regmap)) {
+ dev_err(gbe_dev->dev,
+ "pcsr regmap lookup failed: %ld\n",
+ PTR_ERR(gbe_dev->pcsr_regmap));
+ return PTR_ERR(gbe_dev->pcsr_regmap);
+ }
+
ret = of_address_to_resource(node, XGBE_SM_REG_INDEX, &res);
if (ret) {
dev_err(gbe_dev->dev,
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 01/10] net: netcp: ethss: add support of subsystem register region regmap
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
arnd, davem, devicetree, mark.rutland, robh+dt
In-Reply-To: <1482271793-7671-1-git-send-email-m-karicheri2@ti.com>
From: WingMan Kwok <w-kwok2@ti.com>
10gbe phy driver needs to access the 10gbe subsystem control
register during phy initialization. To facilitate the shared
access of the subsystem register region between the 10gbe Ethernet
driver and the phy driver, this patch adds support of the
subsystem register region defined by a syscon node in the dts.
Although there is no shared access to the gbe subsystem register
region, using syscon for that is for the sake of consistency.
This change is backward compatible with previously released gbe
devicetree bindings.
Signed-off-by: WingMan Kwok <w-kwok2@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
.../devicetree/bindings/net/keystone-netcp.txt | 16 ++-
drivers/net/ethernet/ti/netcp_ethss.c | 140 +++++++++++++++++----
2 files changed, 127 insertions(+), 29 deletions(-)
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index 04ba1dc..0854a73 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -72,20 +72,24 @@ Required properties:
"ti,netcp-gbe-2" for 1GbE N NetCP 1.5 (N=2)
"ti,netcp-xgbe" for 10 GbE
+- syscon-subsys: phandle to syscon node of the switch
+ subsystem registers.
+
- reg: register location and the size for the following register
regions in the specified order.
- switch subsystem registers
+ - sgmii module registers
- sgmii port3/4 module registers (only for NetCP 1.4)
- switch module registers
- serdes registers (only for 10G)
NetCP 1.4 ethss, here is the order
- index #0 - switch subsystem registers
+ index #0 - sgmii module registers
index #1 - sgmii port3/4 module registers
index #2 - switch module registers
NetCP 1.5 ethss 9 port, 5 port and 2 port
- index #0 - switch subsystem registers
+ index #0 - sgmii module registers
index #1 - switch module registers
index #2 - serdes registers
@@ -145,6 +149,11 @@ Optional properties:
Example binding:
+gbe_subsys: subsys@2090000 {
+ compatible = "syscon";
+ reg = <0x02090000 0x100>;
+};
+
netcp: netcp@2000000 {
reg = <0x2620110 0x8>;
reg-names = "efuse";
@@ -163,7 +172,8 @@ netcp: netcp@2000000 {
ranges;
gbe@90000 {
label = "netcp-gbe";
- reg = <0x90000 0x300>, <0x90400 0x400>, <0x90800 0x700>;
+ syscon-subsys = <&gbe_subsys>;
+ reg = <0x90100 0x200>, <0x90400 0x200>, <0x90800 0x700>;
/* enable-ale; */
tx-queue = <648>;
tx-channel = <8>;
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index c7e547e..473edda1 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -19,9 +19,11 @@
*/
#include <linux/io.h>
+#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_mdio.h>
#include <linux/of_address.h>
+#include <linux/regmap.h>
#include <linux/if_vlan.h>
#include <linux/ptp_classify.h>
#include <linux/net_tstamp.h>
@@ -43,7 +45,10 @@
#define GBE_MODULE_NAME "netcp-gbe"
#define GBE_SS_VERSION_14 0x4ed21104
+/* for devicetree backward compatible only */
#define GBE_SS_REG_INDEX 0
+
+#define GBE_SGMII_REG_INDEX 0
#define GBE_SGMII34_REG_INDEX 1
#define GBE_SM_REG_INDEX 2
/* offset relative to base of GBE_SS_REG_INDEX */
@@ -71,9 +76,11 @@
#define IS_SS_ID_NU(d) \
(GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU)
-#define GBENU_SS_REG_INDEX 0
+#define GBENU_SGMII_REG_INDEX 0
#define GBENU_SM_REG_INDEX 1
+/* offset relative to base of GBE_SS_REG_INDEX */
#define GBENU_SGMII_MODULE_OFFSET 0x100
+/* offset relative to base of GBENU_SM_REG_INDEX */
#define GBENU_HOST_PORT_OFFSET 0x1000
#define GBENU_SLAVE_PORT_OFFSET 0x2000
#define GBENU_EMAC_OFFSET 0x2330
@@ -82,13 +89,12 @@
#define GBENU_ALE_OFFSET 0x1e000
#define GBENU_HOST_PORT_NUM 0
#define GBENU_NUM_ALE_ENTRIES 1024
-#define GBENU_SGMII_MODULE_SIZE 0x100
/* 10G Ethernet SS defines */
#define XGBE_MODULE_NAME "netcp-xgbe"
#define XGBE_SS_VERSION_10 0x4ee42100
-#define XGBE_SS_REG_INDEX 0
+#define XGBE_SGMII_REG_INDEX 0
#define XGBE_SM_REG_INDEX 1
#define XGBE_SERDES_REG_INDEX 2
@@ -173,6 +179,7 @@
#define XGBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \
offsetof(struct xgbe##_##rb, rn)
#define GBE_REG_ADDR(p, rb, rn) (p->rb + p->rb##_ofs.rn)
+#define GBE_REG_OFS(p, rb, rn) ((p)->rb##_ofs.rn)
#define HOST_TX_PRI_MAP_DEFAULT 0x00000000
@@ -225,6 +232,7 @@
/* The PTP event messages - Sync, Delay_Req, Pdelay_Req, and Pdelay_Resp. */
#define EVENT_MSG_BITS (BIT(0) | BIT(1) | BIT(2) | BIT(3))
#endif /* CONFIG_TI_CPTS */
+#define SGMII_MODULE_SIZE 0x100
struct xgbe_ss_regs {
u32 id_ver;
@@ -716,7 +724,9 @@ struct gbe_priv {
u32 ss_version;
u32 stats_en_mask;
- void __iomem *ss_regs;
+ struct regmap *ss_regmap;
+ struct regmap *pcsr_regmap;
+ void __iomem *ss_regs;
void __iomem *switch_regs;
void __iomem *host_port_regs;
void __iomem *ale_reg;
@@ -2192,7 +2202,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
int max_rx_len)
{
void __iomem *rx_maxlen_reg;
- u32 xgmii_mode;
+ int ret;
if (max_rx_len > NETCP_MAX_FRAME_SIZE)
max_rx_len = NETCP_MAX_FRAME_SIZE;
@@ -2200,9 +2210,16 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
/* Enable correct MII mode at SS level */
if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) &&
(slave->link_interface >= XGMII_LINK_MAC_PHY)) {
- xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control));
- xgmii_mode |= (1 << slave->slave_num);
- writel(xgmii_mode, GBE_REG_ADDR(gbe_dev, ss_regs, control));
+ ret = regmap_update_bits(gbe_dev->ss_regmap,
+ GBE_REG_OFS(gbe_dev, ss_regs, control),
+ 1 << slave->slave_num,
+ 1 << slave->slave_num);
+
+ if (ret) {
+ dev_err(gbe_dev->dev,
+ "regmap update xgmii mode bit Failed\n");
+ return;
+ }
}
if (IS_SS_ID_MU(gbe_dev))
@@ -3127,35 +3144,46 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
void __iomem *regs;
int ret, i;
- ret = of_address_to_resource(node, XGBE_SS_REG_INDEX, &res);
+ gbe_dev->ss_regmap = syscon_regmap_lookup_by_phandle(node,
+ "syscon-subsys");
+
+ if (IS_ERR(gbe_dev->ss_regmap)) {
+ dev_err(gbe_dev->dev,
+ "subsys regmap lookup failed: %ld\n",
+ PTR_ERR(gbe_dev->ss_regmap));
+ return PTR_ERR(gbe_dev->ss_regmap);
+ }
+
+ ret = of_address_to_resource(node, XGBE_SM_REG_INDEX, &res);
if (ret) {
dev_err(gbe_dev->dev,
- "Can't xlate xgbe of node(%s) ss address at %d\n",
- node->name, XGBE_SS_REG_INDEX);
+ "Can't xlate xgbe of node(%s) sm address at %d\n",
+ node->name, XGBE_SM_REG_INDEX);
return ret;
}
regs = devm_ioremap_resource(gbe_dev->dev, &res);
if (IS_ERR(regs)) {
- dev_err(gbe_dev->dev, "Failed to map xgbe ss register base\n");
+ dev_err(gbe_dev->dev, "Failed to map xgbe sm register base\n");
return PTR_ERR(regs);
}
- gbe_dev->ss_regs = regs;
+ gbe_dev->switch_regs = regs;
- ret = of_address_to_resource(node, XGBE_SM_REG_INDEX, &res);
+ ret = of_address_to_resource(node, XGBE_SGMII_REG_INDEX, &res);
if (ret) {
dev_err(gbe_dev->dev,
- "Can't xlate xgbe of node(%s) sm address at %d\n",
- node->name, XGBE_SM_REG_INDEX);
+ "Can't xlate xgbe of node(%s) sgmii address at %d\n",
+ node->name, XGBE_SGMII_REG_INDEX);
return ret;
}
regs = devm_ioremap_resource(gbe_dev->dev, &res);
if (IS_ERR(regs)) {
- dev_err(gbe_dev->dev, "Failed to map xgbe sm register base\n");
+ dev_err(gbe_dev->dev,
+ "Failed to map xgbe sgmii register base\n");
return PTR_ERR(regs);
}
- gbe_dev->switch_regs = regs;
+ gbe_dev->sgmii_port_regs = regs;
ret = of_address_to_resource(node, XGBE_SERDES_REG_INDEX, &res);
if (ret) {
@@ -3171,6 +3199,8 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
return PTR_ERR(regs);
}
gbe_dev->xgbe_serdes_regs = regs;
+ gbe_dev->sgmii_port34_regs = gbe_dev->sgmii_port_regs +
+ (2 * SGMII_MODULE_SIZE);
gbe_dev->num_stats_mods = gbe_dev->max_num_ports;
gbe_dev->et_stats = xgbe10_et_stats;
@@ -3195,9 +3225,9 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
}
gbe_dev->ss_version = XGBE_SS_VERSION_10;
- gbe_dev->sgmii_port_regs = gbe_dev->ss_regs +
- XGBE10_SGMII_MODULE_OFFSET;
- gbe_dev->host_port_regs = gbe_dev->ss_regs + XGBE10_HOST_PORT_OFFSET;
+
+ gbe_dev->host_port_regs = gbe_dev->switch_regs +
+ XGBE10_HOST_PORT_OFFSET;
for (i = 0; i < gbe_dev->max_num_ports; i++)
gbe_dev->hw_stats_regs[i] = gbe_dev->switch_regs +
@@ -3228,8 +3258,8 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
return 0;
}
-static int get_gbe_resource_version(struct gbe_priv *gbe_dev,
- struct device_node *node)
+static int get_gbe_resource_version_ss_regs(struct gbe_priv *gbe_dev,
+ struct device_node *node)
{
struct resource res;
void __iomem *regs;
@@ -3248,8 +3278,27 @@ static int get_gbe_resource_version(struct gbe_priv *gbe_dev,
dev_err(gbe_dev->dev, "Failed to map gbe register base\n");
return PTR_ERR(regs);
}
+
gbe_dev->ss_regs = regs;
gbe_dev->ss_version = readl(gbe_dev->ss_regs);
+ gbe_dev->ss_regmap = NULL;
+ return 0;
+}
+
+static int get_gbe_resource_version(struct gbe_priv *gbe_dev,
+ struct device_node *node)
+{
+ gbe_dev->ss_regmap = syscon_regmap_lookup_by_phandle(node,
+ "syscon-subsys");
+ if (IS_ERR(gbe_dev->ss_regmap)) {
+ dev_dbg(gbe_dev->dev,
+ "subsys regmap lookup failed: %ld. try reg property\n",
+ PTR_ERR(gbe_dev->ss_regmap));
+ return get_gbe_resource_version_ss_regs(gbe_dev, node);
+ }
+
+ regmap_read(gbe_dev->ss_regmap, 0, &gbe_dev->ss_version);
+ gbe_dev->ss_regs = NULL;
return 0;
}
@@ -3260,6 +3309,27 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
void __iomem *regs;
int i, ret;
+ if (gbe_dev->ss_regs) {
+ gbe_dev->sgmii_port_regs = gbe_dev->ss_regs +
+ GBE13_SGMII_MODULE_OFFSET;
+ } else {
+ ret = of_address_to_resource(node, GBE_SGMII_REG_INDEX, &res);
+ if (ret) {
+ dev_err(gbe_dev->dev,
+ "Can't translate of gbe node(%s) address at index %d\n",
+ node->name, GBE_SGMII_REG_INDEX);
+ return ret;
+ }
+
+ regs = devm_ioremap_resource(gbe_dev->dev, &res);
+ if (IS_ERR(regs)) {
+ dev_err(gbe_dev->dev,
+ "Failed to map gbe sgmii port register base\n");
+ return PTR_ERR(regs);
+ }
+ gbe_dev->sgmii_port_regs = regs;
+ }
+
ret = of_address_to_resource(node, GBE_SGMII34_REG_INDEX, &res);
if (ret) {
dev_err(gbe_dev->dev,
@@ -3314,7 +3384,6 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
return -ENOMEM;
}
- gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBE13_SGMII_MODULE_OFFSET;
gbe_dev->host_port_regs = gbe_dev->switch_regs + GBE13_HOST_PORT_OFFSET;
/* K2HK has only 2 hw stats modules visible at a time, so
@@ -3402,14 +3471,33 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
}
gbe_dev->switch_regs = regs;
- gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET;
+ if (gbe_dev->ss_regs) {
+ gbe_dev->sgmii_port_regs = gbe_dev->ss_regs +
+ GBENU_SGMII_MODULE_OFFSET;
+ } else {
+ ret = of_address_to_resource(node, GBENU_SGMII_REG_INDEX, &res);
+ if (ret) {
+ dev_err(gbe_dev->dev,
+ "Can't translate of gbenu node(%s) addr at index %d\n",
+ node->name, GBENU_SGMII_REG_INDEX);
+ return ret;
+ }
+
+ regs = devm_ioremap_resource(gbe_dev->dev, &res);
+ if (IS_ERR(regs)) {
+ dev_err(gbe_dev->dev,
+ "Failed to map gbenu sgmii port register base\n");
+ return PTR_ERR(regs);
+ }
+ gbe_dev->sgmii_port_regs = regs;
+ }
/* Although sgmii modules are mem mapped to one contiguous
* region on GBENU devices, setting sgmii_port34_regs allows
* consistent code when accessing sgmii api
*/
gbe_dev->sgmii_port34_regs = gbe_dev->sgmii_port_regs +
- (2 * GBENU_SGMII_MODULE_SIZE);
+ (2 * SGMII_MODULE_SIZE);
gbe_dev->host_port_regs = gbe_dev->switch_regs + GBENU_HOST_PORT_OFFSET;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 00/10] netcp: enhancements and minor fixes
From: Murali Karicheri @ 2016-12-20 22:09 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA, linux-omap-u79uwXL29TY76Z2rM5mHXA,
grygorii.strashko-l0cyMroinI0, mugunthanvnm-l0cyMroinI0,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, arnd-r2nGTMty4D4,
davem-fT/PcQaiUtIeIZ0/mPfg9Q, devicetree-u79uwXL29TY76Z2rM5mHXA,
mark.rutland-5wv7dgnIgG8, robh+dt-DgEjT+Ai2ygdnm+yROfE0A
This series is for net-next. This propagates enhancements and minor
bug fixes from internal version of the driver to keep the upstream
in sync. Please review and apply if this looks good.
Tested on all of K2HK/E/L boards.
Thanks
Murali Karicheri
Michael Scherban (1):
net: netcp: store network statistics in 64 bits
Murali Karicheri (7):
net: netcp: extract eflag from desc for rx_hook handling
net: netcp: remove the redundant memmov()
net: netcp: ethss: get phy-handle only if link interface is MAC-to-PHY
net: netcp: use hw capability to remove FCS word from rx packets
net: netcp: ale: update to support unknown vlan controls for NU switch
net: netcp: ale: use ale_status to size the ale table
net: netcp: ale: add proper ale entry mask bits for netcp switch ALE
WingMan Kwok (2):
net: netcp: ethss: add support of subsystem register region regmap
net: netcp: ethss: add support of 10gbe pcsr link status
.../devicetree/bindings/net/keystone-netcp.txt | 19 +-
drivers/net/ethernet/ti/cpsw_ale.c | 180 ++++++++++++++++---
drivers/net/ethernet/ti/cpsw_ale.h | 17 +-
drivers/net/ethernet/ti/netcp.h | 21 +++
drivers/net/ethernet/ti/netcp_core.c | 102 ++++++++---
drivers/net/ethernet/ti/netcp_ethss.c | 200 +++++++++++++++++----
include/linux/soc/ti/knav_dma.h | 2 +
7 files changed, 456 insertions(+), 85 deletions(-)
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: HalfSipHash Acceptable Usage
From: Theodore Ts'o @ 2016-12-20 21:36 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: Jean-Philippe Aumasson, Hannes Frederic Sowa, LKML, Eric Biggers,
Daniel J . Bernstein, David Laight, David Miller, Andi Kleen,
George Spelvin, kernel-hardening, Andy Lutomirski,
Linux Crypto Mailing List, Tom Herbert, Vegard Nossum, Netdev,
Linus Torvalds
In-Reply-To: <CAHmME9rPmH=wP_eHYopt8ZPG9TSN7bos3fGOuqKL2HjQW-2SWA@mail.gmail.com>
On Mon, Dec 19, 2016 at 06:32:44PM +0100, Jason A. Donenfeld wrote:
> 1) Anything that requires actual long-term security will use
> SipHash2-4, with the 64-bit output and the 128-bit key. This includes
> things like TCP sequence numbers. This seems pretty uncontroversial to
> me. Seem okay to you?
Um, why do TCP sequence numbers need long-term security? So long as
you rekey every 5 minutes or so, TCP sequence numbers don't need any
more security than that, since even if you break the key used to
generate initial sequence numbers seven a minute or two later, any
pending TCP connections will have timed out long before.
See the security analysis done in RFC 6528[1], where among other
things, it points out why MD5 is acceptable with periodic rekeying,
although there is the concern that this could break certain hueristics
used when establishing new connections during the TIME-WAIT state.
[1] https://tools.ietf.org/html/rfc6528
- Ted
^ permalink raw reply
* [GIT] Networking
From: David Miller @ 2016-12-20 21:02 UTC (permalink / raw)
To: torvalds; +Cc: akpm, netdev, linux-kernel
1) Use rb_entry() instead of hardcoded container_of(), from Geliang Tang.
2) Use correct memory barriers in stammac driver, from Pavel Machek.
3) Fix assoc bind address handling in SCTP, from Xin Long.
4) Make the length check for UFO handling consistent between
__ip_append_data() and ip_finish_output(), from Zheng Li.
5) HSI driver compatible strings were busted fro hix5hd2, from Dongpo
Li.
6) Handle devm_ioremap() errors properly in cavium driver, from Arvind
Yadav.
Please pull, thanks a lot!
The following changes since commit 52f40e9d657cc126b766304a5dd58ad73b02ff46:
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2016-12-17 20:17:04 -0800)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
for you to fetch changes up to a763f78cea845c91b8d91f93dabf70c407635dc5:
RDS: use rb_entry() (2016-12-20 14:22:49 -0500)
----------------------------------------------------------------
Arvind Yadav (1):
net: ethernet: cavium: octeon: octeon_mgmt: Handle return NULL error from devm_ioremap
David S. Miller (4):
Merge branch 'phy-broken-modes'
Merge branch 'fsl-fixes'
Merge branch 'hix5hd2_gmac-compatible-string'
Merge branch 'sctp-fixes'
Dongpo Li (2):
net: hix5hd2_gmac: fix compatible strings name
ARM: dts: hix5hd2: don't change the existing compatible string
Geliang Tang (4):
net/mlx5: use rb_entry()
net_sched: sch_fq: use rb_entry()
net_sched: sch_netem: use rb_entry()
RDS: use rb_entry()
Jarno Rajahalme (1):
openvswitch: Add a missing break statement.
Madalin Bucur (4):
fsl/fman: fix 1G support for QSGMII interfaces
powerpc: fsl/fman: remove fsl,fman from of_device_ids[]
fsl/fman: A007273 only applies to PPC SoCs
fsl/fman: enable compilation on ARM64
Pavel Machek (1):
stmmac: fix memory barriers
Tobias Klauser (1):
ethernet: sfc: Add Kconfig entry for vendor Solarflare
WingMan Kwok (2):
net: netcp: ethss: fix errors in ethtool ops
net: netcp: ethss: fix 10gbe host port tx pri map configuration
Xin Long (2):
sctp: reduce indent level in sctp_copy_local_addr_list
sctp: not copying duplicate addrs to the assoc's bind address list
jbrunet (3):
net: phy: fix sign type error in genphy_config_eee_advert
net: phy: use boolean dt properties for eee broken modes
dt: bindings: net: use boolean dt properties for eee broken modes
zheng li (1):
ipv4: Should use consistent conditional judgement for ip fragment in __ip_append_data and ip_finish_output
Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt | 13 ++++++++-----
Documentation/devicetree/bindings/net/phy.txt | 10 ++++++++--
arch/arm/boot/dts/hisi-x5hd2.dtsi | 4 ++--
arch/powerpc/platforms/85xx/corenet_generic.c | 3 ---
drivers/net/ethernet/Kconfig | 1 -
drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 6 ++++++
drivers/net/ethernet/freescale/fman/Kconfig | 2 +-
drivers/net/ethernet/freescale/fman/fman.c | 15 +++++++++++++++
drivers/net/ethernet/freescale/fman/mac.c | 1 +
drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 13 +++++++------
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 2 +-
drivers/net/ethernet/sfc/Kconfig | 21 +++++++++++++++++++++
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 4 ++--
drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++----
drivers/net/ethernet/ti/netcp_ethss.c | 24 ++++++++++++++++++------
drivers/net/phy/phy_device.c | 22 +++++++++++++++++-----
include/dt-bindings/net/mdio.h | 19 -------------------
net/ipv4/ip_output.c | 2 +-
net/openvswitch/flow_netlink.c | 1 +
net/rds/rdma.c | 2 +-
net/sched/sch_fq.c | 14 +++++++-------
net/sched/sch_netem.c | 2 +-
net/sctp/bind_addr.c | 3 +++
net/sctp/protocol.c | 40 ++++++++++++++++++++++------------------
25 files changed, 148 insertions(+), 86 deletions(-)
delete mode 100644 include/dt-bindings/net/mdio.h
^ permalink raw reply
* [ANNOUNCE] nftables 0.7 release
From: Pablo Neira Ayuso @ 2016-12-20 20:46 UTC (permalink / raw)
To: netfilter-devel; +Cc: netdev, netfilter, netfilter-announce, lwn
[-- Attachment #1: Type: text/plain, Size: 10356 bytes --]
Hi!
The Netfilter project proudly presents:
nftables 0.7
This release contains many accumulated bug fixes and new features
available up to the (upcoming) Linux 4.10-rc1 kernel release.
* Facilitate migration from iptables to nftables:
At compilation time, you have to pass this option.
# ./configure --with-xtables
And libxtables needs to be installed in your system. This allows you
to list a ruleset containing xt extensions loaded through
iptables-compat-restore tool. The nft tool provides a native
translation for iptables extensions (if available).
* Add new fib expression, which can be used to obtain the output
interface from the route table based on either source or destination
address of a packet. This can be used to e.g. add reverse path
filtering, eg. drop if not coming from the same interface packet
arrived on:
# nft add rule x prerouting fib saddr . iif oif eq 0 drop
Accept only if from eth:
# nft add rule x prerouting fib saddr . iif oif eq "eth0" accept
Accept if from any valid interface:
# nft add rule x prerouting fib saddr oif accept
Querying of address type is also supported, this can be used
to only accept packets to addresses configured in the same
interface, eg.
# nft add rule x prerouting fib daddr . iif type local accept
Its also possible to use mark and verdict map, eg,
# nft add rule x prerouting \
meta mark set 0xdead fib daddr . mark type vmap {
blackhole : drop,
prohibit : drop,
unicast : accept
}
* Support hashing of any arbitrary key combination, eg.
# nft add rule x y \
dnat to jhash ip saddr . tcp dport mod 2 map { \
0 : 192.168.20.100, \
1 : 192.168.30.100 \
}
Another usecase: Set packet marks based on any arbitrary hashing.
* Add number generation support. Useful for round-robin packet mark
setting, eg.
# nft add rule filter prerouting meta mark set numgen inc mod 2
You can also specify an offset to indicate from what value you want
to start from.
The modulus provides the scale of the counting sequence. You can
also use this from maps, eg.
# nft add rule nat prerouting \
dnat to numgen inc mod 2 map { 0 : 192.168.10.100, 1 : 192.168.20.200 }
So this is distributing new connections in a round-robin fashion
between 192.168.10.100 and 192.168.20.200. Don't forget the special NAT
chain semantics: Only the first packet evaluates the rule, follow up
packets rely on conntrack to apply the NAT information.
You can also emulate flow distribution with different backend weights
using intervals, eg.
# nft add rule nat prerouting \
dnat to numgen inc mod 10 map { 0-5 : 192.168.10.100, 6-9 : 192.168.20.200 }
* Add quota support, eg.
# nft add rule filter input \
flow table http { ip saddr timeout 60s quota over 50 mbytes } drop
This creates a flow table, where every flow gets a quota of 50
mbytes. You can also from use simple rules too to enforce quotas, of
course.
* Introduce routing expression, for routing related data with support
for nexthop (i.e. the directly connected IP address that an outgoing
packet is sent to), which can be used either for matching or accounting, eg.
# nft add rule filter postrouting \
ip daddr 192.168.1.0/24 rt nexthop != 192.168.0.1 drop
This will drop any traffic to 192.168.1.0/24 that is not routed via
192.168.0.1.
# nft add rule filter postrouting \
flow table acct { rt nexthop timeout 600s counter }
# nft add rule ip6 filter postrouting \
flow table acct { rt nexthop timeout 600s counter }
These rules count outgoing traffic per nexthop. Note that the timeout
releases an entry if no traffic is seen for this nexthop within 10
minutes.
* Notrack support, to explicitly skip connection tracking for matching
packets, eg.
# nft add rule ip raw prerouting tcp dport { 80, 443 } notrack
So you can skip tracking for http and https traffic.
* Support to set non-byte bound packet header fields, including
checksum adjustment, eg. ip6 ecn set 1.
* Add 'create set' and 'create element' commands, eg.
# nft add set x y { type ipv4_addr\; }
# nft create set x y { type ipv4_addr\; }
<cmdline>:1:1-35: Error: Could not process rule: File exists
create set x y { type ipv4_addr; }
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# nft add set x y { type ipv4_addr\; }
#
So 'create' bails out if the set already exists, while 'add'
doesn't, for more ergonomic usage as several users requested on
the mailing list.
* Allow to use variable reference for set element definitions, eg.
# cat ruleset.nft
define s-ext-2-int = { 10.10.10.10 . 25, 10.10.10.10 . 143 }
table inet forward {
set s-ext-2-int {
type ipv4_addr . inet_service
elements = $s-ext-2-int
}
}
# nft -f ruleset.nft
Useful to improve ruleset maintainability, as you can split out
variable and set definitions from the filtering policy itself.
* Allow to use variable definitions from element commands, eg.
define whitelist_v4 = { 1.1.1.1 }
table inet filter {
set whitelist_v4 { type ipv4_addr; }
}
add element inet filter whitelist_v4 $whitelist_v4
* Add support to flush set. You can use this new command to remove all
existing elements in a set, eg.
# nft flush set filter xyz
Note that this requires (upcoming) Linux kernel 4.10-rc versions.
* Inverted set lookups, eg. tcp dport != { 80, 443 }.
* Honor absolute and relative paths via include file, where:
include "./ruleset.nft"
refers to a file in the working directory.
include "ruleset.nft"
refers to a file in the nftables root path (via sysconfdir), and:
include "/etc/nftables/ruleset.nft"
provides an absolute reference to the file that need to be included.
This also solves an ambiguity if the same file name is used both under
sysconfdir and the current working directory.
* Support log flags, to enable logging TCP sequence and options:
# nft add rule x y log flags tcp sequence,options
... IP options, eg:
# nft add rule x y log flags ip options
... socket UID, eg.
# nft add rule x y log flags skuid
... decide ethernet link layer address, eg.
# nft add rule x y log flags ether
... or simply set on all flags:
# nft add rule x y log flags all
* tc classid parser support, eg.
nft add rule filter forward meta priority abcd:1234
* Allow numeric connlabels, so if connlabel still works with undefined
labels, eg. ct label set 2.
* Document log, reject, counter, meta, limit, nat, ct, payload and
queue statements from nft(8) manpage.
Bugfixes
========
Not strictly limited to this list below, but some highlights:
* Allow split table definitions, eg.
# cat ruleset.nft
table inet filter {
chain ssh {
type filter hook input priority 0; policy accept;
tcp dport ssh accept;
}
}
table inet filter {
chain input {
type filter hook input priority 1; policy drop;
}
}
# nft -f ruleset.nft
* Use new range expression to represent inverted intervals, eg.
ip saddr != 1.1.1.1-2.2.2.2, since previously generated bytecode was
not correct.
* Solve endianness problems with link layer address.
* Fix parser to keep map flag around on definition.
* Skip timeout attribute in dynamic set updates, other kernel bails
out with EINVAL.
* Restore parsing of dynamic set element updates.
* The time datatype now uses milliseconds, as the kernel expects.
* Allow numeric interface index numbers, eg. in meta iif, oif.
* Fix monitor trace crash with netdev family.
* Flow table with concatenation fixes.
* Keep element comments around when using set intervals.
* Fixed memory corruption in userspace when deleting lots of elements
in one go via nft -f.
* Several nft internal cache fixes, including cache reset on 'flush
ruleset'.
* Restore parens on right-hand side of relational expression.
* Replace getnameinfo() by internal lookup table, so we don't rely on
/etc/services anymore for service names, so we restrict them to
a well-known set that is supported by our scanner. You can list
service names via 'nft describe tcp dport'.
* Display symbol table values in the right hostbyte order and
decimal/hexadecimal representation.
* Fix a nasty bug in the set interval code triggering huge memory
consumption in userspace for set and map intervals with runtime
updates.
We also got lots more tests added to our infrastructure to catch up
regressions.
Syntax updates
==============
Several minor syntax updates, although previous syntax has been
preserved by now to facilitate transition, the new one is prefered:
* Consistency grammar fixes: 'snat' and 'dnat' now require 'to', eg.
snat to 1.2.3.4. For consistency with existing statements such as
redirect, masquerade, dup and fwd. Moreover, add colon after 'to' in
'redirect' for consistency with nat and masq statements.
* Allow ct l3proto/protocol without direction since they are unrelated
to the direction.
* Explicit ruleset exportation, eg. nft export ruleset json, for
consistency with other existing ruleset commands.
* Always quote user-defined strings from rules when listing them.
* Support for RFC2732 IPv6 address format with brackets, eg.
dnat to [2001:838:35f:1::]:80
* Allow strings starting by underscores and dots in user-define
strings, conforming with POSIX.1-2008 (which is simultaneously IEEE
Std 1003.1-2008).
Resources
=========
The nftables code can be obtained from:
* http://netfilter.org/projects/nftables/downloads.html
* ftp://ftp.netfilter.org/pub/nftables
* git://git.netfilter.org/nftables
To build the code, libnftnl 1.0.7 and libmnl >= 1.0.2 are required:
* http://netfilter.org/projects/libnftnl/index.html
* http://netfilter.org/projects/libmnl/index.html
Visit our wikipage for user documentation at:
* http://wiki.nftables.org
For the manpage reference, check man(8) nft.
In case of bugs and feature request, file them via:
* https://bugzilla.netfilter.org
Make sure you create no duplicates already, thanks!
Happy holidays!
[-- Attachment #2: changes-nftables-0.7.txt --]
[-- Type: text/plain, Size: 9551 bytes --]
Anatole Denis (7):
evaluate: Add set to cache only when well-formed
tests: Add regression test for malformed sets
Revert "evaluate: check for NULL datatype in rhs in lookup expr"
src: Interpret OP_NEQ against a set as OP_LOOKUP
tests/py: Unmask negative set lookup
rule: Introduce helper function cache_flush
evaluate: Update cache on flush ruleset
Anders K. Pedersen (4):
rt: introduce routing expression
Replace tests/files/expr-rt with Python based tests, and replace ether type with meta nfproto, which generates a bit fewer instructions.
evaluate: Allow concatenation of rt nexthop etc.
doc: fix synopsis for ct expression
Arturo Borrero (3):
tests: shell: delete unused variable in run-tests.sh
tests: shell: cleanup tempfile handling in testcases/sets/cache_handling_0
tests: shell: run-tests.sh: use src/nft binary by default
Arturo Borrero Gonzalez (12):
tests: shell: update kernel modules to clean
xt: update Arturo Borrero Gonzalez email address
tests: shell: delete useless stderr output in testcase
tests: shell: introduce the cache testcases directory
tests: shell: add a new testcase for ruleset loading bug
tests: shell: add testcases for comments in set elements
tests: shell: allow to execute a single testcase
tests: shell: testcase for adding many set elements
tests: shell: testcase for deleting many set elements
tests: shell: another testcase for deleting many set elements
tests: shell: add a testcase for many defines
tests: shell: add testcase for different defines usage
Carlos Falgueras García (1):
src: Simplify parser rule_spec tree
Elise Lennion (4):
datatype: Replace getnameinfo() by internal lookup table
datatype: Display pre-defined inet_service values in host byte order
datatype: Display pre-defined inet_service values in decimal base
expression: Show the base which pre-defined constants are displayed
Florian Westphal (30):
payload: don't update protocol context if we can't find a description
meta: add random support
meta: add tests for meta random
ct: use nftables sysconf location for connlabel configuration
tests: add basic payload tests
tests: add ether payload set test
netlink: add __binop_adjust helper
payload: print base and raw values for unknown payloads
evaluate: add small helper to check if payload expr needs binop adjustment
evaluate: add support to set IPv6 non-byte header fields
netlink: decode payload statment
tests: ip6 dscp, flowlabel and ecn test cases
netlink: make checksum fixup work with odd-sized header fields
tests: ip payload set support for ecn and dscp
ct: allow numeric conntrack labels
ct: display bit number instead of raw value
doc: update meta expression
doc: payload and conntrack statement
datatype: ll: use big endian byte ordering
tests: catch ordering issue w. ether set
payload: remove byteorder conversion
meta: permit numeric interface type
netlink: fix monitor trace crash with netdev family
meta: fix pkttype name and add 'other' symbol
utils: provide snprintf helper macro
ct: allow resolving ct keys at run time
meta: allow resolving meta keys at run time
src: add fib expression
Revert "tests: py: nft-tests.py: Add function for loading and removing kernel modules"
bison: remove old log level tokens
Jon Jensen (1):
Correct description of -n/--numeric option
Laura Garcia Liebana (5):
doc: Update datatypes
src: add offset attribute for numgen expression
netlink: fix linearize numgen type
src: make hash seed attribute optional
src: add offset attribute for hash expression
Liping Zhang (14):
tests: shell: make testcases which using tcp/udp port more rubost
tests: shell: add endless jump loop tests
parser_bison: keep snat/dnat existing syntax unchanged
tests: shell: add testcase for reject expr
meta: fix memory leak in tc classid parser
tests: py: replace "eth0" with "lo" in dup expr tests
src: fix compile error due to _UNTIL renamed to _MODULUS in libnftnl
tests: py: add more test cases for queue expr
tests: py: fix numgen case failed due to changes in libnftnl
src: support ct l3proto/protocol without direction syntax
ct: fix "ct l3proto/protocol" syntax broken
log: rename the log level "warning" to "warn"
src: add log flags syntax support
tests: shell: add test case for inserting element into verdict map
Manuel Johannes Messner (3):
tests: py: nft-tests.py: Add function for loading and removing kernel modules
tests: py: any: Make tests more generic by using other interfaces
tests: py: any: Remove duplicate tests
Nicholas Vinson (1):
nft: configure.ac: Replace magic dblatex dep.
Pablo Neira (2):
src: expose delinearize/linearize structures and stmt_error()
src: trigger layer 4 checksum when pseudoheader fields are modified
Pablo Neira Ayuso (71):
src: use new definitions from libnftnl
segtree: don't check for overlaps if set definition is empty
tests: shell: cover transactions via nft -f using flat syntax
datatype: time_type should send milliseconds to userspace
parser_bison: restore parsing of dynamic set element updates
netlink_linearize: skip NFTNL_EXPR_DYNSET_TIMEOUT attribute if timeout is unset
include: cache ip_tables.h, ip6_tables.h, arp_tables.h and ebtables.h
src: add xt compat support
parser_bison: fix typo in symbol redefinition error reporting
tests: shell: make sure split table definition works via nft -f
xt: use struct xt_xlate_{mt,tg}_params
parser_bison: keep map flag around when flags are specified
scanner: honor absolute and relative paths via include file
scanner: don't fall back on current directory if include is not found
scanner: don't break line on include error message
tests: tests to include files
ct: add missing slash to connlabel path
ct: release ct_label table on exit
src: quote user-defined strings when used from rule selectors
src: add 'to' for snat and dnat
src: support for RFC2732 IPv6 address format with brackets
parser_bison: missing token string in QUOTED_ASTERISK and ASTERISK_STRING
scanner: allow strings starting by underscores and dots
scanner: remove range expression
src: rename datatype name from tc_handle to classid
src: simplify classid printing using %x instead of %04x
src: meta priority support using tc classid
parser_bison: redirect to :port for consistency with nat/masq statement
parser_bison: explicit indication on export ruleset
src: add create set command
tests: shell: cover add and create set command
src: create element command
tests: shell: cover add and create set command
include: refresh uapi/linux/netfilter/nf_tables.h copy
tests: py: adapt it to new add element command semantics
src: add quota statement
src: add numgen expression
src: add hash expression
evaluate: add expr_evaluate_integer()
evaluate: validate maximum hash and numgen value
parser_bison: add variable_expr rule
parser_bison: allow variable references in set elements definition
tests: py: adapt netlink bytecode output of numgen and hash
evaluate: display expression, statement and command name on debug
netlink_delinearize: Avoid potential null pointer deref
doc: nft: add my copyright statement to the manpage
doc: nft: document log, reject, counter, meta, limit, nat and queue statements
src: use new range expression for != [a,b] intervals
parser_bison: allow to use variable to add/create/delete elements
src: don't need keyword for log level
parser: add offset keyword and parser rule
tests/py: add missing payload test for numgen offset
netlink_linearize: skip set element expression in flow table key
segtree: keep element comments in set intervals
tests: py: add some testcases for log flags
tests: py: missing range conversion in icmpv6
src: add notrack support
mnl: use nftnl_set_elems_nlmsg_build_payload_iter() when deleting elements
include: refresh nf_tables.h header
datatype: honor -nn option from inet_service_type_print()
evaluate: return ctx->table from table_lookup_global()
src: add support to flush sets
segtree: wrong prefix expression length on interval_map_decompose()
segtree: don't trigger error on exact overlaps
mnl: don't send empty set elements netlink message to kernel
tests: py: update quota and payload
netlink_linearize: fix IPv6 layer 4 checksum mangling
mnl: add mnl_nft_setelem_batch_flush() and use it from netlink_flush_setelems()
xt: use NFTNL_* definitions
configure: Bump version to v0.7
include: Missing noinst_HEADERS updates
Phil Sutter (5):
evaluate: Fix datalen checks in expr_evaluate_string()
evaluate: reject: Have a generic fix for missing network context
evaluate: Avoid undefined behaviour in concat_subtype_id()
parser_bison: Allow parens on RHS of relational_expr
tests: py: Test TCP flags match with parentheses
^ permalink raw reply
* [PATCH 5/5 net-next] inet: reset tb->fastreuseport when adding a reuseport sk
From: Josef Bacik @ 2016-12-20 20:07 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In-Reply-To: <1482264424-15439-1-git-send-email-jbacik@fb.com>
If we have non reuseport sockets on a tb we will set tb->fastreuseport to 0 and
never set it again. Which means that in the future if we end up adding a bunch
of reuseport sk's to that tb we'll have to do the expensive scan every time.
Instead add a sock_common to the tb so we know what reuseport sk succeeded last.
Once one sk has made it onto the list we know that there are no potential bind
conflicts on the owners list that match that sk's rcv_addr. So copy the sk's
common into our tb->fastsock and set tb->fastruseport to FASTREUSESOCK_STRICT so
we know we have to do an extra check for subsequent reuseport sockets and skip
the expensive bind conflict check.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/inet_hashtables.h | 4 ++++
net/ipv4/inet_connection_sock.c | 53 +++++++++++++++++++++++++++++++++++++----
2 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 50f635c..b776401 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -74,12 +74,16 @@ struct inet_ehash_bucket {
* users logged onto your box, isn't it nice to know that new data
* ports are created in O(1) time? I thought so. ;-) -DaveM
*/
+#define FASTREUSEPORT_ANY 1
+#define FASTREUSEPORT_STRICT 2
+
struct inet_bind_bucket {
possible_net_t ib_net;
unsigned short port;
signed char fastreuse;
signed char fastreuseport;
kuid_t fastuid;
+ struct sock_common fastsock;
int num_owners;
struct hlist_node node;
struct hlist_head owners;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d3ccf62..9e29fad 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -164,6 +164,32 @@ success:
return head;
}
+static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
+ struct sock *sk)
+{
+ struct sock *sk2 = (struct sock *)&tb->fastsock;
+ kuid_t uid = sock_i_uid(sk);
+
+ if (tb->fastreuseport <= 0)
+ return 0;
+ if (!sk->sk_reuseport)
+ return 0;
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ return 0;
+ if (!uid_eq(tb->fastuid, uid))
+ return 0;
+ /* We only need to check the rcv_saddr if this tb was once marked
+ * without fastreuseport and then was reset, as we can only know that
+ * the fastsock has no potential bind conflicts with the rest of the
+ * possible socks on the owners list.
+ */
+ if (tb->fastreuseport == FASTREUSEPORT_ANY)
+ return 1;
+ if (!inet_csk(sk)->icsk_af_ops->rcv_saddr_equal(sk, sk2, true))
+ return 0;
+ return 1;
+}
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
* We try to allocate an odd port (and leave even ports for connect())
@@ -206,9 +232,7 @@ tb_found:
goto success;
if ((tb->fastreuse > 0 && reuse) ||
- (tb->fastreuseport > 0 &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
+ sk_reuseport_match(tb, sk))
goto success;
if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
@@ -220,14 +244,35 @@ success:
if (sk->sk_reuseport) {
tb->fastreuseport = 1;
tb->fastuid = uid;
+ memcpy(&tb->fastsock, &sk->__sk_common,
+ sizeof(struct sock_common));
} else {
tb->fastreuseport = 0;
}
} else {
if (!reuse)
tb->fastreuse = 0;
- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ if (sk->sk_reuseport) {
+ /* We didn't match or we don't have fastreuseport set on
+ * the tb, but we have sk_reuseport set on this socket
+ * and we know that there are no bind conflicts with
+ * this socket in this tb, so reset our tb's reuseport
+ * settings so that any subsequent sockets that match
+ * our current socket will be put on the fast path.
+ *
+ * If we reset we need to set FASTREUSEPORT_STRICT so we
+ * do extra checking for all subsequent sk_reuseport
+ * socks.
+ */
+ if (!sk_reuseport_match(tb, sk)) {
+ tb->fastreuseport = FASTREUSEPORT_STRICT;
+ tb->fastuid = uid;
+ memcpy(&tb->fastsock, &sk->__sk_common,
+ sizeof(struct sock_common));
+ }
+ } else {
tb->fastreuseport = 0;
+ }
}
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);
--
2.9.3
^ permalink raw reply related
* [PATCH 4/5 net-next] inet: split inet_csk_get_port into two functions
From: Josef Bacik @ 2016-12-20 20:07 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In-Reply-To: <1482264424-15439-1-git-send-email-jbacik@fb.com>
inet_csk_get_port does two different things, it either scans for an open port,
or it tries to see if the specified port is available for use. Since these two
operations have different rules and are basically independent lets split them
into two different functions to make them both more readable.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
net/ipv4/inet_connection_sock.c | 72 +++++++++++++++++++++++++++--------------
1 file changed, 47 insertions(+), 25 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fc9bfe1..d3ccf62 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -84,34 +84,21 @@ static int inet_csk_bind_conflict(const struct sock *sk,
return sk2 != NULL;
}
-/* Obtain a reference to a local port for the given sock,
- * if snum is zero it means select any available local port.
- * We try to allocate an odd port (and leave even ports for connect())
+/*
+ * Find an open port number for the socket. Returns with the
+ * inet_bind_hashbucket lock held.
*/
-int inet_csk_get_port(struct sock *sk, unsigned short snum)
+static struct inet_bind_hashbucket *
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
- bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, port = snum;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
struct inet_bind_bucket *tb;
- kuid_t uid = sock_i_uid(sk);
u32 remaining, offset;
- bool reuseport_ok = !!snum;
- bool empty_tb = true;
- if (port) {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == port)
- goto tb_found;
-
- goto tb_not_found;
- }
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -150,13 +137,12 @@ other_parity_scan:
if (net_eq(ib_net(tb), net) && tb->port == port) {
if (hlist_empty(&tb->owners))
goto success;
- if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok)) {
- empty_tb = false;
+ if (!inet_csk_bind_conflict(sk, tb, false, false))
goto success;
- }
goto next_port;
}
- goto tb_not_found;
+ tb = NULL;
+ goto success;
next_port:
spin_unlock_bh(&head->lock);
cond_resched();
@@ -171,8 +157,44 @@ next_port:
attempt_half = 2;
goto other_half_scan;
}
- return ret;
+ return NULL;
+success:
+ *port_ret = port;
+ *tb_ret = tb;
+ return head;
+}
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ * We try to allocate an odd port (and leave even ports for connect())
+ */
+int inet_csk_get_port(struct sock *sk, unsigned short snum)
+{
+ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ int ret = 1, port = snum;
+ struct inet_bind_hashbucket *head;
+ struct net *net = sock_net(sk);
+ struct inet_bind_bucket *tb = NULL;
+ kuid_t uid = sock_i_uid(sk);
+ bool empty_tb = true;
+
+ if (!port) {
+ head = inet_csk_find_open_port(sk, &tb, &port);
+ if (!head)
+ return 1;
+ if (!tb)
+ goto tb_not_found;
+ if (!hlist_empty(&tb->owners))
+ empty_tb = false;
+ goto success;
+ }
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->port == port)
+ goto tb_found;
tb_not_found:
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
@@ -188,7 +210,7 @@ tb_found:
!rcu_access_pointer(sk->sk_reuseport_cb) &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
- if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
empty_tb = false;
}
--
2.9.3
^ permalink raw reply related
* [PATCH 3/5 net-next] inet: don't check for bind conflicts twice when searching for a port
From: Josef Bacik @ 2016-12-20 20:07 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In-Reply-To: <1482264424-15439-1-git-send-email-jbacik@fb.com>
This is just wasted time, we've already found a tb that doesn't have a bind
conflict, and we don't drop the head lock so scanning again isn't going to give
us a different answer. Instead move the tb->reuse setting logic outside of the
found_tb path and put it in the success: path. Then make it so that we don't
goto again if we find a bind conflict in the found_tb path as we won't reach
this anymore when we are scanning for an ephemeral port.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
net/ipv4/inet_connection_sock.c | 39 ++++++++++++++++++---------------------
1 file changed, 18 insertions(+), 21 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1a1a94bd..fc9bfe1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -92,7 +92,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, attempts = 5, port = snum;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
@@ -100,6 +100,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
kuid_t uid = sock_i_uid(sk);
u32 remaining, offset;
bool reuseport_ok = !!snum;
+ bool empty_tb = true;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -111,7 +112,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
goto tb_not_found;
}
-again:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -148,8 +148,12 @@ other_parity_scan:
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
- goto tb_found;
+ if (hlist_empty(&tb->owners))
+ goto success;
+ if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok)) {
+ empty_tb = false;
+ goto success;
+ }
goto next_port;
}
goto tb_not_found;
@@ -184,23 +188,12 @@ tb_found:
!rcu_access_pointer(sk->sk_reuseport_cb) &&
sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
- if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
- if ((reuse ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) && !snum &&
- --attempts >= 0) {
- spin_unlock_bh(&head->lock);
- goto again;
- }
+ if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok))
goto fail_unlock;
- }
- if (!reuse)
- tb->fastreuse = 0;
- if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
- tb->fastreuseport = 0;
- } else {
+ empty_tb = false;
+ }
+success:
+ if (empty_tb) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = 1;
@@ -208,8 +201,12 @@ tb_found:
} else {
tb->fastreuseport = 0;
}
+ } else {
+ if (!reuse)
+ tb->fastreuse = 0;
+ if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ tb->fastreuseport = 0;
}
-success:
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
--
2.9.3
^ permalink raw reply related
* [PATCH 1/5 net-next] inet: replace ->bind_conflict with ->rcv_saddr_equal
From: Josef Bacik @ 2016-12-20 20:07 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In-Reply-To: <1482264424-15439-1-git-send-email-jbacik@fb.com>
The only difference between inet6_csk_bind_conflict and inet_csk_bind_conflict
is how they check the rcv_saddr. Since we want to be able to check the saddr in
other places just drop the protocol specific ->bind_conflict and replace it with
->rcv_saddr_equal, then make inet_csk_bind_conflict the one true bind conflict
function.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
include/net/inet6_connection_sock.h | 5 -----
include/net/inet_connection_sock.h | 9 +++------
net/dccp/ipv4.c | 3 ++-
net/dccp/ipv6.c | 2 +-
net/ipv4/inet_connection_sock.c | 22 +++++++-------------
net/ipv4/tcp_ipv4.c | 3 ++-
net/ipv4/udp.c | 1 +
net/ipv6/inet6_connection_sock.c | 40 -------------------------------------
net/ipv6/tcp_ipv6.c | 4 ++--
9 files changed, 18 insertions(+), 71 deletions(-)
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 3212b39..8ec87b6 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -15,16 +15,11 @@
#include <linux/types.h>
-struct inet_bind_bucket;
struct request_sock;
struct sk_buff;
struct sock;
struct sockaddr;
-int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool soreuseport_ok);
-
struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ec0479a..9cd43c5 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,9 +62,9 @@ struct inet_connection_sock_af_ops {
char __user *optval, int __user *optlen);
#endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
- int (*bind_conflict)(const struct sock *sk,
- const struct inet_bind_bucket *tb,
- bool relax, bool soreuseport_ok);
+ int (*rcv_saddr_equal)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard);
void (*mtu_reduced)(struct sock *sk);
};
@@ -261,9 +261,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
-int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool soreuseport_ok);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9c67a96..1931324 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -901,7 +902,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
+ .rcv_saddr_equal = ipv4_rcv_saddr_equal,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4663a01..45242b8 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -926,7 +926,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
+ .rcv_saddr_equal = ipv6_rcv_saddr_equal,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5f44fa1..74f6a57 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -44,9 +44,9 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool reuseport_ok)
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
+ bool relax, bool reuseport_ok)
{
struct sock *sk2;
bool reuse = sk->sk_reuse;
@@ -62,7 +62,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
sk_for_each_bound(sk2, &tb->owners) {
if (sk != sk2 &&
- !inet_v6_ipv6only(sk2) &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
@@ -72,23 +71,18 @@ int inet_csk_bind_conflict(const struct sock *sk,
rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
-
- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
+ if (inet_csk(sk)->icsk_af_ops->rcv_saddr_equal(sk, sk2, true))
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
-
- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
+ if (inet_csk(sk)->icsk_af_ops->rcv_saddr_equal(sk, sk2, true))
break;
}
}
}
return sk2 != NULL;
}
-EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
@@ -167,8 +161,7 @@ other_parity_scan:
smallest_size = tb->num_owners;
smallest_port = port;
}
- if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false,
- reuseport_ok))
+ if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
goto tb_found;
goto next_port;
}
@@ -209,8 +202,7 @@ tb_found:
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
smallest_size == -1)
goto success;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true,
- reuseport_ok)) {
+ if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
if ((reuse ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 029708f..7608012 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -63,6 +63,7 @@
#include <linux/times.h>
#include <linux/slab.h>
+#include <net/addrconf.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
@@ -1781,7 +1782,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
- .bind_conflict = inet_csk_bind_conflict,
+ .rcv_saddr_equal = ipv4_rcv_saddr_equal,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2a70c05..6089ea8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -374,6 +374,7 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
}
return 0;
}
+EXPORT_SYMBOL(ipv4_rcv_saddr_equal);
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 71939a2..7538715 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -28,46 +28,6 @@
#include <net/inet6_connection_sock.h>
#include <net/sock_reuseport.h>
-int inet6_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax,
- bool reuseport_ok)
-{
- const struct sock *sk2;
- bool reuse = !!sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport && reuseport_ok;
- kuid_t uid = sock_i_uid((struct sock *)sk);
-
- /* We must walk the whole port owner list in this case. -DaveM */
- /*
- * See comment in inet_csk_bind_conflict about sock lookup
- * vs net namespaces issues.
- */
- sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if ((!reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- (!reuseport || !sk2->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid,
- sock_i_uid((struct sock *)sk2))))) {
- if (ipv6_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- if (!relax && reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN &&
- ipv6_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- }
-
- return sk2 != NULL;
-}
-EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
-
struct dst_entry *inet6_csk_route_req(const struct sock *sk,
struct flowi6 *fl6,
const struct request_sock *req,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bee59a6..2f40b98 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1603,7 +1603,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
+ .rcv_saddr_equal = ipv6_rcv_saddr_equal,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
@@ -1634,7 +1634,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
- .bind_conflict = inet6_csk_bind_conflict,
+ .rcv_saddr_equal = ipv6_rcv_saddr_equal,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
--
2.9.3
^ permalink raw reply related
* [PATCH 2/5 net-next] inet: kill smallest_size and smallest_port
From: Josef Bacik @ 2016-12-20 20:07 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
In-Reply-To: <1482264424-15439-1-git-send-email-jbacik@fb.com>
In inet_csk_get_port we seem to be using smallest_port to figure out where the
best place to look for a SO_REUSEPORT sk that matches with an existing set of
SO_REUSEPORT's. However if we get to the logic
if (smallest_size != -1) {
port = smallest_port;
goto have_port;
}
we will do a useless search, because we would have already done the
inet_csk_bind_conflict for that port and it would have returned 1, otherwise we
would have gone to found_tb and succeeded. Since this logic makes us do yet
another trip through inet_csk_bind_conflict for a port we know won't work just
delete this code and save us the time.
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
net/ipv4/inet_connection_sock.c | 26 ++++----------------------
1 file changed, 4 insertions(+), 22 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 74f6a57..1a1a94bd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,7 +93,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
int ret = 1, attempts = 5, port = snum;
- int smallest_size = -1, smallest_port;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
int i, low, high, attempt_half;
@@ -103,7 +102,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
bool reuseport_ok = !!snum;
if (port) {
-have_port:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
@@ -137,8 +135,6 @@ other_half_scan:
* We do the opposite to not pollute connect() users.
*/
offset |= 1U;
- smallest_size = -1;
- smallest_port = low; /* avoid compiler warning */
other_parity_scan:
port = low + offset;
@@ -152,15 +148,6 @@ other_parity_scan:
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->port == port) {
- if (((tb->fastreuse > 0 && reuse) ||
- (tb->fastreuseport > 0 &&
- sk->sk_reuseport &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) &&
- (tb->num_owners < smallest_size || smallest_size == -1)) {
- smallest_size = tb->num_owners;
- smallest_port = port;
- }
if (!inet_csk_bind_conflict(sk, tb, false, reuseport_ok))
goto tb_found;
goto next_port;
@@ -171,10 +158,6 @@ next_port:
cond_resched();
}
- if (smallest_size != -1) {
- port = smallest_port;
- goto have_port;
- }
offset--;
if (!(offset & 1))
goto other_parity_scan;
@@ -196,19 +179,18 @@ tb_found:
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
- if (((tb->fastreuse > 0 && reuse) ||
+ if ((tb->fastreuse > 0 && reuse) ||
(tb->fastreuseport > 0 &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1)
+ sk->sk_reuseport && uid_eq(tb->fastuid, uid)))
goto success;
if (inet_csk_bind_conflict(sk, tb, true, reuseport_ok)) {
if ((reuse ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(tb->fastuid, uid))) &&
- !snum && smallest_size != -1 && --attempts >= 0) {
+ uid_eq(tb->fastuid, uid))) && !snum &&
+ --attempts >= 0) {
spin_unlock_bh(&head->lock);
goto again;
}
--
2.9.3
^ permalink raw reply related
* [RFC][PATCH 0/5 net-next] Rework inet_csk_get_port
From: Josef Bacik @ 2016-12-20 20:06 UTC (permalink / raw)
To: davem, hannes, kraigatgoog, eric.dumazet, tom, netdev,
kernel-team
At some point recently the guys working on our load balancer added the ability
to use SO_REUSEPORT. When they restarted their app with this option enabled
they immediately hit a softlockup on what appeared to be the
inet_bind_bucket->lock. Eventually what all of our debugging and discussion led
us to was the fact that the application comes up without SO_REUSEPORT, shuts
down which creates around 100k twsk's, and then comes up and tries to open a
bunch of sockets using SO_REUSEPORT, which meant traversing the inet_bind_bucket
owners list under the lock. Since this lock is needed for dealing with the
twsk's and basically anything else related to connections we would softlockup,
and sometimes not ever recover.
To solve this problem I did what you see in Path 5/5. Once we have a
SO_REUSEPORT socket on the tb->owners list we know that the socket has no
conflicts with any of the other sockets on that list. So we can add a copy of
the sock_common (really all we need is the recv_saddr but it seemed ugly to copy
just the ipv6, ipv4, and flag to indicate if we were ipv6 only in there so I've
copied the whole common) in order to check subsequent SO_REUSEPORT sockets. If
they match the previous one then we can skip the expensive
inet_csk_bind_conflict check. This is what eliminated the soft lockup that we
were seeing.
Patches 1-4 are cleanups and re-workings. For instance when we specify port ==
0 we need to find an open port, but we would do two passes through
inet_csk_bind_conflict every time we found a possible port. We would also keep
track of the smallest_port value in order to try and use it if we found no
port our first run through. This however made no sense as it would have had to
fail the first pass through inet_csk_bind_conflict, so would not actually pass
the second pass through either. Finally I split the function into two functions
in order to make it easier to read and to distinguish between the two behaviors.
I have tested this on one of our load balancing boxes during peak traffic and it
hasn't fallen over. But this is not my area, so obviously feel free to point
out where I'm being stupid and I'll get it fixed up and retested. Thanks,
Josef
^ permalink raw reply
* Re: nfc: trf7970a: Prevent repeated polling from crashing the kernel
From: Mark Greer @ 2016-12-20 19:56 UTC (permalink / raw)
To: Justin Bronder
Cc: Geoff Lansberry, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
lauro.venancio-430g2QfJUUCGglJvpFV4uA,
aloisio.almeida-430g2QfJUUCGglJvpFV4uA,
sameo-VuQAYsv1563Yd54FQh9/CA, robh+dt-DgEjT+Ai2ygdnm+yROfE0A,
mark.rutland-5wv7dgnIgG8, netdev-u79uwXL29TY76Z2rM5mHXA,
devicetree-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jaret Cantu
In-Reply-To: <20161220191352.GB23496-WrO9gjaJVAZ9BIO3fBtL+FdjMaeQq/Z1QQ4Iyu8u01E@public.gmane.org>
On Tue, Dec 20, 2016 at 02:13:52PM -0500, Justin Bronder wrote:
> On 20/12/16 11:59 -0700, Mark Greer wrote:
> > On Tue, Dec 20, 2016 at 11:16:32AM -0500, Geoff Lansberry wrote:
> > > From: Jaret Cantu <jaret.cantu-jEh4hwF5bVhBDgjK7y7TUQ@public.gmane.org>
> > >
> > > Repeated polling attempts cause a NULL dereference error to occur.
> > > This is because the state of the trf7970a is currently reading but
> > > another request has been made to send a command before it has finished.
> >
> > How is this happening? Was trf7970a_abort_cmd() called and it didn't
> > work right? Was it not called at all and there is a bug in the digital
> > layer? More details please.
> >
> > > The solution is to properly kill the waiting reading (workqueue)
> > > before failing on the send.
> >
> > If the bug is in the calling code, then that is what should get fixed.
> > This seems to be a hack to work-around a digital layer bug.
>
> One of our uses of NFC is to begin polling to read a tag and then stop polling
> (in order to save power) until we know via user interaction that we need to poll
> again. This is typically many minutes later so the power saving is pretty
> significant. However, it's possible that a user will remove the tag before
> reading has completed. We also detect this case and stop polling. I can go
> more into this if necessary but that is what exposed a panic.
>
> You can reproduce using neard and python, in our testing it was very likely to
> occur in 10-100 iterations of the following.:
>
> #!/usr/bin/python
> import time
>
> import dbus
>
> bus = dbus.SystemBus()
> nfc0 = bus.get_object('org.neard', '/org/neard/nfc0')
> props = dbus.Interface(nfc0, 'org.freedesktop.DBus.Properties')
>
> try:
> props.Set('org.neard.Adapter', 'Powered', dbus.Boolean(1))
> except:
> pass
>
> adapter = dbus.Interface(nfc0, 'org.neard.Adapter')
>
> for i in range(1000):
> adapter.StartPollLoop('Initiator')
> time.sleep(0.1)
> adapter.StopPollLoop()
> print(i)
>
> I believe the last time we tested this was around the 4.1 release.
Thanks for the info, Justin, but I was also seeking more information
at the kernel NFC subsystem and trf7970a driver level. This patch
adds code inside an 'if' in the driver whose condition should never
be evaluate to true but apparently it did. How?
Thanks,
Mark
--
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: ipv6: handle -EFAULT from skb_copy_bits
From: Dave Jones @ 2016-12-20 19:34 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20161220.132813.435056880928769245.davem@davemloft.net>
On Tue, Dec 20, 2016 at 01:28:13PM -0500, David Miller wrote:
> This has to do with the SKB buffer layout and geometry, not whether
> the packet is "fragmented" in the protocol sense.
>
> So no, this isn't a criteria for packets being filtered out by this
> point.
>
> Can you try to capture what sk->sk_socket->type and
> inet_sk(sk)->hdrincl are set to at the time of the crash?
>
type:3 hdrincl:0
Dave
^ permalink raw reply
* Re: ipv6: handle -EFAULT from skb_copy_bits
From: Cong Wang @ 2016-12-20 19:31 UTC (permalink / raw)
To: Dave Jones; +Cc: David Miller, Linux Kernel Network Developers
In-Reply-To: <20161220181728.dd2cynjwrceruwcu@codemonkey.org.uk>
On Tue, Dec 20, 2016 at 10:17 AM, Dave Jones <davej@codemonkey.org.uk> wrote:
> On Mon, Dec 19, 2016 at 08:36:23PM -0500, David Miller wrote:
> > From: Dave Jones <davej@codemonkey.org.uk>
> > Date: Mon, 19 Dec 2016 19:40:13 -0500
> >
> > > On Mon, Dec 19, 2016 at 07:31:44PM -0500, Dave Jones wrote:
> > >
> > > > Unfortunately, this made no difference. I spent some time today trying
> > > > to make a better reproducer, but failed. I'll revisit again tomorrow.
> > > >
> > > > Maybe I need >1 process/thread to trigger this. That would explain why
> > > > I can trigger it with Trinity.
> > >
> > > scratch that last part, I finally just repro'd it with a single process.
> >
> > Thanks for the info, I'll try to think about this some more.
>
> I threw in some debug printks right before that BUG_ON.
> it's always this:
>
> skb->len=31 skb->data_len=0 offset:30 total_len:9
Clearly we fail because 30 > 31 - 2, seems 'offset' is not correct here,
off-by-one?
^ permalink raw reply
* Re: [PATCH net] be2net: Increase skb headroom size to 256 bytes
From: David Miller @ 2016-12-20 19:30 UTC (permalink / raw)
To: suresh.reddy; +Cc: netdev, kalesh-anakkur.purayil
In-Reply-To: <20161220151430.11115-1-suresh.reddy@broadcom.com>
From: Suresh Reddy <suresh.reddy@broadcom.com>
Date: Tue, 20 Dec 2016 10:14:30 -0500
> From: Kalesh A P <kalesh-anakkur.purayil@broadcom.com>
>
> The driver currently allocates 128 bytes of skb headroom.
> This was found to be insufficient with some configurations
> like Geneve tunnels, which resulted in skb head reallocations.
>
> Increase the headroom to 256 bytes to fix this.
>
> Signed-off-by: Kalesh A P <kalesh-anakkur.purayil@broadcom.com>
> Signed-off-by: Suresh Reddy <suresh.reddy@broadcom.com>
Adding 128 bytes of headroom just for geneve seems excessive.
Do you really need to add that much?
^ permalink raw reply
* Re: [PATCH] net/mlx5: use rb_entry()
From: David Miller @ 2016-12-20 19:23 UTC (permalink / raw)
To: geliangtang; +Cc: saeedm, matanb, leonro, netdev, linux-rdma, linux-kernel
In-Reply-To: <8443fa3fa03d82c2b829375d8020762e5236dc6d.1482203930.git.geliangtang@gmail.com>
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 20 Dec 2016 22:02:14 +0800
> To make the code clearer, use rb_entry() instead of container_of() to
> deal with rbtree.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Applied.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox