* [PATCH V2 net 1/2] net: hns: add the code for cleaning pkt in chip
From: Peng Li @ 2018-08-27 1:59 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
lipeng321
In-Reply-To: <1535335170-111030-1-git-send-email-lipeng321@huawei.com>
If there are packets in hardware when changing the speed
or duplex, it may cause hardware hang up.
This patch adds the code for waiting chip to clean the all
pkts(TX & RX) in chip when the driver uses the function named
"adjust link".
This patch cleans the pkts as follows:
1) close rx of chip, close tx of protocol stack.
2) wait rcb, ppe, mac to clean.
3) adjust link
4) open rx of chip, open tx of protocol stack.
Signed-off-by: Peng Li <lipeng321@huawei.com>
---
drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 67 +++++++++++++++++++++-
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 36 ++++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 44 ++++++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 8 +++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 29 ++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 3 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 23 ++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 23 ++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_enet.c | 21 ++++++-
13 files changed, 255 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index cad52bd..08a750f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -486,6 +486,8 @@ struct hnae_ae_ops {
u8 *auto_neg, u16 *speed, u8 *duplex);
void (*toggle_ring_irq)(struct hnae_ring *ring, u32 val);
void (*adjust_link)(struct hnae_handle *handle, int speed, int duplex);
+ bool (*need_adjust_link)(struct hnae_handle *handle,
+ int speed, int duplex);
int (*set_loopback)(struct hnae_handle *handle,
enum hnae_loop loop_mode, int en);
void (*get_ring_bdnum_limit)(struct hnae_queue *queue,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index e6aad30..b52029e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -155,6 +155,41 @@ static void hns_ae_put_handle(struct hnae_handle *handle)
hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0;
}
+static int hns_ae_wait_flow_down(struct hnae_handle *handle)
+{
+ struct dsaf_device *dsaf_dev;
+ struct hns_ppe_cb *ppe_cb;
+ struct hnae_vf_cb *vf_cb;
+ int ret;
+ int i;
+
+ for (i = 0; i < handle->q_num; i++) {
+ ret = hns_rcb_wait_tx_ring_clean(handle->qs[i]);
+ if (ret)
+ return ret;
+ }
+
+ ppe_cb = hns_get_ppe_cb(handle);
+ ret = hns_ppe_wait_tx_fifo_clean(ppe_cb);
+ if (ret)
+ return ret;
+
+ dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
+ if (!dsaf_dev)
+ return -EINVAL;
+ ret = hns_dsaf_wait_pkt_clean(dsaf_dev, handle->dport_id);
+ if (ret)
+ return ret;
+
+ vf_cb = hns_ae_get_vf_cb(handle);
+ ret = hns_mac_wait_fifo_clean(vf_cb->mac_cb);
+ if (ret)
+ return ret;
+
+ mdelay(10);
+ return 0;
+}
+
static void hns_ae_ring_enable_all(struct hnae_handle *handle, int val)
{
int q_num = handle->q_num;
@@ -399,12 +434,41 @@ static int hns_ae_get_mac_info(struct hnae_handle *handle,
return hns_mac_get_port_info(mac_cb, auto_neg, speed, duplex);
}
+static bool hns_ae_need_adjust_link(struct hnae_handle *handle, int speed,
+ int duplex)
+{
+ struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+ return hns_mac_need_adjust_link(mac_cb, speed, duplex);
+}
+
static void hns_ae_adjust_link(struct hnae_handle *handle, int speed,
int duplex)
{
struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
- hns_mac_adjust_link(mac_cb, speed, duplex);
+ switch (mac_cb->dsaf_dev->dsaf_ver) {
+ case AE_VERSION_1:
+ hns_mac_adjust_link(mac_cb, speed, duplex);
+ break;
+
+ case AE_VERSION_2:
+ /* chip need to clear all pkt inside */
+ hns_mac_disable(mac_cb, MAC_COMM_MODE_RX);
+ if (hns_ae_wait_flow_down(handle)) {
+ hns_mac_enable(mac_cb, MAC_COMM_MODE_RX);
+ break;
+ }
+
+ hns_mac_adjust_link(mac_cb, speed, duplex);
+ hns_mac_enable(mac_cb, MAC_COMM_MODE_RX);
+ break;
+
+ default:
+ break;
+ }
+
+ return;
}
static void hns_ae_get_ring_bdnum_limit(struct hnae_queue *queue,
@@ -902,6 +966,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
.get_status = hns_ae_get_link_status,
.get_info = hns_ae_get_mac_info,
.adjust_link = hns_ae_adjust_link,
+ .need_adjust_link = hns_ae_need_adjust_link,
.set_loopback = hns_ae_config_loopback,
.get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit,
.get_pauseparam = hns_ae_get_pauseparam,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 5488c6e..09e4061 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -257,6 +257,16 @@ static void hns_gmac_get_pausefrm_cfg(void *mac_drv, u32 *rx_pause_en,
*tx_pause_en = dsaf_get_bit(pause_en, GMAC_PAUSE_EN_TX_FDFC_B);
}
+static bool hns_gmac_need_adjust_link(void *mac_drv, enum mac_speed speed,
+ int duplex)
+{
+ struct mac_driver *drv = (struct mac_driver *)mac_drv;
+ struct hns_mac_cb *mac_cb = drv->mac_cb;
+
+ return (mac_cb->speed != speed) ||
+ (mac_cb->half_duplex == duplex);
+}
+
static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed,
u32 full_duplex)
{
@@ -309,6 +319,30 @@ static void hns_gmac_set_promisc(void *mac_drv, u8 en)
hns_gmac_set_uc_match(mac_drv, en);
}
+int hns_gmac_wait_fifo_clean(void *mac_drv)
+{
+ struct mac_driver *drv = (struct mac_driver *)mac_drv;
+ int wait_cnt;
+ u32 val;
+
+ wait_cnt = 0;
+ while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+ val = dsaf_read_dev(drv, GMAC_FIFO_STATE_REG);
+ /* bit5~bit0 is not send complete pkts */
+ if ((val & 0x3f) == 0)
+ break;
+ usleep_range(100, 200);
+ }
+
+ if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+ dev_err(drv->dev,
+ "hns ge %d fifo was not idle.\n", drv->mac_id);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
static void hns_gmac_init(void *mac_drv)
{
u32 port;
@@ -690,6 +724,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
mac_drv->mac_disable = hns_gmac_disable;
mac_drv->mac_free = hns_gmac_free;
mac_drv->adjust_link = hns_gmac_adjust_link;
+ mac_drv->need_adjust_link = hns_gmac_need_adjust_link;
mac_drv->set_tx_auto_pause_frames = hns_gmac_set_tx_auto_pause_frames;
mac_drv->config_max_frame_length = hns_gmac_config_max_frame_length;
mac_drv->mac_pausefrm_cfg = hns_gmac_pause_frm_cfg;
@@ -717,6 +752,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
mac_drv->get_strings = hns_gmac_get_strings;
mac_drv->update_stats = hns_gmac_update_stats;
mac_drv->set_promiscuous = hns_gmac_set_promisc;
+ mac_drv->wait_fifo_clean = hns_gmac_wait_fifo_clean;
return (void *)mac_drv;
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 1c2326b..6ed6f14 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -114,6 +114,26 @@ int hns_mac_get_port_info(struct hns_mac_cb *mac_cb,
return 0;
}
+/**
+ *hns_mac_is_adjust_link - check is need change mac speed and duplex register
+ *@mac_cb: mac device
+ *@speed: phy device speed
+ *@duplex:phy device duplex
+ *
+ */
+bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
+{
+ struct mac_driver *mac_ctrl_drv;
+
+ mac_ctrl_drv = (struct mac_driver *)(mac_cb->priv.mac);
+
+ if (mac_ctrl_drv->need_adjust_link)
+ return mac_ctrl_drv->need_adjust_link(mac_ctrl_drv,
+ (enum mac_speed)speed, duplex);
+ else
+ return true;
+}
+
void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
{
int ret;
@@ -430,6 +450,16 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
return 0;
}
+int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb)
+{
+ struct mac_driver *drv = hns_mac_get_drv(mac_cb);
+
+ if (drv->wait_fifo_clean)
+ return drv->wait_fifo_clean(drv);
+
+ return 0;
+}
+
void hns_mac_reset(struct hns_mac_cb *mac_cb)
{
struct mac_driver *drv = hns_mac_get_drv(mac_cb);
@@ -998,6 +1028,20 @@ static int hns_mac_get_max_port_num(struct dsaf_device *dsaf_dev)
return DSAF_MAX_PORT_NUM;
}
+void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode)
+{
+ struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+ mac_ctrl_drv->mac_enable(mac_cb->priv.mac, mode);
+}
+
+void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode)
+{
+ struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+ mac_ctrl_drv->mac_disable(mac_cb->priv.mac, mode);
+}
+
/**
* hns_mac_init - init mac
* @dsaf_dev: dsa fabric device struct pointer
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index bbc0a98..fbc7534 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -356,6 +356,9 @@ struct mac_driver {
/*adjust mac mode of port,include speed and duplex*/
int (*adjust_link)(void *mac_drv, enum mac_speed speed,
u32 full_duplex);
+ /* need adjust link */
+ bool (*need_adjust_link)(void *mac_drv, enum mac_speed speed,
+ int duplex);
/* config autoegotaite mode of port*/
void (*set_an_mode)(void *mac_drv, u8 enable);
/* config loopbank mode */
@@ -394,6 +397,7 @@ struct mac_driver {
void (*get_info)(void *mac_drv, struct mac_info *mac_info);
void (*update_stats)(void *mac_drv);
+ int (*wait_fifo_clean)(void *mac_drv);
enum mac_mode mac_mode;
u8 mac_id;
@@ -427,6 +431,7 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb,
int hns_mac_init(struct dsaf_device *dsaf_dev);
void mac_adjust_link(struct net_device *net_dev);
+bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex);
void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status);
int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr);
int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
@@ -463,5 +468,8 @@ int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
const unsigned char *addr);
int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn);
+void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode);
+void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode);
+int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb);
#endif /* _HNS_DSAF_MAC_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index ca50c25..e557a4e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2727,6 +2727,35 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX;
}
+int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port)
+{
+ u32 val, val_tmp;
+ int wait_cnt;
+
+ if (port >= DSAF_SERVICE_NW_NUM)
+ return 0;
+
+ wait_cnt = 0;
+ while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+ val = dsaf_read_dev(dsaf_dev, DSAF_VOQ_IN_PKT_NUM_0_REG +
+ (port + DSAF_XGE_NUM) * 0x40);
+ val_tmp = dsaf_read_dev(dsaf_dev, DSAF_VOQ_OUT_PKT_NUM_0_REG +
+ (port + DSAF_XGE_NUM) * 0x40);
+ if (val == val_tmp)
+ break;
+
+ usleep_range(100, 200);
+ }
+
+ if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+ dev_err(dsaf_dev->dev, "hns dsaf clean wait timeout(%u - %u).\n",
+ val, val_tmp);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
/**
* dsaf_probe - probo dsaf dev
* @pdev: dasf platform device
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 4507e82..0e1cd99 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -44,6 +44,8 @@ struct hns_mac_cb;
#define DSAF_ROCE_CREDIT_CHN 8
#define DSAF_ROCE_CHAN_MODE 3
+#define HNS_MAX_WAIT_CNT 10000
+
enum dsaf_roce_port_mode {
DSAF_ROCE_6PORT_MODE,
DSAF_ROCE_4PORT_MODE,
@@ -463,5 +465,6 @@ int hns_dsaf_rm_mac_addr(
int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
u8 mac_id, u8 port_num);
+int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port);
#endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index d160d8c..0942e49 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -275,6 +275,29 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en)
dsaf_write_dev(ppe_cb, PPE_INTEN_REG, msk_vlue & vld_msk);
}
+int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb)
+{
+ int wait_cnt;
+ u32 val;
+
+ wait_cnt = 0;
+ while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+ val = dsaf_read_dev(ppe_cb, PPE_CURR_TX_FIFO0_REG) & 0x3ffU;
+ if (!val)
+ break;
+
+ usleep_range(100, 200);
+ }
+
+ if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+ dev_err(ppe_cb->dev, "hns ppe tx fifo clean wait timeout, still has %u pkt.\n",
+ val);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
/**
* ppe_init_hw - init ppe
* @ppe_cb: ppe device
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index 9d8e643..f670e63 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -100,6 +100,7 @@ struct ppe_common_cb {
};
+int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb);
int hns_ppe_init(struct dsaf_device *dsaf_dev);
void hns_ppe_uninit(struct dsaf_device *dsaf_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 9d76e2e..5d64519 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -66,6 +66,29 @@ void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag)
"queue(%d) wait fbd(%d) clean fail!!\n", i, fbd_num);
}
+int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs)
+{
+ u32 head, tail;
+ int wait_cnt;
+
+ tail = dsaf_read_dev(&qs->tx_ring, RCB_REG_TAIL);
+ wait_cnt = 0;
+ while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+ head = dsaf_read_dev(&qs->tx_ring, RCB_REG_HEAD);
+ if (tail == head)
+ break;
+
+ usleep_range(100, 200);
+ }
+
+ if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+ dev_err(qs->dev->dev, "rcb wait timeout, head not equal to tail.\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
/**
*hns_rcb_reset_ring_hw - ring reset
*@q: ring struct pointer
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index 6028164..2319b77 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
@@ -136,6 +136,7 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag);
void hns_rcb_init_hw(struct ring_pair_cb *ring);
void hns_rcb_reset_ring_hw(struct hnae_queue *q);
void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag);
+int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs);
u32 hns_rcb_get_rx_coalesced_frames(
struct rcb_common_cb *rcb_common, u32 port_idx);
u32 hns_rcb_get_tx_coalesced_frames(
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 886cbbf..74d935d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -464,6 +464,7 @@
#define RCB_RING_INTMSK_TX_OVERTIME_REG 0x000C4
#define RCB_RING_INTSTS_TX_OVERTIME_REG 0x000C8
+#define GMAC_FIFO_STATE_REG 0x0000UL
#define GMAC_DUPLEX_TYPE_REG 0x0008UL
#define GMAC_FD_FC_TYPE_REG 0x000CUL
#define GMAC_TX_WATER_LINE_REG 0x0010UL
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 02a0ba2..f56855e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1112,11 +1112,26 @@ static void hns_nic_adjust_link(struct net_device *ndev)
struct hnae_handle *h = priv->ae_handle;
int state = 1;
+ /* If there is no phy, do not need adjust link */
if (ndev->phydev) {
- h->dev->ops->adjust_link(h, ndev->phydev->speed,
- ndev->phydev->duplex);
- state = ndev->phydev->link;
+ /* When phy link down, do nothing */
+ if (ndev->phydev->link == 0)
+ return;
+
+ if (h->dev->ops->need_adjust_link(h, ndev->phydev->speed,
+ ndev->phydev->duplex)) {
+ /* because Hi161X chip don't support to change gmac
+ * speed and duplex with traffic. Delay 200ms to
+ * make sure there is no more data in chip FIFO.
+ */
+ netif_carrier_off(ndev);
+ msleep(200);
+ h->dev->ops->adjust_link(h, ndev->phydev->speed,
+ ndev->phydev->duplex);
+ netif_carrier_on(ndev);
+ }
}
+
state = state && h->dev->ops->get_status(h);
if (state != priv->link) {
--
2.9.3
^ permalink raw reply related
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Julia Lawall @ 2018-08-27 1:59 UTC (permalink / raw)
To: Joe Perches
Cc: Al Viro, Kees Cook, LKML, Jamal Hadi Salim, Cong Wang, Jiri Pirko,
David S. Miller, Network Development
In-Reply-To: <eca48539a3dede3bfaed9ab9a6c06794cf8160e0.camel@perches.com>
On Sun, 26 Aug 2018, Joe Perches wrote:
> On Sun, 2018-08-26 at 22:24 +0100, Al Viro wrote:
> > On Sun, Aug 26, 2018 at 11:57:57AM -0700, Joe Perches wrote:
> >
> > > > That, BTW, is why I hate the use of sizeof(*p) in kmalloc, etc.
> > > > arguments. typeof is even worse in that respect.
> > >
> > > True. Semantic searches via tools like coccinelle could help here
> > > but those searches are quite a bit slower than straightforward greps.
> >
> > Those searches are .config-sensitive as well, which can be much more
> > unpleasant than being slow...
>
> Are they? Julia?
I don't completely understand the question. Coccinelle doens't know
anything about the configuration.
julia
^ permalink raw reply
* [PATCH V2 net 0/2] net: hns: fix some bugs about speed and duplex change
From: Peng Li @ 2018-08-27 1:59 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
lipeng321
If there are packets in hardware when changing the spped
or duplex, it may cause hardware hang up.
This patchset adds the code for waiting chip to clean the all
pkts(TX & RX) in chip when the driver uses the function named
"adjust link".
This patchset cleans the pkts as follows:
1) close rx of chip, close tx of protocol stack.
2) wait rcb, ppe, mac to clean.
3) adjust link
4) open rx of chip, open tx of protocol stack.
---
Change log:
V1 -> V2:
1, remove a patch according to the comment reported by Andrew Lunn.
---
Peng Li (2):
net: hns: add the code for cleaning pkt in chip
net: hns: add netif_carrier_off before change speed and duplex
drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 67 +++++++++++++++++++++-
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 36 ++++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 44 ++++++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 8 +++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 29 ++++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 3 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 23 ++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 23 ++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_enet.c | 21 ++++++-
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +
14 files changed, 257 insertions(+), 4 deletions(-)
--
2.9.3
^ permalink raw reply
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Kees Cook @ 2018-08-26 21:56 UTC (permalink / raw)
To: Jamal Hadi Salim
Cc: Al Viro, LKML, Cong Wang, Jiri Pirko, David S. Miller,
Network Development
In-Reply-To: <5c88b08d-b9ca-f3df-ae78-cf685ee6723a@mojatatu.com>
On Sun, Aug 26, 2018 at 10:30 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> We should add an nla_policy later.
What's the right way to do that for cases like this?
-Kees
--
Kees Cook
Pixel Security
^ permalink raw reply
* [V2][PATCH net] tipc: fix the big/little endian issue in tipc_dest
From: Haiqing Bai @ 2018-08-27 1:32 UTC (permalink / raw)
To: netdev, jon.maloy, ying.xue, davem; +Cc: zhenbo.gao, haiqing.bai, linux-kernel
In function tipc_dest_push, the 32bit variables 'node' and 'port'
are stored separately in uppper and lower part of 64bit 'value'.
Then this value is assigned to dst->value which is a union like:
union
{
struct {
u32 port;
u32 node;
};
u64 value;
}
This works on little-endian machines like x86 but fails on big-endian
machines.
The fix remove the 'value' stack parameter and even the 'value'
member of the union in tipc_dest, assign the 'node' and 'port' member
directly with the input parameter to avoid the endian issue.
Fixes: a80ae5306a73 ("tipc: improve destination linked list")
Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Haiqing Bai <Haiqing.Bai@windriver.com>
---
net/tipc/name_table.c | 10 ++++------
net/tipc/name_table.h | 9 ++-------
2 files changed, 6 insertions(+), 13 deletions(-)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 88f027b..66d5b2c 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -980,20 +980,17 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
{
- u64 value = (u64)node << 32 | port;
struct tipc_dest *dst;
list_for_each_entry(dst, l, list) {
- if (dst->value != value)
- continue;
- return dst;
+ if (dst->node == node && dst->port == port)
+ return dst;
}
return NULL;
}
bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
{
- u64 value = (u64)node << 32 | port;
struct tipc_dest *dst;
if (tipc_dest_find(l, node, port))
@@ -1002,7 +999,8 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
if (unlikely(!dst))
return false;
- dst->value = value;
+ dst->node = node;
+ dst->port = port;
list_add(&dst->list, l);
return true;
}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 0febba4..892bd75 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -133,13 +133,8 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
struct tipc_dest {
struct list_head list;
- union {
- struct {
- u32 port;
- u32 node;
- };
- u64 value;
- };
+ u32 port;
+ u32 node;
};
struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
--
1.9.1
^ permalink raw reply related
* [GIT] Networking
From: David Miller @ 2018-08-26 21:37 UTC (permalink / raw)
To: torvalds; +Cc: akpm, netdev, linux-kernel
1) ICE, E1000, IGB, IXGBE, and I40E bug fixes from the Intel folks.
2) Better fix for AB-BA deadlock in packet scheduler code, from Cong
Wang.
3) bpf sockmap fixes (zero sized key handling, etc.) from Daniel
Borkmann.
4) Send zero IPID in TCP resets and SYN-RECV state ACKs, to prevent
attackers using it as a side-channel. From Eric Dumazet.
5) Memory leak in mediatek bluetooth driver, from Gustavo A. R. Silva.
6) Hook up rt->dst.input of ipv6 anycast routes properly, from Hangbin
Liu.
7) hns and hns3 bug fixes from Huazhong Tan.
8) Fix RIF leak in mlxsw driver, from Ido Schimmel.
9) iova range check fix in vhost, from Jason Wang.
10) Fix hang in do_tcp_sendpages() with tls, from John Fastabend.
11) More r8152 chips need to disable RX aggregation, from Kai-Heng
Feng.
12) Memory exposure in TCA_U32_SEL handling, from Kees Cook.
13) TCP BBR congestion control fixes from Kevin Yang.
14) hv_netvsc, ignore non-PCI devices, from Stephen Hemminger.
15) qed driver fixes from Tomer Tayar.
Please pull, thanks a lot!
The following changes since commit 2ad0d52699700a91660a406a4046017a2d7f246a:
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2018-08-19 11:51:45 -0700)
are available in the Git repository at:
gitolite@ra.kernel.org:/pub/scm/linux/kernel/git/davem/net.git
for you to fetch changes up to 98c8f125fd8a6240ea343c1aa50a1be9047791b8:
net: sched: Fix memory exposure from short TCA_U32_SEL (2018-08-26 14:21:50 -0700)
----------------------------------------------------------------
Ahmad Fatoum (1):
net: macb: Fix regression breaking non-MDIO fixed-link PHYs
Anirudh Venkataramanan (5):
ice: Fix multiple static analyser warnings
ice: Cleanup magic number
ice: Fix bugs in control queue processing
ice: Fix a few null pointer dereference issues
ice: Trivial formatting fixes
Anssi Hannula (1):
net: macb: do not disable MDIO bus at open/close time
Arnd Bergmann (1):
net_sched: fix unused variable warning in stmmac
Bo Chen (2):
e1000: check on netif_running() before calling e1000_up()
e1000: ensure to free old tx/rx rings in set_ringparam()
Brett Creeley (1):
ice: Set VLAN flags correctly
Bruce Allan (3):
ice: Remove unnecessary node owner check
ice: Update to interrupts enabled in OICR
ice: Change struct members from bool to u8
Colin Ian King (1):
qed: fix spelling mistake "comparsion" -> "comparison"
Cong Wang (10):
net_sched: improve and refactor tcf_action_put_many()
net_sched: remove unnecessary ops->delete()
net_sched: remove unused parameter for tcf_action_delete()
net_sched: remove unused tcf_idr_check()
net_sched: remove list_head from tc_action
net_sched: remove unused tcfa_capab
Revert "net: sched: act_ife: disable bh when taking ife_mod_lock"
act_ife: move tcfa_lock down to where necessary
act_ife: fix a potential deadlock
addrconf: reduce unnecessary atomic allocations
Daniel Borkmann (3):
bpf, sockmap: fix sock_hash_alloc and reject zero-sized keys
bpf, sockmap: fix sock hash count in alloc_sock_hash_elem
bpf: use per htab salt for bucket hash
David Ahern (1):
net/ipv6: Put lwtstate when destroying fib6_info
David S. Miller (9):
Merge branch 'qed-Misc-fixes-in-the-interface-with-the-MFW'
Merge branch 'net_sched-fixes'
Merge branch 'tcp_bbr-PROBE_RTT-minor-bug-fixes'
Merge branch 'hns-fixes'
Merge branch 'hns3-fixes'
Merge branch 'for-upstream' of git://git.kernel.org/.../bluetooth/bluetooth
Merge branch '100GbE' of git://git.kernel.org/.../jkirsher/net-queue
Merge git://git.kernel.org/.../bpf/bpf
Merge branch '1GbE' of git://git.kernel.org/.../jkirsher/net-queue
Eric Dumazet (1):
ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state
Geert Uytterhoeven (1):
Revert "net: stmmac: fix build failure due to missing COMMON_CLK dependency"
Gustavo A. R. Silva (1):
Bluetooth: mediatek: Fix memory leak
Haishuang Yan (1):
ip6_vti: fix a null pointer deference when destroy vti6 tunnel
Hangbin Liu (1):
net/ipv6: init ip6 anycast rt->dst.input as ip6_input
Hans de Goede (1):
Bluetooth: Make BT_HCIUART_RTL configuration option depend on ACPI
Huazhong Tan (6):
net: hns: fix length and page_offset overflow when CONFIG_ARM64_64K_PAGES
net: hns: modify variable type in hns_nic_reuse_page
net: hns: fix skb->truesize underestimation
net: hns: use eth_get_headlen interface instead of hns_nic_get_headlen
net: hns3: fix page_offset overflow when CONFIG_ARM64_64K_PAGES
net: hns3: modify variable type in hns3_nic_reuse_page
Ido Schimmel (1):
mlxsw: spectrum_switchdev: Do not leak RIFs when removing bridge
Jacob Keller (3):
ice: Report stats for allocated queues via ethtool stats
ice: Use order_base_2 to calculate higher power of 2
i40e: fix condition of WARN_ONCE for stat strings
Jason Wang (1):
vhost: correctly check the iova range when waking virtqueue
Jesse Brandeburg (1):
ice: Fix potential return of uninitialized value
Jesus Sanchez-Palencia (1):
igb: Use an advanced ctx descriptor for launchtime
Jia-Ju Bai (3):
igb: Replace GFP_ATOMIC with GFP_KERNEL in igb_sw_init()
igb: Replace mdelay() with msleep() in igb_integrated_phy_loopback()
ixgbe: Replace GFP_ATOMIC with GFP_KERNEL
John Fastabend (2):
tls: possible hang when do_tcp_sendpages hits sndbuf is full case
bpf: sockmap: write_space events need to be passed to TCP handler
Kai-Heng Feng (1):
r8152: disable RX aggregation on new Dell TB16 dock
Kees Cook (1):
net: sched: Fix memory exposure from short TCA_U32_SEL
Kevin Yang (3):
tcp_bbr: add bbr_check_probe_rtt_done() helper
tcp_bbr: in restart from idle, see if we should exit PROBE_RTT
tcp_bbr: apply PROBE_RTT cwnd cap even if acked==0
Manish Chopra (1):
qlge: Fix netdev features configuration.
Martyna Szapar (1):
i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled
Prashant Bhole (1):
xsk: fix return value of xdp_umem_assign_dev()
Preethi Banala (1):
ice: Clean control queues only when they are initialized
Quentin Monnet (1):
tools: bpftool: return from do_event_pipe() on bad arguments
Samuel Mendoza-Jonas (1):
net/ncsi: Fixup .dumpit message flags and ID check in Netlink handler
Sebastian Basierski (1):
ixgbe: fix driver behaviour after issuing VFLR
Stephen Hemminger (1):
hv_netvsc: ignore devices that are not PCI
Toke Høiland-Jørgensen (1):
sch_cake: Fix TC filter flow override and expand it to hosts as well
Tomer Tayar (4):
qed: Wait for ready indication before rereading the shmem
qed: Wait for MCP halt and resume commands to take place
qed: Prevent a possible deadlock during driver load and unload
qed: Avoid sending mailbox commands when MFW is not responsive
Tony Nguyen (1):
ixgbe: Prevent unsupported configurations with XDP
Wolfram Sang (1):
net: ethernet: renesas: use SPDX identifier for Renesas drivers
Yue Haibing (3):
rhashtable: remove duplicated include from rhashtable.c
sch_cake: Remove unused including <linux/version.h>
rds: tcp: remove duplicated include from tcp.c
YueHaibing (1):
ixgb: use dma_zalloc_coherent instead of allocator/memset
drivers/bluetooth/Kconfig | 1 +
drivers/bluetooth/btmtkuart.c | 8 ++--
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 +--
drivers/net/ethernet/cadence/macb_main.c | 36 ++++++++++------
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 10 ++---
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 5 +--
drivers/net/ethernet/hisilicon/hns/hnae.h | 6 +--
drivers/net/ethernet/hisilicon/hns/hns_enet.c | 108 ++----------------------------------------------
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +-
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 +--
drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 7 ++--
drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++----
drivers/net/ethernet/intel/ice/ice.h | 15 +++++--
drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 25 ++++++------
drivers/net/ethernet/intel/ice/ice_common.c | 30 ++++++++------
drivers/net/ethernet/intel/ice/ice_controlq.c | 29 +++++++++----
drivers/net/ethernet/intel/ice/ice_ethtool.c | 52 ++++++++++++++++++------
drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 8 ----
drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 1 +
drivers/net/ethernet/intel/ice/ice_main.c | 115 ++++++++++++++++++++++++++++++----------------------
drivers/net/ethernet/intel/ice/ice_nvm.c | 5 +--
drivers/net/ethernet/intel/ice/ice_sched.c | 3 +-
drivers/net/ethernet/intel/ice/ice_switch.c | 4 +-
drivers/net/ethernet/intel/ice/ice_switch.h | 6 +--
drivers/net/ethernet/intel/ice/ice_txrx.h | 2 +-
drivers/net/ethernet/intel/ice/ice_type.h | 16 ++++----
drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +-
drivers/net/ethernet/intel/igb/igb_main.c | 7 ++--
drivers/net/ethernet/intel/ixgb/ixgb_main.c | 5 +--
drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 4 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 36 ++++++++++++----
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 31 ++++++++++++++
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 ++++-----
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 +-
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 +--
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++++
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 20 +++++++++
drivers/net/ethernet/netronome/nfp/flower/action.c | 6 +--
drivers/net/ethernet/qlogic/qed/qed_init_ops.c | 2 +-
drivers/net/ethernet/qlogic/qed/qed_mcp.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
drivers/net/ethernet/qlogic/qed/qed_mcp.h | 27 ++++++++----
drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 2 +
drivers/net/ethernet/qlogic/qede/qede_filter.c | 6 +--
drivers/net/ethernet/qlogic/qlge/qlge_main.c | 23 ++++-------
drivers/net/ethernet/renesas/ravb.h | 5 +--
drivers/net/ethernet/renesas/ravb_main.c | 5 +--
drivers/net/ethernet/renesas/sh_eth.c | 13 +-----
drivers/net/ethernet/renesas/sh_eth.h | 13 +-----
drivers/net/ethernet/stmicro/stmmac/Kconfig | 10 ++---
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 5 +--
drivers/net/hyperv/netvsc_drv.c | 5 +++
drivers/net/usb/r8152.c | 4 +-
drivers/vhost/vhost.c | 2 +-
include/net/act_api.h | 7 ----
include/net/pkt_cls.h | 25 +++++++-----
kernel/bpf/hashtab.c | 23 ++++++-----
kernel/bpf/sockmap.c | 11 ++++-
lib/rhashtable.c | 1 -
net/dsa/slave.c | 4 +-
net/ipv4/tcp_bbr.c | 42 +++++++++++--------
net/ipv4/tcp_ipv4.c | 6 +++
net/ipv6/addrconf.c | 6 +--
net/ipv6/ip6_fib.c | 2 +
net/ipv6/ip6_vti.c | 3 +-
net/ipv6/route.c | 2 +-
net/ncsi/ncsi-netlink.c | 4 +-
net/rds/tcp.c | 1 -
net/sched/act_api.c | 70 ++++++++++++--------------------
net/sched/act_bpf.c | 8 ----
net/sched/act_connmark.c | 8 ----
net/sched/act_csum.c | 8 ----
net/sched/act_gact.c | 8 ----
net/sched/act_ife.c | 92 ++++++++++++++++++-----------------------
net/sched/act_ipt.c | 16 --------
net/sched/act_mirred.c | 8 ----
net/sched/act_nat.c | 8 ----
net/sched/act_pedit.c | 8 ----
net/sched/act_police.c | 8 ----
net/sched/act_sample.c | 8 ----
net/sched/act_simple.c | 8 ----
net/sched/act_skbedit.c | 8 ----
net/sched/act_skbmod.c | 8 ----
net/sched/act_tunnel_key.c | 8 ----
net/sched/act_vlan.c | 8 ----
net/sched/cls_u32.c | 10 ++++-
net/sched/sch_cake.c | 24 ++++++++---
net/tls/tls_main.c | 9 +++-
net/xdp/xdp_umem.c | 4 +-
tools/bpf/bpftool/map_perf_ring.c | 5 ++-
92 files changed, 754 insertions(+), 711 deletions(-)
^ permalink raw reply
* Re: [PATCH NET 3/3] net: hns: add configuration constraints for 1000M half
From: lipeng (Y) @ 2018-08-27 1:08 UTC (permalink / raw)
To: Andrew Lunn
Cc: davem, netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta
In-Reply-To: <20180825180721.GB23992@lunn.ch>
On 2018/8/26 2:07, Andrew Lunn wrote:
>> This patch is a theoretical protect, and the problem does not really
>> happen.
>>
>> I think you really get the point, do you think we need this patch?
> I think it is not needed.
>
> And if it was needed, it would indicate there is a bug somewhere else.
Hi, Andrew
It is a theoretical protect,we can remove this patch from patchset.
Thanks.
>
> Andrew
>
> .
>
^ permalink raw reply
* Re: confusing comment, explanation of @IFF_RUNNING in if.h
From: Stephen Hemminger @ 2018-08-26 20:51 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: Andrew Lunn, Linux kernel netdev mailing list
In-Reply-To: <alpine.LFD.2.21.1808261518250.8901@localhost.localdomain>
On Sun, 26 Aug 2018 15:20:24 -0400 (EDT)
"Robert P. J. Day" <rpjday@crashcourse.ca> wrote:
> On Sun, 26 Aug 2018, Andrew Lunn wrote:
>
> > > i ask since, in my testing, when the interface should have been
> > > up, the attribute file "operstate" for that interface showed
> > > "unknown", and i wondered how worried i should be about that.
> >
> > Hi Robert
> >
> > You should probably post the driver for review. A well written
> > driver should not even need to care about any of this. phylib and
> > the netdev driver code does all the work. It only gets interesting
> > when you don't have a PHY, e.g. a stacked device, like bonding, or a
> > virtual device like tun/tap.
>
> i wish, but i'm on contract, and proprietary, and NDA and all that.
> so i am reduced to crawling through the code, trying to figure out
> what is misconfigured that is causing all this grief.
>
> rday
>
So you expect FOSS developers to help you with proprietary licensed
driver. Good Luck with that.
^ permalink raw reply
* Re: followup: what's responsible for setting netdev->operstate to IF_OPER_DOWN?
From: Stephen Hemminger @ 2018-08-26 20:50 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: Linux kernel netdev mailing list
In-Reply-To: <alpine.LFD.2.21.1808261102450.13415@localhost.localdomain>
On Sun, 26 Aug 2018 11:14:33 -0400 (EDT)
"Robert P. J. Day" <rpjday@crashcourse.ca> wrote:
> apologies for the constant pleas for assistance, but i think i'm
> zeroing in on the problem that started all this. recap: custom
> FPGA-based linux box with multiple ports, where the current symptom is
> that there is no userspace notification when someone simply unplugs
> one of the ports ("ifconfig" shows that interface still RUNNING).
>
> as i read it, an active ethernet interface should be both UP (the
> administrative state) and RUNNING (the RFC 2863-defined operational
> state). if i unplug, i've verified on a standard net port on my laptop
> that the interface is still UP, but no longer RUNNING, which makes
> perfect sense. i plug back in, interface starts RUNNING again. so
> where's the problem?
>
> i can see that whether ifconfig shows an interface RUNNING is
> defined in net/core/dev.c:
>
> unsigned int dev_get_flags(const struct net_device *dev)
> {
> unsigned int flags;
>
> flags = (dev->flags & ~(IFF_PROMISC |
> IFF_ALLMULTI |
> IFF_RUNNING |
> IFF_LOWER_UP |
> IFF_DORMANT)) |
> (dev->gflags & (IFF_PROMISC |
> IFF_ALLMULTI));
>
> if (netif_running(dev)) {
> if (netif_oper_up(dev))
> flags |= IFF_RUNNING; <---- THERE
> if (netif_carrier_ok(dev))
> flags |= IFF_LOWER_UP;
> if (netif_dormant(dev))
> flags |= IFF_DORMANT;
> }
>
> return flags;
> }
>
> where netif_oper_up() is defined as:
>
> static inline bool netif_oper_up(const struct net_device *dev)
> {
> return (dev->operstate == IF_OPER_UP ||
> dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
> }
>
> so i am simply assuming that the underlying problem is that,
> somewhere down below, the unplugging of a port is somehow not setting
> dev->operstate to its proper value of IF_OPER_DOWN.
>
> that would clearly explain everything, and i'm about to dig even
> further to see where the event of unplugging a port *should* be
> recognized, but does this sound like a reasonable diagnosis? there
> have been other problems with the programming of the FPGA, so it would
> surprise absolutely no one to learn that this aspect was
> misprogrammed.
>
> rday
>
There is no reason drivers should ever muck with flags directly.
You probably are looking for netif_detach
^ permalink raw reply
* Re: KASAN: use-after-free Read in sctp_transport_get_next
From: syzbot @ 2018-08-26 19:36 UTC (permalink / raw)
To: davem, linux-kernel, linux-sctp, lucien.xin, marcelo.leitner,
netdev, nhorman, syzkaller-bugs, vyasevich
In-Reply-To: <000000000000ccaf1d0574297b6b@google.com>
syzbot has found a reproducer for the following crash on:
HEAD commit: 2923b27e5424 Merge tag 'libnvdimm-for-4.19_dax-memory-fail..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=10f01a1e400000
kernel config: https://syzkaller.appspot.com/x/.config?x=3b576e333ca31bb2
dashboard link: https://syzkaller.appspot.com/bug?extid=fe62a0c9aa6a85c6de16
compiler: gcc (GCC) 8.0.1 20180413 (experimental)
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=1022f96a400000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=13be8b32400000
IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com
==================================================================
BUG: KASAN: use-after-free in sctp_transport_get_next+0x11c/0x140
net/sctp/socket.c:5008
Read of size 8 at addr ffff8801b09a23e0 by task syz-executor305/13769
CPU: 0 PID: 13769 Comm: syz-executor305 Not tainted 4.18.0+ #209
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
print_address_description+0x6c/0x20b mm/kasan/report.c:256
kasan_report_error mm/kasan/report.c:354 [inline]
kasan_report.cold.7+0x242/0x30d mm/kasan/report.c:412
__asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
sctp_transport_get_next+0x11c/0x140 net/sctp/socket.c:5008
sctp_transport_get_idx net/sctp/socket.c:5022 [inline]
sctp_for_each_transport+0x152/0x370 net/sctp/socket.c:5083
sctp_diag_dump+0x3a7/0x620 net/sctp/diag.c:527
__inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049
inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065
netlink_dump+0x519/0xd50 net/netlink/af_netlink.c:2233
__netlink_dump_start+0x4f1/0x6f0 net/netlink/af_netlink.c:2329
netlink_dump_start include/linux/netlink.h:213 [inline]
inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170
__sock_diag_cmd net/core/sock_diag.c:232 [inline]
sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263
netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274
netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
sock_sendmsg_nosec net/socket.c:621 [inline]
sock_sendmsg+0xd5/0x120 net/socket.c:631
sock_write_iter+0x362/0x5c0 net/socket.c:900
call_write_iter include/linux/fs.h:1807 [inline]
do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680
do_iter_write+0x185/0x5f0 fs/read_write.c:959
vfs_writev+0x1f1/0x360 fs/read_write.c:1004
do_writev+0x11a/0x310 fs/read_write.c:1039
__do_sys_writev fs/read_write.c:1112 [inline]
__se_sys_writev fs/read_write.c:1109 [inline]
__x64_sys_writev+0x75/0xb0 fs/read_write.c:1109
do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446679
Code: e8 0c e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
ff 0f 83 7b 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f5b0050bda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00000000006dbc48 RCX: 0000000000446679
RDX: 0000000000000001 RSI: 0000000020000000 RDI: 0000000000000005
RBP: 00000000006dbc40 R08: 000000000000001c R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc4c
R13: 0100000000000000 R14: 00007f5b0050c9c0 R15: 00000000006dbd4c
Allocated by task 13750:
save_stack+0x43/0xd0 mm/kasan/kasan.c:448
set_track mm/kasan/kasan.c:460 [inline]
kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
kmem_cache_alloc_trace+0x152/0x730 mm/slab.c:3620
kmalloc include/linux/slab.h:513 [inline]
kzalloc include/linux/slab.h:707 [inline]
sctp_association_new+0x127/0x2290 net/sctp/associola.c:311
sctp_sendmsg_new_asoc+0x39e/0x1200 net/sctp/socket.c:1742
sctp_sendmsg+0x18a5/0x1da0 net/sctp/socket.c:2103
inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
sock_sendmsg_nosec net/socket.c:621 [inline]
sock_sendmsg+0xd5/0x120 net/socket.c:631
__sys_sendto+0x3d7/0x670 net/socket.c:1786
__do_sys_sendto net/socket.c:1798 [inline]
__se_sys_sendto net/socket.c:1794 [inline]
__x64_sys_sendto+0xe1/0x1a0 net/socket.c:1794
do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Freed by task 13747:
save_stack+0x43/0xd0 mm/kasan/kasan.c:448
set_track mm/kasan/kasan.c:460 [inline]
__kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
__cache_free mm/slab.c:3498 [inline]
kfree+0xd9/0x210 mm/slab.c:3813
sctp_association_destroy net/sctp/associola.c:437 [inline]
sctp_association_put+0x264/0x350 net/sctp/associola.c:885
sctp_transport_destroy net/sctp/transport.c:180 [inline]
sctp_transport_put+0x193/0x200 net/sctp/transport.c:332
sctp_backlog_rcv+0x28e/0xd50 net/sctp/input.c:357
sk_backlog_rcv include/net/sock.h:931 [inline]
__release_sock+0x12f/0x3a0 net/core/sock.c:2336
release_sock+0xad/0x2c0 net/core/sock.c:2849
sctp_close+0x49c/0xa80 net/sctp/socket.c:1569
inet_release+0x104/0x1f0 net/ipv4/af_inet.c:428
inet6_release+0x50/0x70 net/ipv6/af_inet6.c:457
__sock_release+0xd7/0x250 net/socket.c:579
sock_close+0x19/0x20 net/socket.c:1139
__fput+0x36e/0x8c0 fs/file_table.c:278
____fput+0x15/0x20 fs/file_table.c:309
task_work_run+0x1e8/0x2a0 kernel/task_work.c:113
tracehook_notify_resume include/linux/tracehook.h:193 [inline]
exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166
prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
entry_SYSCALL_64_after_hwframe+0x49/0xbe
The buggy address belongs to the object at ffff8801b09a23c0
which belongs to the cache kmalloc-4096 of size 4096
The buggy address is located 32 bytes inside of
4096-byte region [ffff8801b09a23c0, ffff8801b09a33c0)
The buggy address belongs to the page:
page:ffffea0006c26880 count:1 mapcount:0 mapping:ffff8801dac00dc0 index:0x0
compound_mapcount: 0
flags: 0x2fffc0000008100(slab|head)
raw: 02fffc0000008100 ffffea0006c0f188 ffffea0006bf8108 ffff8801dac00dc0
raw: 0000000000000000 ffff8801b09a23c0 0000000100000001 0000000000000000
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffff8801b09a2280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff8801b09a2300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> ffff8801b09a2380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
^
ffff8801b09a2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff8801b09a2480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================
^ permalink raw reply
* Re: confusing comment, explanation of @IFF_RUNNING in if.h
From: Robert P. J. Day @ 2018-08-26 19:30 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Linux kernel netdev mailing list
In-Reply-To: <20180826192854.GE7359@lunn.ch>
On Sun, 26 Aug 2018, Andrew Lunn wrote:
> On Sun, Aug 26, 2018 at 03:20:24PM -0400, Robert P. J. Day wrote:
> > On Sun, 26 Aug 2018, Andrew Lunn wrote:
> >
> > > > i ask since, in my testing, when the interface should have been
> > > > up, the attribute file "operstate" for that interface showed
> > > > "unknown", and i wondered how worried i should be about that.
> > >
> > > Hi Robert
> > >
> > > You should probably post the driver for review. A well written
> > > driver should not even need to care about any of this. phylib and
> > > the netdev driver code does all the work. It only gets interesting
> > > when you don't have a PHY, e.g. a stacked device, like bonding, or a
> > > virtual device like tun/tap.
> >
> > i wish, but i'm on contract, and proprietary, and NDA and all that.
> > so i am reduced to crawling through the code, trying to figure out
> > what is misconfigured that is causing all this grief.
>
> I would say proprietary and NDA is causing you all this grief.
>
> There is also the point that if you are not going to contribute the
> code to mainline, why should we help you?
>
> The code is GPL after all, so you can post it.
i'm confident that it will *eventually* be GPLed (i can't imagine
there is any other outcome), but for now, there's nothing i can do.
rday
--
========================================================================
Robert P. J. Day Ottawa, Ontario, CANADA
http://crashcourse.ca/dokuwiki
Twitter: http://twitter.com/rpjday
LinkedIn: http://ca.linkedin.com/in/rpjday
========================================================================
^ permalink raw reply
* Re: confusing comment, explanation of @IFF_RUNNING in if.h
From: Andrew Lunn @ 2018-08-26 19:28 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: Linux kernel netdev mailing list
In-Reply-To: <alpine.LFD.2.21.1808261518250.8901@localhost.localdomain>
On Sun, Aug 26, 2018 at 03:20:24PM -0400, Robert P. J. Day wrote:
> On Sun, 26 Aug 2018, Andrew Lunn wrote:
>
> > > i ask since, in my testing, when the interface should have been
> > > up, the attribute file "operstate" for that interface showed
> > > "unknown", and i wondered how worried i should be about that.
> >
> > Hi Robert
> >
> > You should probably post the driver for review. A well written
> > driver should not even need to care about any of this. phylib and
> > the netdev driver code does all the work. It only gets interesting
> > when you don't have a PHY, e.g. a stacked device, like bonding, or a
> > virtual device like tun/tap.
>
> i wish, but i'm on contract, and proprietary, and NDA and all that.
> so i am reduced to crawling through the code, trying to figure out
> what is misconfigured that is causing all this grief.
I would say proprietary and NDA is causing you all this grief.
There is also the point that if you are not going to contribute the
code to mainline, why should we help you?
The code is GPL after all, so you can post it.
Andrew
^ permalink raw reply
* Re: followup: what's responsible for setting netdev->operstate to IF_OPER_DOWN?
From: Robert P. J. Day @ 2018-08-26 19:26 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Linux kernel netdev mailing list
In-Reply-To: <20180826192445.GD7359@lunn.ch>
On Sun, 26 Aug 2018, Andrew Lunn wrote:
> On Sun, Aug 26, 2018 at 11:14:33AM -0400, Robert P. J. Day wrote:
> >
> > apologies for the constant pleas for assistance, but i think i'm
> > zeroing in on the problem that started all this. recap: custom
> > FPGA-based linux box with multiple ports, where the current
> > symptom is that there is no userspace notification when someone
> > simply unplugs one of the ports ("ifconfig" shows that interface
> > still RUNNING).
>
> What are you using for a PHY? Are you using phylib or phylink? I
> strongly suggest you do, and then you don't need to care about any
> of this.
i'm not sure i'm even allowed to talk about that given the NDA --
i'll just muddle through following the code until i figure out what is
failing to notify userspace. le *sigh*.
rday
--
========================================================================
Robert P. J. Day Ottawa, Ontario, CANADA
http://crashcourse.ca/dokuwiki
Twitter: http://twitter.com/rpjday
LinkedIn: http://ca.linkedin.com/in/rpjday
========================================================================
^ permalink raw reply
* Re: followup: what's responsible for setting netdev->operstate to IF_OPER_DOWN?
From: Andrew Lunn @ 2018-08-26 19:24 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: Linux kernel netdev mailing list
In-Reply-To: <alpine.LFD.2.21.1808261102450.13415@localhost.localdomain>
On Sun, Aug 26, 2018 at 11:14:33AM -0400, Robert P. J. Day wrote:
>
> apologies for the constant pleas for assistance, but i think i'm
> zeroing in on the problem that started all this. recap: custom
> FPGA-based linux box with multiple ports, where the current symptom is
> that there is no userspace notification when someone simply unplugs
> one of the ports ("ifconfig" shows that interface still RUNNING).
What are you using for a PHY? Are you using phylib or phylink? I
strongly suggest you do, and then you don't need to care about any of
this.
Andrew
^ permalink raw reply
* Re: confusing comment, explanation of @IFF_RUNNING in if.h
From: Robert P. J. Day @ 2018-08-26 19:20 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Linux kernel netdev mailing list
In-Reply-To: <20180826192001.GC7359@lunn.ch>
On Sun, 26 Aug 2018, Andrew Lunn wrote:
> > i ask since, in my testing, when the interface should have been
> > up, the attribute file "operstate" for that interface showed
> > "unknown", and i wondered how worried i should be about that.
>
> Hi Robert
>
> You should probably post the driver for review. A well written
> driver should not even need to care about any of this. phylib and
> the netdev driver code does all the work. It only gets interesting
> when you don't have a PHY, e.g. a stacked device, like bonding, or a
> virtual device like tun/tap.
i wish, but i'm on contract, and proprietary, and NDA and all that.
so i am reduced to crawling through the code, trying to figure out
what is misconfigured that is causing all this grief.
rday
--
========================================================================
Robert P. J. Day Ottawa, Ontario, CANADA
http://crashcourse.ca/dokuwiki
Twitter: http://twitter.com/rpjday
LinkedIn: http://ca.linkedin.com/in/rpjday
========================================================================
^ permalink raw reply
* Re: confusing comment, explanation of @IFF_RUNNING in if.h
From: Andrew Lunn @ 2018-08-26 19:20 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: Linux kernel netdev mailing list
In-Reply-To: <alpine.LFD.2.21.1808260410530.31293@localhost.localdomain>
> i ask since, in my testing, when the interface should have been up,
> the attribute file "operstate" for that interface showed "unknown",
> and i wondered how worried i should be about that.
Hi Robert
You should probably post the driver for review. A well written driver
should not even need to care about any of this. phylib and the netdev
driver code does all the work. It only gets interesting when you don't
have a PHY, e.g. a stacked device, like bonding, or a virtual device
like tun/tap.
Andrew
^ permalink raw reply
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Al Viro @ 2018-08-26 22:57 UTC (permalink / raw)
To: Kees Cook
Cc: LKML, Jamal Hadi Salim, Cong Wang, Jiri Pirko, David S. Miller,
Network Development
In-Reply-To: <20180826173236.GU6515@ZenIV.linux.org.uk>
On Sun, Aug 26, 2018 at 06:32:37PM +0100, Al Viro wrote:
> As far as I can tell, the solution is
[snip long and painful reasoning]
> pointers, and not in provably opaque fashion. Theoretically, the three tcf_...
> inlines above need another look; fortunately, they don't use ->next at all, not to
> mention not being used anywhere outside of net/sched/*.c
>
> The 80 lines above prove that we only need to grep net/sched/*.c for
> tcf_proto_ops method calls. And only because we don't have (thank $DEITY)
> anything that could deconstruct types - as soon as some bastard grows means
> to say "type of the second argument of the function pointed to by p", this
> kind of analysis, painful as it is, goes out of window. Even as it is,
> do you really like the idea of newbies trying to get through the exercises
> like the one above?
BTW, would there be any problem if we took the definitions of tcf_proto and
tcf_proto_ops to e.g. net/sched/tcf_proto.h (along with the three inlines in
in pkt_cls.h), left forwards in sch_generic.h and added includes of "tcf_proto.h"
where needed in net/sched/*.c?
That would make tcf_proto/tcf_proto_ops opaque outside of net/sched, reducing
the exposure of internals. Something like a diff below (against net/master,
builds clean, ought to result in identical binary):
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ef727f71336e..35f8eec3f7c0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -217,35 +217,6 @@ cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl)
return old_cl;
}
-static inline void
-tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
-{
- struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
-
- /* Check q as it is not set for shared blocks. In that case,
- * setting class is not supported.
- */
- if (!q)
- return;
- cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
- cl = cls_set_class(q, &r->class, cl);
- if (cl)
- q->ops->cl_ops->unbind_tcf(q, cl);
-}
-
-static inline void
-tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
-{
- struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
-
- if (!q)
- return;
- if ((cl = __cls_set_class(&r->class, 0)) != 0)
- q->ops->cl_ops->unbind_tcf(q, cl);
-}
-
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
@@ -708,18 +679,6 @@ static inline bool tc_in_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false;
}
-static inline void
-tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
- const struct tcf_proto *tp, u32 flags,
- struct netlink_ext_ack *extack)
-{
- cls_common->chain_index = tp->chain->index;
- cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
- if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
- cls_common->extack = extack;
-}
-
enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..72dbb96fc549 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -246,65 +246,7 @@ struct tcf_result {
struct tcf_chain;
-struct tcf_proto_ops {
- struct list_head head;
- char kind[IFNAMSIZ];
-
- int (*classify)(struct sk_buff *,
- const struct tcf_proto *,
- struct tcf_result *);
- int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto *tp,
- struct netlink_ext_ack *extack);
-
- void* (*get)(struct tcf_proto*, u32 handle);
- int (*change)(struct net *net, struct sk_buff *,
- struct tcf_proto*, unsigned long,
- u32 handle, struct nlattr **,
- void **, bool,
- struct netlink_ext_ack *);
- int (*delete)(struct tcf_proto *tp, void *arg,
- bool *last,
- struct netlink_ext_ack *);
- void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
- int (*reoffload)(struct tcf_proto *tp, bool add,
- tc_setup_cb_t *cb, void *cb_priv,
- struct netlink_ext_ack *extack);
- void (*bind_class)(void *, u32, unsigned long);
- void * (*tmplt_create)(struct net *net,
- struct tcf_chain *chain,
- struct nlattr **tca,
- struct netlink_ext_ack *extack);
- void (*tmplt_destroy)(void *tmplt_priv);
-
- /* rtnetlink specific */
- int (*dump)(struct net*, struct tcf_proto*, void *,
- struct sk_buff *skb, struct tcmsg*);
- int (*tmplt_dump)(struct sk_buff *skb,
- struct net *net,
- void *tmplt_priv);
-
- struct module *owner;
-};
-
-struct tcf_proto {
- /* Fast access part */
- struct tcf_proto __rcu *next;
- void __rcu *root;
-
- /* called under RCU BH lock*/
- int (*classify)(struct sk_buff *,
- const struct tcf_proto *,
- struct tcf_result *);
- __be16 protocol;
-
- /* All the rest */
- u32 prio;
- void *data;
- const struct tcf_proto_ops *ops;
- struct tcf_chain *chain;
- struct rcu_head rcu;
-};
+struct tcf_proto_ops;
struct qdisc_skb_cb {
unsigned int pkt_len;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 229d63c99be2..e946ada18299 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,11 +25,12 @@
#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
-#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/act_api.h>
#include <net/netlink.h>
+#include "tcf_proto.h"
+
static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
{
u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 31bd1439cf60..be5fba6355c5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/* The list of all installed classifier types */
static LIST_HEAD(tcf_proto_base);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..3772432889f2 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -22,6 +22,8 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
struct basic_head {
struct list_head flist;
struct idr handle_idr;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fa6fe2fe0f32..fb2478e357cd 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -23,6 +23,8 @@
#include <net/pkt_cls.h>
#include <net/sock.h>
+#include "tcf_proto.h"
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..5638c711e53c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -18,6 +18,8 @@
#include <net/sock.h>
#include <net/cls_cgroup.h>
+#include "tcf_proto.h"
+
struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..7e60e432e3a8 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -33,6 +33,8 @@
#include <net/netfilter/nf_conntrack.h>
#endif
+#include "tcf_proto.h"
+
struct flow_head {
struct list_head filters;
struct rcu_head rcu;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6fd9bdd93796..b36c61f7ee44 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -20,7 +20,6 @@
#include <linux/ip.h>
#include <linux/mpls.h>
-#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
@@ -29,6 +28,8 @@
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include "tcf_proto.h"
+
struct fl_flow_key {
int indev_ifindex;
struct flow_dissector_key_control control;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..be872b1653f5 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,7 +28,8 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
+
+#include "tcf_proto.h"
#define HTSIZE 256
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 856fa79d4ffd..708faf62ecab 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -13,9 +13,10 @@
#include <linux/init.h>
#include <linux/module.h>
-#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
struct cls_mall_head {
struct tcf_exts exts;
struct tcf_result res;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..d40ae6d14b2d 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -22,6 +22,8 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/*
* 1. For now we assume that route tags < 256.
* It allows to use direct table lookups, instead of hash tables.
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index cbb5e0d600f3..131a81aeaa4e 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -20,6 +20,8 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
#define RSVP_DST_LEN 1
#define RSVP_ID "rsvp"
#define RSVP_OPS cls_rsvp_ops
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index dd08aea2aee5..159dc01cf251 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -20,6 +20,8 @@
#include <net/pkt_cls.h>
#include <net/netlink.h>
+#include "tcf_proto.h"
+
#define RSVP_DST_LEN 4
#define RSVP_ID "rsvp6"
#define RSVP_OPS cls_rsvp6_ops
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9ccc93f257db..e7d06c3d40a3 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,7 +13,8 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
+
+#include "tcf_proto.h"
/*
* Passing parameters to the root seems to be done more awkwardly than really
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5d2a6dc3921..7b3bdfd80001 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -47,6 +47,8 @@
#include <net/pkt_cls.h>
#include <linux/idr.h>
+#include "tcf_proto.h"
+
struct tc_u_knode {
struct tc_u_knode __rcu *next;
u32 handle;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 1331a4c2d8ff..b123880fbe07 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -90,6 +90,8 @@
#include <linux/skbuff.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
static LIST_HEAD(ematch_ops);
static DEFINE_RWLOCK(ematch_mod_lock);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98541c6399db..d6ac218811d0 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -37,6 +37,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/*
Short review.
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index cd49afca9617..6bf259e55319 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -17,6 +17,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/*
* The ATM queuing discipline provides a framework for invoking classifiers
* (aka "filters"), which in turn select classes of this queuing discipline.
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 35fc7252187c..fcfd5f321447 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -75,6 +75,8 @@
#include <net/netfilter/nf_conntrack_core.h>
#endif
+#include "tcf_proto.h"
+
#define CAKE_SET_WAYS (8)
#define CAKE_MAX_TINS (8)
#define CAKE_QUEUES (1024)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d53cfe..8021ba377dfd 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -21,6 +21,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/* Class-Based Queueing (CBQ) algorithm.
=======================================
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8a9939..19a48fa95b9b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -18,6 +18,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
struct drr_class {
struct Qdisc_class_common common;
unsigned int filter_cnt;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 049714c57075..b3a4537afbcb 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,6 +18,8 @@
#include <net/inet_ecn.h>
#include <asm/byteorder.h>
+#include "tcf_proto.h"
+
/*
* classid class marking
* ------- ----- -------
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c0a9d5dbf94..8868a8e1a81f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -28,6 +28,8 @@
#include <net/codel_impl.h>
#include <net/codel_qdisc.h>
+#include "tcf_proto.h"
+
/* Fair Queue CoDel.
*
* Principles :
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3278a76f6861..9c75b77da56e 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -68,6 +68,8 @@
#include <net/pkt_cls.h>
#include <asm/div64.h>
+#include "tcf_proto.h"
+
/*
* kernel internal service curve representation:
* coordinates are given by 64 bit unsigned integers.
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 43c4bfe625a9..c206b3cfdfb2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -38,10 +38,10 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <net/netlink.h>
-#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
/* HTB algorithm.
Author: devik@cdi.cz
========================================================================
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1da7ea8de0ad..107563c14e24 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -27,6 +27,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
struct multiq_sched_data {
u16 bands;
u16 max_bands;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 222e53d3d27a..4fed3fd38dd3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,6 +22,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
struct prio_sched_data {
int bands;
struct tcf_proto __rcu *filter_list;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c11fc54..32f68e639037 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -19,6 +19,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include "tcf_proto.h"
+
/* Quick Fair Queueing Plus
========================
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 7cbdad8419b7..5465249c600f 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -28,6 +28,8 @@
#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
+#include "tcf_proto.h"
+
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
* This implementation uses L = 8 and N = 16
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 2f2678197760..abc1598e87e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -26,6 +26,8 @@
#include <net/pkt_cls.h>
#include <net/red.h>
+#include "tcf_proto.h"
+
/* Stochastic Fairness Queuing algorithm.
=======================================
diff --git a/net/sched/tcf_proto.h b/net/sched/tcf_proto.h
new file mode 100644
index 000000000000..b8d0e15e7f26
--- /dev/null
+++ b/net/sched/tcf_proto.h
@@ -0,0 +1,104 @@
+/* struct tcf_proto internal details - outside of net/sched it's opaque */
+
+#include <net/sch_generic.h>
+
+struct tcf_proto {
+ /* Fast access part */
+ struct tcf_proto __rcu *next;
+ void __rcu *root;
+
+ /* called under RCU BH lock*/
+ int (*classify)(struct sk_buff *,
+ const struct tcf_proto *,
+ struct tcf_result *);
+ __be16 protocol;
+
+ /* All the rest */
+ u32 prio;
+ void *data;
+ const struct tcf_proto_ops *ops;
+ struct tcf_chain *chain;
+ struct rcu_head rcu;
+};
+
+struct tcf_proto_ops {
+ struct list_head head;
+ char kind[IFNAMSIZ];
+
+ int (*classify)(struct sk_buff *,
+ const struct tcf_proto *,
+ struct tcf_result *);
+ int (*init)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack);
+
+ void* (*get)(struct tcf_proto*, u32 handle);
+ int (*change)(struct net *net, struct sk_buff *,
+ struct tcf_proto*, unsigned long,
+ u32 handle, struct nlattr **,
+ void **, bool,
+ struct netlink_ext_ack *);
+ int (*delete)(struct tcf_proto *tp, void *arg,
+ bool *last,
+ struct netlink_ext_ack *);
+ void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
+ int (*reoffload)(struct tcf_proto *tp, bool add,
+ tc_setup_cb_t *cb, void *cb_priv,
+ struct netlink_ext_ack *extack);
+ void (*bind_class)(void *, u32, unsigned long);
+ void * (*tmplt_create)(struct net *net,
+ struct tcf_chain *chain,
+ struct nlattr **tca,
+ struct netlink_ext_ack *extack);
+ void (*tmplt_destroy)(void *tmplt_priv);
+
+ /* rtnetlink specific */
+ int (*dump)(struct net*, struct tcf_proto*, void *,
+ struct sk_buff *skb, struct tcmsg*);
+ int (*tmplt_dump)(struct sk_buff *skb,
+ struct net *net,
+ void *tmplt_priv);
+
+ struct module *owner;
+};
+
+static inline void
+tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
+{
+ struct Qdisc *q = tp->chain->block->q;
+ unsigned long cl;
+
+ /* Check q as it is not set for shared blocks. In that case,
+ * setting class is not supported.
+ */
+ if (!q)
+ return;
+ cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
+ cl = cls_set_class(q, &r->class, cl);
+ if (cl)
+ q->ops->cl_ops->unbind_tcf(q, cl);
+}
+
+static inline void
+tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
+{
+ struct Qdisc *q = tp->chain->block->q;
+ unsigned long cl;
+
+ if (!q)
+ return;
+ if ((cl = __cls_set_class(&r->class, 0)) != 0)
+ q->ops->cl_ops->unbind_tcf(q, cl);
+}
+
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+ const struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ cls_common->chain_index = tp->chain->index;
+ cls_common->protocol = tp->protocol;
+ cls_common->prio = tp->prio;
+ if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
+ cls_common->extack = extack;
+}
^ permalink raw reply related
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Al Viro @ 2018-08-26 22:43 UTC (permalink / raw)
To: Joe Perches
Cc: Julia Lawall, Kees Cook, LKML, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Network Development
In-Reply-To: <eca48539a3dede3bfaed9ab9a6c06794cf8160e0.camel@perches.com>
On Sun, Aug 26, 2018 at 03:26:54PM -0700, Joe Perches wrote:
> On Sun, 2018-08-26 at 22:24 +0100, Al Viro wrote:
> > On Sun, Aug 26, 2018 at 11:57:57AM -0700, Joe Perches wrote:
> >
> > > > That, BTW, is why I hate the use of sizeof(*p) in kmalloc, etc.
> > > > arguments. typeof is even worse in that respect.
> > >
> > > True. Semantic searches via tools like coccinelle could help here
> > > but those searches are quite a bit slower than straightforward greps.
> >
> > Those searches are .config-sensitive as well, which can be much more
> > unpleasant than being slow...
>
> Are they? Julia?
They work pretty much on preprocessor output level; if something it ifdef'ed
out on given config, it won't be seen...
^ permalink raw reply
* [PATCH] iwlwifi: mvm: fix spelling mistake "Recieved" -> "Received"
From: Colin King @ 2018-08-26 22:31 UTC (permalink / raw)
To: Johannes Berg, Emmanuel Grumbach, Luca Coelho,
Intel Linux Wireless, Kalle Valo, David S . Miller,
linux-wireless, netdev
Cc: kernel-janitors, linux-kernel
From: Colin Ian King <colin.king@canonical.com>
Trivial fix to spelling mistake in debug message.
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index b15b0d84bb7e..5f32d3131d62 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3320,7 +3320,7 @@ static bool iwl_mvm_rx_aux_roc(struct iwl_notif_wait_data *notif_wait,
resp = (void *)pkt->data;
IWL_DEBUG_TE(mvm,
- "Aux ROC: Recieved response from ucode: status=%d uid=%d\n",
+ "Aux ROC: Received response from ucode: status=%d uid=%d\n",
resp->status, resp->event_unique_id);
te_data->uid = le32_to_cpu(resp->event_unique_id);
--
2.17.1
^ permalink raw reply related
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Joe Perches @ 2018-08-26 22:26 UTC (permalink / raw)
To: Al Viro, Julia Lawall
Cc: Kees Cook, LKML, Jamal Hadi Salim, Cong Wang, Jiri Pirko,
David S. Miller, Network Development
In-Reply-To: <20180826212421.GW6515@ZenIV.linux.org.uk>
On Sun, 2018-08-26 at 22:24 +0100, Al Viro wrote:
> On Sun, Aug 26, 2018 at 11:57:57AM -0700, Joe Perches wrote:
>
> > > That, BTW, is why I hate the use of sizeof(*p) in kmalloc, etc.
> > > arguments. typeof is even worse in that respect.
> >
> > True. Semantic searches via tools like coccinelle could help here
> > but those searches are quite a bit slower than straightforward greps.
>
> Those searches are .config-sensitive as well, which can be much more
> unpleasant than being slow...
Are they? Julia?
^ permalink raw reply
* Re: broken behaviour of TC filter delete
From: Jamal Hadi Salim @ 2018-08-26 17:48 UTC (permalink / raw)
To: Jiri Pirko, Cong Wang
Cc: Roman Mashak, Linux Kernel Network Developers, Jiri Pirko
In-Reply-To: <20180825130243.GE2931@nanopsycho>
On 2018-08-25 9:02 a.m., Jiri Pirko wrote:
> Fri, Aug 24, 2018 at 08:11:07PM CEST, xiyou.wangcong@gmail.com wrote:
>
>>> ENOENT seems to be more logical to return when there's no more filter to delete.
>>
>> Yeah, at least we should keep ENOENT for compatibility.
>>
>> The bug here is chain 0 is gone after the last filter is gone,
>> so when you delete the filter again, it treats it as you specify
>> chain 0 which does not exist, so it hits EINVAL before ENOENT.
>
> I understand. My concern is about consistency with other chains. Perhaps
> -ENOENT for all chains in this case would be doable. What do you think?
>
ENOENT with extack describing whether chain or filter not found.
cheers,
jamal
^ permalink raw reply
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Al Viro @ 2018-08-26 21:24 UTC (permalink / raw)
To: Joe Perches
Cc: Kees Cook, LKML, Jamal Hadi Salim, Cong Wang, Jiri Pirko,
David S. Miller, Network Development
In-Reply-To: <d31fe59160e0b7d40e09536a3c74619ebb1f3b13.camel@perches.com>
On Sun, Aug 26, 2018 at 11:57:57AM -0700, Joe Perches wrote:
> > That, BTW, is why I hate the use of sizeof(*p) in kmalloc, etc.
> > arguments. typeof is even worse in that respect.
>
> True. Semantic searches via tools like coccinelle could help here
> but those searches are quite a bit slower than straightforward greps.
Those searches are .config-sensitive as well, which can be much more
unpleasant than being slow...
^ permalink raw reply
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: David Miller @ 2018-08-26 21:22 UTC (permalink / raw)
To: keescook; +Cc: linux-kernel, viro, jhs, xiyou.wangcong, jiri, netdev
In-Reply-To: <20180826055801.GA42063@beast>
From: Kees Cook <keescook@chromium.org>
Date: Sat, 25 Aug 2018 22:58:01 -0700
> Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink
> policy, so max length isn't enforced, only minimum. This means nkeys
> (from userspace) was being trusted without checking the actual size of
> nla_len(), which could lead to a memory over-read, and ultimately an
> exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within
> a namespace.
>
> Reported-by: Al Viro <viro@zeniv.linux.org.uk>
> Signed-off-by: Kees Cook <keescook@chromium.org>
I'll apply this as-is and queued it up for -stable.
If we want to avoid sizeof(*p) type stuff, it can be done as a follow-up.
Thanks.
^ permalink raw reply
* Re: [PATCH] net: sched: Fix memory exposure from short TCA_U32_SEL
From: Jamal Hadi Salim @ 2018-08-26 17:30 UTC (permalink / raw)
To: Kees Cook, Al Viro
Cc: LKML, Cong Wang, Jiri Pirko, David S. Miller, Network Development
In-Reply-To: <CAGXu5jK7VzayzZTcxgZBf-+YHWO+Hv7s8utj2rzTc3gFtA8pFQ@mail.gmail.com>
On 2018-08-26 2:19 a.m., Kees Cook wrote:
> On Sat, Aug 25, 2018 at 11:15 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>> On Sat, Aug 25, 2018 at 10:58:01PM -0700, Kees Cook wrote:
>> Saner approach would be sel_size = offsetof(struct tc_u32_sel, keys[s->nkeys])...
>
> Either is fine by me.
>
>>> + sel_size = struct_size(s, keys, s->nkeys);
>>> + if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
>>> + err = -EINVAL;
>>> + goto erridr;
>>> + }
>>>
>>> - n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
>>> + n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL);
>>
>> ITYM
>> n = kzalloc(offsetof(struct tc_u_common, sel.keys[s->nkeys]), GFP_KERNEL);
>
> I prefer to reuse sel_size and keep typeof() to keep things tied to
> "n" more directly. *shrug*
Looks good to me.
We should add an nla_policy later.
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
cheers,
jamal
^ permalink raw reply
* bpfilter causes a leftover kernel process
From: Olivier Brunel @ 2018-08-26 16:08 UTC (permalink / raw)
To: netdev
Hi,
(Please cc me as I'm not subscribed to the list, thanks.)
I'm running an Arch Linux x86_64 system, and recently updated to a 3.18
kernel, which led me to encounter what I believe to be a kernel bug
related to the bpfilter framework.
What happens is that upon boot, there's a "leftover kernel process"
running (shown as "[none]" in ps), which doesn't seem to do anything
(anymore) but does have references/fds open to the root fs, and so when
trying to shutdown the system umounting the root fs fails (EBUSY)
because of it, leading to expected issues.
Simply killing that process allows umounting the root fs fine and
"resolves" all issues. This process/behavior wasn't there with any
previous kernel, and is there with all tried kernels from 4.18.0 to
4.18.4, without any other change to the system -- although this is due
to CONFIG_BPFILTER=y in the kernel config.
Indeed I managed to compile a kernel 4.18.4 myself using the Arch Linux
config[1] with a single change of unsetting CONFIG_BPFILTER, and with
the resulting kernel I don't have this "leftover kernel process"
anymore, everything is back to normal.
Now, about this process, here's a few outputs to try and describe what
it is:
rafus# pgrep none
920
rafus# cd /proc/920
rafus# readlink exe
/ (deleted)
rafus# ls -l fd
total 0
lr-x------ 1 root root 64 Aug 26 17:17 0 -> 'pipe:[13366]'
l-wx------ 1 root root 64 Aug 26 17:17 1 -> 'pipe:[13367]'
lrwx------ 1 root root 64 Aug 26 17:17 2 -> /dev/console
rafus# cat status
Name: none
Umask: 0022
State: S (sleeping)
Tgid: 920
Ngid: 0
Pid: 920
PPid: 2
TracerPid: 0
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 64
Groups:
NStgid: 920
NSpid: 920
NSpgid: 0
NSsid: 0
VmPeak: 2296 kB
VmSize: 2296 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 748 kB
VmRSS: 748 kB
RssAnon: 60 kB
RssFile: 684 kB
RssShmem: 4 kB
VmData: 176 kB
VmStk: 132 kB
VmExe: 8 kB
VmLib: 1452 kB
VmPTE: 44 kB
VmSwap: 0 kB
HugetlbPages: 0 kB
CoreDumping: 0
Threads: 1
SigQ: 0/7861
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000000000000
CapInh: 0000000000000000
CapPrm: 0000003fffffffff
CapEff: 0000003fffffffff
CapBnd: 0000003fffffffff
CapAmb: 0000000000000000
NoNewPrivs: 0
Seccomp: 0
Speculation_Store_Bypass: vulnerable
Cpus_allowed: 1
Cpus_allowed_list: 0
Mems_allowed: 00000001
Mems_allowed_list: 0
voluntary_ctxt_switches: 65
nonvoluntary_ctxt_switches: 1
rafus# cat stack
[<0>] pipe_wait+0x6c/0xb0
[<0>] pipe_read+0x20a/0x2d0
[<0>] __vfs_read+0x13a/0x180
[<0>] vfs_read+0x8a/0x130
[<0>] ksys_read+0x4f/0xb0
[<0>] do_syscall_64+0x5b/0x170
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0>] 0xffffffffffffffff
rafus# file -L exe
exe: ELF 64-bit LSB pie executable x86-64, version 1 (SYSV),
dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for
GNU/Linux 3.2.0,
BuildID[sha1]=b247cedd3f8daaea3eee38477aa641d84b77f0ba, not stripped
rafus# stat -L exe
File: exe
Size: 16832 Blocks: 40 IO Block: 4096 regular
file Device: 1h/1d Inode: 13361 Links: 0
Access: (0777/-rwxrwxrwx) Uid: ( 0/ root) Gid: ( 0/ root)
Access: 2018-08-26 17:17:37.334261924 +0200
Modify: 2018-08-26 17:14:27.787595262 +0200
Change: 2018-08-26 17:14:27.787595262 +0200
Birth: -
rafus# sha1sum exe
723d59584abe5e1e9917f0ec41d7e9120514afe7 exe
rafus# strings exe|grep bpf
Started bpfilter
I'm not actually sure what the process is, I'm guessing some kind of
helper is spawned at some point during boot, and for some reason it
never ends.
Although I can reproduce it (it happens on every boot with a kernel
4.18 and CONFIG_BPFILTER=y), I'm unfortunately not sure what is
actually needed to be done in order to trigger it.
I don't use bpfilter myself/directly, as said this happens with the
exact same system as with previous kernels, but I obviously have some
network configuration (done using iptables/iproute2) set up during boot.
Let me know if you need more information or need me to test things, and
I'll do my best.
Thank you.
[1]
https://git.archlinux.org/svntogit/packages.git/tree/trunk?h=packages/linux
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox