Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 3/5] qede: Add support for PTP resource locking.
From: Sudarsana Reddy Kalluru @ 2017-04-26 16:00 UTC (permalink / raw)
  To: davem; +Cc: richardcochran, netdev, Yuval.Mintz
In-Reply-To: <20170426160053.8356-1-sudarsana.kalluru@cavium.com>

The patch adds necessary changes to the driver to use qed resource
locking functionality. Currently the ptp initialization is spread
between driver probe/open implementations, associated APIs are
qede_ptp_register_phc()/qede_ptp_start(). Clubbed this functionality
into single API qed_ptp_enable() to simplify the usage of qed resource
locking implementation. The new API will be invoked in the probe path.
Similarly the ptp clean-up code is moved to qede_ptp_disable() which
gets invoked in the driver unload path.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c |  18 +---
 drivers/net/ethernet/qlogic/qede/qede_ptp.c  | 150 +++++++++++++--------------
 drivers/net/ethernet/qlogic/qede/qede_ptp.h  |   6 +-
 3 files changed, 75 insertions(+), 99 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 292e2dc..b9ba23d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -907,13 +907,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION);
 
 	/* PTP not supported on VFs */
-	if (!is_vf) {
-		rc = qede_ptp_register_phc(edev);
-		if (rc) {
-			DP_NOTICE(edev, "Cannot register PHC\n");
-			goto err5;
-		}
-	}
+	if (!is_vf)
+		qede_ptp_enable(edev, true);
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -928,8 +923,6 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	return 0;
 
-err5:
-	unregister_netdev(edev->ndev);
 err4:
 	qede_roce_dev_remove(edev);
 err3:
@@ -980,7 +973,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	unregister_netdev(ndev);
 	cancel_delayed_work_sync(&edev->sp_task);
 
-	qede_ptp_remove(edev);
+	qede_ptp_disable(edev);
 
 	qede_roce_dev_remove(edev);
 
@@ -1877,8 +1870,6 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 	qede_roce_dev_event_close(edev);
 	edev->state = QEDE_STATE_CLOSED;
 
-	qede_ptp_stop(edev);
-
 	/* Close OS Tx */
 	netif_tx_disable(edev->ndev);
 	netif_carrier_off(edev->ndev);
@@ -1987,13 +1978,10 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 
 	qede_roce_dev_event_open(edev);
 
-	qede_ptp_start(edev, (mode == QEDE_LOAD_NORMAL));
-
 	edev->state = QEDE_STATE_OPEN;
 
 	DP_INFO(edev, "Ending successfully qede load\n");
 
-
 	goto out;
 err4:
 	qede_sync_free_irqs(edev);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 2e62dec..6396363 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -206,21 +206,6 @@ static u64 qede_ptp_read_cc(const struct cyclecounter *cc)
 	return phc_cycles;
 }
 
-static void qede_ptp_init_cc(struct qede_dev *edev)
-{
-	struct qede_ptp *ptp;
-
-	ptp = edev->ptp;
-	if (!ptp)
-		return;
-
-	memset(&ptp->cc, 0, sizeof(ptp->cc));
-	ptp->cc.read = qede_ptp_read_cc;
-	ptp->cc.mask = CYCLECOUNTER_MASK(64);
-	ptp->cc.shift = 0;
-	ptp->cc.mult = 1;
-}
-
 static int qede_ptp_cfg_filters(struct qede_dev *edev)
 {
 	struct qede_ptp *ptp = edev->ptp;
@@ -324,61 +309,6 @@ int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *ifr)
 			    sizeof(config)) ? -EFAULT : 0;
 }
 
-/* Called during load, to initialize PTP-related stuff */
-static void qede_ptp_init(struct qede_dev *edev, bool init_tc)
-{
-	struct qede_ptp *ptp;
-	int rc;
-
-	ptp = edev->ptp;
-	if (!ptp)
-		return;
-
-	spin_lock_init(&ptp->lock);
-
-	/* Configure PTP in HW */
-	rc = ptp->ops->enable(edev->cdev);
-	if (rc) {
-		DP_ERR(edev, "Stopping PTP initialization\n");
-		return;
-	}
-
-	/* Init work queue for Tx timestamping */
-	INIT_WORK(&ptp->work, qede_ptp_task);
-
-	/* Init cyclecounter and timecounter. This is done only in the first
-	 * load. If done in every load, PTP application will fail when doing
-	 * unload / load (e.g. MTU change) while it is running.
-	 */
-	if (init_tc) {
-		qede_ptp_init_cc(edev);
-		timecounter_init(&ptp->tc, &ptp->cc,
-				 ktime_to_ns(ktime_get_real()));
-	}
-
-	DP_VERBOSE(edev, QED_MSG_DEBUG, "PTP initialization is successful\n");
-}
-
-void qede_ptp_start(struct qede_dev *edev, bool init_tc)
-{
-	qede_ptp_init(edev, init_tc);
-	qede_ptp_cfg_filters(edev);
-}
-
-void qede_ptp_remove(struct qede_dev *edev)
-{
-	struct qede_ptp *ptp;
-
-	ptp = edev->ptp;
-	if (ptp && ptp->clock) {
-		ptp_clock_unregister(ptp->clock);
-		ptp->clock = NULL;
-	}
-
-	kfree(ptp);
-	edev->ptp = NULL;
-}
-
 int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info)
 {
 	struct qede_ptp *ptp = edev->ptp;
@@ -417,8 +347,7 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info)
 	return 0;
 }
 
-/* Called during unload, to stop PTP-related stuff */
-void qede_ptp_stop(struct qede_dev *edev)
+void qede_ptp_disable(struct qede_dev *edev)
 {
 	struct qede_ptp *ptp;
 
@@ -426,6 +355,11 @@ void qede_ptp_stop(struct qede_dev *edev)
 	if (!ptp)
 		return;
 
+	if (ptp->clock) {
+		ptp_clock_unregister(ptp->clock);
+		ptp->clock = NULL;
+	}
+
 	/* Cancel PTP work queue. Should be done after the Tx queues are
 	 * drained to prevent additional scheduling.
 	 */
@@ -439,11 +373,54 @@ void qede_ptp_stop(struct qede_dev *edev)
 	spin_lock_bh(&ptp->lock);
 	ptp->ops->disable(edev->cdev);
 	spin_unlock_bh(&ptp->lock);
+
+	kfree(ptp);
+	edev->ptp = NULL;
 }
 
-int qede_ptp_register_phc(struct qede_dev *edev)
+static int qede_ptp_init(struct qede_dev *edev, bool init_tc)
 {
 	struct qede_ptp *ptp;
+	int rc;
+
+	ptp = edev->ptp;
+	if (!ptp)
+		return -EINVAL;
+
+	spin_lock_init(&ptp->lock);
+
+	/* Configure PTP in HW */
+	rc = ptp->ops->enable(edev->cdev);
+	if (rc) {
+		DP_INFO(edev, "PTP HW enable failed\n");
+		return rc;
+	}
+
+	/* Init work queue for Tx timestamping */
+	INIT_WORK(&ptp->work, qede_ptp_task);
+
+	/* Init cyclecounter and timecounter. This is done only in the first
+	 * load. If done in every load, PTP application will fail when doing
+	 * unload / load (e.g. MTU change) while it is running.
+	 */
+	if (init_tc) {
+		memset(&ptp->cc, 0, sizeof(ptp->cc));
+		ptp->cc.read = qede_ptp_read_cc;
+		ptp->cc.mask = CYCLECOUNTER_MASK(64);
+		ptp->cc.shift = 0;
+		ptp->cc.mult = 1;
+
+		timecounter_init(&ptp->tc, &ptp->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
+
+	return rc;
+}
+
+int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
+{
+	struct qede_ptp *ptp;
+	int rc;
 
 	ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
 	if (!ptp) {
@@ -454,14 +431,19 @@ int qede_ptp_register_phc(struct qede_dev *edev)
 	ptp->edev = edev;
 	ptp->ops = edev->ops->ptp;
 	if (!ptp->ops) {
-		kfree(ptp);
-		edev->ptp = NULL;
-		DP_ERR(edev, "PTP clock registeration failed\n");
-		return -EIO;
+		DP_INFO(edev, "PTP enable failed\n");
+		rc = -EIO;
+		goto err1;
 	}
 
 	edev->ptp = ptp;
 
+	rc = qede_ptp_init(edev, init_tc);
+	if (rc)
+		goto err1;
+
+	qede_ptp_cfg_filters(edev);
+
 	/* Fill the ptp_clock_info struct and register PTP clock */
 	ptp->clock_info.owner = THIS_MODULE;
 	snprintf(ptp->clock_info.name, 16, "%s", edev->ndev->name);
@@ -478,13 +460,21 @@ int qede_ptp_register_phc(struct qede_dev *edev)
 
 	ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
 	if (IS_ERR(ptp->clock)) {
-		ptp->clock = NULL;
-		kfree(ptp);
-		edev->ptp = NULL;
+		rc = -EINVAL;
 		DP_ERR(edev, "PTP clock registeration failed\n");
+		goto err2;
 	}
 
 	return 0;
+
+err2:
+	qede_ptp_disable(edev);
+	ptp->clock = NULL;
+err1:
+	kfree(ptp);
+	edev->ptp = NULL;
+
+	return rc;
 }
 
 void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h
index f328f9b..691a14c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.h
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h
@@ -40,10 +40,8 @@
 void qede_ptp_rx_ts(struct qede_dev *edev, struct sk_buff *skb);
 void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb);
 int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req);
-void qede_ptp_start(struct qede_dev *edev, bool init_tc);
-void qede_ptp_stop(struct qede_dev *edev);
-void qede_ptp_remove(struct qede_dev *edev);
-int qede_ptp_register_phc(struct qede_dev *edev);
+void qede_ptp_disable(struct qede_dev *edev);
+int qede_ptp_enable(struct qede_dev *edev, bool init_tc);
 int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts);
 
 static inline void qede_ptp_record_rx_ts(struct qede_dev *edev,
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 5/5] qed: Acquire/release ptt_ptp lock when enabling/disabling PTP.
From: Sudarsana Reddy Kalluru @ 2017-04-26 16:00 UTC (permalink / raw)
  To: davem; +Cc: richardcochran, netdev, Yuval.Mintz
In-Reply-To: <20170426160053.8356-1-sudarsana.kalluru@cavium.com>

Move the code for acquiring/releasing ptt_ptp lock to ptp specific
implementations i.e., ptp_enable()/disable() respectively.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h      |  7 +++++--
 drivers/net/ethernet/qlogic/qed/qed_main.c | 12 ------------
 drivers/net/ethernet/qlogic/qed/qed_ptp.c  | 15 ++++++++++++++-
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 3f8d07b..c07191c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -474,6 +474,11 @@ struct qed_hwfn {
 	struct qed_ptt			*p_main_ptt;
 	struct qed_ptt			*p_dpc_ptt;
 
+	/* PTP will be used only by the leading function.
+	 * Usage of all PTP-apis should be synchronized as result.
+	 */
+	struct qed_ptt *p_ptp_ptt;
+
 	struct qed_sb_sp_info		*p_sp_sb;
 	struct qed_sb_attn_info		*p_sb_attn;
 
@@ -532,8 +537,6 @@ struct qed_hwfn {
 
 	struct qed_ptt *p_arfs_ptt;
 
-	/* p_ptp_ptt is valid for leading HWFN only */
-	struct qed_ptt *p_ptp_ptt;
 	struct qed_simd_fp_handler	simd_proto_handler[64];
 
 #ifdef CONFIG_QED_SRIOV
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index a919260..8a5a064 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -956,13 +956,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 			}
 		}
 #endif
-		p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-		if (p_ptt) {
-			QED_LEADING_HWFN(cdev)->p_ptp_ptt = p_ptt;
-		} else {
-			DP_NOTICE(cdev, "Failed to acquire PTT for PTP\n");
-			goto err;
-		}
 	}
 
 	cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS;
@@ -1076,9 +1069,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 		qed_ptt_release(QED_LEADING_HWFN(cdev),
 				QED_LEADING_HWFN(cdev)->p_arfs_ptt);
 #endif
-	if (IS_PF(cdev) && QED_LEADING_HWFN(cdev)->p_ptp_ptt)
-		qed_ptt_release(QED_LEADING_HWFN(cdev),
-				QED_LEADING_HWFN(cdev)->p_ptp_ptt);
 
 	qed_iov_wq_stop(cdev, false);
 
@@ -1098,8 +1088,6 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 			qed_ptt_release(QED_LEADING_HWFN(cdev),
 					QED_LEADING_HWFN(cdev)->p_arfs_ptt);
 #endif
-		qed_ptt_release(QED_LEADING_HWFN(cdev),
-				QED_LEADING_HWFN(cdev)->p_ptp_ptt);
 		qed_free_stream_mem(cdev);
 		if (IS_QED_ETH_IF(cdev))
 			qed_sriov_disable(cdev, true);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index c0a3cbd..1871ebf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -324,13 +324,23 @@ static int qed_ptp_hw_adjfreq(struct qed_dev *cdev, s32 ppb)
 static int qed_ptp_hw_enable(struct qed_dev *cdev)
 {
 	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt;
+	struct qed_ptt *p_ptt;
 	int rc;
 
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_NOTICE(p_hwfn, "Failed to acquire PTT for PTP\n");
+		return -EBUSY;
+	}
+
+	p_hwfn->p_ptp_ptt = p_ptt;
+
 	rc = qed_ptp_res_lock(p_hwfn, p_ptt);
 	if (rc) {
 		DP_INFO(p_hwfn,
 			"Couldn't acquire the resource lock, skip ptp enable for this PF\n");
+		qed_ptt_release(p_hwfn, p_ptt);
+		p_hwfn->p_ptp_ptt = NULL;
 		return rc;
 	}
 
@@ -402,6 +412,9 @@ static int qed_ptp_hw_disable(struct qed_dev *cdev)
 	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, 0x0);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, 0x0);
 
+	qed_ptt_release(p_hwfn, p_ptt);
+	p_hwfn->p_ptp_ptt = NULL;
+
 	return 0;
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 2/5] qed: Add support for PTP resource locking.
From: Sudarsana Reddy Kalluru @ 2017-04-26 16:00 UTC (permalink / raw)
  To: davem; +Cc: richardcochran, netdev, Yuval.Mintz
In-Reply-To: <20170426160053.8356-1-sudarsana.kalluru@cavium.com>

The patch adds support for per-port resource lock in favour of PTP.
PTP module acquires/releases the MFW resource lock while enabling/
disabling the PTP on the interface. The PF instance which has the
ownership of this resource lock will get the exclusive access to the
PTP clock functionality on the port.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |  1 +
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 14 +++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  4 ++
 drivers/net/ethernet/qlogic/qed/qed_ptp.c | 87 +++++++++++++++++++++++++++++++
 4 files changed, 106 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 8a8f139..3f8d07b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -767,6 +767,7 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
 
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 int qed_device_num_engines(struct qed_dev *cdev);
+int qed_device_get_port_id(struct qed_dev *cdev);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 2a3ae00..aa1a4d5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -4064,3 +4064,17 @@ int qed_device_num_engines(struct qed_dev *cdev)
 {
 	return QED_IS_BB(cdev) ? 2 : 1;
 }
+
+static int qed_device_num_ports(struct qed_dev *cdev)
+{
+	/* in CMT always only one port */
+	if (cdev->num_hwfns > 1)
+		return 1;
+
+	return cdev->num_ports_in_engines * qed_device_num_engines(cdev);
+}
+
+int qed_device_get_port_id(struct qed_dev *cdev)
+{
+	return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index e8cf597..5ae35d6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -795,6 +795,10 @@ int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
 
 enum qed_resc_lock {
 	QED_RESC_LOCK_DBG_DUMP = QED_MCP_RESC_LOCK_MIN_VAL,
+	QED_RESC_LOCK_PTP_PORT0,
+	QED_RESC_LOCK_PTP_PORT1,
+	QED_RESC_LOCK_PTP_PORT2,
+	QED_RESC_LOCK_PTP_PORT3,
 	QED_RESC_LOCK_RESC_ALLOC = QED_MCP_RESC_LOCK_MAX_VAL,
 	QED_RESC_LOCK_RESC_INVALID
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 80c9c0b..26a9baf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -34,6 +34,7 @@
 #include "qed_dev_api.h"
 #include "qed_hw.h"
 #include "qed_l2.h"
+#include "qed_mcp.h"
 #include "qed_ptp.h"
 #include "qed_reg_addr.h"
 
@@ -45,6 +46,82 @@
 #define QED_DRIFT_CNTR_DIRECTION_SHIFT		31
 #define QED_TIMESTAMP_MASK			BIT(16)
 
+static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn)
+{
+	switch (qed_device_get_port_id(p_hwfn->cdev)) {
+	case 0:
+		return QED_RESC_LOCK_PTP_PORT0;
+	case 1:
+		return QED_RESC_LOCK_PTP_PORT1;
+	case 2:
+		return QED_RESC_LOCK_PTP_PORT2;
+	case 3:
+		return QED_RESC_LOCK_PTP_PORT3;
+	default:
+		return QED_RESC_LOCK_RESC_INVALID;
+	}
+}
+
+static int qed_ptp_res_lock(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_resc_lock_params params;
+	enum qed_resc_lock resource;
+	int rc;
+
+	resource = qed_ptcdev_to_resc(p_hwfn);
+	if (resource == QED_RESC_LOCK_RESC_INVALID)
+		return -EINVAL;
+
+	qed_mcp_resc_lock_default_init(&params, NULL, resource, true);
+
+	rc = qed_mcp_resc_lock(p_hwfn, p_ptt, &params);
+	if (rc && rc != -EINVAL) {
+		return rc;
+	} else if (rc == -EINVAL) {
+		/* MFW doesn't support resource locking, first PF on the port
+		 * has lock ownership.
+		 */
+		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engines)
+			return 0;
+
+		DP_INFO(p_hwfn, "PF doesn't have lock ownership\n");
+		return -EBUSY;
+	} else if (!rc && !params.b_granted) {
+		DP_INFO(p_hwfn, "Failed to acquire ptp resource lock\n");
+		return -EBUSY;
+	}
+
+	return rc;
+}
+
+static int qed_ptp_res_unlock(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_resc_unlock_params params;
+	enum qed_resc_lock resource;
+	int rc;
+
+	resource = qed_ptcdev_to_resc(p_hwfn);
+	if (resource == QED_RESC_LOCK_RESC_INVALID)
+		return -EINVAL;
+
+	qed_mcp_resc_lock_default_init(NULL, &params, resource, true);
+
+	rc = qed_mcp_resc_unlock(p_hwfn, p_ptt, &params);
+	if (rc == -EINVAL) {
+		/* MFW doesn't support locking, first PF has lock ownership */
+		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engines) {
+			rc = 0;
+		} else {
+			DP_INFO(p_hwfn, "PF doesn't have lock ownership\n");
+			return -EINVAL;
+		}
+	} else if (rc) {
+		DP_INFO(p_hwfn, "Failed to release the ptp resource lock\n");
+	}
+
+	return rc;
+}
+
 /* Read Rx timestamp */
 static int qed_ptp_hw_read_rx_ts(struct qed_dev *cdev, u64 *timestamp)
 {
@@ -249,6 +326,14 @@ static int qed_ptp_hw_enable(struct qed_dev *cdev)
 {
 	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt;
+	int rc;
+
+	rc = qed_ptp_res_lock(p_hwfn, p_ptt);
+	if (rc) {
+		DP_INFO(p_hwfn,
+			"Couldn't acquire the resource lock, skip ptp enable for this PF\n");
+		return rc;
+	}
 
 	/* Reset PTP event detection rules - will be configured in the IOCTL */
 	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0x7FF);
@@ -305,6 +390,8 @@ static int qed_ptp_hw_disable(struct qed_dev *cdev)
 	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt;
 
+	qed_ptp_res_unlock(p_hwfn, p_ptt);
+
 	/* Reset PTP event detection rules */
 	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0x7FF);
 	qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, 0x3FFF);
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net-next 01/18] net: dsa: mv88e6xxx: add max VID to info
From: Andrew Lunn @ 2017-04-26 16:04 UTC (permalink / raw)
  To: Vivien Didelot
  Cc: netdev, linux-kernel, kernel, David S. Miller, Florian Fainelli
In-Reply-To: <20170426155336.5937-2-vivien.didelot@savoirfairelinux.com>

On Wed, Apr 26, 2017 at 11:53:19AM -0400, Vivien Didelot wrote:
> Some chips don't have a VLAN Table Unit, most of them do have a 4K
> table, some others as the 88E6390 family has a 13th bit for the VID.
> 
> Add a new max_vid member to the info structure, used to check the
> presence of a VTU as well as the value used to iterate from in VTU
> GetNext operations.
> 
> This makes the MV88E6XXX_FLAG_VTU obsolete, thus remove it.
> 
> Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
> ---
>  drivers/net/dsa/mv88e6xxx/chip.c      | 38 ++++++++++++++++++++++++++---------
>  drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 13 ++----------
>  2 files changed, 31 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 44ba8cff5631..e45ddf3e90e8 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -1440,7 +1440,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
>  	u16 pvid;
>  	int err;
>  
> -	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
> +	if (!chip->info->max_vid)
>  		return -EOPNOTSUPP;
>  
>  	mutex_lock(&chip->reg_lock);
> @@ -1478,7 +1478,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
>  		err = cb(&vlan->obj);
>  		if (err)
>  			break;
> -	} while (next.vid < GLOBAL_VTU_VID_MASK);
> +	} while (next.vid < chip->info->max_vid);

This change in itself is worth it. We should not be using a mask for
this sort of comparison.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* [PATCH net] net: adjust skb->truesize in ___pskb_trim()
From: Eric Dumazet @ 2017-04-26 16:07 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Andrey Konovalov, Willem de Bruijn

From: Eric Dumazet <edumazet@google.com>

Andrey found a way to trigger the WARN_ON_ONCE(delta < len) in
skb_try_coalesce() using syzkaller and a filter attached to a TCP
socket.

As we did recently in commit 158f323b9868 ("net: adjust skb->truesize in
pskb_expand_head()") we can adjust skb->truesize from ___pskb_trim(),
via a call to skb_condense().

If all frags were freed, then skb->truesize can be recomputed.

This call can be done if skb is not yet owned, or destructor is
sock_edemux().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Willem de Bruijn <willemb@google.com>
---
 net/core/skbuff.c |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f86bf69cfb8d8bc17262cdba5d9f57a4726cd476..f1d04592ace02f32efa6e05df89c9a5e0023157f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1576,6 +1576,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
 		skb_set_tail_pointer(skb, len);
 	}
 
+	if (!skb->sk || skb->destructor == sock_edemux)
+		skb_condense(skb);
 	return 0;
 }
 EXPORT_SYMBOL(___pskb_trim);

^ permalink raw reply related

* [PATCH net-next] bpf: restore skb->sk before pskb_trim() call
From: Eric Dumazet @ 2017-04-26 16:09 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Andrey Konovalov, Willem de Bruijn

From: Eric Dumazet <edumazet@google.com>

While testing a fix [1] in ___pskb_trim(), addressing the WARN_ON_ONCE()
in skb_try_coalesce() reported by Andrey, I found that we had an skb
with skb->sk set but no skb->destructor.

This invalidated heuristic found in commit 158f323b9868 ("net: adjust
skb->truesize in pskb_expand_head()") and in cited patch.

Considering the BUG_ON(skb->sk) we have in skb_orphan(), we should
restrain the temporary setting to a minimal section.

[1] https://patchwork.ozlabs.org/patch/755570/ 
    net: adjust skb->truesize in ___pskb_trim()

Fixes: 8f917bba0042 ("bpf: pass sk to helper functions")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
---
 net/core/filter.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 9a37860a80fc78378705b681ec3b0468824cbcf4..a253a6197e6b37a7ae2fe451c646b01c861a3e22 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -98,8 +98,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 
 		skb->sk = sk;
 		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
-		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 		skb->sk = save_sk;
+		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 	}
 	rcu_read_unlock();
 

^ permalink raw reply related

* Re: [Patch net 3/3] team: use a larger struct for mac address
From: Cong Wang @ 2017-04-26 16:10 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Linux Kernel Network Developers, Andrey Konovalov
In-Reply-To: <20170426054033.GA1867@nanopsycho.orion>

On Tue, Apr 25, 2017 at 10:40 PM, Jiri Pirko <jiri@resnulli.us> wrote:
> Wed, Apr 26, 2017 at 07:03:23AM CEST, xiyou.wangcong@gmail.com wrote:
>>IPv6 tunnels use sizeof(struct in6_addr) as dev->addr_len,
>>but in many places especially bonding, we use struct sockaddr
>>to copy and set mac addr, this could lead to stack out-of-bounds
>>access.
>>
>>Fix it by using a larger address storage.
>>
>>Reported-by: Andrey Konovalov <andreyknvl@google.com>
>>Cc: Jiri Pirko <jiri@resnulli.us>
>>Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
>>---
>> drivers/net/team/team.c | 9 ++++++---
>> 1 file changed, 6 insertions(+), 3 deletions(-)
>>
>>diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
>>index 85c0124..88878f1 100644
>>--- a/drivers/net/team/team.c
>>+++ b/drivers/net/team/team.c
>>@@ -60,10 +60,13 @@ static struct team_port *team_port_get_rtnl(const struct net_device *dev)
>> static int __set_port_dev_addr(struct net_device *port_dev,
>>                              const unsigned char *dev_addr)
>> {
>>-      struct sockaddr addr;
>>+      struct {
>>+              unsigned short type;
>>+              unsigned char addr[MAX_ADDR_LEN];
>>+      } addr;
>
> Wouldn't it make sense to define this struct somewhere in the core
> headers?

I _did_ use a struct mac_addr until I found there are multiple places
in the tree already defining it... We are in a similar situation to the union
of struct in_addr and struct in6_addr, unfortunately.

We can always clean up these for net-next.

^ permalink raw reply

* Re: [Patch net 3/3] team: use a larger struct for mac address
From: Cong Wang @ 2017-04-26 16:11 UTC (permalink / raw)
  To: Jarod Wilson
  Cc: Jiri Pirko, Linux Kernel Network Developers, Andrey Konovalov
In-Reply-To: <7103fb6b-2483-37c0-48e5-19aa9fd4f386@redhat.com>

On Wed, Apr 26, 2017 at 8:55 AM, Jarod Wilson <jarod@redhat.com> wrote:
>
> We already have struct sockaddr_storage that could be used throughout this
> set as well. We just converted a few pieces of the bonding driver over to
> using it for better support of ipoib bonds, via commit
> faeeb317a5615076dff1ff44b51e862e6064dbd0. Might be better to just use that
> in both bonding and team, rather than having different per-driver structs,
> or Yet Another Address Storage implementation.

Technically, struct sockaddr_storage is not enough either, given the
max is MAX_ADDR_LEN. This is why I gave up on sockaddr_storage.

^ permalink raw reply

* Re: [PATCH net-next] bpf: restore skb->sk before pskb_trim() call
From: Daniel Borkmann @ 2017-04-26 16:14 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: netdev, Andrey Konovalov, Willem de Bruijn
In-Reply-To: <1493222963.6453.77.camel@edumazet-glaptop3.roam.corp.google.com>

On 04/26/2017 06:09 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> While testing a fix [1] in ___pskb_trim(), addressing the WARN_ON_ONCE()
> in skb_try_coalesce() reported by Andrey, I found that we had an skb
> with skb->sk set but no skb->destructor.
>
> This invalidated heuristic found in commit 158f323b9868 ("net: adjust
> skb->truesize in pskb_expand_head()") and in cited patch.
>
> Considering the BUG_ON(skb->sk) we have in skb_orphan(), we should
> restrain the temporary setting to a minimal section.
>
> [1] https://patchwork.ozlabs.org/patch/755570/
>      net: adjust skb->truesize in ___pskb_trim()
>
> Fixes: 8f917bba0042 ("bpf: pass sk to helper functions")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Andrey Konovalov <andreyknvl@google.com>

Good point, thanks!

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* [REGRESSION next-20170426] Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") causes oops in mvneta
From: Ralph Sennhauser @ 2017-04-26 16:15 UTC (permalink / raw)
  To: Sricharan R
  Cc: Rafael J. Wysocki, Joerg Roedel, Bjorn Helgaas, linux-acpi,
	linux-kernel, linux-pci, Thomas Petazzoni, netdev

Hi Sricharan R,

Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time
for platform/amba/pci bus devices") causes a kernel panic as in the log
below on an armada-385. Reverting the commit fixes the issue.

Regards
Ralph

---

[   18.288244] [c06d8480] *pgd=0061941e(bad)
[   18.292271] Internal error: Oops: 80d [#1] SMP ARM
[   18.297080] Modules linked in:
[   18.471175] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           O    4.11.0-rc8-next-20170426 #3
[   18.479909] Hardware name: Marvell Armada 380/385 (Device Tree)
[   18.485850] task: c0a07000 task.stack: c0a00000
[   18.490401] PC is at __memzero+0x40/0x80
[   18.494336] LR is at 0x0
[   18.496878] pc : [<c0317920>]    lr : [<00000000>]    psr: 00000113
[   18.496878] sp : c0a01d0c  ip : 00000000  fp : c0a01d34
[   18.508402] r10: df43f800  r9 : df43f800  r8 : 00000001
[   18.513645] r7 : c06d7e40  r6 : 000007c0  r5 : c06d8480  r4 : de14aa80
[   18.520196] r3 : 00000000  r2 : 00000000  r1 : ffffffe4  r0 : c06d8480
[   18.526750] Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   18.533912] Control: 10c5387d  Table: 16f0004a  DAC: 00000051
[   18.539679] Process swapper/0 (pid: 0, stack limit = 0xc0a00210)
[   18.545708] Stack: (0xc0a01d0c to 0xc0a02000)
[   18.550082] 1d00:                            c04caef0 c06d7e40 00000700 0cc04000 00000001
[   18.558292] 1d20: df691810 df43f800 c0a01d4c c0a01d38 c04caf20 c04cae7c e1b1d000 df5bd980
[   18.566503] 1d40: c0a01dc4 c0a01d50 c043d3d8 c04caf14 df41e494 00000000 1e652500 c0601460
[   18.574714] 1d60: dfbd8880 c0a04680 c06d7e40 00000000 dfbd8888 00000000 00000000 00000188
[   18.582924] 1d80: 1e652500 dfbd8880 00000040 00000100 df43fc80 00000001 00000000 dfbd8888
[   18.591134] 1da0: c043cfd4 00000040 0000012c ffff91f5 c0a02d00 c0a01de8 c0a01e24 c0a01dc8
[   18.599345] 1dc0: c04db2d4 c043cfe0 c015d81c c06db4d4 c0a04990 c0a04990 c0a302b2 1f267000
[   18.607555] 1de0: c096ab40 dfbd1b40 c0a01de8 c0a01de8 c0a01df0 c0a01df0 c0a01e1c 00000000
[   18.615766] 1e00: c0a0208c c0a00000 00000100 00000003 c0a02080 40000003 c0a01e84 c0a01e28
[   18.623977] 1e20: c0122e9c c04db0cc c0a01e54 00000101 c0a01e4c 00200102 c0a02d00 ffff91f5
[   18.632187] 1e40: 0000000a c06025b4 c0a31740 c09632a8 c0a02080 c0a01e28 c0169788 c0968420
[   18.640397] 1e60: 00000000 00000000 00000001 df408000 e0803100 c0a01f58 c0a01e94 c0a01e88
[   18.648607] 1e80: c01232d0 c0122d84 c0a01ebc c0a01e98 c015d6e4 c012322c c0a169c0 c0a03fac
[   18.656818] 1ea0: e080210c c0a01ee8 e0802100 e0803100 c0a01ee4 c0a01ec0 c01014a4 c015d688
[   18.665029] 1ec0: c01085b0 60000013 ffffffff c0a01f1c 00000000 c0a00000 c0a01f44 c0a01ee8
[   18.673240] 1ee0: c010c86c c0101460 00000001 00000000 00000000 c0118e40 c0a00000 c0a03cf8
[   18.681451] 1f00: c0a03cac c09696f8 00000000 00000000 c0a01f58 c0a01f44 c0a01f48 c0a01f38
[   18.689661] 1f20: c01085ac c01085b0 60000013 ffffffff 00000051 00000000 c0a01f54 c0a01f48
[   18.697871] 1f40: c05e3634 c010857c c0a01f8c c0a01f58 c0153458 c05e3618 c0a03c80 c0a0f30a
[   18.706082] 1f60: c0a30c00 000000bd c0a30c00 c0a03c80 ffffffff c0a30c00 c0833a28 dfffcb40
[   18.714292] 1f80: c0a01f9c c0a01f90 c0153718 c01532c0 c0a01fac c0a01fa0 c05dd758 c0153704
[   18.722503] 1fa0: c0a01ff4 c0a01fb0 c0800d68 c05dd6e8 ffffffff ffffffff 00000000 c08006f8
[   18.730713] 1fc0: 00000000 c0833a28 00000000 c0a30e94 c0a03c9c c0833a24 c0a081e8 0000406a
[   18.738923] 1fe0: 414fc091 00000000 00000000 c0a01ff8 0000807c c08009b4 00000000 00000000
[   18.747132] Backtrace:
[   18.749591] [<c04cae70>] (__build_skb) from [<c04caf20>] (build_skb+0x18/0x6c)
[   18.756843]  r9:df43f800 r8:df691810 r7:00000001 r6:0cc04000 r5:00000700 r4:c06d7e40
[   18.764618] [<c04caf08>] (build_skb) from [<c043d3d8>] (mvneta_poll+0x404/0xc18)
[   18.772042]  r5:df5bd980 r4:e1b1d000
[   18.775632] [<c043cfd4>] (mvneta_poll) from [<c04db2d4>] (net_rx_action+0x214/0x308)
[   18.783406]  r10:c0a01de8 r9:c0a02d00 r8:ffff91f5 r7:0000012c r6:00000040 r5:c043cfd4
[   18.791266]  r4:dfbd8888
[   18.793810] [<c04db0c0>] (net_rx_action) from [<c0122e9c>] (__do_softirq+0x124/0x248)
[   18.801672]  r10:40000003 r9:c0a02080 r8:00000003 r7:00000100 r6:c0a00000 r5:c0a0208c
[   18.809531]  r4:00000000
[   18.812074] [<c0122d78>] (__do_softirq) from [<c01232d0>] (irq_exit+0xb0/0xe4)
[   18.819325]  r10:c0a01f58 r9:e0803100 r8:df408000 r7:00000001 r6:00000000 r5:00000000
[   18.827184]  r4:c0968420
[   18.829729] [<c0123220>] (irq_exit) from [<c015d6e4>] (__handle_domain_irq+0x68/0xbc)
[   18.837591] [<c015d67c>] (__handle_domain_irq) from [<c01014a4>] (gic_handle_irq+0x50/0x94)
[   18.845976]  r9:e0803100 r8:e0802100 r7:c0a01ee8 r6:e080210c r5:c0a03fac r4:c0a169c0
[   18.853749] [<c0101454>] (gic_handle_irq) from [<c010c86c>] (__irq_svc+0x6c/0x90)
[   18.861261] Exception stack(0xc0a01ee8 to 0xc0a01f30)
[   18.866332] 1ee0:                   00000001 00000000 00000000 c0118e40 c0a00000 c0a03cf8
[   18.874543] 1f00: c0a03cac c09696f8 00000000 00000000 c0a01f58 c0a01f44 c0a01f48 c0a01f38
[   18.882753] 1f20: c01085ac c01085b0 60000013 ffffffff
[   18.887823]  r9:c0a00000 r8:00000000 r7:c0a01f1c r6:ffffffff r5:60000013 r4:c01085b0
[   18.895601] [<c0108570>] (arch_cpu_idle) from [<c05e3634>] (default_idle_call+0x28/0x34)
[   18.903727] [<c05e360c>] (default_idle_call) from [<c0153458>] (do_idle+0x1a4/0x1d0)
[   18.911503] [<c01532b4>] (do_idle) from [<c0153718>] (cpu_startup_entry+0x20/0x24)
[   18.919103]  r10:dfffcb40 r9:c0833a28 r8:c0a30c00 r7:ffffffff r6:c0a03c80 r5:c0a30c00
[   18.926962]  r4:000000bd
[   18.929508] [<c01536f8>] (cpu_startup_entry) from [<c05dd758>] (rest_init+0x7c/0x80)
[   18.937284] [<c05dd6dc>] (rest_init) from [<c0800d68>] (start_kernel+0x3c0/0x3cc)
[   18.944796] [<c08009a8>] (start_kernel) from [<0000807c>] (0x807c)
[   18.951001] Code: a8a0500c cafffff9 08bd8000 e3110020 (18a0500c)
[   18.957119] ---[ end trace 4e5c1e66e49610b0 ]---
[   18.961753] Kernel panic - not syncing: Fatal exception in interrupt
[   18.968133] CPU1: stopping
[   18.970852] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G      D    O    4.11.0-rc8-next-20170426 #3
[   18.979585] Hardware name: Marvell Armada 380/385 (Device Tree)
[   18.985527] Backtrace:
[   18.987986] [<c010ba4c>] (dump_backtrace) from [<c010bd20>] (show_stack+0x18/0x1c)
[   18.995586]  r7:df467f28 r6:60000193 r5:c0a165b0 r4:00000000
[   19.001268] [<c010bd08>] (show_stack) from [<c031883c>] (dump_stack+0x94/0xa8)
[   19.008520] [<c03187a8>] (dump_stack) from [<c010ecec>] (handle_IPI+0x178/0x198)
[   19.015945]  r7:df467f28 r6:00000000 r5:00000001 r4:c0a30ef0
[   19.021627] [<c010eb74>] (handle_IPI) from [<c01014e4>] (gic_handle_irq+0x90/0x94)
[   19.029227]  r7:df467f28 r6:e080210c r5:c0a03fac r4:c0a169c0
[   19.034908] [<c0101454>] (gic_handle_irq) from [<c010c86c>] (__irq_svc+0x6c/0x90)
[   19.042419] Exception stack(0xdf467f28 to 0xdf467f70)
[   19.047490] 7f20:                   00000001 00000000 00000000 c0118e40 df466000 c0a03cf8
[   19.055701] 7f40: c0a03cac c09696f8 00000000 00000000 df467f98 df467f84 df467f88 df467f78
[   19.063911] 7f60: c01085ac c01085b0 60000013 ffffffff
[   19.068982]  r9:df466000 r8:00000000 r7:df467f5c r6:ffffffff r5:60000013 r4:c01085b0
[   19.076758] [<c0108570>] (arch_cpu_idle) from [<c05e3634>] (default_idle_call+0x28/0x34)
[   19.084882] [<c05e360c>] (default_idle_call) from [<c0153458>] (do_idle+0x1a4/0x1d0)
[   19.092657] [<c01532b4>] (do_idle) from [<c0153718>] (cpu_startup_entry+0x20/0x24)
[   19.100258]  r10:00000000 r9:414fc091 r8:0000406a r7:c0a30f00 r6:10c0387d r5:00000001
[   19.108118]  r4:00000087
[   19.110661] [<c01536f8>] (cpu_startup_entry) from [<c010e918>] (secondary_start_kernel+0x150/0x15c)
[   19.119743] [<c010e7c8>] (secondary_start_kernel) from [<0010162c>] (0x10162c)
[   19.126993]  r5:00000051 r4:1f45c06a
[   19.130583] Rebooting in 3 seconds..

---

git bisect start
# bad: [e0a8aa40bd2c7d973b6520293f3fd86dcbca847b] Add linux-next specific files for 20170426
git bisect bad e0a8aa40bd2c7d973b6520293f3fd86dcbca847b
# good: [c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201] Linux 4.11-rc1
git bisect good c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201
# good: [221c3c382a529418d3a2acc58f53101103f6ff13] Merge remote-tracking branch 'l2-mtd/master'
git bisect good 221c3c382a529418d3a2acc58f53101103f6ff13
# bad: [ffcefbbe2f8c0af5f22af921afd2756baceddd74] Merge remote-tracking branch 'spi/for-next'
git bisect bad ffcefbbe2f8c0af5f22af921afd2756baceddd74
# good: [c2e7f82d336a451ebb904b8bf9a5a558cf16c39b] drm: mali-dp: Check the mclk rate and allow up/down scaling
git bisect good c2e7f82d336a451ebb904b8bf9a5a558cf16c39b
# good: [03b600f368cfa6f94e9622dda82e60f55b5e6224] Merge remote-tracking branch 'block/for-next'
git bisect good 03b600f368cfa6f94e9622dda82e60f55b5e6224
# good: [8b1e05cbcdfc59496eff5870cb6b6ab964ecc733] Merge remote-tracking branch 'battery/for-next'
git bisect good 8b1e05cbcdfc59496eff5870cb6b6ab964ecc733
# good: [e765d496d3adb3d69bd8c53df6fd3f3b77e5b1d2] Merge remote-tracking branch 'watchdog/master'
git bisect good e765d496d3adb3d69bd8c53df6fd3f3b77e5b1d2
# bad: [858eed97e369df7af0993463f355aa9755227136] Merge remote-tracking branch 'audit/next'
git bisect bad 858eed97e369df7af0993463f355aa9755227136
# bad: [efc2195bcc35eebf06805806eb525893f3b9ab5c] Merge branches 'arm/exynos', 'arm/omap', 'arm/rockchip', 'arm/mediatek', 'arm/smmu', 'arm/core', 'x86/vt-d', 'x86/amd' and 'core' into next
git bisect bad efc2195bcc35eebf06805806eb525893f3b9ab5c
# bad: [316ca8804ea84a782d5ba2163711ebb22116ff5a] ACPI/IORT: Remove linker section for IORT entries probing
git bisect bad 316ca8804ea84a782d5ba2163711ebb22116ff5a
# good: [d7b0558230e444f29488fcee0b0b561015d16f8a] iommu/of: Prepare for deferred IOMMU configuration
git bisect good d7b0558230e444f29488fcee0b0b561015d16f8a
# bad: [09515ef5ddad71c7820e5e428da418b709feeb26] of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices
git bisect bad 09515ef5ddad71c7820e5e428da418b709feeb26
# good: [1d9029d440e40b276c0691caed1de10c42d96bef] ACPI/IORT: Add function to check SMMUs drivers presence
git bisect good 1d9029d440e40b276c0691caed1de10c42d96bef
# good: [efc8551a276faab19d85079da02c5fb602b0dcbe] of: device: Fix overflow of coherent_dma_mask
git bisect good efc8551a276faab19d85079da02c5fb602b0dcbe
# first bad commit: [09515ef5ddad71c7820e5e428da418b709feeb26] of/acpi:
Configure dma operations at probe time for platform/amba/pci bus devices

^ permalink raw reply

* Re: [PATCH net-next 1/4] ixgbe: sparc: rename the ARCH_WANT_RELAX_ORDER to IXGBE_ALLOW_RELAXED_ORDER
From: Alexander Duyck @ 2017-04-26 16:18 UTC (permalink / raw)
  To: Ding Tianhong
  Cc: Amir Ancel, David Laight, Gabriele Paoloni, davem@davemloft.net,
	Catalin Marinas, Will Deacon, Mark Rutland, Robin Murphy,
	jeffrey.t.kirsher@intel.com, linux-arm-kernel@lists.infradead.org,
	netdev@vger.kernel.org, LinuxArm, linux-pci@vger.kernel.org
In-Reply-To: <a2239141-a185-1bb8-0abd-7a05b9fde015@huawei.com>

On Wed, Apr 26, 2017 at 2:26 AM, Ding Tianhong <dingtianhong@huawei.com> wrote:
> Hi Amir:
>
> It is really glad to hear that the mlx5 will support RO mode this year, if so, do you agree that enable it dynamic by ethtool -s xxx,
> we have try it several month ago but there was only one drivers would use it at that time so the maintainer against it, it mlx5 would support RO,
> we could try to restart this solution, what do you think about it. :)
>
> Thanks
> Ding

Hi Ding,

Enabing relaxed ordering really doesn't have any place in ethtool. It
is a PCIe attribute that you are essentially wanting to enable.

It might be worth while to take a look at updating the PCIe code path
to handle this. Really what we should probably do is guarantee that
the architectures that need relaxed ordering are setting it in the
PCIe Device Control register and that the ones that don't are clearing
the bit. It's possible that this is already occurring, but I don't
know the state of handling those bits is in the kernel. Once we can
guarantee that we could use that to have the drivers determine their
behavior in regards to relaxed ordering. For example in the case of
igb/ixgbe we could probably change the behavior so that it will bey
default try to use relaxed ordering but if it is not enabled in PCIe
Device Control register the hardware should not request to use it. It
would simplify things in the drivers and allow for each architecture
to control things as needed in their PCIe code.

- Alex

^ permalink raw reply

* Re: [PATCH net-next] tcp: memset ca_priv data to 0 properly
From: Wei Wang @ 2017-04-26 16:25 UTC (permalink / raw)
  To: Linux Kernel Network Developers, David Miller
  Cc: Eric Dumazet, Yuchung Cheng, Neal Cardwell, Wei Wang
In-Reply-To: <20170426003802.40091-1-tracywwnj@gmail.com>

This fix should target for net tree instead of net-next.
Sorry for the wrong title.

On Tue, Apr 25, 2017 at 5:38 PM, Wei Wang <weiwan@google.com> wrote:
> From: Wei Wang <weiwan@google.com>
>
> Always zero out ca_priv data in tcp_assign_congestion_control() so that
> ca_priv data is cleared out during socket creation.
> Also always zero out ca_priv data in tcp_reinit_congestion_control() so
> that when cc algorithm is changed, ca_priv data is cleared out as well.
> We should still zero out ca_priv data even in TCP_CLOSE state because
> user could call connect() on AF_UNSPEC to disconnect the socket and
> leave it in TCP_CLOSE state and later call setsockopt() to switch cc
> algorithm on this socket.
>
> Fixes: 2b0a8c9ee ("tcp: add CDG congestion control")
> Reported-by: Andrey Konovalov  <andreyknvl@google.com>
> Signed-off-by: Wei Wang <weiwan@google.com>
> Acked-by: Eric Dumazet <edumazet@google.com>
> Acked-by: Yuchung Cheng <ycheng@google.com>
> Acked-by: Neal Cardwell <ncardwell@google.com>
> ---
>  net/ipv4/tcp_cong.c | 11 +++--------
>  1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
> index 79c4817abc94..6e3c512054a6 100644
> --- a/net/ipv4/tcp_cong.c
> +++ b/net/ipv4/tcp_cong.c
> @@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struct sock *sk)
>         }
>  out:
>         rcu_read_unlock();
> +       memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
>
> -       /* Clear out private data before diag gets it and
> -        * the ca has not been initialized.
> -        */
> -       if (ca->get_info)
> -               memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
>         if (ca->flags & TCP_CONG_NEEDS_ECN)
>                 INET_ECN_xmit(sk);
>         else
> @@ -200,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
>         tcp_cleanup_congestion_control(sk);
>         icsk->icsk_ca_ops = ca;
>         icsk->icsk_ca_setsockopt = 1;
> +       memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
>
> -       if (sk->sk_state != TCP_CLOSE) {
> -               memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
> +       if (sk->sk_state != TCP_CLOSE)
>                 tcp_init_congestion_control(sk);
> -       }
>  }
>
>  /* Manage refcounts on socket close. */
> --
> 2.13.0.rc0.306.g87b477812d-goog
>

^ permalink raw reply

* Re: xdp_redirect ifindex vs port. Was: best API for returning/setting egress port?
From: John Fastabend @ 2017-04-26 16:35 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Alexei Starovoitov, Daniel Borkmann, Andy Gospodarek,
	Daniel Borkmann, Alexei Starovoitov, netdev@vger.kernel.org,
	xdp-newbies@vger.kernel.org
In-Reply-To: <20170426111158.578b925e@redhat.com>

[...]

>> Jesper, I was working up the code for the redirect piece for ixgbe and
>> virtio, please use this as a base for your virtual port number table. I'll
>> push an update onto github tomorrow. I think the table should drop in fairly
>> nicely.
> 
> Cool, I will do that. Then, I'll also have a redirect method to shape
> this around, and I would have to benchmark/test your ixgbe redirect.
> 
> (John please let me know, what github tree we are talking about, and
> what branch)
> 
> 
>> One piece that isn't clear to me is how do you plan to instantiate and
>> program this table. Is it a new static bpf map that is created any
>> time we see the redirect command? I think this would be preferred.
> 
> (This is difficult to explain without us misunderstanding each-other)
> 

Yep and I'm not sure I follow :)

> As Alexei also mentioned before, ifindex vs port makes no real
> difference seen from the bpf program side.  It is userspace's
> responsibility to add ifindex/port's to the bpf-maps, according to how
> the bpf program "policy" want to "connect" these ports.  The
> port-table system add one extra step, of also adding this port to the
> port-table (which lives inside the kernel). 
> 

I'm not sure I understand the "lives inside the kernel" bit. I assumed
the 'map' should be a bpf map and behave like any other bpf map.

I wanted a new map to be defined, something like this from the bpf programmer
side.

struct bpf_map_def SEC("maps") port_table =
	.type = BPF_MAP_TYPE_PORT_CONNECTION,
	.key_size = sizeof(u32),
	.value_size = BPF_PORT_CONNECTION_SIZE,
	.max_entries = 256,
};

> When loading the XDP program, we also need to pass along a port table
> "id" this XDP program is associated with (and if it doesn't exists you
> create it).  And your userspace "control-plane" application also need
> to know this port table "id", when adding a new port.

So the user space application that is loading the program also needs
to handle this map. This seems correct to me. But I don't see the
value in making some new port table when we already have well understood
framework for maps.

> 
> The concept of having multiple port tables is key.  As this implies we
> can have several simultaneous "data-planes" that is *isolated* from
> each-other.  Think about how network-namespaces/containers want
> isolation. A subtle thing I'm afraid to mention, is that oppose to the
> ifindex model, a port table with mapping to a net_device pointer, would
> allow (faster) delivery into the container's inner net_device, which
> sort of violates the isolation, but I would argue it is not a problem
> as this net_device pointer could only be added from a process within the
> namespace.  I like this feature, but it could easily be disallowed via
> port insertion-time validation.
> 

I think the above optimization should be allowed. And agree multiple port
tables (maps?) is needed. Again all this points to using standard maps
logic in my mind. For permissions and different domains, which I think
you were starting to touch on, it looks like we could extend the pinning API.
At the moment it does an inode_permission(inode, MAY_WRITE) check but I
presume this could be extended. None of this would be needed in v1 and
could be added subsequently. read-only maps seems doable.

>    
>>>> I'm not worried about the DROP case, I agree that is fine (as you
>>>> also say).  The problem is unintentionally sending a packet to a
>>>> wrong ifindex.  This is clearly an eBPF program error, BUT with
>>>> XDP this becomes a very hard to debug program error.  With
>>>> TC-redirect/cls_bpf we can tcpdump the packets, with XDP there is
>>>> no visibility into this happening (the NSA is going to love this
>>>> "feature").  Maybe we could add yet-another tracepoint to allow
>>>> debugging this.  My proposal that we simply remove the possibility
>>>> for such program errors, by as you say move the validation from
>>>> run-time into static insertion-time, via a port table.  
>>>
>>> I think lack of tcpdump-like debugging in xdp is a separate issue.
>>> As I was saying in the other thread we have trivial 'xdpdump'
>>> kern+user app that emits pcap file, but it's too specific to how we
>>> use tail_calls+prog_array in our xdp setup. I'm working on the
>>> program chaining that will be generic and allow us transparently
>>> add multiple xdp or tc progs to the same attachment point and will
>>> allow us to do 'xdpdump' at any point of this pipeline, so
>>> debugging of what happened to the packet will be easier and done in
>>> the same way for both tc and xdp.
>>> btw in our experience working with both tc and xdp the tc+bpf was
>>> actually harder to use and more bug prone.
>>>   
>>
>> Nice, the tcpdump-like debugging looks interesting.
> 
> Yes, this xdpdump sound like a very useful tool.
> 

^ permalink raw reply

* Re: [Patch net 3/3] team: use a larger struct for mac address
From: Jarod Wilson @ 2017-04-26 16:46 UTC (permalink / raw)
  To: Cong Wang; +Cc: Jiri Pirko, Linux Kernel Network Developers, Andrey Konovalov
In-Reply-To: <CAM_iQpVXBuXNfVM_h894HEF_9hv2rWgDJcMbvEJG25xfm7DGUA@mail.gmail.com>

On 2017-04-26 12:11 PM, Cong Wang wrote:
> On Wed, Apr 26, 2017 at 8:55 AM, Jarod Wilson <jarod@redhat.com> wrote:
>>
>> We already have struct sockaddr_storage that could be used throughout this
>> set as well. We just converted a few pieces of the bonding driver over to
>> using it for better support of ipoib bonds, via commit
>> faeeb317a5615076dff1ff44b51e862e6064dbd0. Might be better to just use that
>> in both bonding and team, rather than having different per-driver structs,
>> or Yet Another Address Storage implementation.
> 
> Technically, struct sockaddr_storage is not enough either, given the
> max is MAX_ADDR_LEN. This is why I gave up on sockaddr_storage.

Wait, what? Am I missing something? MAX_ADDR_LEN is 32, and 
sockaddr_storage is a #define for __kernel_sockaddr_storage, which has 
it's __data member defined as being of size 128 - sizeof(unsigned short).

-- 
Jarod Wilson
jarod@redhat.com

^ permalink raw reply

* Re: bluetooth 6lowpan interfaces are not virtual anymore
From: Jukka Rissanen @ 2017-04-26 16:52 UTC (permalink / raw)
  To: Michael Richardson, Alexander Aring
  Cc: Network Development, Luiz Augusto von Dentz,
	linux-wpan-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Linux Bluetooth
In-Reply-To: <383.1493218546-VaGMqW6d0iXFptTlUKWvmrDks+cytr/Z@public.gmane.org>

Hi Michael,

On Wed, 2017-04-26 at 10:55 -0400, Michael Richardson wrote:
> Alexander Aring <aar-bIcnvbaLZ9MEGnE8C9+IrQ@public.gmane.org> wrote:
>     >> In a classic SVR4 STREAMS works, it would have been just
> another
>     >> module.  (No, I'm not a fan of *STREAMS* or of SVR4 in
> general,
>     >> although I liked some of the ideas).
>     >>
> 
>     > ok, I see you complain about "having a virtual on top of wpan
>     > interface", or?
> 
>     > I wanted to talk at first about the queue handling which is
> introduced
>     > when 6LoWPAN is not a virtual interface anymore. Or do you want
> to have
>     > a queue in front of 6lowpan adaptation (see other mail reply
> with ASCII
>     > graphics).
> 
> I would like to have a single queue, as close to the hardware as
> possible,
> such that BQL can do it's thing easily.  Should we rethink outgoing
> fragment
> handling for 6lowpan?  Clearly the BT people had a need.
> I don't think they've had a chance to respond to your complaints.

Note that the BT fragmentation (or actually it is called segmentation
in BT) is totally different what 802.15.4 is doing. I do not think
there is any need to add fragmentation handling into 6lo.

Actually the 6lowpan kernel module could probably be simplified to be a
library. We did this in Zephyr where we have compress() and
uncompress() functions that do all the magic.  

> 
>     > We can change that you can run multiple interfaces on one
>     > PHY. Currently we just allow one, because address filtering.
> Disable
>     > address filtering
>     > we will loose ACK handling on hardware.
> 
> Yes, that's a limitation of some hardware, and if you enable multiple
> PANIDs,
> that might be the consequence....
> 
>     > I can try to implement all stuff in software "for fun, maybe
> see what
>     > we can do to handle ACK in software, etc" Then you can run
> multiple
> 
> I'm not asking you to do it, I'm asking, now that we've gotten to a
> certain
> point, we have a better idea what the various requirements are, and
> can we
> re-evaluate things and maybe tweak some things.
> 
> --
> ]               Never tell me the odds!                 | ipv6 mesh
> networks [
> ]   Michael Richardson, Sandelman Software Works        | network
> architect  [
> ]     mcr-SWp7JaYWvAQV+D8aMU/kSg@public.gmane.org  http://www.sandelman.ca/        |   ruby on
> rails    [
> 


Cheers,
Jukka

^ permalink raw reply

* Re: [PATCH v1 net-next 0/6] Extend socket timestamping API
From: Richard Cochran @ 2017-04-26 16:54 UTC (permalink / raw)
  To: Miroslav Lichvar
  Cc: netdev, Willem de Bruijn, Soheil Hassas Yeganeh, Keller, Jacob E,
	Denny Page, Jiri Benc
In-Reply-To: <20170426145035.25846-1-mlichvar@redhat.com>

On Wed, Apr 26, 2017 at 04:50:29PM +0200, Miroslav Lichvar wrote:
> This patchset adds new options to the timestamping API that will be
> useful for NTP implementations and possibly other applications.

Are there any userland ntp patches floating around to exercise the new
HW time stamping option?

Thanks,
Richard

^ permalink raw reply

* [PATCH net-next] ip6_tunnel: Fix missing tunnel encapsulation limit option
From: Craig Gallek @ 2017-04-26 17:07 UTC (permalink / raw)
  To: Hideaki YOSHIFUJI, Alexey Kuznetsov, David S . Miller; +Cc: netdev

From: Craig Gallek <kraig@google.com>

The IPv6 tunneling code tries to insert IPV6_TLV_TNL_ENCAP_LIMIT and
IPV6_TLV_PADN options when an encapsulation limit is defined (the
default is a limit of 4).  An MTU adjustment is done to account for
these options as well.  However, the options are never present in the
generated packets.

ipv6_push_nfrag_opts requires that IPV6_RTHDR be present in order to
include any IPV6_DSTOPTS options.  The v6 tunnel code does not
use routing options, so the encap limit options are not included.

A brief reading of RFC 3542 section 9.2 (specifically the 4th paragraph)
makes me believe that this requirement in the kernel is incorrect.

Fixes: 333fad5364d6: ("[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542)")
Signed-off-by: Craig Gallek <kraig@google.com>
---
 net/ipv6/exthdrs.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 25192a3b0cd7..224a89e68a42 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -932,15 +932,12 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
 			  u8 *proto,
 			  struct in6_addr **daddr, struct in6_addr *saddr)
 {
-	if (opt->srcrt) {
+	if (opt->srcrt)
 		ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
-		/*
-		 * IPV6_RTHDRDSTOPTS is ignored
-		 * unless IPV6_RTHDR is set (RFC3542).
-		 */
-		if (opt->dst0opt)
-			ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
-	}
+
+	if (opt->dst0opt)
+		ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+
 	if (opt->hopopt)
 		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
 }
-- 
2.13.0.rc0.306.g87b477812d-goog

^ permalink raw reply related

* [PATCH net] tcp: fix access to sk->sk_state in tcp_poll()
From: Davide Caratti @ 2017-04-26 17:07 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, Wei Wang
  Cc: netdev

avoid direct access to sk->sk_state when tcp_poll() is called on a socket
using active TCP fastopen with deferred connect. Use local variable
'state', which stores the result of sk_state_load(), like it was done in
commit 00fd38d938db ("tcp: ensure proper barriers in lockless contexts").

Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 40ba424..2dc7fcf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -533,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
-	} else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+	} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
 		/* Active TCP fastopen socket with defer_connect
 		 * Return POLLOUT so application can call write()
 		 * in order for kernel to generate SYN+data
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net] net: adjust skb->truesize in ___pskb_trim()
From: Andrey Konovalov @ 2017-04-26 17:08 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1493222866.6453.75.camel@edumazet-glaptop3.roam.corp.google.com>

On Wed, Apr 26, 2017 at 6:07 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Andrey found a way to trigger the WARN_ON_ONCE(delta < len) in
> skb_try_coalesce() using syzkaller and a filter attached to a TCP
> socket.
>
> As we did recently in commit 158f323b9868 ("net: adjust skb->truesize in
> pskb_expand_head()") we can adjust skb->truesize from ___pskb_trim(),
> via a call to skb_condense().
>
> If all frags were freed, then skb->truesize can be recomputed.
>
> This call can be done if skb is not yet owned, or destructor is
> sock_edemux().

Hi Eric,

I still see the warning even with your patch.

Thanks!

>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
> Cc: Willem de Bruijn <willemb@google.com>
> ---
>  net/core/skbuff.c |    2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f86bf69cfb8d8bc17262cdba5d9f57a4726cd476..f1d04592ace02f32efa6e05df89c9a5e0023157f 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1576,6 +1576,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
>                 skb_set_tail_pointer(skb, len);
>         }
>
> +       if (!skb->sk || skb->destructor == sock_edemux)
> +               skb_condense(skb);
>         return 0;
>  }
>  EXPORT_SYMBOL(___pskb_trim);
>
>

^ permalink raw reply

* Re: blocking ops when !TASK_RUNNING in vsock_stream_sendmsg() (again)
From: Cong Wang @ 2017-04-26 17:18 UTC (permalink / raw)
  To: Michal Kubecek
  Cc: Claudio Imbrenda, Linux Kernel Network Developers, Andy King,
	George Zhang
In-Reply-To: <20170421081458.GI13789@unicorn.suse.cz>

[-- Attachment #1: Type: text/plain, Size: 532 bytes --]

Hi,

On Fri, Apr 21, 2017 at 1:14 AM, Michal Kubecek <mkubecek@suse.cz> wrote:
> I tried to think about a solution but there doesn't seem to be an easy
> way to fix this in vmw_stream_sendmsg() as moving prepare_to_wait()
> inside the loop would result in missed wake-ups (that was the problem
> with the original fix); IMHO the right way to resolve the issue would be
> rewriting the vmci queue pair code to allow performing the has_space()
> check without taking a mutex.


Can you try the attached patch (compile only)?

Thanks.

[-- Attachment #2: vsock-wait.diff --]
[-- Type: text/plain, Size: 2095 bytes --]

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f675..dfc8c51e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeout;
 	int err;
 	struct vsock_transport_send_notify_data send_data;
-
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	sk = sock->sk;
 	vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err < 0)
 		goto out;
 
-
 	while (total_written < len) {
 		ssize_t written;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk_sleep(sk), &wait);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
 			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			release_sock(sk);
-			timeout = schedule_timeout(timeout);
+			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk_sleep(sk), &wait);
+		remove_wait_queue(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after

^ permalink raw reply related

* Re: [PATCH net] tcp: fix access to sk->sk_state in tcp_poll()
From: Wei Wang @ 2017-04-26 17:24 UTC (permalink / raw)
  To: Davide Caratti
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy,
	Linux Kernel Network Developers
In-Reply-To: <0a1710138c3e55c388a52dba817b2d1b1996c899.1493226034.git.dcaratti@redhat.com>

On Wed, Apr 26, 2017 at 10:07 AM, Davide Caratti <dcaratti@redhat.com> wrote:
> avoid direct access to sk->sk_state when tcp_poll() is called on a socket
> using active TCP fastopen with deferred connect. Use local variable
> 'state', which stores the result of sk_state_load(), like it was done in
> commit 00fd38d938db ("tcp: ensure proper barriers in lockless contexts").
>
> Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>

Acked-by: Wei Wang <weiwan@google.com>
Thanks for the fix.

^ permalink raw reply

* Re: [PATCH v2 2/2] ARM: dts: Add the ethernet and ethernet PHY to the cygnus core DT.
From: Eric Anholt @ 2017-04-26 17:25 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Florian Fainelli, Vivien Didelot, netdev, Rob Herring,
	Mark Rutland, devicetree, linux-arm-kernel, linux-kernel,
	bcm-kernel-feedback-list, Ray Jui, Scott Branden, Jon Mason
In-Reply-To: <20170426004907.GA9453@lunn.ch>

[-- Attachment #1: Type: text/plain, Size: 469 bytes --]

Andrew Lunn <andrew@lunn.ch> writes:

>> +		eth0: ethernet@18042000 {
>> +			compatible = "brcm,amac";
>> +			reg = <0x18042000 0x1000>,
>> +			      <0x18110000 0x1000>;
>> +			reg-names = "amac_base", "idm_base";
>> +			interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
>> +			max-speed = <1000>;
>
> Hi Eric
>
> Sorry i missed this the first time. Does this Ethernet controller do >
> 1Gbps? Does this max-speed do anything useful?

It doesn't look like it.  Dropped.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply

* Re: hmmm...
From: David Miller @ 2017-04-26 17:25 UTC (permalink / raw)
  To: ast; +Cc: netdev
In-Reply-To: <133d8e46-c5c1-1e0e-86a1-a7b5a2737bff@fb.com>

From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 25 Apr 2017 22:31:06 -0700

> On 4/25/17 8:38 PM, David Miller wrote:
> jgt/jge/jsgt/sge was a stumbling block for me as well,
> since it still takes me longer than necessary to disambiguate
> into > vs >= and signed/unsigned

I had this problem while writing Sparc JIT :)

> Though I think Daniel still prefers old classic bpf asm ;)

I do too.

> Anyway, back to the question...
> since BFD and GCC are so much entrenched into canonical style
> of asm code, I don't mind that gnu toolchain will be using it.

Ok.  All data flows from right to left in the instructions so it will
be familiar for x86 assembler hackers.

> I like that you used 'dw' in 'ldxdw' instead of just 'd'
> though 'x' can probably be dropped.

Ok, dropped.

> 'x' should be added here instead:
>  { "stb", BPF_OPC_ST    | BPF_OPC_MEM  | BPF_OPC_B, "[1+O],i" },
>  { "stb", BPF_OPC_STX   | BPF_OPC_MEM  | BPF_OPC_B, "[1+O],2" },

The 'x' really isn't necessary, I would say.  Assembler can tell from
context whether immediate or register variant is wanted and thus:

	stb	[r1+8], 2
	stb	[r1+8], r4

are both assembled correctly.

^ permalink raw reply

* Re: [PATCH v2 2/2] ARM: dts: Add the ethernet and ethernet PHY to the cygnus core DT.
From: Eric Anholt @ 2017-04-26 17:26 UTC (permalink / raw)
  To: Florian Fainelli, Vivien Didelot, Andrew Lunn,
	netdev-u79uwXL29TY76Z2rM5mHXA, Rob Herring, Mark Rutland,
	devicetree-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w, Ray Jui,
	Scott Branden, Jon Mason
In-Reply-To: <0cb00eb7-41d0-0390-4687-d966499ed9f4-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1646 bytes --]

Florian Fainelli <f.fainelli-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> On 04/25/2017 04:53 PM, Eric Anholt wrote:
>> Cygnus has a single AMAC controller connected to the B53 switch with 2
>> PHYs.  On the BCM911360_EP platform, those two PHYs are connected to
>> the external ethernet jacks.
>> 
>> Signed-off-by: Eric Anholt <eric-WhKQ6XTQaPysTnJN9+BGXg@public.gmane.org>
>> Reviewed-by: Florian Fainelli <f.fainelli-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>> 
>> v2: Call the node "switch", just call the ports "port" (suggestions by
>>     Florian), drop max-speed on the phys (suggestion by Andrew Lunn),
>>     call the other nodes "ethernet" and "ethernet-phy" (suggestions by
>>     Sergei Shtylyov)
>> 
>>  arch/arm/boot/dts/bcm-cygnus.dtsi      | 58 ++++++++++++++++++++++++++++++++++
>>  arch/arm/boot/dts/bcm911360_entphn.dts |  8 +++++
>>  2 files changed, 66 insertions(+)
>> 
>> diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
>> index 009f1346b817..9fd89be0f5e0 100644
>> --- a/arch/arm/boot/dts/bcm-cygnus.dtsi
>> +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
>> @@ -142,6 +142,54 @@
>>  			interrupts = <0>;
>>  		};
>>  
>> +		mdio: mdio@18002000 {
>> +			compatible = "brcm,iproc-mdio";
>> +			reg = <0x18002000 0x8>;
>> +			#size-cells = <1>;
>> +			#address-cells = <0>;
>
> Sorry for not noticing earlier, since you override this correctly in the
> board-level DTS file can you put a:
>
> 			status = "disabled"
>
> property in there by default?

I didn't have the override in the board file either, just switch and
ethernet.  Fixed.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply

* Re: [PATCH v2 2/2] ARM: dts: Add the ethernet and ethernet PHY to the cygnus core DT.
From: Eric Anholt @ 2017-04-26 17:26 UTC (permalink / raw)
  To: Florian Fainelli, Vivien Didelot, Andrew Lunn,
	netdev-u79uwXL29TY76Z2rM5mHXA, Rob Herring, Mark Rutland,
	devicetree-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w, Ray Jui,
	Scott Branden, Jon Mason
In-Reply-To: <0cb00eb7-41d0-0390-4687-d966499ed9f4-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1564 bytes --]

Florian Fainelli <f.fainelli-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> On 04/25/2017 04:53 PM, Eric Anholt wrote:
>> Cygnus has a single AMAC controller connected to the B53 switch with 2
>> PHYs.  On the BCM911360_EP platform, those two PHYs are connected to
>> the external ethernet jacks.
>> 
>> Signed-off-by: Eric Anholt <eric-WhKQ6XTQaPysTnJN9+BGXg@public.gmane.org>
>> Reviewed-by: Florian Fainelli <f.fainelli-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>> 
>> v2: Call the node "switch", just call the ports "port" (suggestions by
>>     Florian), drop max-speed on the phys (suggestion by Andrew Lunn),
>>     call the other nodes "ethernet" and "ethernet-phy" (suggestions by
>>     Sergei Shtylyov)
>> 
>>  arch/arm/boot/dts/bcm-cygnus.dtsi      | 58 ++++++++++++++++++++++++++++++++++
>>  arch/arm/boot/dts/bcm911360_entphn.dts |  8 +++++
>>  2 files changed, 66 insertions(+)
>> 
>> diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
>> index 009f1346b817..9fd89be0f5e0 100644
>> --- a/arch/arm/boot/dts/bcm-cygnus.dtsi
>> +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
>> @@ -142,6 +142,54 @@
>>  			interrupts = <0>;
>>  		};
>>  
>> +		mdio: mdio@18002000 {
>> +			compatible = "brcm,iproc-mdio";
>> +			reg = <0x18002000 0x8>;
>> +			#size-cells = <1>;
>> +			#address-cells = <0>;
>
> Sorry for not noticing earlier, since you override this correctly in the
> board-level DTS file can you put a:
>
> 			status = "disabled"
>
> property in there by default?

Done.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox