Netdev List
 help / color / mirror / Atom feed
* [RFC PATCH 3/6] net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

That's possible to offload vlan to tc priority mapping with
assumption sk_prio == L2 prio.

Example:
$ ethtool -L eth0 rx 1 tx 4

$ qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1

$ tc -g class show dev eth0
+---(100:ffe2) mqprio
|    +---(100:3) mqprio
|    +---(100:4) mqprio
|    
+---(100:ffe1) mqprio
|    +---(100:2) mqprio
|    
+---(100:ffe0) mqprio
     +---(100:1) mqprio

Here, 100:1 is txq0, 100:2 is txq1, 100:3 is txq2, 100:4 is txq3
txq0 belongs to tc0, txq1 to tc1, txq2 and txq3 to tc2
The offload part only maps L2 prio to classes of traffic, but not
to transmit queues, so to direct traffic to traffic class vlan has
to be created with appropriate egress map.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 82 ++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 9bd615da04d3..4b232cda5436 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -39,6 +39,7 @@
 #include <linux/sys_soc.h>
 
 #include <linux/pinctrl/consumer.h>
+#include <net/pkt_cls.h>
 
 #include "cpsw.h"
 #include "cpsw_ale.h"
@@ -153,6 +154,8 @@ do {								\
 #define IRQ_NUM			2
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_TC_NUM			4
+#define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
 
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
@@ -453,6 +456,7 @@ struct cpsw_priv {
 	u8				mac_addr[ETH_ALEN];
 	bool				rx_pause;
 	bool				tx_pause;
+	bool				mqprio_hw;
 	u32 emac_port;
 	struct cpsw_common *cpsw;
 };
@@ -1577,6 +1581,14 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 	soft_reset_slave(slave);
 }
 
+static int cpsw_tc_to_fifo(int tc, int num_tc)
+{
+	if (tc == num_tc - 1)
+		return 0;
+
+	return CPSW_FIFO_SHAPERS_NUM - tc;
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2190,6 +2202,75 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
 	return ret;
 }
 
+static int cpsw_set_tc(struct net_device *ndev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio = type_data;
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int fifo, num_tc, count, offset;
+	struct cpsw_slave *slave;
+	u32 tx_prio_map = 0;
+	int i, tc, ret;
+
+	num_tc = mqprio->qopt.num_tc;
+	if (num_tc > CPSW_TC_NUM)
+		return -EINVAL;
+
+	if (mqprio->mode != TC_MQPRIO_MODE_DCB)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cpsw->dev);
+		return ret;
+	}
+
+	if (num_tc) {
+		for (i = 0; i < 8; i++) {
+			tc = mqprio->qopt.prio_tc_map[i];
+			fifo = cpsw_tc_to_fifo(tc, num_tc);
+			tx_prio_map |= fifo << (4 * i);
+		}
+
+		netdev_set_num_tc(ndev, num_tc);
+		for (i = 0; i < num_tc; i++) {
+			count = mqprio->qopt.count[i];
+			offset = mqprio->qopt.offset[i];
+			netdev_set_tc_queue(ndev, i, count, offset);
+		}
+	}
+
+	if (!mqprio->qopt.hw) {
+		/* restore default configuration */
+		netdev_reset_tc(ndev);
+		tx_prio_map = TX_PRIORITY_MAPPING;
+	}
+
+	priv->mqprio_hw = mqprio->qopt.hw;
+
+	offset = cpsw->version == CPSW_VERSION_1 ?
+		 CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	slave_write(slave, tx_prio_map, offset);
+
+	pm_runtime_put_sync(cpsw->dev);
+
+	return 0;
+}
+
+static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			     void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return cpsw_set_tc(ndev, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -2205,6 +2286,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #endif
 	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
+	.ndo_setup_tc           = cpsw_ndo_setup_tc,
 };
 
 static int cpsw_get_regs_len(struct net_device *ndev)
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 6/6] Documentation: networking: cpsw: add MQPRIO & CBS offload examples
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

This document describes MQPRIO and CBS Qdisc offload configuration
for cpsw driver based on examples. It potentially can be used in
audio video bridging (AVB) and time sensitive networking (TSN).

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 Documentation/networking/cpsw.txt | 540 ++++++++++++++++++++++++++++++
 1 file changed, 540 insertions(+)
 create mode 100644 Documentation/networking/cpsw.txt

diff --git a/Documentation/networking/cpsw.txt b/Documentation/networking/cpsw.txt
new file mode 100644
index 000000000000..28c64896d59d
--- /dev/null
+++ b/Documentation/networking/cpsw.txt
@@ -0,0 +1,540 @@
+* Texas Instruments CPSW ethernet driver
+
+Multiqueue & CBS & MQPRIO
+=====================================================================
+=====================================================================
+
+The cpsw has 3 CBS shapers for each external ports. This document
+describes MQPRIO and CBS Qdisc offload configuration for cpsw driver
+based on examples. It potentially can be used in audio video bridging
+(AVB) and time sensitive networking (TSN).
+
+The following examples was tested on AM572x EVM and BBB boards.
+
+Test setup
+==========
+
+Under consideration two examples with AM52xx EVM running cpsw driver
+in dual_emac mode.
+
+Several prerequisites:
+- TX queues must be rated starting from txq0 that has highest priority
+- Traffic classes are used starting from 0, that has highest priority
+- CBS shapers should be used with rated queues
+- The bandwidth for CBS shapers has to be set a little bit more then
+  potential incoming rate, thus, rate of all incoming tx queues has
+  to be a little less
+- Real rates can differ, due to discreetness
+- Map skb-priority to txq is not enough, also skb-priority to l2 prio
+  map has to be created with ip or vconfig tool
+- Any l2/socket prio (0 - 7) for classes can be used, but for
+  simplicity default values are used: 3 and 2
+- only 2 classes tested: A and B, but checked and can work with more,
+  maximum allowed 4, but only for 3 rate can be set.
+
+Test setup for examples
+=======================
+                                    +-------------------------------+
+                                    |--+                            |
+                                    |  |      Workstation0          |
+                                    |E |  MAC 18:03:73:66:87:42     |
++-----------------------------+  +--|t |                            |
+|                    | 1  | E |  |  |h |./tsn_listener -d \         |
+|  Target board:     | 0  | t |--+  |0 | 18:03:73:66:87:42 -i eth0 \|
+|  AM572x EVM        | 0  | h |     |  | -s 1500                    |
+|                    | 0  | 0 |     |--+                            |
+|  Only 2 classes:   |Mb  +---|     +-------------------------------+
+|  class A, class B  |        |
+|                    |    +---|     +-------------------------------+
+|                    | 1  | E |     |--+                            |
+|                    | 0  | t |     |  |      Workstation1          |
+|                    | 0  | h |--+  |E |  MAC 20:cf:30:85:7d:fd     |
+|                    |Mb  | 1 |  +--|t |                            |
++-----------------------------+     |h |./tsn_listener -d \         |
+                                    |0 | 20:cf:30:85:7d:fd -i eth0 \|
+                                    |  | -s 1500                    |
+                                    |--+                            |
+                                    +-------------------------------+
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 1: One port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM52xx evm, applicable for single port boards)
+
+tc - traffic class
+txq - transmit queue
+p - priority
+f - fifo (cpsw fifo)
+S - shaper configured
+
++------------------------------------------------------------------+ u
+| +---------------+  +---------------+  +------+ +------+          | s
+| |               |  |               |  |      | |      |          | e
+| | App 1         |  | App 2         |  | Apps | | Apps |          | r
+| | Class A       |  | Class B       |  | Rest | | Rest |          |
+| | Eth0          |  | Eth0          |  | Eth0 | | Eth1 |          | s
+| | VLAN100       |  | VLAN100       |  |   |  | |   |  |          | p
+| | 40 Mb/s       |  | 20 Mb/s       |  |   |  | |   |  |          | a
+| | SO_PRIORITY=3 |  | SO_PRIORITY=2 |  |   |  | |   |  |          | c
+| |   |           |  |   |           |  |   |  | |   |  |          | e
+| +---|-----------+  +---|-----------+  +---|--+ +---|--+          |
++-----|------------------|------------------|--------|-------------+
+    +-+     +------------+                  |        |
+    |       |             +-----------------+     +--+
+    |       |             |                       |
++---|-------|-------------|-----------------------|----------------+
+| +----+ +----+ +----+ +----+                   +----+             |
+| | p3 | | p2 | | p1 | | p0 |                   | p0 |             | k
+| \    / \    / \    / \    /                   \    /             | e
+|  \  /   \  /   \  /   \  /                     \  /              | r
+|   \/     \/     \/     \/                       \/               | n
+|    |     |             |                        |                | e
+|    |     |       +-----+                        |                | l
+|    |     |       |                              |                |
+| +----+ +----+ +----+                          +----+             | s
+| |tc0 | |tc1 | |tc2 |                          |tc0 |             | p
+| \    / \    / \    /                          \    /             | a
+|  \  /   \  /   \  /                            \  /              | c
+|   \/     \/     \/                              \/               | e
+|   |      |       +-----+                        |                |
+|   |      |       |     |                        |                |
+|   |      |       |     |                        |                |
+|   |      |       |     |                        |                |
+| +----+ +----+ +----+ +----+                   +----+             |
+| |txq0| |txq1| |txq2| |txq3|                   |txq4|             |
+| \    / \    / \    / \    /                   \    /             |
+|  \  /   \  /   \  /   \  /                     \  /              |
+|   \/     \/     \/     \/                       \/               |
+| +-|------|------|------|--+                  +--|--------------+ |
+| | |      |      |      |  | Eth0.100         |  |     Eth1     | |
++---|------|------|------|------------------------|----------------+
+    |      |      |      |                        |
+    p      p      p      p                        |
+    3      2      0-1, 4-7  <- L2 priority        |
+    |      |      |      |                        |
+    |      |      |      |                        |
++---|------|------|------|------------------------|----------------+
+|   |      |      |      |             |----------+                |
+| +----+ +----+ +----+ +----+       +----+                         |
+| |dma7| |dma6| |dma5| |dma4|       |dma4|                         |
+| \    / \    / \    / \    /       \    /                         | c
+|  \S /   \S /   \  /   \  /         \  /                          | p
+|   \/     \/     \/     \/           \/                           | s
+|   |      |      | +-----            |                            | w
+|   |      |      | |                 |                            |
+|   |      |      | |                 |                            | d
+| +----+ +----+ +----+p            p+----+                         | r
+| |    | |    | |    |o            o|    |                         | i
+| | f3 | | f2 | | f0 |r            r| f0 |                         | v
+| |tc0 | |tc1 | |tc2 |t            t|tc0 |                         | e
+| \CBS / \CBS / \CBS /1            2\CBS /                         | r
+|  \S /   \S /   \  /                \  /                          |
+|   \/     \/     \/                  \/                           |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1
+$ ethtool -L eth0 rx 1 tx 5
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX:             8
+TX:             8
+Other:          0
+Combined:       0
+Current hardware settings:
+RX:             1
+TX:             5
+Other:          0
+Combined:       0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1
+// Set rates 40 and 20 Mb/s appropriately.
+// Pay attention, real speed can differ a bit due to discreetness.
+// Leave last 2 tx queues not rated.
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
+
+5a)
+// As two interface sharing same set of tx queues, assign all traffic
+// coming to interface Eth1 to separate queue in order to not mix it
+// with traffic from interface Eth0, so use separate txq to send
+// packets to Eth1, so all prio -> tc0 and tc0 -> txq4
+// Here hw 0, so here still default configuration for eth1 in hw
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \
+map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+|    +---(100:3) mqprio
+|    +---(100:4) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:1) mqprio
+
+$ tc -g class show dev eth1
++---(100:ffe0) mqprio
+     +---(100:5) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc
+// Set it +1 Mb for reserve (important!)
+// here only idle slope is important, others arg are ignored
+// Pay attention, real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc:
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos
+$ ip link add link eth0 name eth0.100 type vlan id 100
+8021q: 802.1Q VLAN Support v1.8
+8021q: adding VLAN 0 to HW filter on device eth0
+8021q: adding VLAN 0 to HW filter on device eth1
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio, 1 to 1
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Run your appropriate tools with socket option "SO_PRIORITY"
+// to 3 for class A and/or to 2 for class B
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+
+13)
+// run your listener on workstation
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+14)
+// Restore default configuration if needed
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 2: Two port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM52xx evm, for dual emac boards only)
+
++------------------------------------------------------------------+ u
+| +----------+  +----------+  +------+  +----------+  +----------+ | s
+| |          |  |          |  |      |  |          |  |          | | e
+| | App 1    |  | App 2    |  | Apps |  | App 3    |  | App 4    | | r
+| | Class A  |  | Class B  |  | Rest |  | Class B  |  | Class A  | |
+| | Eth0     |  | Eth0     |  |   |  |  | Eth1     |  | Eth1     | | s
+| | VLAN100  |  | VLAN100  |  |   |  |  | VLAN100  |  | VLAN100  | | p
+| | 40 Mb/s  |  | 20 Mb/s  |  |   |  |  | 10 Mb/s  |  | 30 Mb/s  | | a
+| | SO_PRI=3 |  | SO_PRI=2 |  |   |  |  | SO_PRI=3 |  | SO_PRI=2 | | c
+| |   |      |  |   |      |  |   |  |  |   |      |  |   |      | | e
+| +---|------+  +---|------+  +---|--+  +---|------+  +---|------+ |
++-----|-------------|-------------|---------|-------------|--------+
+    +-+     +-------+             |         +----------+  +----+
+    |       |             +-------+------+             |       |
+    |       |             |              |             |       |
++---|-------|-------------|--------------|-------------|-------|---+
+| +----+ +----+ +----+ +----+          +-+--+ +----+ +----+ +----+ |
+| | p3 | | p2 | | p1 | | p0 |          | p0 | | p1 | | p2 | | p3 | | k
+| \    / \    / \    / \    /          \    / \    / \    / \    / | e
+|  \  /   \  /   \  /   \  /            \  /   \  /   \  /   \  /  | r
+|   \/     \/     \/     \/              \/     \/     \/     \/   | n
+|   |      |             |                |             |      |   | e
+|   |      |        +----+                +----+        |      |   | l
+|   |      |        |                          |        |      |   |
+| +----+ +----+ +----+                        +----+ +----+ +----+ | s
+| |tc0 | |tc1 | |tc2 |                        |tc2 | |tc1 | |tc0 | | p
+| \    / \    / \    /                        \    / \    / \    / | a
+|  \  /   \  /   \  /                          \  /   \  /   \  /  | c
+|   \/     \/     \/                            \/     \/     \/   | e
+|   |      |       +-----+                +-----+      |       |   |
+|   |      |       |     |                |     |      |       |   |
+|   |      |       |     |                |     |      |       |   |
+|   |      |       |     |    E      E    |     |      |       |   |
+| +----+ +----+ +----+ +----+ t      t +----+ +----+ +----+ +----+ |
+| |txq0| |txq1| |txq4| |txq5| h      h |txq6| |txq7| |txq3| |txq2| |
+| \    / \    / \    / \    / 0      1 \    / \    / \    / \    / |
+|  \  /   \  /   \  /   \  /  .      .  \  /   \  /   \  /   \  /  |
+|   \/     \/     \/     \/   1      1   \/     \/     \/     \/   |
+| +-|------|------|------|--+ 0      0 +-|------|------|------|--+ |
+| | |      |      |      |  | 0      0 | |      |      |      |  | |
++---|------|------|------|---------------|------|------|------|----+
+    |      |      |      |               |      |      |      |
+    p      p      p      p               p      p      p      p
+    3      2      0-1, 4-7   <-L2 pri->  0-1, 4-7      2      3
+    |      |      |      |               |      |      |      |
+    |      |      |      |               |      |      |      |
++---|------|------|------|---------------|------|------|------|----+
+|   |      |      |      |               |      |      |      |    |
+| +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
+| |dma7| |dma6| |dma3| |dma2|          |dma1| |dma0| |dma4| |dma5| |
+| \    / \    / \    / \    /          \    / \    / \    / \    / | c
+|  \S /   \S /   \  /   \  /            \  /   \  /   \S /   \S /  | p
+|   \/     \/     \/     \/              \/     \/     \/     \/   | s
+|   |      |      | +-----                |      |      |      |   | w
+|   |      |      | |                     +----+ |      |      |   |
+|   |      |      | |                          | |      |      |   | d
+| +----+ +----+ +----+p                      p+----+ +----+ +----+ | r
+| |    | |    | |    |o                      o|    | |    | |    | | i
+| | f3 | | f2 | | f0 |r        CPSW          r| f3 | | f2 | | f0 | | v
+| |tc0 | |tc1 | |tc2 |t                      t|tc0 | |tc1 | |tc2 | | e
+| \CBS / \CBS / \CBS /1                      2\CBS / \CBS / \CBS / | r
+|  \S /   \S /   \  /                          \S /   \S /   \  /  |
+|   \/     \/     \/                            \/     \/     \/   |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 8 tx queues, for interface Eth0, but they are common, so are accessed
+// by two interfaces Eth0 and Eth1.
+$ ethtool -L eth1 rx 1 tx 8
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX:             8
+TX:             8
+Other:          0
+Combined:       0
+Current hardware settings:
+RX:             1
+TX:             8
+Other:          0
+Combined:       0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0
+// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately
+// for Eth0 and 30 and 10 Mb/s for Eth1.
+// Real speed can differ a bit due to discreetness
+// Leave last 4 tx queues as not rated
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate
+$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+30
+10
+0
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class for Eth0:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+|    +---(100:5) mqprio
+|    +---(100:6) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:1) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0
+// here only idle slope is important, others ignored
+// Real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos for Eth0
+$ ip link add link eth0 name eth0.100 type vlan id 100
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio for Eth0.100, one to one
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Map skb->priority to traffic class for Eth1:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7)
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1
+
+13)
+// Check classes settings
+$ tc -g class show dev eth1
++---(100:ffe2) mqprio
+|    +---(100:7) mqprio
+|    +---(100:8) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:4) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:3) mqprio
+
+14)
+// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1
+// here only idle slope is important, others ignored
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:1 cbs locredit -1453 \
+hicredit 47 sendslope -969000 idleslope 31000 offload 1
+net eth1: set FIFO3 bw = 40
+
+15)
+// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:2 cbs locredit -1483 \
+hicredit 34 sendslope -989000 idleslope 11000 offload 1
+net eth1: set FIFO2 bw = 20
+
+16)
+// Create vlan 100 to map sk->priority to vlan qos for Eth1
+$ ip link add link eth1 name eth1.100 type vlan id 100
+net eth1: Adding vlanid 100 to vlan filter
+
+17)
+// Map skb->priority to L2 prio for Eth1.100, one to one
+$ ip link set eth1.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+18)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth1.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+19)
+// Run appropriate tools with socket option "SO_PRIORITY" to 3
+// for class A and to 2 for class B. For both interfaces
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500&
+
+20)
+// run your listeners on workstations
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+21)
+// Restore default configuration if needed
+$ ip link del eth1.100
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+net eth1: Prev FIFO2 is shaped
+net eth1: set FIFO3 bw = 0
+net eth1: set FIFO2 bw = 0
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 4/6] net: ethernet: ti: cpsw: add CBS Qdisc offload
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

The cpsw has up to 4 FIFOs per port and upper 3 FIFOs can feed rate
limited queue with shaping. In order to set and enable shaping for
those 3 FIFOs queues the network device with CBS qdisc attached is
needed. The CBS configuration is added for dual-emac/single port mode
only, but potentially can be used in switch mode also, based on
switchdev for instance.

Despite the FIFO shapers can work w/o cpdma level shapers the base
usage must be in combine with cpdma level shapers as described in TRM,
that are set as maximum rates for interface queues with sysfs.

One of the possible configuration with txq shapers and CBS shapers:

                      Configured with echo RATE >
                  /sys/class/net/eth0/queues/tx-0/tx_maxrate
             /---------------------------------------------------
            /
           /            cpdma level shapers
        +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+
        | c7 | | c6 | | c5 | | c4 | | c3 | | c2 | | c1 | | c0 |
        \    / \    / \    / \    / \    / \    / \    / \    /
         \  /   \  /   \  /   \  /   \  /   \  /   \  /   \  /
          \/     \/     \/     \/     \/     \/     \/     \/
+---------|------|------|------|-------------------------------------+
|    +----+      |      |  +---+                                     |
|    |      +----+      |  |                                         |
|    v      v           v  v                                         |
| +----+ +----+ +----+ +----+ p        p+----+ +----+ +----+ +----+  |
| |    | |    | |    | |    | o        o|    | |    | |    | |    |  |
| | f3 | | f2 | | f1 | | f0 | r  CPSW  r| f3 | | f2 | | f1 | | f0 |  |
| |    | |    | |    | |    | t        t|    | |    | |    | |    |  |
| \    / \    / \    / \    / 0        1\    / \    / \    / \    /  |
|  \  X   \  /   \  /   \  /             \  /   \  /   \  /   \  /   |
|   \/ \   \/     \/     \/               \/     \/     \/     \/    |
+-------\------------------------------------------------------------+
         \
          \ FIFO shaper, set with CBS offload added in this patch,
           \ FIFO0 cannot be rate limited
            ------------------------------------------------------

CBS shaper configuration is supposed to be used with root MQPRIO Qdisc
offload allowing to add sk_prio->tc->txq maps that direct traffic to
appropriate tx queue and maps L2 priority to FIFO shaper.

The CBS shaper is intended to be used for AVB where L2 priority
(pcp field) is used to differentiate class of traffic. So additionally
vlan needs to be created with appropriate egress sk_prio->l2 prio map.

If CBS has several tx queues assigned to it, the sum of their
bandwidth has not overlap bandwidth set for CBS. It's recomended the
CBS bandwidth to be a little bit more.

The CBS shaper is configured with CBS qdisc offload interface using tc
tool from iproute2 packet.

For instance:

$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1

$ tc -g class show dev eth0
+---(100:ffe2) mqprio
|    +---(100:3) mqprio
|    +---(100:4) mqprio
|    
+---(100:ffe1) mqprio
|    +---(100:2) mqprio
|    
+---(100:ffe0) mqprio
     +---(100:1) mqprio

$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1440 \
hicredit 60 sendslope -960000 idleslope 40000 offload 1

$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
hicredit 62 sendslope -980000 idleslope 20000 offload 1

The above code set CBS shapers for tc0 and tc1, for that txq0 and
txq1 is used. Pay attention, the real set bandwidth can differ a bit
due to discreteness of configuration parameters.

Here parameters like locredit, hicredit and sendslope are ignored
internally and are supposed to be set with assumption that maximum
frame size for frame - 1500.

It's supposed that interface speed is not changed while reconnection,
not always is true, so inform user in case speed of interface was
changed, as it can impact on dependent shapers configuration.

For more examples see Documentation.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 221 +++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4b232cda5436..c7710b0e1c17 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -46,6 +46,8 @@
 #include "cpts.h"
 #include "davinci_cpdma.h"
 
+#include <net/pkt_sched.h>
+
 #define CPSW_DEBUG	(NETIF_MSG_HW		| NETIF_MSG_WOL		| \
 			 NETIF_MSG_DRV		| NETIF_MSG_LINK	| \
 			 NETIF_MSG_IFUP		| NETIF_MSG_INTR	| \
@@ -154,8 +156,12 @@ do {								\
 #define IRQ_NUM			2
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_FIFO_QUEUE_TYPE_SHIFT	16
+#define CPSW_FIFO_SHAPE_EN_SHIFT	16
+#define CPSW_FIFO_RATE_EN_SHIFT		20
 #define CPSW_TC_NUM			4
 #define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
+#define CPSW_PCT_MASK			0x7f
 
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
@@ -457,6 +463,8 @@ struct cpsw_priv {
 	bool				rx_pause;
 	bool				tx_pause;
 	bool				mqprio_hw;
+	int				fifo_bw[CPSW_TC_NUM];
+	int				shp_cfg_speed;
 	u32 emac_port;
 	struct cpsw_common *cpsw;
 };
@@ -1081,6 +1089,38 @@ static void cpsw_set_slave_mac(struct cpsw_slave *slave,
 	slave_write(slave, mac_lo(priv->mac_addr), SA_LO);
 }
 
+static bool cpsw_shp_is_off(struct cpsw_priv *priv)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	u32 shift, mask, val;
+
+	val = readl_relaxed(&cpsw->regs->ptype);
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+	mask = 7 << shift;
+	val = val & mask;
+
+	return !val;
+}
+
+static void cpsw_fifo_shp_on(struct cpsw_priv *priv, int fifo, int on)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	u32 shift, mask, val;
+
+	val = readl_relaxed(&cpsw->regs->ptype);
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+	mask = (1 << --fifo) << shift;
+	val = on ? val | mask : val & ~mask;
+
+	writel_relaxed(val, &cpsw->regs->ptype);
+}
+
 static void _cpsw_adjust_link(struct cpsw_slave *slave,
 			      struct cpsw_priv *priv, bool *link)
 {
@@ -1120,6 +1160,12 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
 			mac_control |= BIT(4);
 
 		*link = true;
+
+		if (priv->shp_cfg_speed &&
+		    priv->shp_cfg_speed != slave->phy->speed &&
+		    !cpsw_shp_is_off(priv))
+			dev_warn(priv->dev,
+				 "Speed was changed, CBS sahper speeds are changed!");
 	} else {
 		mac_control = 0;
 		/* disable forwarding */
@@ -1589,6 +1635,178 @@ static int cpsw_tc_to_fifo(int tc, int num_tc)
 	return CPSW_FIFO_SHAPERS_NUM - tc;
 }
 
+static int cpsw_set_fifo_bw(struct cpsw_priv *priv, int fifo, int bw)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	u32 val = 0, send_pct, shift;
+	struct cpsw_slave *slave;
+	int pct = 0, i;
+
+	if (bw > priv->shp_cfg_speed * 1000)
+		goto err;
+
+	/* shaping has to stay enabled for highest fifos linearly
+	 * and fifo bw no more then interface can allow
+	 */
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	send_pct = slave_read(slave, SEND_PERCENT);
+	for (i = CPSW_FIFO_SHAPERS_NUM; i > 0; i--) {
+		if (!bw) {
+			if (i >= fifo || !priv->fifo_bw[i])
+				continue;
+
+			dev_warn(priv->dev, "Prev FIFO%d is shaped", i);
+			continue;
+		}
+
+		if (!priv->fifo_bw[i] && i > fifo) {
+			dev_err(priv->dev, "Upper FIFO%d is not shaped", i);
+			return -EINVAL;
+		}
+
+		shift = (i - 1) * 8;
+		if (i == fifo) {
+			send_pct &= ~(CPSW_PCT_MASK << shift);
+			val = DIV_ROUND_UP(bw, priv->shp_cfg_speed * 10);
+			if (!val)
+				val = 1;
+
+			send_pct |= val << shift;
+			pct += val;
+			continue;
+		}
+
+		if (priv->fifo_bw[i])
+			pct += (send_pct >> shift) & CPSW_PCT_MASK;
+	}
+
+	if (pct >= 100)
+		goto err;
+
+	slave_write(slave, send_pct, SEND_PERCENT);
+	priv->fifo_bw[fifo] = bw;
+
+	dev_warn(priv->dev, "set FIFO%d bw = %d\n", fifo,
+		 DIV_ROUND_CLOSEST(val * priv->shp_cfg_speed, 100));
+
+	return 0;
+err:
+	dev_err(priv->dev, "Bandwidth doesn't fit in tc configuration");
+	return -EINVAL;
+}
+
+static int cpsw_set_fifo_rlimit(struct cpsw_priv *priv, int fifo, int bw)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	u32 tx_in_ctl_rg, val;
+	int ret;
+
+	ret = cpsw_set_fifo_bw(priv, fifo, bw);
+	if (ret)
+		return ret;
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	tx_in_ctl_rg = cpsw->version == CPSW_VERSION_1 ?
+		       CPSW1_TX_IN_CTL : CPSW2_TX_IN_CTL;
+
+	if (!bw)
+		cpsw_fifo_shp_on(priv, fifo, bw);
+
+	val = slave_read(slave, tx_in_ctl_rg);
+	if (cpsw_shp_is_off(priv)) {
+		/* disable FIFOs rate limited queues */
+		val &= ~(0xf << CPSW_FIFO_RATE_EN_SHIFT);
+
+		/* set type of FIFO queues to normal priority mode */
+		val &= ~(3 << CPSW_FIFO_QUEUE_TYPE_SHIFT);
+
+		/* set type of FIFO queues to be rate limited */
+		if (bw)
+			val |= 2 << CPSW_FIFO_QUEUE_TYPE_SHIFT;
+		else
+			priv->shp_cfg_speed = 0;
+	}
+
+	/* toggle a FIFO rate limited queue */
+	if (bw)
+		val |= BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+	else
+		val &= ~BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+	slave_write(slave, val, tx_in_ctl_rg);
+
+	/* FIFO transmit shape enable */
+	cpsw_fifo_shp_on(priv, fifo, bw);
+	return 0;
+}
+
+/* Defaults:
+ * class A - prio 3
+ * class B - prio 2
+ * shaping for class A should be set first
+ */
+static int cpsw_set_cbs(struct net_device *ndev,
+			struct tc_cbs_qopt_offload *qopt)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	int prev_speed = 0;
+	int tc, ret, fifo;
+	u32 bw = 0;
+
+	tc = netdev_txq_to_tc(priv->ndev, qopt->queue);
+
+	/* enable channels in backward order, as highest FIFOs must be rate
+	 * limited first and for compliance with CPDMA rate limited channels
+	 * that also used in bacward order. FIFO0 cannot be rate limited.
+	 */
+	fifo = cpsw_tc_to_fifo(tc, ndev->num_tc);
+	if (!fifo) {
+		dev_err(priv->dev, "Last tc%d can't be rate limited", tc);
+		return -EINVAL;
+	}
+
+	/* do nothing, it's disabled anyway */
+	if (!qopt->enable && !priv->fifo_bw[fifo])
+		return 0;
+
+	/* shapers can be set if link speed is known */
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	if (slave->phy && slave->phy->link) {
+		if (priv->shp_cfg_speed &&
+		    priv->shp_cfg_speed != slave->phy->speed)
+			prev_speed = priv->shp_cfg_speed;
+
+		priv->shp_cfg_speed = slave->phy->speed;
+	}
+
+	if (!priv->shp_cfg_speed) {
+		dev_err(priv->dev, "Link speed is not known");
+		return -1;
+	}
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cpsw->dev);
+		return ret;
+	}
+
+	bw = qopt->enable ? qopt->idleslope : 0;
+	ret = cpsw_set_fifo_rlimit(priv, fifo, bw);
+	if (ret) {
+		priv->shp_cfg_speed = prev_speed;
+		prev_speed = 0;
+	}
+
+	if (bw && prev_speed)
+		dev_warn(priv->dev,
+			 "Speed was changed, CBS sahper speeds are changed!");
+
+	pm_runtime_put_sync(cpsw->dev);
+	return ret;
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2263,6 +2481,9 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 			     void *type_data)
 {
 	switch (type) {
+	case TC_SETUP_QDISC_CBS:
+		return cpsw_set_cbs(ndev, type_data);
+
 	case TC_SETUP_QDISC_MQPRIO:
 		return cpsw_set_tc(ndev, type_data);
 
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 2/6] net: ethernet: ti: cpdma: fit rated channels in backward order
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

According to TRM tx rated channels should be in 7..0 order,
so correct it.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 31 ++++++++++++-------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 31ae04117f0a..37fbdc668cc7 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -406,37 +406,36 @@ static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
 	struct cpdma_chan *chan;
 	u32 old_rate = ch->rate;
 	u32 new_rmask = 0;
-	int rlim = 1;
+	int rlim = 0;
 	int i;
 
-	*prio_mode = 0;
 	for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
 		chan = ctlr->channels[i];
-		if (!chan) {
-			rlim = 0;
+		if (!chan)
 			continue;
-		}
 
 		if (chan == ch)
 			chan->rate = rate;
 
 		if (chan->rate) {
-			if (rlim) {
-				new_rmask |= chan->mask;
-			} else {
-				ch->rate = old_rate;
-				dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
-					chan->chan_num);
-				return -EINVAL;
-			}
-		} else {
-			*prio_mode = 1;
-			rlim = 0;
+			rlim = 1;
+			new_rmask |= chan->mask;
+			continue;
 		}
+
+		if (rlim)
+			goto err;
 	}
 
 	*rmask = new_rmask;
+	*prio_mode = rlim;
 	return 0;
+
+err:
+	ch->rate = old_rate;
+	dev_err(ctlr->dev, "Upper cpdma ch%d is not rate limited\n",
+		chan->chan_num);
+	return -EINVAL;
 }
 
 static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 1/6] net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk
In-Reply-To: <20180518211510.13341-1-ivan.khoronzhuk@linaro.org>

The cpdma channel highest priority is from hi to lo number.
The driver has limited number of descriptors that are shared between
number of cpdma channels. Number of queues can be tuned with ethtool,
that allows to not spend descriptors on not needed cpdma channels.
In AVB usually only 2 tx queues can be enough with rate limitation.
The rate limitation can be used only for hi priority queues. Thus, to
use only 2 queues the 8 has to be created. It's wasteful.

So, in order to allow using only needed number of rate limited
tx queues, save resources, and be able to set rate limitation for
them, let assign tx cpdma channels in backward order to queues.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a7285dddfd29..9bd615da04d3 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -967,8 +967,8 @@ static int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
 
 	/* process every unprocessed channel */
 	ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-	for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
-		if (!(ch_map & 0x01))
+	for (ch = 0, num_tx = 0; ch_map & 0xff; ch_map <<= 1, ch++) {
+		if (!(ch_map & 0x80))
 			continue;
 
 		txv = &cpsw->txv[ch];
@@ -2431,7 +2431,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	void (*handler)(void *, int, int);
 	struct netdev_queue *queue;
 	struct cpsw_vector *vec;
-	int ret, *ch;
+	int ret, *ch, vch;
 
 	if (rx) {
 		ch = &cpsw->rx_ch_num;
@@ -2444,7 +2444,8 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	}
 
 	while (*ch < ch_num) {
-		vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+		vch = rx ? *ch : 7 - *ch;
+		vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
 		queue = netdev_get_tx_queue(priv->ndev, *ch);
 		queue->tx_maxrate = 0;
 
@@ -2980,7 +2981,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	u32 slave_offset, sliver_offset, slave_size;
 	const struct soc_device_attribute *soc;
 	struct cpsw_common		*cpsw;
-	int ret = 0, i;
+	int ret = 0, i, ch;
 	int irq;
 
 	cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
@@ -3155,7 +3156,8 @@ static int cpsw_probe(struct platform_device *pdev)
 	if (soc)
 		cpsw->quirk_irq = 1;
 
-	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+	ch = cpsw->quirk_irq ? 0 : 7;
+	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
 	if (IS_ERR(cpsw->txv[0].ch)) {
 		dev_err(priv->dev, "error initializing tx dma channel\n");
 		ret = PTR_ERR(cpsw->txv[0].ch);
-- 
2.17.0

^ permalink raw reply related

* [RFC PATCH 0/6] net: ethernet: ti: cpsw: add MQPRIO and CBS Qdisc offload
From: Ivan Khoronzhuk @ 2018-05-18 21:15 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, Ivan Khoronzhuk

This series adds MQPRIO and CBS Qdisc offload for TI cpsw driver.
It potentially can be used in audio video bridging (AVB) and time
sensitive networking (TSN).

Patchset was tested on AM572x EVM and BBB boards. Last patch from this
series adds detailed description of configuration with examples. For
consistency reasons, in role of talker and listener, tools from
patchset "TSN: Add qdisc based config interface for CBS" were used and
can be seen here: https://www.spinics.net/lists/netdev/msg460869.html

Based on net-next/master

Ivan Khoronzhuk (6):
  net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
  net: ethernet: ti: cpdma: fit rated channels in backward order
  net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
  net: ethernet: ti: cpsw: add CBS Qdisc offload
  net: ethernet: ti: cpsw: restore shaper configuration while down/up
  Documentation: networking: cpsw: add MQPRIO & CBS offload examples

 Documentation/networking/cpsw.txt       | 540 ++++++++++++++++++++++++
 drivers/net/ethernet/ti/cpsw.c          | 364 +++++++++++++++-
 drivers/net/ethernet/ti/davinci_cpdma.c |  31 +-
 3 files changed, 913 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/networking/cpsw.txt

-- 
2.17.0

^ permalink raw reply

* Re: [bpf-next PATCH v2 0/2] SK_MSG programs: read sock fields
From: Daniel Borkmann @ 2018-05-18 21:11 UTC (permalink / raw)
  To: John Fastabend, ast; +Cc: netdev
In-Reply-To: <20180517211452.14426.98480.stgit@john-Precision-Tower-5810>

On 05/17/2018 11:16 PM, John Fastabend wrote:
> In this series we add the ability for sk msg programs to read basic
> sock information about the sock they are attached to. The second
> patch adds the tests to the selftest test_verifier.
> 
> One obseration that I had from writing this seriess is lots of the
> ./net/core/filter.c code is almost duplicated across program types.
> I thought about building a template/macro that we could use as a
> single block of code to read sock data out for multiple programs,
> but I wasn't convinced it was worth it yet. The result was using a
> macro saved a couple lines of code per block but made the code
> a bit harder to read IMO. We can probably revisit the idea later
> if we get more duplication.
> 
> v2: add errstr field to negative test_verifier test cases to ensure
>     we get the expected err string back from the verifier.
> 
> ---
> 
> John Fastabend (2):
>       bpf: allow sk_msg programs to read sock fields
>       bpf: add sk_msg prog sk access tests to test_verifier
> 
> 
>  include/linux/filter.h                      |    1 
>  include/uapi/linux/bpf.h                    |    8 ++
>  kernel/bpf/sockmap.c                        |    1 
>  net/core/filter.c                           |  114 ++++++++++++++++++++++++++-
>  tools/include/uapi/linux/bpf.h              |    8 ++
>  tools/testing/selftests/bpf/test_verifier.c |  115 +++++++++++++++++++++++++++
>  6 files changed, 244 insertions(+), 3 deletions(-)
> 
> --
> Signature
> 

Applied to bpf-next, thanks John!

^ permalink raw reply

* Re: [PATCH bpf-next v2 7/7] tools/bpftool: add perf subcommand
From: Jakub Kicinski @ 2018-05-18 20:51 UTC (permalink / raw)
  To: Yonghong Song, Quentin Monnet; +Cc: peterz, ast, daniel, netdev, kernel-team
In-Reply-To: <20180518050310.2814608-8-yhs@fb.com>

On Thu, 17 May 2018 22:03:10 -0700, Yonghong Song wrote:
> The new command "bpftool perf [show | list]" will traverse
> all processes under /proc, and if any fd is associated
> with a perf event, it will print out related perf event
> information. Documentation is also added.

Thanks for the changes, it looks good with some minor nits which can be
addressed as follow up if there is no other need to respin.  Please
consider it:

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>

> Below is an example to show the results using bcc commands.
> Running the following 4 bcc commands:
>   kprobe:     trace.py '__x64_sys_nanosleep'
>   kretprobe:  trace.py 'r::__x64_sys_nanosleep'
>   tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
>   uprobe:     trace.py 'p:/home/yhs/a.out:main'
> 
> The bpftool command line and result:
> 
>   $ bpftool perf
>   pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
>   pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
>   pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
>   pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
> 
>   $ bpftool -j perf
>   {"pid":21711,"fd":5,"prog_id":5,"attach_info":"kprobe","func":"__x64_sys_write","offset":0}, \
>   {"pid":21765,"fd":5,"prog_id":7,"attach_info":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
>   {"pid":21767,"fd":5,"prog_id":8,"attach_info":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
>   {"pid":21800,"fd":5,"prog_id":9,"attach_info":"uprobe","filename":"/home/yhs/a.out","offset":1159}

nit: this is now an array

>   $ bpftool prog
>   5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
> 	  loaded_at 2018-05-15T04:46:37-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 4
>   7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
> 	  loaded_at 2018-05-15T04:48:32-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 7
>   8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
> 	  loaded_at 2018-05-15T04:48:48-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 8
>   9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
> 	  loaded_at 2018-05-15T04:49:52-0700  uid 0
> 	  xlated 200B  not jited  memlock 4096B  map_ids 9
> 
>   $ ps ax | grep "python ./trace.py"
>   21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
>   21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
>   21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
>   21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
>   22374 pts/1    S+     0:00 grep --color=auto python ./trace.py
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>

> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
> index b301c9b..3680ad4 100644
> --- a/tools/bpf/bpftool/bash-completion/bpftool
> +++ b/tools/bpf/bpftool/bash-completion/bpftool
> @@ -448,6 +448,15 @@ _bpftool()
>                      ;;
>              esac
>              ;;
> +        cgroup)

s/cgroup/perf/ :)

> +            case $command in
> +                *)
> +                    [[ $prev == $object ]] && \
> +                        COMPREPLY=( $( compgen -W 'help \
> +                            show list' -- "$cur" ) )
> +                    ;;
> +            esac
> +            ;;
>      esac
>  } &&
>  complete -F _bpftool bpftool

> +static int show_proc(const char *fpath, const struct stat *sb,
> +		     int tflag, struct FTW *ftwbuf)
> +{
> +	__u64 probe_offset, probe_addr;
> +	__u32 prog_id, attach_info;
> +	int err, pid = 0, fd = 0;
> +	const char *pch;
> +	char buf[4096];
> +
> +	/* prefix always /proc */
> +	pch = fpath + 5;
> +	if (*pch == '\0')
> +		return 0;
> +
> +	/* pid should be all numbers */
> +	pch++;
> +	while (isdigit(*pch)) {
> +		pid = pid * 10 + *pch - '0';
> +		pch++;
> +	}
> +	if (*pch == '\0')
> +		return 0;
> +	if (*pch != '/')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* check /proc/<pid>/fd directory */
> +	pch++;
> +	if (strncmp(pch, "fd", 2))
> +		return FTW_SKIP_SUBTREE;
> +	pch += 2;
> +	if (*pch == '\0')
> +		return 0;
> +	if (*pch != '/')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* check /proc/<pid>/fd/<fd_num> */
> +	pch++;
> +	while (isdigit(*pch)) {
> +		fd = fd * 10 + *pch - '0';
> +		pch++;
> +	}
> +	if (*pch != '\0')
> +		return FTW_SKIP_SUBTREE;
> +
> +	/* query (pid, fd) for potential perf events */
> +	err = bpf_task_fd_query(pid, fd, 0, buf, sizeof(buf), &prog_id,
> +				&attach_info, &probe_offset, &probe_addr);
> +	if (err < 0)
> +		return 0;

nit: it could be nice from user perspective to detect whether kernel
     supports the command and fail if not.  Otherwise user is not sure
     if there is no output because kernel lacks support or because
     there were really no attached progs.  Just a thought, not really
     a requirement.

> +	if (json_output)
> +		print_perf_json(pid, fd, prog_id, attach_info, buf, probe_offset,
> +				probe_addr);
> +	else
> +		print_perf_plain(pid, fd, prog_id, attach_info, buf, probe_offset,
> +				 probe_addr);
> +
> +	return 0;
> +}
> +
> +static int do_show(int argc, char **argv)
> +{
> +	int err = 0, nopenfd = 16;
> +	int flags = FTW_ACTIONRETVAL | FTW_PHYS;

nit: reverse xmas tree

> +	if (json_output)
> +		jsonw_start_array(json_wtr);
> +	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
> +		p_err("%s", strerror(errno));
> +		err = -1;
> +	}
> +	if (json_output)
> +		jsonw_end_array(json_wtr);
> +
> +	return err;
> +}

^ permalink raw reply

* Re: cascaded switch
From: Andrew Lunn @ 2018-05-18 20:51 UTC (permalink / raw)
  To: Ran Shalit; +Cc: netdev
In-Reply-To: <CAJ2oMh+ciCMb_thn5TV9n_N+mgcUj49sg+4m9O6BOVjpYLEVdg@mail.gmail.com>

> So, it is used so that the 2 switch will behave as if it is one big switch.

Yes. This particularly important with offloading. When your offload a
bridge, you don't need to care which switch the ports or on. If
traffic needs to go from one switch to the other, it will. If you
modelled it as two switches, you would need to manually setup that
cross switch connection.

      Andrew

^ permalink raw reply

* Re: [PATCH v4 3/3] bpf: add selftest for lirc_mode2 type program
From: Y Song @ 2018-05-18 20:51 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller, Quentin Monnet
In-Reply-To: <CAH3MdRW=-fwgRzfePvg_f_n81yenvEGBwB_E2FRT4f9ah04xUg@mail.gmail.com>

On Fri, May 18, 2018 at 1:17 PM, Y Song <ys114321@gmail.com> wrote:
> On Fri, May 18, 2018 at 7:07 AM, Sean Young <sean@mess.org> wrote:
>> This is simple test over rc-loopback.
>>
>> Signed-off-by: Sean Young <sean@mess.org>
>
> Acked-by: Yonghong Song <yhs@fb.com>

Just one minor thing. You need to add "test_lirc_mode2_user"
in tools/testing/selftests/bpf/.gitignore
so it will not show up when you do "git status".

If the patch needs respin, you can add this in the new revision.
Otherwise, I think a followup patch to fix this should be fine.

>
>> ---
>>  tools/bpf/bpftool/prog.c                      |   1 +
>>  tools/include/uapi/linux/bpf.h                |  53 ++++-
>>  tools/include/uapi/linux/lirc.h               | 217 ++++++++++++++++++
>>  tools/lib/bpf/libbpf.c                        |   1 +
>>  tools/testing/selftests/bpf/Makefile          |   8 +-
>>  tools/testing/selftests/bpf/bpf_helpers.h     |   6 +
>>  .../testing/selftests/bpf/test_lirc_mode2.sh  |  28 +++
>>  .../selftests/bpf/test_lirc_mode2_kern.c      |  23 ++
>>  .../selftests/bpf/test_lirc_mode2_user.c      | 154 +++++++++++++
>>  9 files changed, 487 insertions(+), 4 deletions(-)
>>  create mode 100644 tools/include/uapi/linux/lirc.h
>>  create mode 100755 tools/testing/selftests/bpf/test_lirc_mode2.sh
>>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_kern.c
>>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_user.c
>>
>> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
>> index 9bdfdf2d3fbe..07f1ace39a46 100644
>> --- a/tools/bpf/bpftool/prog.c
>> +++ b/tools/bpf/bpftool/prog.c
>> @@ -71,6 +71,7 @@ static const char * const prog_type_name[] = {
>>         [BPF_PROG_TYPE_SK_MSG]          = "sk_msg",
>>         [BPF_PROG_TYPE_RAW_TRACEPOINT]  = "raw_tracepoint",
>>         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
>> +       [BPF_PROG_TYPE_LIRC_MODE2]      = "lirc_mode2",
>>  };
>>
>>  static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index d94d333a8225..8227832b713e 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -141,6 +141,7 @@ enum bpf_prog_type {
>>         BPF_PROG_TYPE_SK_MSG,
>>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
>> +       BPF_PROG_TYPE_LIRC_MODE2,
>>  };
>>
>>  enum bpf_attach_type {
>> @@ -158,6 +159,7 @@ enum bpf_attach_type {
>>         BPF_CGROUP_INET6_CONNECT,
>>         BPF_CGROUP_INET4_POST_BIND,
>>         BPF_CGROUP_INET6_POST_BIND,
>> +       BPF_LIRC_MODE2,
>>         __MAX_BPF_ATTACH_TYPE
>>  };
>>
>> @@ -1902,6 +1904,53 @@ union bpf_attr {
>>   *             egress otherwise). This is the only flag supported for now.
>>   *     Return
>>   *             **SK_PASS** on success, or **SK_DROP** on error.
>> + *
>> + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
>> + *     Description
>> + *             This helper is used in programs implementing IR decoding, to
>> + *             report a successfully decoded key press with *scancode*,
>> + *             *toggle* value in the given *protocol*. The scancode will be
>> + *             translated to a keycode using the rc keymap, and reported as
>> + *             an input key down event. After a period a key up event is
>> + *             generated. This period can be extended by calling either
>> + *             **bpf_rc_keydown** () with the same values, or calling
>> + *             **bpf_rc_repeat** ().
>> + *
>> + *             Some protocols include a toggle bit, in case the button
>> + *             was released and pressed again between consecutive scancodes
>> + *
>> + *             The *ctx* should point to the lirc sample as passed into
>> + *             the program.
>> + *
>> + *             The *protocol* is the decoded protocol number (see
>> + *             **enum rc_proto** for some predefined values).
>> + *
>> + *             This helper is only available is the kernel was compiled with
>> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
>> + *             "**y**".
>> + *
>> + *     Return
>> + *             0
>> + *
>> + * int bpf_rc_repeat(void *ctx)
>> + *     Description
>> + *             This helper is used in programs implementing IR decoding, to
>> + *             report a successfully decoded repeat key message. This delays
>> + *             the generation of a key up event for previously generated
>> + *             key down event.
>> + *
>> + *             Some IR protocols like NEC have a special IR message for
>> + *             repeating last button, for when a button is held down.
>> + *
>> + *             The *ctx* should point to the lirc sample as passed into
>> + *             the program.
>> + *
>> + *             This helper is only available is the kernel was compiled with
>> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
>> + *             "**y**".
>> + *
>> + *     Return
>> + *             0
>>   */
>>  #define __BPF_FUNC_MAPPER(FN)          \
>>         FN(unspec),                     \
>> @@ -1976,7 +2025,9 @@ union bpf_attr {
>>         FN(fib_lookup),                 \
>>         FN(sock_hash_update),           \
>>         FN(msg_redirect_hash),          \
>> -       FN(sk_redirect_hash),
>> +       FN(sk_redirect_hash),           \
>> +       FN(rc_repeat),                  \
>> +       FN(rc_keydown),
>>
>>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>   * function eBPF program intends to call
>> diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
>> new file mode 100644
>> index 000000000000..f189931042a7
>> --- /dev/null
>> +++ b/tools/include/uapi/linux/lirc.h
>> @@ -0,0 +1,217 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/*
>> + * lirc.h - linux infrared remote control header file
>> + * last modified 2010/07/13 by Jarod Wilson
>> + */
>> +
>> +#ifndef _LINUX_LIRC_H
>> +#define _LINUX_LIRC_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/ioctl.h>
>> +
>> +#define PULSE_BIT       0x01000000
>> +#define PULSE_MASK      0x00FFFFFF
>> +
>> +#define LIRC_MODE2_SPACE     0x00000000
>> +#define LIRC_MODE2_PULSE     0x01000000
>> +#define LIRC_MODE2_FREQUENCY 0x02000000
>> +#define LIRC_MODE2_TIMEOUT   0x03000000
>> +
>> +#define LIRC_VALUE_MASK      0x00FFFFFF
>> +#define LIRC_MODE2_MASK      0xFF000000
>> +
>> +#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
>> +#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
>> +#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
>> +#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
>> +
>> +#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
>> +#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
>> +
>> +#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
>> +#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
>> +#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
>> +#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
>> +
>> +/* used heavily by lirc userspace */
>> +#define lirc_t int
>> +
>> +/*** lirc compatible hardware features ***/
>> +
>> +#define LIRC_MODE2SEND(x) (x)
>> +#define LIRC_SEND2MODE(x) (x)
>> +#define LIRC_MODE2REC(x) ((x) << 16)
>> +#define LIRC_REC2MODE(x) ((x) >> 16)
>> +
>> +#define LIRC_MODE_RAW                  0x00000001
>> +#define LIRC_MODE_PULSE                0x00000002
>> +#define LIRC_MODE_MODE2                0x00000004
>> +#define LIRC_MODE_SCANCODE             0x00000008
>> +#define LIRC_MODE_LIRCCODE             0x00000010
>> +
>> +
>> +#define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
>> +#define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
>> +#define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
>> +#define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
>> +
>> +#define LIRC_CAN_SEND_MASK             0x0000003f
>> +
>> +#define LIRC_CAN_SET_SEND_CARRIER      0x00000100
>> +#define LIRC_CAN_SET_SEND_DUTY_CYCLE   0x00000200
>> +#define LIRC_CAN_SET_TRANSMITTER_MASK  0x00000400
>> +
>> +#define LIRC_CAN_REC_RAW               LIRC_MODE2REC(LIRC_MODE_RAW)
>> +#define LIRC_CAN_REC_PULSE             LIRC_MODE2REC(LIRC_MODE_PULSE)
>> +#define LIRC_CAN_REC_MODE2             LIRC_MODE2REC(LIRC_MODE_MODE2)
>> +#define LIRC_CAN_REC_SCANCODE          LIRC_MODE2REC(LIRC_MODE_SCANCODE)
>> +#define LIRC_CAN_REC_LIRCCODE          LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
>> +
>> +#define LIRC_CAN_REC_MASK              LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
>> +
>> +#define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
>> +#define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
>> +
>> +#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
>> +#define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
>> +#define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
>> +#define LIRC_CAN_SET_REC_TIMEOUT          0x10000000
>> +#define LIRC_CAN_SET_REC_FILTER           0x08000000
>> +
>> +#define LIRC_CAN_MEASURE_CARRIER          0x02000000
>> +#define LIRC_CAN_USE_WIDEBAND_RECEIVER    0x04000000
>> +
>> +#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
>> +#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
>> +
>> +#define LIRC_CAN_NOTIFY_DECODE            0x01000000
>> +
>> +/*** IOCTL commands for lirc driver ***/
>> +
>> +#define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
>> +
>> +#define LIRC_GET_SEND_MODE             _IOR('i', 0x00000001, __u32)
>> +#define LIRC_GET_REC_MODE              _IOR('i', 0x00000002, __u32)
>> +#define LIRC_GET_REC_RESOLUTION        _IOR('i', 0x00000007, __u32)
>> +
>> +#define LIRC_GET_MIN_TIMEOUT           _IOR('i', 0x00000008, __u32)
>> +#define LIRC_GET_MAX_TIMEOUT           _IOR('i', 0x00000009, __u32)
>> +
>> +/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
>> +#define LIRC_GET_LENGTH                _IOR('i', 0x0000000f, __u32)
>> +
>> +#define LIRC_SET_SEND_MODE             _IOW('i', 0x00000011, __u32)
>> +#define LIRC_SET_REC_MODE              _IOW('i', 0x00000012, __u32)
>> +/* Note: these can reset the according pulse_width */
>> +#define LIRC_SET_SEND_CARRIER          _IOW('i', 0x00000013, __u32)
>> +#define LIRC_SET_REC_CARRIER           _IOW('i', 0x00000014, __u32)
>> +#define LIRC_SET_SEND_DUTY_CYCLE       _IOW('i', 0x00000015, __u32)
>> +#define LIRC_SET_TRANSMITTER_MASK      _IOW('i', 0x00000017, __u32)
>> +
>> +/*
>> + * when a timeout != 0 is set the driver will send a
>> + * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
>> + * never sent, timeout is disabled by default
>> + */
>> +#define LIRC_SET_REC_TIMEOUT           _IOW('i', 0x00000018, __u32)
>> +
>> +/* 1 enables, 0 disables timeout reports in MODE2 */
>> +#define LIRC_SET_REC_TIMEOUT_REPORTS   _IOW('i', 0x00000019, __u32)
>> +
>> +/*
>> + * if enabled from the next key press on the driver will send
>> + * LIRC_MODE2_FREQUENCY packets
>> + */
>> +#define LIRC_SET_MEASURE_CARRIER_MODE  _IOW('i', 0x0000001d, __u32)
>> +
>> +/*
>> + * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
>> + * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
>> + */
>> +#define LIRC_SET_REC_CARRIER_RANGE     _IOW('i', 0x0000001f, __u32)
>> +
>> +#define LIRC_SET_WIDEBAND_RECEIVER     _IOW('i', 0x00000023, __u32)
>> +
>> +/*
>> + * struct lirc_scancode - decoded scancode with protocol for use with
>> + *     LIRC_MODE_SCANCODE
>> + *
>> + * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
>> + *     was decoded.
>> + * @flags: should be 0 for transmit. When receiving scancodes,
>> + *     LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
>> + *     depending on the protocol
>> + * @rc_proto: see enum rc_proto
>> + * @keycode: the translated keycode. Set to 0 for transmit.
>> + * @scancode: the scancode received or to be sent
>> + */
>> +struct lirc_scancode {
>> +       __u64   timestamp;
>> +       __u16   flags;
>> +       __u16   rc_proto;
>> +       __u32   keycode;
>> +       __u64   scancode;
>> +};
>> +
>> +/* Set if the toggle bit of rc-5 or rc-6 is enabled */
>> +#define LIRC_SCANCODE_FLAG_TOGGLE      1
>> +/* Set if this is a nec or sanyo repeat */
>> +#define LIRC_SCANCODE_FLAG_REPEAT      2
>> +
>> +/**
>> + * enum rc_proto - the Remote Controller protocol
>> + *
>> + * @RC_PROTO_UNKNOWN: Protocol not known
>> + * @RC_PROTO_OTHER: Protocol known but proprietary
>> + * @RC_PROTO_RC5: Philips RC5 protocol
>> + * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
>> + * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
>> + * @RC_PROTO_JVC: JVC protocol
>> + * @RC_PROTO_SONY12: Sony 12 bit protocol
>> + * @RC_PROTO_SONY15: Sony 15 bit protocol
>> + * @RC_PROTO_SONY20: Sony 20 bit protocol
>> + * @RC_PROTO_NEC: NEC protocol
>> + * @RC_PROTO_NECX: Extended NEC protocol
>> + * @RC_PROTO_NEC32: NEC 32 bit protocol
>> + * @RC_PROTO_SANYO: Sanyo protocol
>> + * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
>> + * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
>> + * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
>> + * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
>> + * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
>> + * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
>> + * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
>> + * @RC_PROTO_SHARP: Sharp protocol
>> + * @RC_PROTO_XMP: XMP protocol
>> + * @RC_PROTO_CEC: CEC protocol
>> + * @RC_PROTO_IMON: iMon Pad protocol
>> + */
>> +enum rc_proto {
>> +       RC_PROTO_UNKNOWN        = 0,
>> +       RC_PROTO_OTHER          = 1,
>> +       RC_PROTO_RC5            = 2,
>> +       RC_PROTO_RC5X_20        = 3,
>> +       RC_PROTO_RC5_SZ         = 4,
>> +       RC_PROTO_JVC            = 5,
>> +       RC_PROTO_SONY12         = 6,
>> +       RC_PROTO_SONY15         = 7,
>> +       RC_PROTO_SONY20         = 8,
>> +       RC_PROTO_NEC            = 9,
>> +       RC_PROTO_NECX           = 10,
>> +       RC_PROTO_NEC32          = 11,
>> +       RC_PROTO_SANYO          = 12,
>> +       RC_PROTO_MCIR2_KBD      = 13,
>> +       RC_PROTO_MCIR2_MSE      = 14,
>> +       RC_PROTO_RC6_0          = 15,
>> +       RC_PROTO_RC6_6A_20      = 16,
>> +       RC_PROTO_RC6_6A_24      = 17,
>> +       RC_PROTO_RC6_6A_32      = 18,
>> +       RC_PROTO_RC6_MCE        = 19,
>> +       RC_PROTO_SHARP          = 20,
>> +       RC_PROTO_XMP            = 21,
>> +       RC_PROTO_CEC            = 22,
>> +       RC_PROTO_IMON           = 23,
>> +};
>> +
>> +#endif
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index 3dbe217bf23e..01e514479f6b 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -1461,6 +1461,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
>>         case BPF_PROG_TYPE_CGROUP_DEVICE:
>>         case BPF_PROG_TYPE_SK_MSG:
>>         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
>> +       case BPF_PROG_TYPE_LIRC_MODE2:
>>                 return false;
>>         case BPF_PROG_TYPE_UNSPEC:
>>         case BPF_PROG_TYPE_KPROBE:
>> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
>> index 1eb0fa2aba92..ee6d49f18be5 100644
>> --- a/tools/testing/selftests/bpf/Makefile
>> +++ b/tools/testing/selftests/bpf/Makefile
>> @@ -24,7 +24,7 @@ urandom_read: urandom_read.c
>>  # Order correspond to 'make run_tests' order
>>  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
>>         test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
>> -       test_sock test_btf test_sockmap
>> +       test_sock test_btf test_sockmap test_lirc_mode2_user
>>
>>  TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
>>         test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
>> @@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
>>         sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
>>         sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
>>         test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
>> -       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
>> +       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
>> +       test_lirc_mode2_kern.o
>>
>>  # Order correspond to 'make run_tests' order
>>  TEST_PROGS := test_kmod.sh \
>> @@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
>>         test_xdp_meta.sh \
>>         test_offload.py \
>>         test_sock_addr.sh \
>> -       test_tunnel.sh
>> +       test_tunnel.sh \
>> +       test_lirc_mode2.sh
>>
>>  # Compile but not part of 'make run_tests'
>>  TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
>> diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
>> index 8f143dfb3700..a6864827ed34 100644
>> --- a/tools/testing/selftests/bpf/bpf_helpers.h
>> +++ b/tools/testing/selftests/bpf/bpf_helpers.h
>> @@ -114,6 +114,12 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
>>  static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
>>                              int plen, __u32 flags) =
>>         (void *) BPF_FUNC_fib_lookup;
>> +static int (*bpf_rc_repeat)(void *ctx) =
>> +       (void *) BPF_FUNC_rc_repeat;
>> +static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
>> +                            unsigned long long scancode, unsigned int toggle) =
>> +       (void *) BPF_FUNC_rc_keydown;
>> +
>>
>>  /* llvm builtin functions that eBPF C program may use to
>>   * emit BPF_LD_ABS and BPF_LD_IND instructions
>> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
>> new file mode 100755
>> index 000000000000..ce2e15e4f976
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
>> @@ -0,0 +1,28 @@
>> +#!/bin/bash
>> +# SPDX-License-Identifier: GPL-2.0
>> +
>> +GREEN='\033[0;92m'
>> +RED='\033[0;31m'
>> +NC='\033[0m' # No Color
>> +
>> +modprobe rc-loopback
>> +
>> +for i in /sys/class/rc/rc*
>> +do
>> +       if grep -q DRV_NAME=rc-loopback $i/uevent
>> +       then
>> +               LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
>> +       fi
>> +done
>> +
>> +if [ -n $LIRCDEV ];
>> +then
>> +       TYPE=lirc_mode2
>> +       ./test_lirc_mode2_user $LIRCDEV
>> +       ret=$?
>> +       if [ $ret -ne 0 ]; then
>> +               echo -e ${RED}"FAIL: $TYPE"${NC}
>> +       else
>> +               echo -e ${GREEN}"PASS: $TYPE"${NC}
>> +       fi
>> +fi
>> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
>> new file mode 100644
>> index 000000000000..ba26855563a5
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
>> @@ -0,0 +1,23 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +// test ir decoder
>> +//
>> +// Copyright (C) 2018 Sean Young <sean@mess.org>
>> +
>> +#include <linux/bpf.h>
>> +#include <linux/lirc.h>
>> +#include "bpf_helpers.h"
>> +
>> +SEC("lirc_mode2")
>> +int bpf_decoder(unsigned int *sample)
>> +{
>> +       if (LIRC_IS_PULSE(*sample)) {
>> +               unsigned int duration = LIRC_VALUE(*sample);
>> +
>> +               if (duration & 0x10000)
>> +                       bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +char _license[] SEC("license") = "GPL";
>> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
>> new file mode 100644
>> index 000000000000..bd77688c8277
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
>> @@ -0,0 +1,154 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +// test ir decoder
>> +//
>> +// Copyright (C) 2018 Sean Young <sean@mess.org>
>> +
>> +// A lirc chardev is a device representing a consumer IR (cir) device which
>> +// can receive infrared signals from remote control (and/or transmit IR)
>> +//
>> +// IR is sent as a series of pulses and space somewhat like morse code. The
>> +// BPF program can decode this into scancodes so that rc-core can translate
>> +// this into input key codes using the rc keymap
>> +//
>> +// This test works by sending IR over rc-loopback, so the IR is processed by
>> +// BPF and then decoded into scancodes. The/ lirc chardev must be the one
>> +// associated with rc-loopback, see the output of ir-keytable(1)".
>> +//
>> +// The following CONFIG options must be enabled for the test to succeed:
>> +// CONFIG_RC_CORE=y
>> +// CONFIG_BPF_RAWIR_EVENT=y
>> +// CONFIG_RC_LOOPBACK=y
>> +
>> +// Steps:
>> +// 1. Open the /dev/lircN device for rc-loopback (given on command line)
>> +// 2. Attach bpf_lirc_mode2 program which decodes some IR.
>> +// 3. Send some IR to the same IR device; since it is loopback, this will
>> +//    end up in the bpf program
>> +// 4. bpf program should decode IR and report keycode
>> +// 5. We can read keycode from same /dev/lirc device
>> +
>> +#include <linux/bpf.h>
>> +#include <linux/lirc.h>
>> +#include <assert.h>
>> +#include <errno.h>
>> +#include <signal.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <stdbool.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <poll.h>
>> +#include <libgen.h>
>> +#include <sys/resource.h>
>> +#include <sys/types.h>
>> +#include <sys/ioctl.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>> +
>> +#include "bpf_util.h"
>> +#include <bpf/bpf.h>
>> +#include <bpf/libbpf.h>
>> +
>> +int main(int argc, char **argv)
>> +{
>> +       struct bpf_object *obj;
>> +       int ret, lircfd, progfd, mode;
>> +       int testir = 0x1dead;
>> +       u32 prog_ids[10], prog_flags[10], prog_cnt;
>> +
>> +       if (argc != 2) {
>> +               printf("Usage: %s /dev/lircN\n", argv[0]);
>> +               return 2;
>> +       }
>> +
>> +       ret = bpf_prog_load("test_lirc_mode2_kern.o",
>> +                           BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
>> +       if (ret) {
>> +               printf("Failed to load bpf program\n");
>> +               return 1;
>> +       }
>> +
>> +       lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
>> +       if (lircfd == -1) {
>> +               printf("failed to open lirc device %s: %m\n", argv[1]);
>> +               return 1;
>> +       }
>> +
>> +       /* Let's try detach it before it was ever attached */
>> +       ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
>> +       if (ret != -1 || errno != ENOENT) {
>> +               printf("bpf_prog_detach2 not attached should fail: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       mode = LIRC_MODE_SCANCODE;
>> +       if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
>> +               printf("failed to set rec mode: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       prog_cnt = 10;
>> +       ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
>> +                            &prog_cnt);
>> +       if (ret) {
>> +               printf("Failed to query bpf programs on lirc device: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       if (prog_cnt != 0) {
>> +               printf("Expected nothing to be attached\n");
>> +               return 1;
>> +       }
>> +
>> +       ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
>> +       if (ret) {
>> +               printf("Failed to attach bpf to lirc device: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       /* Write raw IR */
>> +       ret = write(lircfd, &testir, sizeof(testir));
>> +       if (ret != sizeof(testir)) {
>> +               printf("Failed to send test IR message: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
>> +       struct lirc_scancode lsc;
>> +
>> +       poll(&pfd, 1, 100);
>> +
>> +       /* Read decoded IR */
>> +       ret = read(lircfd, &lsc, sizeof(lsc));
>> +       if (ret != sizeof(lsc)) {
>> +               printf("Failed to read decoded IR: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
>> +               printf("Incorrect scancode decoded\n");
>> +               return 1;
>> +       }
>> +
>> +       prog_cnt = 10;
>> +       ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
>> +                            &prog_cnt);
>> +       if (ret) {
>> +               printf("Failed to query bpf programs on lirc device: %m\n");
>> +               return 1;
>> +       }
>> +
>> +       if (prog_cnt != 1) {
>> +               printf("Expected one program to be attached\n");
>> +               return 1;
>> +       }
>> +
>> +       /* Let's try detaching it now it is actually attached */
>> +       ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
>> +       if (ret) {
>> +               printf("bpf_prog_detach2: returned %m\n");
>> +               return 1;
>> +       }
>> +
>> +       return 0;
>> +}
>> --
>> 2.17.0
>>

^ permalink raw reply

* Re: [bpf-next V4 PATCH 7/8] xdp/trace: extend tracepoint in devmap with an err
From: Jesper Dangaard Brouer @ 2018-05-18 20:49 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, Magnus Karlsson,
	makita.toshiaki
In-Reply-To: <152665050718.21055.1600708476059740845.stgit@firesoul>

On Fri, 18 May 2018 15:35:07 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> Extending tracepoint xdp:xdp_devmap_xmit in devmap with an err code
> allow people to easier identify the reason behind the ndo_xdp_xmit
> call to a given driver is failing.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [bpf-next V4 PATCH 8/8] samples/bpf: xdp_monitor use err code from tracepoint xdp:xdp_devmap_xmit
From: Jesper Dangaard Brouer @ 2018-05-18 20:48 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, Magnus Karlsson,
	makita.toshiaki
In-Reply-To: <152665051226.21055.12643992796812932945.stgit@firesoul>

On Fri, 18 May 2018 15:35:12 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> Update xdp_monitor to use the recently added err code introduced
> in tracepoint xdp:xdp_devmap_xmit, to show if the drop count is
> caused by some driver general delivery problem.  Other kind of drops
> will likely just be more normal TX space issues.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [bpf-next V4 PATCH 5/8] xdp: introduce xdp_return_frame_rx_napi
From: Jesper Dangaard Brouer @ 2018-05-18 20:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, Magnus Karlsson,
	makita.toshiaki
In-Reply-To: <152665049699.21055.13036051815851476324.stgit@firesoul>

On Fri, 18 May 2018 15:34:57 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> When sending an xdp_frame through xdp_do_redirect call, then error
> cases can happen where the xdp_frame needs to be dropped, and
> returning an -errno code isn't sufficient/possible any-longer
> (e.g. for cpumap case). This is already fully supported, by simply
> calling xdp_return_frame.
> 
> This patch is an optimization, which provides xdp_return_frame_rx_napi,
> which is a faster variant for these error cases.  It take advantage of
> the protection provided by XDP RX running under NAPI protection.
> 
> This change is mostly relevant for drivers using the page_pool
> allocator as it can take advantage of this. (Tested with mlx5).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Ups, forgot my SoB... hope it's sufficient to add it this way.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: cascaded switch
From: Ran Shalit @ 2018-05-18 20:40 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev
In-Reply-To: <20180518202920.GI23100@lunn.ch>

On Fri, May 18, 2018 at 11:29 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>> Hi,
>>
>> I mean the same terminology used in marvell's switch.(I don't think
>> there is more than one terminology for this, please correct me if
>> wrong).
>> Anyway, I can see examples how it is done, but I don't understand the
>> benefit of this constellation, and why device tree needs to be
>> familiar with it.
>>
>> <   switch 1  >---port10--------port10- <  switch 2 >
>>  | ....|         |                                             | ....|    |
>> port 1-9     |                                      port 1-9     |
>>                  |                                                        |
>>                  |                                                        |
>> <cpu>--mdio----------------------------------------------
>
> Your ASCII art is all messed up, but i get what you mean.
>
> This is the D in DSA. You would use this when a single switch does not
> have enough ports for your use case. So you use two switches.
>
> You need to tell each switch what links are used to get to other
> switches. There is an internal routing table. So you need to describe
> these links in device tree.
>

I understand, thanks,
So, it is used so that the 2 switch will behave as if it is one big switch.

Yet, how does it change the way the ports appears in "ifconfig" ?
Is it that if they were separate switch I wouldn't see incremental
numbers in "lanX" in ifconfig  (as is probably the result in cascaded
switch) ?

Regards,
ranran

>       Andrew

^ permalink raw reply

* Re: [pull request][for-next 00/15] Mellanox, mlx5 core and netdev updates 2018-05-17
From: Saeed Mahameed @ 2018-05-18 20:36 UTC (permalink / raw)
  To: Jason Gunthorpe, davem@davemloft.net
  Cc: netdev@vger.kernel.org, Leon Romanovsky,
	linux-rdma@vger.kernel.org, dledford@redhat.com, Or Gerlitz
In-Reply-To: <20180518172125.GA15625@mellanox.com>

On Fri, 2018-05-18 at 11:21 -0600, Jason Gunthorpe wrote:
> On Fri, May 18, 2018 at 01:03:51PM -0400, David Miller wrote:
> > From: Saeed Mahameed <saeedm@mellanox.com>
> > Date: Thu, 17 May 2018 18:22:43 -0700
> > 
> > > Below you can find two pull requests,
> > > 
> > > 1. mlx5 core updates to be shared for both netdev and RDMA,
> > > (patches 1..9)
> > >  which is based on the last mlx5-next pull request
> > >  
> > > The following changes since commit
> > > a8408f4e6db775e245f20edf12b13fd58cc03a1c:
> > > 
> > >   net/mlx5: fix spelling mistake: "modfiy" -> "modify" (2018-05-
> > > 04 12:11:51 -0700)
> > > 
> > > are available in the Git repository at:
> > > 
> > >  
> > > git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git
> > > tags/mlx5-updates-2018-05-17
> > > 
> > > for you to fetch changes up to
> > > 10ff5359f883412728ba816046ee3a696625ca02:
> > > 
> > >   net/mlx5e: Explicitly set source e-switch in offloaded TC rules
> > > (2018-05-17 14:17:35 -0700)
> > > 
> > > 2. mlx5e netdev updates only for net-next branch (patches 10..15)
> > > based on net-next
> > > and the above pull request.
> > > 
> > > The following changes since commit
> > > 538e2de104cfb4ef1acb35af42427bff42adbe4d:
> > > 
> > >   Merge branch 'net-Allow-more-drivers-with-COMPILE_TEST' (2018-
> > > 05-17 17:11:07 -0400)
> > > 
> > > are available in the Git repository at:
> > > 
> > >   git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git
> > > tags/mlx5e-updates-2018-05-17
> > > 
> > > for you to fetch changes up to
> > > a228060a7c9ab88597eeac131e4578595d5d46ae:
> > > 
> > >   net/mlx5e: Add HW vport counters to representor ethtool stats
> > > (2018-05-17 17:48:54 -0700)
> > > 
> > > Dave, for your convenience you can either pull 1. and then 2. or
> > > pull 2.
> > > directly.
> > 
> > Looks good.
> > 
> > I pulled 1 then I pulled 2.  That seemed to work
> > well.  Particularly
> > it allowed me to capture the two different merge commit messages
> > one
> > by one.
> 
> Does this double up the merge commit though? I see this in Saeed's
> tags/mlx5e-updates-2018-05-17 ?
> 
> commit 260ab7042e24ccd4407985c6e775e39d064fab2b
> Merge: 538e2de104cfb4 10ff5359f88341
> Author: Saeed Mahameed <saeedm@mellanox.com>
> Date:   Thu May 17 17:47:09 2018 -0700
> 
>     Merge tag 'mlx5-updates-2018-05-17' of
> git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
>     
>     mlx5-updates-2018-05-17
>     
>     mlx5 core dirver updates for both net-next and rdma-next
> branches.
>     
>     From Christophe JAILLET, first three patches to use kvfree where
> needed.
>     
>     From: Or Gerlitz <ogerlitz@mellanox.com>
>     
>     Next six patches from Roi and Co adds support for merged
>     sriov e-switch which comes to serve cases where both PFs, VFs set
>     on them and both uplinks are to be used in single v-switch SW
> model.
>     When merged e-switch is supported, the per-port e-switch is
> logically
>     merged into one e-switch that spans both physical ports and all
> the VFs.
>     
>     This model allows to offload TC eswitch rules between VFs
> belonging
>     to different PFs (and hence have different eswitch affinity), it
> also
>     sets the some of the foundations needed for uplink LAG support.
>     
>     Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> 
> And this in your tree:
> 
> commit 3888ea4e2f1fb2f61e5418adf4b8332107ac0c8f
> Merge: 2c47a65b7009eb 10ff5359f88341
> Author: David S. Miller <davem@davemloft.net>
> Date:   Fri May 18 13:00:08 2018 -0400
> 
>     Merge tag 'mlx5-updates-2018-05-17' of
> git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
>     
>     Saeed Mahameed says:
>     
>     ====================
>     mlx5-updates-2018-05-17
>     
>     mlx5 core dirver updates for both net-next and rdma-next
> branches.
>     
>     From Christophe JAILLET, first three patche to use kvfree where
> needed.
>     
>     From: Or Gerlitz <ogerlitz@mellanox.com>
>     
>     Next six patches from Roi and Co adds support for merged
>     sriov e-switch which comes to serve cases where both PFs, VFs set
>     on them and both uplinks are to be used in single v-switch SW
> model.
>     When merged e-switch is supported, the per-port e-switch is
> logically
>     merged into one e-switch that spans both physical ports and all
> the VFs.
>     
>     This model allows to offload TC eswitch rules between VFs
> belonging
>     to different PFs (and hence have different eswitch affinity), it
> also
>     sets the some of the foundations needed for uplink LAG support.
>     ====================
>     
>     Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> I think the trouble is the Saeed needs to merge the 'core' stuff to
> create the non-core patches for netdev (just like we want to do for
> rdma)
> 
> So maybe netdev should take the #2 pull request and rdma should
> take number #1?
> 

If the concern is the log message split, then yes pulling #2 is
sufficient since #1 
"Merge tag 'mlx5-updates-2018-05-17' of
git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux"
Already part of #2.

and we can avoid the duplicate log message for #1

> This seems to be working OK from RDMA's side, we have much less
> netdev
> stuff in our tree now which seems good!
> 
> Thanks,
> Jason

^ permalink raw reply

* Re: [pull request][for-next 00/15] Mellanox, mlx5 core and netdev updates 2018-05-17
From: Saeed Mahameed @ 2018-05-18 20:33 UTC (permalink / raw)
  To: davem@davemloft.net
  Cc: Jason Gunthorpe, netdev@vger.kernel.org, Leon Romanovsky,
	linux-rdma@vger.kernel.org, dledford@redhat.com, Or Gerlitz
In-Reply-To: <20180518.130351.456975805564832525.davem@davemloft.net>

On Fri, 2018-05-18 at 13:03 -0400, David Miller wrote:
> From: Saeed Mahameed <saeedm@mellanox.com>
> Date: Thu, 17 May 2018 18:22:43 -0700
> 
> > Below you can find two pull requests,
> > 
> > 1. mlx5 core updates to be shared for both netdev and RDMA,
> > (patches 1..9)
> >  which is based on the last mlx5-next pull request
> >  
> > The following changes since commit
> > a8408f4e6db775e245f20edf12b13fd58cc03a1c:
> > 
> >   net/mlx5: fix spelling mistake: "modfiy" -> "modify" (2018-05-04
> > 12:11:51 -0700)
> > 
> > are available in the Git repository at:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git
> > tags/mlx5-updates-2018-05-17
> > 
> > for you to fetch changes up to
> > 10ff5359f883412728ba816046ee3a696625ca02:
> > 
> >   net/mlx5e: Explicitly set source e-switch in offloaded TC rules
> > (2018-05-17 14:17:35 -0700)
> > 
> > 2. mlx5e netdev updates only for net-next branch (patches 10..15)
> > based on net-next
> > and the above pull request.
> > 
> > The following changes since commit
> > 538e2de104cfb4ef1acb35af42427bff42adbe4d:
> > 
> >   Merge branch 'net-Allow-more-drivers-with-COMPILE_TEST' (2018-05-
> > 17 17:11:07 -0400)
> > 
> > are available in the Git repository at:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git
> > tags/mlx5e-updates-2018-05-17
> > 
> > for you to fetch changes up to
> > a228060a7c9ab88597eeac131e4578595d5d46ae:
> > 
> >   net/mlx5e: Add HW vport counters to representor ethtool stats
> > (2018-05-17 17:48:54 -0700)
> > 
> > Dave, for your convenience you can either pull 1. and then 2. or
> > pull 2.
> > directly.
> 
> Looks good.
> 
> I pulled 1 then I pulled 2.  That seemed to work well.  Particularly
> it allowed me to capture the two different merge commit messages one
> by one.
> 
> Is this basically how you want to handle things moving forward?
> 

Thanks Dave !
Basically yes, we want to avoid sending netdev related patches to rdma,
and vice versa.
Unlike the previous "shared code" procedure, this is a more natural way
to do things, since the mlx5 core is a shared arena, we want to
maintain it separate from netdev and rdma.

Before, Leon and I needed to sync before each release and create a
"shared code" pull requests that includes everything (core/rdma/netdev)
that was conflicting in the core arena.


> Thanks.

^ permalink raw reply

* Re: [PATCH bpf-next 0/3] nfp: bpf: complete shift supports on NFP JIT
From: Daniel Borkmann @ 2018-05-18 20:32 UTC (permalink / raw)
  To: Jakub Kicinski, alexei.starovoitov; +Cc: oss-drivers, netdev
In-Reply-To: <20180518191211.18670-1-jakub.kicinski@netronome.com>

On 05/18/2018 09:12 PM, Jakub Kicinski wrote:
> Jiong says:
> 
> NFP eBPF JIT is missing logic indirect shifts (both left and right) and
> arithmetic right shift (both indirect shift and shift by constant).
> 
> This patch adds support for them.
> 
> For indirect shifts, shift amount is not specified as constant, NFP needs
> to get the shift amount through the low 5 bits of source A operand in
> PREV_ALU, therefore extra instructions are needed compared with shifts by
> constants.
> 
> Because NFP is 32-bit, so we are using register pair for 64-bit shifts and
> therefore would need different instruction sequences depending on whether
> shift amount is less than 32 or not.
> 
> NFP branch-on-bit-test instruction emitter is added by this patch set and
> is used for efficient runtime check on shift amount. We'd think the shift
> amount is less than 32 if bit 5 is clear and greater or equal then 32
> otherwise. Shift amount is greater than or equal to 64 will result in
> undefined behavior.
> 
> This patch also use range info to avoid generating unnecessary runtime code
> if we are certain shift amount is less than 32 or not.
> 
> Jiong Wang (3):
>   nfp: bpf: support logic indirect shifts (BPF_[L|R]SH | BPF_X)
>   nfp: bpf: support arithmetic right shift by constant (BPF_ARSH |
>     BPF_K)
>   nfp: bpf: support arithmetic indirect right shift (BPF_ARSH | BPF_X)
> 
>  drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 410 ++++++++++++++++--
>  drivers/net/ethernet/netronome/nfp/bpf/main.h |  28 ++
>  .../net/ethernet/netronome/nfp/bpf/offload.c  |   2 +
>  .../net/ethernet/netronome/nfp/bpf/verifier.c |   8 +
>  drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  18 +-
>  5 files changed, 435 insertions(+), 31 deletions(-)

Applied to bpf-next, thanks guys!

^ permalink raw reply

* Re: [PATCH] selftests: bpf: config: enable NET_SCH_INGRESS for xdp_meta.sh
From: Daniel Borkmann @ 2018-05-18 20:32 UTC (permalink / raw)
  To: Anders Roxell, ast, shuah; +Cc: netdev, linux-kernel, linux-kselftest
In-Reply-To: <20180518182324.23827-1-anders.roxell@linaro.org>

On 05/18/2018 08:23 PM, Anders Roxell wrote:
> When running bpf's selftest test_xdp_meta.sh it fails:
> ./test_xdp_meta.sh
> Error: Specified qdisc not found.
> selftests: test_xdp_meta [FAILED]
> 
> Need to enable CONFIG_NET_SCH_INGRESS and CONFIG_NET_CLS_ACT to get the
> test to pass.
> 
> Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer")
> Signed-off-by: Anders Roxell <anders.roxell@linaro.org>

Applied to bpf tree, thanks Anders!

^ permalink raw reply

* Re: WARNING in __static_key_slow_dec
From: Willem de Bruijn @ 2018-05-18 20:30 UTC (permalink / raw)
  To: DaeRyong Jeong
  Cc: David Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Network Development, LKML, Byoungyoung Lee, Kyungtae Kim,
	bammanag
In-Reply-To: <20180518080308.GA28587@dragonet.kaist.ac.kr>

On Fri, May 18, 2018 at 4:03 AM, DaeRyong Jeong <threeearcat@gmail.com> wrote:
> We report the crash: WARNING in __static_key_slow_dec
>
> This crash has been found in v4.8 using RaceFuzzer (a modified
> version of Syzkaller), which we describe more at the end of this
> report.
> Even though v4.8 is the relatively old version, we did manual verification
> and we think the bug still exists.
> Our analysis shows that the race occurs when invoking two syscalls
> concurrently, setsockopt() with optname SO_TIMESTAMPING and ioctl() with
> cmd SIOCGSTAMPNS.
>
>
> Diagnosis:
> We think if timestamp was previously enabled with
> SOCK_TIMESTAMPING_RX_SOFTWARE flag, the concurrent execution of
> sock_disable_timestamp() and sock_enable_timestamp() causes the crash.
>
>
> Thread interleaving:
> (Assume sk->flag has the SOCK_TIMESTAMPING_RX_SOFTWARE flag by the
> previous setsockopt() call with SO_TIMESTAMPING)
>
> CPU0 (sock_disable_timestamp())                 CPU1 (sock_enable_timestamp())
> =====                                           =====
> (flag == 1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)  (flag == SOCK_TIMESTAMP)
>
>                                                 if (!sock_flag(sk, flag)) {
>                                                         unsigned long previous_flags = sk->sk_flags;
>
> if (sk->sk_flags & flags) {
>         sk->sk_flags &= ~flags;
>         if (sock_needs_netstamp(sk) &&
>             !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
>                 net_disable_timestamp();
>                                                         sock_set_flag(sk, flag);
>
>                                                         if (sock_needs_netstamp(sk) &&
>                                                             !(previous_flags & SK_FLAGS_TIMESTAMP))
>                                                                 net_enable_timestamp();
>                                                         /* Here, net_enable_timestamp() is not called because
>                                                          * previous_flags has the SOCK_TIMESTAMPING_RX_SOFTWARE
>                                                          * flag
>                                                          */
> /* After the race, sk->sk has the flag SOCK_TIMESTAMP, but
>  * net_enable_timestamp() is not called one more time.
>  * Consequently, when the socket is closed, __sk_destruct()
>  * calls net_disable_timestamp() that leads WARNING.
>  */

Thanks for the detailed analysis.

Indeed the updates to sk->sk_flags and calls to net_(dis|en)able_timestamp
should happen atomically, but this is not the case. The setsockopt
path holds the socket lock, but not all ioctl paths.

Perhaps we can take lock_sock_fast in sock_get_timestamp and
variants.

^ permalink raw reply

* Re: cascaded switch
From: Andrew Lunn @ 2018-05-18 20:29 UTC (permalink / raw)
  To: Ran Shalit; +Cc: netdev
In-Reply-To: <CAJ2oMhLSLXNE4L8UfiG_a+C1n6u_yKvUcAm4NQfrJLuBTkpSxg@mail.gmail.com>

> Hi,
> 
> I mean the same terminology used in marvell's switch.(I don't think
> there is more than one terminology for this, please correct me if
> wrong).
> Anyway, I can see examples how it is done, but I don't understand the
> benefit of this constellation, and why device tree needs to be
> familiar with it.
> 
> <   switch 1  >---port10--------port10- <  switch 2 >
>  | ....|         |                                             | ....|    |
> port 1-9     |                                      port 1-9     |
>                  |                                                        |
>                  |                                                        |
> <cpu>--mdio----------------------------------------------

Your ASCII art is all messed up, but i get what you mean.

This is the D in DSA. You would use this when a single switch does not
have enough ports for your use case. So you use two switches.

You need to tell each switch what links are used to get to other
switches. There is an internal routing table. So you need to describe
these links in device tree.

      Andrew

^ permalink raw reply

* Re: [PATCH bpf-next v6 5/6] ipv6: sr: Add seg6local action End.BPF
From: Daniel Borkmann @ 2018-05-18 20:24 UTC (permalink / raw)
  To: Mathieu Xhonneux, netdev; +Cc: dlebrun, alexei.starovoitov
In-Reply-To: <9e3f898c74cdd45c2d71676a0b60fbe6215b5e3d.1526565671.git.m.xhonneux@gmail.com>

On 05/17/2018 04:28 PM, Mathieu Xhonneux wrote:
> This patch adds the End.BPF action to the LWT seg6local infrastructure.
> This action works like any other seg6local End action, meaning that an IPv6
> header with SRH is needed, whose DA has to be equal to the SID of the
> action. It will also advance the SRH to the next segment, the BPF program
> does not have to take care of this.
> 
> Since the BPF program may not be a source of instability in the kernel, it
> is important to ensure that the integrity of the packet is maintained
> before yielding it back to the IPv6 layer. The hook hence keeps track if
> the SRH has been altered through the helpers, and re-validates its
> content if needed with seg6_validate_srh. The state kept for validation is
> stored in a per-CPU buffer. The BPF program is not allowed to directly
> write into the packet, and only some fields of the SRH can be altered
> through the helper bpf_lwt_seg6_store_bytes.
> 
> Performances profiling has shown that the SRH re-validation does not induce
> a significant overhead. If the altered SRH is deemed as invalid, the packet
> is dropped.
> 
> This validation is also done before executing any action through
> bpf_lwt_seg6_action, and will not be performed again if the SRH is not
> modified after calling the action.
> 
> The BPF program may return 3 types of return codes:
>     - BPF_OK: the End.BPF action will look up the next destination through
>              seg6_lookup_nexthop.
>     - BPF_REDIRECT: if an action has been executed through the
>           bpf_lwt_seg6_action helper, the BPF program should return this
>           value, as the skb's destination is already set and the default
>           lookup should not be performed.
>     - BPF_DROP : the packet will be dropped.
> 
> Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
> Acked-by: David Lebrun <dlebrun@google.com>
[...]
>  static struct seg6_action_desc seg6_action_table[] = {
>  	{
>  		.action		= SEG6_LOCAL_ACTION_END,
> @@ -497,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
>  		.attrs		= (1 << SEG6_LOCAL_SRH),
>  		.input		= input_action_end_b6_encap,
>  		.static_headroom	= sizeof(struct ipv6hdr),
> -	}
> +	},
> +	{
> +		.action		= SEG6_LOCAL_ACTION_END_BPF,
> +		.attrs		= (1 << SEG6_LOCAL_BPF),
> +		.input		= input_action_end_bpf,
> +	},
> +
>  };
>  
>  static struct seg6_action_desc *__get_action_desc(int action)
> @@ -542,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
>  				    .len = sizeof(struct in6_addr) },
>  	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
>  	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
> +	[SEG6_LOCAL_BPF]	= { .type = NLA_NESTED },
>  };
>  
>  static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
> @@ -719,6 +797,71 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
>  	return 0;
>  }
>  
> +#define MAX_PROG_NAME 256
> +static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = {
> +	[LWT_BPF_PROG_FD]   = { .type = NLA_U32, },

>From UAPI point of view, I wouldn't name it LWT_BPF_PROG_FD but rather something like
LWT_BPF_PROG for example. That way, the setup can contain the fd number, but on the
dump you can put the prog->aux->id in there so that prog lookup can be done again.

> +	[LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
> +				.len = MAX_PROG_NAME },
> +};
> +
> +static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
> +{
> +	struct nlattr *tb[LWT_BPF_PROG_MAX + 1];
> +	struct bpf_prog *p;
> +	int ret;
> +	u32 fd;
> +
> +	ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attrs[SEG6_LOCAL_BPF],
> +			       bpf_prog_policy, NULL);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
> +		return -EINVAL;
> +
> +	slwt->bpf.name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
> +	if (!slwt->bpf.name)
> +		return -ENOMEM;
> +
> +	fd = nla_get_u32(tb[LWT_BPF_PROG_FD]);
> +	p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
> +	if (IS_ERR(p))
> +		return PTR_ERR(p);

Here in the above error path is definitely a bug in that you don't free the
prior allocated slwt->bpf.name from nla_memdup().

Also when you destroy the struct seg6_local_lwt object, what I'm not getting
is where you drop the prog reference again and free slwt->bpf.name there?

> +
> +	slwt->bpf.prog = p;
> +
> +	return 0;
> +}
> +
> +static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
> +{
> +	struct nlattr *nest;
> +
> +	if (!slwt->bpf.prog)
> +		return 0;
> +
> +	nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
> +	if (!nest)
> +		return -EMSGSIZE;
> +
> +	if (slwt->bpf.name &&
> +	    nla_put_string(skb, LWT_BPF_PROG_NAME, slwt->bpf.name))
> +		return -EMSGSIZE;
> +
> +	return nla_nest_end(skb, nest);
> +}
> +
> +static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
> +{
> +	if (!a->bpf.name && !b->bpf.name)
> +		return 0;
> +
> +	if (!a->bpf.name || !b->bpf.name)
> +		return 1;
> +
> +	return strcmp(a->bpf.name, b->bpf.name);
> +}
> +
>  struct seg6_action_param {
>  	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
>  	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
> @@ -749,6 +892,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
>  	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
>  				    .put = put_nla_oif,
>  				    .cmp = cmp_nla_oif },
> +
> +	[SEG6_LOCAL_BPF]	= { .parse = parse_nla_bpf,
> +				    .put = put_nla_bpf,
> +				    .cmp = cmp_nla_bpf },
> +
>  };
>  
>  static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
> @@ -797,7 +945,6 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
>  
>  	err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
>  			       extack);
> -
>  	if (err < 0)
>  		return err;
>  
> @@ -886,6 +1033,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
>  	if (attrs & (1 << SEG6_LOCAL_OIF))
>  		nlsize += nla_total_size(4);
>  
> +	if (attrs & (1 << SEG6_LOCAL_BPF))
> +		nlsize += nla_total_size(sizeof(struct nlattr)) +
> +		       nla_total_size(MAX_PROG_NAME) +
> +		       nla_total_size(4);
> +
>  	return nlsize;
>  }
>  
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 3dbe217bf23e..a29fed1dfce2 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
>  	case BPF_PROG_TYPE_LWT_IN:
>  	case BPF_PROG_TYPE_LWT_OUT:
>  	case BPF_PROG_TYPE_LWT_XMIT:
> +	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
>  	case BPF_PROG_TYPE_SOCK_OPS:
>  	case BPF_PROG_TYPE_SK_SKB:
>  	case BPF_PROG_TYPE_CGROUP_DEVICE:
> 

^ permalink raw reply

* Re: [PATCH v4 3/3] bpf: add selftest for lirc_mode2 type program
From: Y Song @ 2018-05-18 20:17 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller, Quentin Monnet
In-Reply-To: <7e961c6fcea8130e8cb69849cd64077fed6e835a.1526651592.git.sean@mess.org>

On Fri, May 18, 2018 at 7:07 AM, Sean Young <sean@mess.org> wrote:
> This is simple test over rc-loopback.
>
> Signed-off-by: Sean Young <sean@mess.org>

Acked-by: Yonghong Song <yhs@fb.com>

> ---
>  tools/bpf/bpftool/prog.c                      |   1 +
>  tools/include/uapi/linux/bpf.h                |  53 ++++-
>  tools/include/uapi/linux/lirc.h               | 217 ++++++++++++++++++
>  tools/lib/bpf/libbpf.c                        |   1 +
>  tools/testing/selftests/bpf/Makefile          |   8 +-
>  tools/testing/selftests/bpf/bpf_helpers.h     |   6 +
>  .../testing/selftests/bpf/test_lirc_mode2.sh  |  28 +++
>  .../selftests/bpf/test_lirc_mode2_kern.c      |  23 ++
>  .../selftests/bpf/test_lirc_mode2_user.c      | 154 +++++++++++++
>  9 files changed, 487 insertions(+), 4 deletions(-)
>  create mode 100644 tools/include/uapi/linux/lirc.h
>  create mode 100755 tools/testing/selftests/bpf/test_lirc_mode2.sh
>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_kern.c
>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_user.c
>
> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
> index 9bdfdf2d3fbe..07f1ace39a46 100644
> --- a/tools/bpf/bpftool/prog.c
> +++ b/tools/bpf/bpftool/prog.c
> @@ -71,6 +71,7 @@ static const char * const prog_type_name[] = {
>         [BPF_PROG_TYPE_SK_MSG]          = "sk_msg",
>         [BPF_PROG_TYPE_RAW_TRACEPOINT]  = "raw_tracepoint",
>         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
> +       [BPF_PROG_TYPE_LIRC_MODE2]      = "lirc_mode2",
>  };
>
>  static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index d94d333a8225..8227832b713e 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -141,6 +141,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_SK_MSG,
>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +       BPF_PROG_TYPE_LIRC_MODE2,
>  };
>
>  enum bpf_attach_type {
> @@ -158,6 +159,7 @@ enum bpf_attach_type {
>         BPF_CGROUP_INET6_CONNECT,
>         BPF_CGROUP_INET4_POST_BIND,
>         BPF_CGROUP_INET6_POST_BIND,
> +       BPF_LIRC_MODE2,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -1902,6 +1904,53 @@ union bpf_attr {
>   *             egress otherwise). This is the only flag supported for now.
>   *     Return
>   *             **SK_PASS** on success, or **SK_DROP** on error.
> + *
> + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
> + *     Description
> + *             This helper is used in programs implementing IR decoding, to
> + *             report a successfully decoded key press with *scancode*,
> + *             *toggle* value in the given *protocol*. The scancode will be
> + *             translated to a keycode using the rc keymap, and reported as
> + *             an input key down event. After a period a key up event is
> + *             generated. This period can be extended by calling either
> + *             **bpf_rc_keydown** () with the same values, or calling
> + *             **bpf_rc_repeat** ().
> + *
> + *             Some protocols include a toggle bit, in case the button
> + *             was released and pressed again between consecutive scancodes
> + *
> + *             The *ctx* should point to the lirc sample as passed into
> + *             the program.
> + *
> + *             The *protocol* is the decoded protocol number (see
> + *             **enum rc_proto** for some predefined values).
> + *
> + *             This helper is only available is the kernel was compiled with
> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> + *             "**y**".
> + *
> + *     Return
> + *             0
> + *
> + * int bpf_rc_repeat(void *ctx)
> + *     Description
> + *             This helper is used in programs implementing IR decoding, to
> + *             report a successfully decoded repeat key message. This delays
> + *             the generation of a key up event for previously generated
> + *             key down event.
> + *
> + *             Some IR protocols like NEC have a special IR message for
> + *             repeating last button, for when a button is held down.
> + *
> + *             The *ctx* should point to the lirc sample as passed into
> + *             the program.
> + *
> + *             This helper is only available is the kernel was compiled with
> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> + *             "**y**".
> + *
> + *     Return
> + *             0
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -1976,7 +2025,9 @@ union bpf_attr {
>         FN(fib_lookup),                 \
>         FN(sock_hash_update),           \
>         FN(msg_redirect_hash),          \
> -       FN(sk_redirect_hash),
> +       FN(sk_redirect_hash),           \
> +       FN(rc_repeat),                  \
> +       FN(rc_keydown),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
> new file mode 100644
> index 000000000000..f189931042a7
> --- /dev/null
> +++ b/tools/include/uapi/linux/lirc.h
> @@ -0,0 +1,217 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * lirc.h - linux infrared remote control header file
> + * last modified 2010/07/13 by Jarod Wilson
> + */
> +
> +#ifndef _LINUX_LIRC_H
> +#define _LINUX_LIRC_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define PULSE_BIT       0x01000000
> +#define PULSE_MASK      0x00FFFFFF
> +
> +#define LIRC_MODE2_SPACE     0x00000000
> +#define LIRC_MODE2_PULSE     0x01000000
> +#define LIRC_MODE2_FREQUENCY 0x02000000
> +#define LIRC_MODE2_TIMEOUT   0x03000000
> +
> +#define LIRC_VALUE_MASK      0x00FFFFFF
> +#define LIRC_MODE2_MASK      0xFF000000
> +
> +#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
> +#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
> +#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
> +#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
> +
> +#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
> +#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
> +
> +#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
> +#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
> +#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
> +#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
> +
> +/* used heavily by lirc userspace */
> +#define lirc_t int
> +
> +/*** lirc compatible hardware features ***/
> +
> +#define LIRC_MODE2SEND(x) (x)
> +#define LIRC_SEND2MODE(x) (x)
> +#define LIRC_MODE2REC(x) ((x) << 16)
> +#define LIRC_REC2MODE(x) ((x) >> 16)
> +
> +#define LIRC_MODE_RAW                  0x00000001
> +#define LIRC_MODE_PULSE                0x00000002
> +#define LIRC_MODE_MODE2                0x00000004
> +#define LIRC_MODE_SCANCODE             0x00000008
> +#define LIRC_MODE_LIRCCODE             0x00000010
> +
> +
> +#define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
> +#define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
> +#define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
> +#define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
> +
> +#define LIRC_CAN_SEND_MASK             0x0000003f
> +
> +#define LIRC_CAN_SET_SEND_CARRIER      0x00000100
> +#define LIRC_CAN_SET_SEND_DUTY_CYCLE   0x00000200
> +#define LIRC_CAN_SET_TRANSMITTER_MASK  0x00000400
> +
> +#define LIRC_CAN_REC_RAW               LIRC_MODE2REC(LIRC_MODE_RAW)
> +#define LIRC_CAN_REC_PULSE             LIRC_MODE2REC(LIRC_MODE_PULSE)
> +#define LIRC_CAN_REC_MODE2             LIRC_MODE2REC(LIRC_MODE_MODE2)
> +#define LIRC_CAN_REC_SCANCODE          LIRC_MODE2REC(LIRC_MODE_SCANCODE)
> +#define LIRC_CAN_REC_LIRCCODE          LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
> +
> +#define LIRC_CAN_REC_MASK              LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
> +
> +#define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
> +#define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
> +
> +#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
> +#define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
> +#define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
> +#define LIRC_CAN_SET_REC_TIMEOUT          0x10000000
> +#define LIRC_CAN_SET_REC_FILTER           0x08000000
> +
> +#define LIRC_CAN_MEASURE_CARRIER          0x02000000
> +#define LIRC_CAN_USE_WIDEBAND_RECEIVER    0x04000000
> +
> +#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
> +#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
> +
> +#define LIRC_CAN_NOTIFY_DECODE            0x01000000
> +
> +/*** IOCTL commands for lirc driver ***/
> +
> +#define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
> +
> +#define LIRC_GET_SEND_MODE             _IOR('i', 0x00000001, __u32)
> +#define LIRC_GET_REC_MODE              _IOR('i', 0x00000002, __u32)
> +#define LIRC_GET_REC_RESOLUTION        _IOR('i', 0x00000007, __u32)
> +
> +#define LIRC_GET_MIN_TIMEOUT           _IOR('i', 0x00000008, __u32)
> +#define LIRC_GET_MAX_TIMEOUT           _IOR('i', 0x00000009, __u32)
> +
> +/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
> +#define LIRC_GET_LENGTH                _IOR('i', 0x0000000f, __u32)
> +
> +#define LIRC_SET_SEND_MODE             _IOW('i', 0x00000011, __u32)
> +#define LIRC_SET_REC_MODE              _IOW('i', 0x00000012, __u32)
> +/* Note: these can reset the according pulse_width */
> +#define LIRC_SET_SEND_CARRIER          _IOW('i', 0x00000013, __u32)
> +#define LIRC_SET_REC_CARRIER           _IOW('i', 0x00000014, __u32)
> +#define LIRC_SET_SEND_DUTY_CYCLE       _IOW('i', 0x00000015, __u32)
> +#define LIRC_SET_TRANSMITTER_MASK      _IOW('i', 0x00000017, __u32)
> +
> +/*
> + * when a timeout != 0 is set the driver will send a
> + * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
> + * never sent, timeout is disabled by default
> + */
> +#define LIRC_SET_REC_TIMEOUT           _IOW('i', 0x00000018, __u32)
> +
> +/* 1 enables, 0 disables timeout reports in MODE2 */
> +#define LIRC_SET_REC_TIMEOUT_REPORTS   _IOW('i', 0x00000019, __u32)
> +
> +/*
> + * if enabled from the next key press on the driver will send
> + * LIRC_MODE2_FREQUENCY packets
> + */
> +#define LIRC_SET_MEASURE_CARRIER_MODE  _IOW('i', 0x0000001d, __u32)
> +
> +/*
> + * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
> + * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
> + */
> +#define LIRC_SET_REC_CARRIER_RANGE     _IOW('i', 0x0000001f, __u32)
> +
> +#define LIRC_SET_WIDEBAND_RECEIVER     _IOW('i', 0x00000023, __u32)
> +
> +/*
> + * struct lirc_scancode - decoded scancode with protocol for use with
> + *     LIRC_MODE_SCANCODE
> + *
> + * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
> + *     was decoded.
> + * @flags: should be 0 for transmit. When receiving scancodes,
> + *     LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
> + *     depending on the protocol
> + * @rc_proto: see enum rc_proto
> + * @keycode: the translated keycode. Set to 0 for transmit.
> + * @scancode: the scancode received or to be sent
> + */
> +struct lirc_scancode {
> +       __u64   timestamp;
> +       __u16   flags;
> +       __u16   rc_proto;
> +       __u32   keycode;
> +       __u64   scancode;
> +};
> +
> +/* Set if the toggle bit of rc-5 or rc-6 is enabled */
> +#define LIRC_SCANCODE_FLAG_TOGGLE      1
> +/* Set if this is a nec or sanyo repeat */
> +#define LIRC_SCANCODE_FLAG_REPEAT      2
> +
> +/**
> + * enum rc_proto - the Remote Controller protocol
> + *
> + * @RC_PROTO_UNKNOWN: Protocol not known
> + * @RC_PROTO_OTHER: Protocol known but proprietary
> + * @RC_PROTO_RC5: Philips RC5 protocol
> + * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
> + * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
> + * @RC_PROTO_JVC: JVC protocol
> + * @RC_PROTO_SONY12: Sony 12 bit protocol
> + * @RC_PROTO_SONY15: Sony 15 bit protocol
> + * @RC_PROTO_SONY20: Sony 20 bit protocol
> + * @RC_PROTO_NEC: NEC protocol
> + * @RC_PROTO_NECX: Extended NEC protocol
> + * @RC_PROTO_NEC32: NEC 32 bit protocol
> + * @RC_PROTO_SANYO: Sanyo protocol
> + * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
> + * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
> + * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
> + * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
> + * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
> + * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
> + * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
> + * @RC_PROTO_SHARP: Sharp protocol
> + * @RC_PROTO_XMP: XMP protocol
> + * @RC_PROTO_CEC: CEC protocol
> + * @RC_PROTO_IMON: iMon Pad protocol
> + */
> +enum rc_proto {
> +       RC_PROTO_UNKNOWN        = 0,
> +       RC_PROTO_OTHER          = 1,
> +       RC_PROTO_RC5            = 2,
> +       RC_PROTO_RC5X_20        = 3,
> +       RC_PROTO_RC5_SZ         = 4,
> +       RC_PROTO_JVC            = 5,
> +       RC_PROTO_SONY12         = 6,
> +       RC_PROTO_SONY15         = 7,
> +       RC_PROTO_SONY20         = 8,
> +       RC_PROTO_NEC            = 9,
> +       RC_PROTO_NECX           = 10,
> +       RC_PROTO_NEC32          = 11,
> +       RC_PROTO_SANYO          = 12,
> +       RC_PROTO_MCIR2_KBD      = 13,
> +       RC_PROTO_MCIR2_MSE      = 14,
> +       RC_PROTO_RC6_0          = 15,
> +       RC_PROTO_RC6_6A_20      = 16,
> +       RC_PROTO_RC6_6A_24      = 17,
> +       RC_PROTO_RC6_6A_32      = 18,
> +       RC_PROTO_RC6_MCE        = 19,
> +       RC_PROTO_SHARP          = 20,
> +       RC_PROTO_XMP            = 21,
> +       RC_PROTO_CEC            = 22,
> +       RC_PROTO_IMON           = 23,
> +};
> +
> +#endif
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 3dbe217bf23e..01e514479f6b 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -1461,6 +1461,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
>         case BPF_PROG_TYPE_CGROUP_DEVICE:
>         case BPF_PROG_TYPE_SK_MSG:
>         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
> +       case BPF_PROG_TYPE_LIRC_MODE2:
>                 return false;
>         case BPF_PROG_TYPE_UNSPEC:
>         case BPF_PROG_TYPE_KPROBE:
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 1eb0fa2aba92..ee6d49f18be5 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -24,7 +24,7 @@ urandom_read: urandom_read.c
>  # Order correspond to 'make run_tests' order
>  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
>         test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
> -       test_sock test_btf test_sockmap
> +       test_sock test_btf test_sockmap test_lirc_mode2_user
>
>  TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
>         test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
> @@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
>         sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
>         sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
>         test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
> -       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
> +       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
> +       test_lirc_mode2_kern.o
>
>  # Order correspond to 'make run_tests' order
>  TEST_PROGS := test_kmod.sh \
> @@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
>         test_xdp_meta.sh \
>         test_offload.py \
>         test_sock_addr.sh \
> -       test_tunnel.sh
> +       test_tunnel.sh \
> +       test_lirc_mode2.sh
>
>  # Compile but not part of 'make run_tests'
>  TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
> diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
> index 8f143dfb3700..a6864827ed34 100644
> --- a/tools/testing/selftests/bpf/bpf_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_helpers.h
> @@ -114,6 +114,12 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
>  static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
>                              int plen, __u32 flags) =
>         (void *) BPF_FUNC_fib_lookup;
> +static int (*bpf_rc_repeat)(void *ctx) =
> +       (void *) BPF_FUNC_rc_repeat;
> +static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
> +                            unsigned long long scancode, unsigned int toggle) =
> +       (void *) BPF_FUNC_rc_keydown;
> +
>
>  /* llvm builtin functions that eBPF C program may use to
>   * emit BPF_LD_ABS and BPF_LD_IND instructions
> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
> new file mode 100755
> index 000000000000..ce2e15e4f976
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
> @@ -0,0 +1,28 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +GREEN='\033[0;92m'
> +RED='\033[0;31m'
> +NC='\033[0m' # No Color
> +
> +modprobe rc-loopback
> +
> +for i in /sys/class/rc/rc*
> +do
> +       if grep -q DRV_NAME=rc-loopback $i/uevent
> +       then
> +               LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
> +       fi
> +done
> +
> +if [ -n $LIRCDEV ];
> +then
> +       TYPE=lirc_mode2
> +       ./test_lirc_mode2_user $LIRCDEV
> +       ret=$?
> +       if [ $ret -ne 0 ]; then
> +               echo -e ${RED}"FAIL: $TYPE"${NC}
> +       else
> +               echo -e ${GREEN}"PASS: $TYPE"${NC}
> +       fi
> +fi
> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
> new file mode 100644
> index 000000000000..ba26855563a5
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
> @@ -0,0 +1,23 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// test ir decoder
> +//
> +// Copyright (C) 2018 Sean Young <sean@mess.org>
> +
> +#include <linux/bpf.h>
> +#include <linux/lirc.h>
> +#include "bpf_helpers.h"
> +
> +SEC("lirc_mode2")
> +int bpf_decoder(unsigned int *sample)
> +{
> +       if (LIRC_IS_PULSE(*sample)) {
> +               unsigned int duration = LIRC_VALUE(*sample);
> +
> +               if (duration & 0x10000)
> +                       bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
> +       }
> +
> +       return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
> new file mode 100644
> index 000000000000..bd77688c8277
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
> @@ -0,0 +1,154 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// test ir decoder
> +//
> +// Copyright (C) 2018 Sean Young <sean@mess.org>
> +
> +// A lirc chardev is a device representing a consumer IR (cir) device which
> +// can receive infrared signals from remote control (and/or transmit IR)
> +//
> +// IR is sent as a series of pulses and space somewhat like morse code. The
> +// BPF program can decode this into scancodes so that rc-core can translate
> +// this into input key codes using the rc keymap
> +//
> +// This test works by sending IR over rc-loopback, so the IR is processed by
> +// BPF and then decoded into scancodes. The/ lirc chardev must be the one
> +// associated with rc-loopback, see the output of ir-keytable(1)".
> +//
> +// The following CONFIG options must be enabled for the test to succeed:
> +// CONFIG_RC_CORE=y
> +// CONFIG_BPF_RAWIR_EVENT=y
> +// CONFIG_RC_LOOPBACK=y
> +
> +// Steps:
> +// 1. Open the /dev/lircN device for rc-loopback (given on command line)
> +// 2. Attach bpf_lirc_mode2 program which decodes some IR.
> +// 3. Send some IR to the same IR device; since it is loopback, this will
> +//    end up in the bpf program
> +// 4. bpf program should decode IR and report keycode
> +// 5. We can read keycode from same /dev/lirc device
> +
> +#include <linux/bpf.h>
> +#include <linux/lirc.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <poll.h>
> +#include <libgen.h>
> +#include <sys/resource.h>
> +#include <sys/types.h>
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include "bpf_util.h"
> +#include <bpf/bpf.h>
> +#include <bpf/libbpf.h>
> +
> +int main(int argc, char **argv)
> +{
> +       struct bpf_object *obj;
> +       int ret, lircfd, progfd, mode;
> +       int testir = 0x1dead;
> +       u32 prog_ids[10], prog_flags[10], prog_cnt;
> +
> +       if (argc != 2) {
> +               printf("Usage: %s /dev/lircN\n", argv[0]);
> +               return 2;
> +       }
> +
> +       ret = bpf_prog_load("test_lirc_mode2_kern.o",
> +                           BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
> +       if (ret) {
> +               printf("Failed to load bpf program\n");
> +               return 1;
> +       }
> +
> +       lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
> +       if (lircfd == -1) {
> +               printf("failed to open lirc device %s: %m\n", argv[1]);
> +               return 1;
> +       }
> +
> +       /* Let's try detach it before it was ever attached */
> +       ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
> +       if (ret != -1 || errno != ENOENT) {
> +               printf("bpf_prog_detach2 not attached should fail: %m\n");
> +               return 1;
> +       }
> +
> +       mode = LIRC_MODE_SCANCODE;
> +       if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
> +               printf("failed to set rec mode: %m\n");
> +               return 1;
> +       }
> +
> +       prog_cnt = 10;
> +       ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
> +                            &prog_cnt);
> +       if (ret) {
> +               printf("Failed to query bpf programs on lirc device: %m\n");
> +               return 1;
> +       }
> +
> +       if (prog_cnt != 0) {
> +               printf("Expected nothing to be attached\n");
> +               return 1;
> +       }
> +
> +       ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
> +       if (ret) {
> +               printf("Failed to attach bpf to lirc device: %m\n");
> +               return 1;
> +       }
> +
> +       /* Write raw IR */
> +       ret = write(lircfd, &testir, sizeof(testir));
> +       if (ret != sizeof(testir)) {
> +               printf("Failed to send test IR message: %m\n");
> +               return 1;
> +       }
> +
> +       struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
> +       struct lirc_scancode lsc;
> +
> +       poll(&pfd, 1, 100);
> +
> +       /* Read decoded IR */
> +       ret = read(lircfd, &lsc, sizeof(lsc));
> +       if (ret != sizeof(lsc)) {
> +               printf("Failed to read decoded IR: %m\n");
> +               return 1;
> +       }
> +
> +       if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
> +               printf("Incorrect scancode decoded\n");
> +               return 1;
> +       }
> +
> +       prog_cnt = 10;
> +       ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
> +                            &prog_cnt);
> +       if (ret) {
> +               printf("Failed to query bpf programs on lirc device: %m\n");
> +               return 1;
> +       }
> +
> +       if (prog_cnt != 1) {
> +               printf("Expected one program to be attached\n");
> +               return 1;
> +       }
> +
> +       /* Let's try detaching it now it is actually attached */
> +       ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
> +       if (ret) {
> +               printf("bpf_prog_detach2: returned %m\n");
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> --
> 2.17.0
>

^ permalink raw reply

* Re: [PATCH v4 2/3] media: rc: introduce BPF_PROG_LIRC_MODE2
From: Y Song @ 2018-05-18 20:14 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller, Quentin Monnet
In-Reply-To: <cd5140387a0f9c5ffc68d1846774f12fed45f34d.1526651592.git.sean@mess.org>

On Fri, May 18, 2018 at 7:07 AM, Sean Young <sean@mess.org> wrote:
> Add support for BPF_PROG_LIRC_MODE2. This type of BPF program can call
> rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report
> that the last key should be repeated.
>
> The bpf program can be attached to using the bpf(BPF_PROG_ATTACH) syscall;
> the target_fd must be the /dev/lircN device.
>
> Signed-off-by: Sean Young <sean@mess.org>

Acked-by: Yonghong Song <yhs@fb.com>

> ---
>  drivers/media/rc/Kconfig        |  13 ++
>  drivers/media/rc/Makefile       |   1 +
>  drivers/media/rc/bpf-lirc.c     | 308 ++++++++++++++++++++++++++++++++
>  drivers/media/rc/lirc_dev.c     |  30 ++++
>  drivers/media/rc/rc-core-priv.h |  22 +++
>  drivers/media/rc/rc-ir-raw.c    |  12 +-
>  include/linux/bpf_rcdev.h       |  30 ++++
>  include/linux/bpf_types.h       |   3 +
>  include/uapi/linux/bpf.h        |  53 +++++-
>  kernel/bpf/syscall.c            |   7 +
>  10 files changed, 476 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/media/rc/bpf-lirc.c
>  create mode 100644 include/linux/bpf_rcdev.h
>
> diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
> index eb2c3b6eca7f..d5b35a6ba899 100644
> --- a/drivers/media/rc/Kconfig
> +++ b/drivers/media/rc/Kconfig
> @@ -25,6 +25,19 @@ config LIRC
>            passes raw IR to and from userspace, which is needed for
>            IR transmitting (aka "blasting") and for the lirc daemon.
>
> +config BPF_LIRC_MODE2
> +       bool "Support for eBPF programs attached to lirc devices"
> +       depends on BPF_SYSCALL
> +       depends on RC_CORE=y
> +       depends on LIRC
> +       help
> +          Allow attaching eBPF programs to a lirc device using the bpf(2)
> +          syscall command BPF_PROG_ATTACH. This is supported for raw IR
> +          receivers.
> +
> +          These eBPF programs can be used to decode IR into scancodes, for
> +          IR protocols not supported by the kernel decoders.
> +
>  menuconfig RC_DECODERS
>         bool "Remote controller decoders"
>         depends on RC_CORE
> diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
> index 2e1c87066f6c..e0340d043fe8 100644
> --- a/drivers/media/rc/Makefile
> +++ b/drivers/media/rc/Makefile
> @@ -5,6 +5,7 @@ obj-y += keymaps/
>  obj-$(CONFIG_RC_CORE) += rc-core.o
>  rc-core-y := rc-main.o rc-ir-raw.o
>  rc-core-$(CONFIG_LIRC) += lirc_dev.o
> +rc-core-$(CONFIG_BPF_LIRC_MODE2) += bpf-lirc.o
>  obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o
>  obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
>  obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
> diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
> new file mode 100644
> index 000000000000..c9673df2d9cd
> --- /dev/null
> +++ b/drivers/media/rc/bpf-lirc.c
> @@ -0,0 +1,308 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// bpf-lirc.c - handles bpf
> +//
> +// Copyright (C) 2018 Sean Young <sean@mess.org>
> +
> +#include <linux/bpf.h>
> +#include <linux/filter.h>
> +#include <linux/bpf_rcdev.h>
> +#include "rc-core-priv.h"
> +
> +/*
> + * BPF interface for raw IR
> + */
> +const struct bpf_prog_ops lirc_mode2_prog_ops = {
> +};
> +
> +BPF_CALL_1(bpf_rc_repeat, u32*, sample)
> +{
> +       struct ir_raw_event_ctrl *ctrl;
> +
> +       ctrl = container_of(sample, struct ir_raw_event_ctrl, bpf_sample);
> +
> +       rc_repeat(ctrl->dev);
> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto rc_repeat_proto = {
> +       .func      = bpf_rc_repeat,
> +       .gpl_only  = true, /* rc_repeat is EXPORT_SYMBOL_GPL */
> +       .ret_type  = RET_INTEGER,
> +       .arg1_type = ARG_PTR_TO_CTX,
> +};
> +
> +/*
> + * Currently rc-core does not support 64-bit scancodes, but there are many
> + * known protocols with more than 32 bits. So, define the interface as u64
> + * as a future-proof.
> + */
> +BPF_CALL_4(bpf_rc_keydown, u32*, sample, u32, protocol, u64, scancode,
> +          u32, toggle)
> +{
> +       struct ir_raw_event_ctrl *ctrl;
> +
> +       ctrl = container_of(sample, struct ir_raw_event_ctrl, bpf_sample);
> +
> +       rc_keydown(ctrl->dev, protocol, scancode, toggle != 0);
> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto rc_keydown_proto = {
> +       .func      = bpf_rc_keydown,
> +       .gpl_only  = true, /* rc_keydown is EXPORT_SYMBOL_GPL */
> +       .ret_type  = RET_INTEGER,
> +       .arg1_type = ARG_PTR_TO_CTX,
> +       .arg2_type = ARG_ANYTHING,
> +       .arg3_type = ARG_ANYTHING,
> +       .arg4_type = ARG_ANYTHING,
> +};
> +
> +static const struct bpf_func_proto *
> +lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +       switch (func_id) {
> +       case BPF_FUNC_rc_repeat:
> +               return &rc_repeat_proto;
> +       case BPF_FUNC_rc_keydown:
> +               return &rc_keydown_proto;
> +       case BPF_FUNC_map_lookup_elem:
> +               return &bpf_map_lookup_elem_proto;
> +       case BPF_FUNC_map_update_elem:
> +               return &bpf_map_update_elem_proto;
> +       case BPF_FUNC_map_delete_elem:
> +               return &bpf_map_delete_elem_proto;
> +       case BPF_FUNC_ktime_get_ns:
> +               return &bpf_ktime_get_ns_proto;
> +       case BPF_FUNC_tail_call:
> +               return &bpf_tail_call_proto;
> +       case BPF_FUNC_get_prandom_u32:
> +               return &bpf_get_prandom_u32_proto;
> +       case BPF_FUNC_trace_printk:
> +               if (capable(CAP_SYS_ADMIN))
> +                       return bpf_get_trace_printk_proto();
> +               /* fall through */
> +       default:
> +               return NULL;
> +       }
> +}
> +
> +static bool lirc_mode2_is_valid_access(int off, int size,
> +                                      enum bpf_access_type type,
> +                                      const struct bpf_prog *prog,
> +                                      struct bpf_insn_access_aux *info)
> +{
> +       /* We have one field of u32 */
> +       return type == BPF_READ && off == 0 && size == sizeof(u32);
> +}
> +
> +const struct bpf_verifier_ops lirc_mode2_verifier_ops = {
> +       .get_func_proto  = lirc_mode2_func_proto,
> +       .is_valid_access = lirc_mode2_is_valid_access
> +};
> +
> +#define BPF_MAX_PROGS 64
> +
> +static int rc_dev_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
> +{
> +       struct bpf_prog_array __rcu *old_array;
> +       struct bpf_prog_array *new_array;
> +       struct ir_raw_event_ctrl *raw;
> +       int ret;
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
> +               return -EINVAL;
> +
> +       ret = mutex_lock_interruptible(&ir_raw_handler_lock);
> +       if (ret)
> +               return ret;
> +
> +       raw = rcdev->raw;
> +       if (!raw) {
> +               ret = -ENODEV;
> +               goto unlock;
> +       }
> +
> +       if (raw->progs && bpf_prog_array_length(raw->progs) >= BPF_MAX_PROGS) {
> +               ret = -E2BIG;
> +               goto unlock;
> +       }
> +
> +       old_array = raw->progs;
> +       ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
> +       if (ret < 0)
> +               goto unlock;
> +
> +       rcu_assign_pointer(raw->progs, new_array);
> +       bpf_prog_array_free(old_array);
> +
> +unlock:
> +       mutex_unlock(&ir_raw_handler_lock);
> +       return ret;
> +}
> +
> +static int rc_dev_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
> +{
> +       struct bpf_prog_array __rcu *old_array;
> +       struct bpf_prog_array *new_array;
> +       struct ir_raw_event_ctrl *raw;
> +       int ret;
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
> +               return -EINVAL;
> +
> +       ret = mutex_lock_interruptible(&ir_raw_handler_lock);
> +       if (ret)
> +               return ret;
> +
> +       raw = rcdev->raw;
> +       if (!raw) {
> +               ret = -ENODEV;
> +               goto unlock;
> +       }
> +
> +       old_array = raw->progs;
> +       ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
> +       /*
> +        * Do not use bpf_prog_array_delete_safe() as we would end up
> +        * with a dummy entry in the array, and the we would free the
> +        * dummy in rc_dev_bpf_free()
> +        */
> +       if (ret == 0) {
> +               rcu_assign_pointer(raw->progs, new_array);
> +               bpf_prog_array_free(old_array);
> +       }
> +unlock:
> +       mutex_unlock(&ir_raw_handler_lock);
> +       return ret;
> +}
> +
> +void rc_dev_bpf_run(struct rc_dev *rcdev, u32 sample)
> +{
> +       struct ir_raw_event_ctrl *raw = rcdev->raw;
> +
> +       raw->bpf_sample = sample;
> +
> +       if (raw->progs)
> +               BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, BPF_PROG_RUN);
> +}
> +
> +/*
> + * This should be called once the rc thread has been stopped, so there can be
> + * no concurrent bpf execution.
> + */
> +void rc_dev_bpf_free(struct rc_dev *rcdev)
> +{
> +       struct bpf_prog **progs;
> +
> +       if (!rcdev->raw->progs)
> +               return;
> +
> +       progs = rcu_dereference(rcdev->raw->progs)->progs;
> +       while (*progs)
> +               bpf_prog_put(*progs++);
> +
> +       bpf_prog_array_free(rcdev->raw->progs);
> +}
> +
> +int rc_dev_prog_attach(const union bpf_attr *attr)
> +{
> +       struct bpf_prog *prog;
> +       struct rc_dev *rcdev;
> +       int ret;
> +
> +       if (attr->attach_flags)
> +               return -EINVAL;
> +
> +       prog = bpf_prog_get_type(attr->attach_bpf_fd,
> +                                BPF_PROG_TYPE_LIRC_MODE2);
> +       if (IS_ERR(prog))
> +               return PTR_ERR(prog);
> +
> +       rcdev = rc_dev_get_from_fd(attr->target_fd);
> +       if (IS_ERR(rcdev)) {
> +               bpf_prog_put(prog);
> +               return PTR_ERR(rcdev);
> +       }
> +
> +       ret = rc_dev_bpf_attach(rcdev, prog);
> +       if (ret)
> +               bpf_prog_put(prog);
> +
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> +
> +int rc_dev_prog_detach(const union bpf_attr *attr)
> +{
> +       struct bpf_prog *prog;
> +       struct rc_dev *rcdev;
> +       int ret;
> +
> +       if (attr->attach_flags)
> +               return -EINVAL;
> +
> +       prog = bpf_prog_get_type(attr->attach_bpf_fd,
> +                                BPF_PROG_TYPE_LIRC_MODE2);
> +       if (IS_ERR(prog))
> +               return PTR_ERR(prog);
> +
> +       rcdev = rc_dev_get_from_fd(attr->target_fd);
> +       if (IS_ERR(rcdev)) {
> +               bpf_prog_put(prog);
> +               return PTR_ERR(rcdev);
> +       }
> +
> +       ret = rc_dev_bpf_detach(rcdev, prog);
> +
> +       bpf_prog_put(prog);
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> +
> +int rc_dev_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
> +{
> +       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> +       struct bpf_prog_array __rcu *progs;
> +       struct rc_dev *rcdev;
> +       u32 cnt, flags = 0;
> +       int ret;
> +
> +       if (attr->query.query_flags)
> +               return -EINVAL;
> +
> +       rcdev = rc_dev_get_from_fd(attr->query.target_fd);
> +       if (IS_ERR(rcdev))
> +               return PTR_ERR(rcdev);
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
> +               return -EINVAL;
> +
> +       ret = mutex_lock_interruptible(&ir_raw_handler_lock);
> +       if (ret)
> +               return ret;
> +
> +       progs = rcdev->raw->progs;
> +       cnt = progs ? bpf_prog_array_length(progs) : 0;
> +
> +       if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) {
> +               ret = -EFAULT;
> +               goto unlock;
> +       }
> +       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) {
> +               ret = -EFAULT;
> +               goto unlock;
> +       }
> +
> +       if (attr->query.prog_cnt != 0 && prog_ids && cnt)
> +               ret = bpf_prog_array_copy_to_user(progs, prog_ids, cnt);
> +
> +unlock:
> +       mutex_unlock(&ir_raw_handler_lock);
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
> index 24e9fbb80e81..7e760bf11a51 100644
> --- a/drivers/media/rc/lirc_dev.c
> +++ b/drivers/media/rc/lirc_dev.c
> @@ -20,6 +20,7 @@
>  #include <linux/module.h>
>  #include <linux/mutex.h>
>  #include <linux/device.h>
> +#include <linux/file.h>
>  #include <linux/idr.h>
>  #include <linux/poll.h>
>  #include <linux/sched.h>
> @@ -104,6 +105,12 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev)
>                         TO_US(ev.duration), TO_STR(ev.pulse));
>         }
>
> +       /*
> +        * bpf does not care about the gap generated above; that exists
> +        * for backwards compatibility
> +        */
> +       rc_dev_bpf_run(dev, sample);
> +
>         spin_lock_irqsave(&dev->lirc_fh_lock, flags);
>         list_for_each_entry(fh, &dev->lirc_fh, list) {
>                 if (LIRC_IS_TIMEOUT(sample) && !fh->send_timeout_reports)
> @@ -816,4 +823,27 @@ void __exit lirc_dev_exit(void)
>         unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX);
>  }
>
> +struct rc_dev *rc_dev_get_from_fd(int fd)
> +{
> +       struct fd f = fdget(fd);
> +       struct lirc_fh *fh;
> +       struct rc_dev *dev;
> +
> +       if (!f.file)
> +               return ERR_PTR(-EBADF);
> +
> +       if (f.file->f_op != &lirc_fops) {
> +               fdput(f);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       fh = f.file->private_data;
> +       dev = fh->rc;
> +
> +       get_device(&dev->dev);
> +       fdput(f);
> +
> +       return dev;
> +}
> +
>  MODULE_ALIAS("lirc_dev");
> diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h
> index e0e6a17460f6..511e4a2dc2d5 100644
> --- a/drivers/media/rc/rc-core-priv.h
> +++ b/drivers/media/rc/rc-core-priv.h
> @@ -13,6 +13,7 @@
>  #define        MAX_IR_EVENT_SIZE       512
>
>  #include <linux/slab.h>
> +#include <uapi/linux/bpf.h>
>  #include <media/rc-core.h>
>
>  /**
> @@ -57,6 +58,11 @@ struct ir_raw_event_ctrl {
>         /* raw decoder state follows */
>         struct ir_raw_event prev_ev;
>         struct ir_raw_event this_ev;
> +
> +#ifdef CONFIG_BPF_LIRC_MODE2
> +       u32                             bpf_sample;
> +       struct bpf_prog_array __rcu     *progs;
> +#endif
>         struct nec_dec {
>                 int state;
>                 unsigned count;
> @@ -126,6 +132,9 @@ struct ir_raw_event_ctrl {
>         } imon;
>  };
>
> +/* Mutex for locking raw IR processing and handler change */
> +extern struct mutex ir_raw_handler_lock;
> +
>  /* macros for IR decoders */
>  static inline bool geq_margin(unsigned d1, unsigned d2, unsigned margin)
>  {
> @@ -288,6 +297,7 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev);
>  void ir_lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc);
>  int ir_lirc_register(struct rc_dev *dev);
>  void ir_lirc_unregister(struct rc_dev *dev);
> +struct rc_dev *rc_dev_get_from_fd(int fd);
>  #else
>  static inline int lirc_dev_init(void) { return 0; }
>  static inline void lirc_dev_exit(void) {}
> @@ -299,4 +309,16 @@ static inline int ir_lirc_register(struct rc_dev *dev) { return 0; }
>  static inline void ir_lirc_unregister(struct rc_dev *dev) { }
>  #endif
>
> +/*
> + * bpf interface
> + */
> +#ifdef CONFIG_BPF_LIRC_MODE2
> +void rc_dev_bpf_free(struct rc_dev *dev);
> +void rc_dev_bpf_run(struct rc_dev *dev, u32 sample);
> +#else
> +static inline void rc_dev_bpf_free(struct rc_dev *dev) { }
> +static inline void rc_dev_bpf_run(struct rc_dev *dev, u32 sample)
> +{ }
> +#endif
> +
>  #endif /* _RC_CORE_PRIV */
> diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
> index 374f83105a23..a3131d4236b3 100644
> --- a/drivers/media/rc/rc-ir-raw.c
> +++ b/drivers/media/rc/rc-ir-raw.c
> @@ -14,7 +14,7 @@
>  static LIST_HEAD(ir_raw_client_list);
>
>  /* Used to handle IR raw handler extensions */
> -static DEFINE_MUTEX(ir_raw_handler_lock);
> +DEFINE_MUTEX(ir_raw_handler_lock);
>  static LIST_HEAD(ir_raw_handler_list);
>  static atomic64_t available_protocols = ATOMIC64_INIT(0);
>
> @@ -621,9 +621,17 @@ void ir_raw_event_unregister(struct rc_dev *dev)
>         list_for_each_entry(handler, &ir_raw_handler_list, list)
>                 if (handler->raw_unregister)
>                         handler->raw_unregister(dev);
> -       mutex_unlock(&ir_raw_handler_lock);
> +
> +       rc_dev_bpf_free(dev);
>
>         ir_raw_event_free(dev);
> +
> +       /*
> +        * A user can be calling bpf(BPF_PROG_{QUERY|ATTACH|DETACH}), so
> +        * ensure that the raw member is null on unlock; this is how
> +        * "device gone" is checked.
> +        */
> +       mutex_unlock(&ir_raw_handler_lock);
>  }
>
>  /*
> diff --git a/include/linux/bpf_rcdev.h b/include/linux/bpf_rcdev.h
> new file mode 100644
> index 000000000000..570ca0036cf5
> --- /dev/null
> +++ b/include/linux/bpf_rcdev.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _BPF_RCDEV_H
> +#define _BPF_RCDEV_H
> +
> +#include <linux/bpf.h>
> +#include <uapi/linux/bpf.h>
> +
> +#ifdef CONFIG_BPF_LIRC_MODE2
> +int rc_dev_prog_attach(const union bpf_attr *attr);
> +int rc_dev_prog_detach(const union bpf_attr *attr);
> +int rc_dev_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
> +#else
> +static inline int rc_dev_prog_attach(const union bpf_attr *attr)
> +{
> +       return -EINVAL;
> +}
> +
> +static inline int rc_dev_prog_detach(const union bpf_attr *attr)
> +{
> +       return -EINVAL;
> +}
> +
> +static inline int rc_dev_prog_query(const union bpf_attr *attr,
> +                                   union bpf_attr __user *uattr)
> +{
> +       return -EINVAL;
> +}
> +#endif
> +
> +#endif /* _BPF_RCDEV_H */
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index b67f8793de0d..47b771421d40 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -25,6 +25,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
>  #ifdef CONFIG_CGROUP_BPF
>  BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
>  #endif
> +#ifdef CONFIG_BPF_LIRC_MODE2
> +BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
> +#endif
>
>  BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d94d333a8225..8227832b713e 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -141,6 +141,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_SK_MSG,
>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +       BPF_PROG_TYPE_LIRC_MODE2,
>  };
>
>  enum bpf_attach_type {
> @@ -158,6 +159,7 @@ enum bpf_attach_type {
>         BPF_CGROUP_INET6_CONNECT,
>         BPF_CGROUP_INET4_POST_BIND,
>         BPF_CGROUP_INET6_POST_BIND,
> +       BPF_LIRC_MODE2,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -1902,6 +1904,53 @@ union bpf_attr {
>   *             egress otherwise). This is the only flag supported for now.
>   *     Return
>   *             **SK_PASS** on success, or **SK_DROP** on error.
> + *
> + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
> + *     Description
> + *             This helper is used in programs implementing IR decoding, to
> + *             report a successfully decoded key press with *scancode*,
> + *             *toggle* value in the given *protocol*. The scancode will be
> + *             translated to a keycode using the rc keymap, and reported as
> + *             an input key down event. After a period a key up event is
> + *             generated. This period can be extended by calling either
> + *             **bpf_rc_keydown** () with the same values, or calling
> + *             **bpf_rc_repeat** ().
> + *
> + *             Some protocols include a toggle bit, in case the button
> + *             was released and pressed again between consecutive scancodes
> + *
> + *             The *ctx* should point to the lirc sample as passed into
> + *             the program.
> + *
> + *             The *protocol* is the decoded protocol number (see
> + *             **enum rc_proto** for some predefined values).
> + *
> + *             This helper is only available is the kernel was compiled with
> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> + *             "**y**".
> + *
> + *     Return
> + *             0
> + *
> + * int bpf_rc_repeat(void *ctx)
> + *     Description
> + *             This helper is used in programs implementing IR decoding, to
> + *             report a successfully decoded repeat key message. This delays
> + *             the generation of a key up event for previously generated
> + *             key down event.
> + *
> + *             Some IR protocols like NEC have a special IR message for
> + *             repeating last button, for when a button is held down.
> + *
> + *             The *ctx* should point to the lirc sample as passed into
> + *             the program.
> + *
> + *             This helper is only available is the kernel was compiled with
> + *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
> + *             "**y**".
> + *
> + *     Return
> + *             0
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -1976,7 +2025,9 @@ union bpf_attr {
>         FN(fib_lookup),                 \
>         FN(sock_hash_update),           \
>         FN(msg_redirect_hash),          \
> -       FN(sk_redirect_hash),
> +       FN(sk_redirect_hash),           \
> +       FN(rc_repeat),                  \
> +       FN(rc_keydown),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index bfcde949c7f8..6e9a4cbc14b9 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -11,6 +11,7 @@
>   */
>  #include <linux/bpf.h>
>  #include <linux/bpf_trace.h>
> +#include <linux/bpf_rcdev.h>
>  #include <linux/btf.h>
>  #include <linux/syscalls.h>
>  #include <linux/slab.h>
> @@ -1576,6 +1577,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
>         case BPF_SK_SKB_STREAM_PARSER:
>         case BPF_SK_SKB_STREAM_VERDICT:
>                 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
> +       case BPF_LIRC_MODE2:
> +               return rc_dev_prog_attach(attr);
>         default:
>                 return -EINVAL;
>         }
> @@ -1646,6 +1649,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
>         case BPF_SK_SKB_STREAM_PARSER:
>         case BPF_SK_SKB_STREAM_VERDICT:
>                 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
> +       case BPF_LIRC_MODE2:
> +               return rc_dev_prog_detach(attr);
>         default:
>                 return -EINVAL;
>         }
> @@ -1693,6 +1698,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
>         case BPF_CGROUP_SOCK_OPS:
>         case BPF_CGROUP_DEVICE:
>                 break;
> +       case BPF_LIRC_MODE2:
> +               return rc_dev_prog_query(attr, uattr);
>         default:
>                 return -EINVAL;
>         }
> --
> 2.17.0
>

^ permalink raw reply

* Re: cascaded switch
From: Ran Shalit @ 2018-05-18 20:10 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev
In-Reply-To: <20180518191357.GH23100@lunn.ch>

On Fri, May 18, 2018 at 10:13 PM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Fri, May 18, 2018 at 09:35:38PM +0300, Ran Shalit wrote:
>> Hello,
>>
>> I am trying to understand the concept of cascaded switch.
>> I haven't find much information on this topic.
>>
>> Can anyone please explain the general concept, when is it used, and
>> why does the device tree need to know about cascaded switch ?
>
> Hi Ran
>
> I think you first need to define what you mean by cascaded switches.
>
Hi,

I mean the same terminology used in marvell's switch.(I don't think
there is more than one terminology for this, please correct me if
wrong).
Anyway, I can see examples how it is done, but I don't understand the
benefit of this constellation, and why device tree needs to be
familiar with it.

<   switch 1  >---port10--------port10- <  switch 2 >
 | ....|         |                                             | ....|    |
port 1-9     |                                      port 1-9     |
                 |                                                        |
                 |                                                        |
<cpu>--mdio----------------------------------------------

The term "cascaded switches" is also used in dsa documentation in device tree:
https://www.kernel.org/doc/Documentation/networking/dsa/dsa.txt


Regard,
Ranran

>   Andrew

^ permalink raw reply

* Re: [PATCH v4 1/3] bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not found
From: Y Song @ 2018-05-18 20:08 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller, Quentin Monnet
In-Reply-To: <36f48c3a4a563bd8cdac18bcf8d48c0d06365863.1526651592.git.sean@mess.org>

On Fri, May 18, 2018 at 7:07 AM, Sean Young <sean@mess.org> wrote:
> This makes is it possible for bpf prog detach to return -ENOENT.
>
> Signed-off-by: Sean Young <sean@mess.org>

Acked-by: Yonghong Song <yhs@fb.com>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox