Netdev List
 help / color / mirror / Atom feed
* [PATCH v1] net: mvpp2: divide fifo for dts-active ports only
From: stefanc @ 2020-11-23 14:52 UTC (permalink / raw)
  To: netdev
  Cc: thomas.petazzoni, davem, nadavh, ymarkman, linux-kernel, stefanc,
	kuba, linux, mw, antoine.tenart, andrew, rmk+kernel

From: Stefan Chulski <stefanc@marvell.com>

Tx/Rx FIFO is a HW resource limited by total size, but shared
by all ports of same CP110 and impacting port-performance.
Do not divide the FIFO for ports which are not enabled in DTS,
so active ports could have more FIFO.

The active port mapping should be done in probe before FIFO-init.

Signed-off-by: Stefan Chulski <stefanc@marvell.com>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h      |  23 +++--
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 129 +++++++++++++++++-------
 2 files changed, 108 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 8347758..6bd7e40 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -695,6 +695,9 @@
 /* Maximum number of supported ports */
 #define MVPP2_MAX_PORTS			4
 
+/* Loopback port index */
+#define MVPP2_LOOPBACK_PORT_INDEX	3
+
 /* Maximum number of TXQs used by single port */
 #define MVPP2_MAX_TXQ			8
 
@@ -729,22 +732,21 @@
 #define MVPP2_TX_DESC_ALIGN		(MVPP2_DESC_ALIGNED_SIZE - 1)
 
 /* RX FIFO constants */
+#define MVPP2_RX_FIFO_PORT_DATA_SIZE_44KB	0xb000
 #define MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB	0x8000
 #define MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB	0x2000
 #define MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB	0x1000
-#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB	0x200
-#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB	0x80
+#define MVPP2_RX_FIFO_PORT_ATTR_SIZE(data_size)	((data_size) >> 6)
 #define MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB	0x40
 #define MVPP2_RX_FIFO_PORT_MIN_PKT		0x80
 
 /* TX FIFO constants */
-#define MVPP22_TX_FIFO_DATA_SIZE_10KB		0xa
-#define MVPP22_TX_FIFO_DATA_SIZE_3KB		0x3
-#define MVPP2_TX_FIFO_THRESHOLD_MIN		256
-#define MVPP2_TX_FIFO_THRESHOLD_10KB	\
-	(MVPP22_TX_FIFO_DATA_SIZE_10KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
-#define MVPP2_TX_FIFO_THRESHOLD_3KB	\
-	(MVPP22_TX_FIFO_DATA_SIZE_3KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
+#define MVPP22_TX_FIFO_DATA_SIZE_16KB		16
+#define MVPP22_TX_FIFO_DATA_SIZE_10KB		10
+#define MVPP22_TX_FIFO_DATA_SIZE_3KB		3
+#define MVPP2_TX_FIFO_THRESHOLD_MIN		256 /* Bytes */
+#define MVPP2_TX_FIFO_THRESHOLD(kb)	\
+		((kb) * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -946,6 +948,9 @@ struct mvpp2 {
 	/* List of pointers to port structures */
 	int port_count;
 	struct mvpp2_port *port_list[MVPP2_MAX_PORTS];
+	/* Map of enabled ports */
+	unsigned long port_map;
+
 	struct mvpp2_tai *tai;
 
 	/* Number of Tx threads used */
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index f6616c8..08c237a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6601,32 +6601,56 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
+static void mvpp22_rx_fifo_set_hw(struct mvpp2 *priv, int port, int data_size)
 {
-	int port;
+	int attr_size = MVPP2_RX_FIFO_PORT_ATTR_SIZE(data_size);
 
-	/* The FIFO size parameters are set depending on the maximum speed a
-	 * given port can handle:
-	 * - Port 0: 10Gbps
-	 * - Port 1: 2.5Gbps
-	 * - Ports 2 and 3: 1Gbps
-	 */
+	mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port), data_size);
+	mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), attr_size);
+}
 
-	mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(0),
-		    MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB);
-	mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(0),
-		    MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB);
+/* Initialize TX FIFO's: the total FIFO size is 48kB on PPv2.2.
+ * 4kB fixed space must be assigned for the loopback port.
+ * Redistribute remaining avialable 44kB space among all active ports.
+ * Guarantee minimum 32kB for 10G port and 8kB for port 1, capable of 2.5G
+ * SGMII link.
+ */
+static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
+{
+	int remaining_ports_count;
+	unsigned long port_map;
+	int size_remainder;
+	int port, size;
+
+	/* The loopback requires fixed 4kB of the FIFO space assignment. */
+	mvpp22_rx_fifo_set_hw(priv, MVPP2_LOOPBACK_PORT_INDEX,
+			      MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB);
+	port_map = priv->port_map & ~BIT(MVPP2_LOOPBACK_PORT_INDEX);
+
+	/* Set RX FIFO size to 0 for inactive ports. */
+	for_each_clear_bit(port, &port_map, MVPP2_LOOPBACK_PORT_INDEX)
+		mvpp22_rx_fifo_set_hw(priv, port, 0);
+
+	/* Assign remaining RX FIFO space among all active ports. */
+	size_remainder = MVPP2_RX_FIFO_PORT_DATA_SIZE_44KB;
+	remaining_ports_count = hweight_long(port_map);
+
+	for_each_set_bit(port, &port_map, MVPP2_LOOPBACK_PORT_INDEX) {
+		if (remaining_ports_count == 1)
+			size = size_remainder;
+		else if (port == 0)
+			size = max(size_remainder / remaining_ports_count,
+				   MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB);
+		else if (port == 1)
+			size = max(size_remainder / remaining_ports_count,
+				   MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB);
+		else
+			size = size_remainder / remaining_ports_count;
 
-	mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(1),
-		    MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB);
-	mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(1),
-		    MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB);
+		size_remainder -= size;
+		remaining_ports_count--;
 
-	for (port = 2; port < MVPP2_MAX_PORTS; port++) {
-		mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port),
-			    MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB);
-		mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port),
-			    MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB);
+		mvpp22_rx_fifo_set_hw(priv, port, size);
 	}
 
 	mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG,
@@ -6634,24 +6658,53 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-/* Initialize Tx FIFO's: the total FIFO size is 19kB on PPv2.2 and 10G
- * interfaces must have a Tx FIFO size of 10kB. As only port 0 can do 10G,
- * configure its Tx FIFO size to 10kB and the others ports Tx FIFO size to 3kB.
+static void mvpp22_tx_fifo_set_hw(struct mvpp2 *priv, int port, int size)
+{
+	int threshold = MVPP2_TX_FIFO_THRESHOLD(size);
+
+	mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
+	mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), threshold);
+}
+
+/* Initialize TX FIFO's: the total FIFO size is 19kB on PPv2.2.
+ * 3kB fixed space must be assigned for the loopback port.
+ * Redistribute remaining avialable 16kB space among all active ports.
+ * The 10G interface should use 10kB (which is maximum possible size
+ * per single port).
  */
 static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
 {
-	int port, size, thrs;
-
-	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
-		if (port == 0) {
+	int remaining_ports_count;
+	unsigned long port_map;
+	int size_remainder;
+	int port, size;
+
+	/* The loopback requires fixed 3kB of the FIFO space assignment. */
+	mvpp22_tx_fifo_set_hw(priv, MVPP2_LOOPBACK_PORT_INDEX,
+			      MVPP22_TX_FIFO_DATA_SIZE_3KB);
+	port_map = priv->port_map & ~BIT(MVPP2_LOOPBACK_PORT_INDEX);
+
+	/* Set TX FIFO size to 0 for inactive ports. */
+	for_each_clear_bit(port, &port_map, MVPP2_LOOPBACK_PORT_INDEX)
+		mvpp22_tx_fifo_set_hw(priv, port, 0);
+
+	/* Assign remaining TX FIFO space among all active ports. */
+	size_remainder = MVPP22_TX_FIFO_DATA_SIZE_16KB;
+	remaining_ports_count = hweight_long(port_map);
+
+	for_each_set_bit(port, &port_map, MVPP2_LOOPBACK_PORT_INDEX) {
+		if (remaining_ports_count == 1)
+			size = min(size_remainder,
+				   MVPP22_TX_FIFO_DATA_SIZE_10KB);
+		else if (port == 0)
 			size = MVPP22_TX_FIFO_DATA_SIZE_10KB;
-			thrs = MVPP2_TX_FIFO_THRESHOLD_10KB;
-		} else {
-			size = MVPP22_TX_FIFO_DATA_SIZE_3KB;
-			thrs = MVPP2_TX_FIFO_THRESHOLD_3KB;
-		}
-		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
-		mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), thrs);
+		else
+			size = size_remainder / remaining_ports_count;
+
+		size_remainder -= size;
+		remaining_ports_count--;
+
+		mvpp22_tx_fifo_set_hw(priv, port, size);
 	}
 }
 
@@ -6952,6 +7005,12 @@ static int mvpp2_probe(struct platform_device *pdev)
 			goto err_axi_clk;
 	}
 
+	/* Map DTS-active ports. Should be done before FIFO mvpp2_init */
+	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
+		if (!fwnode_property_read_u32(port_fwnode, "port-id", &i))
+			priv->port_map |= BIT(i);
+	}
+
 	/* Initialize network controller */
 	err = mvpp2_init(pdev, priv);
 	if (err < 0) {
-- 
1.9.1


^ permalink raw reply related

* Re: [PATCH v3 1/1] xdp: remove the function xsk_map_inc
From: Magnus Karlsson @ 2020-11-23 14:40 UTC (permalink / raw)
  To: Zhu Yanjun
  Cc: Karlsson, Magnus, Björn Töpel, David S. Miller, netdev
In-Reply-To: <CAD=hENe2Ky391gFKSWu0dC9oYZUkYRGr+H2BsoHFemKctH0vKQ@mail.gmail.com>

On Mon, Nov 23, 2020 at 3:30 PM Zhu Yanjun <zyjzyj2000@gmail.com> wrote:
>
> On Mon, Nov 23, 2020 at 10:27 PM <zyjzyj2000@gmail.com> wrote:
> >
> > From: Zhu Yanjun <zyjzyj2000@gmail.com>
> >
> > The function xsk_map_inc is a simple wrapper of bpf_map_inc and
> > always returns zero. As such, replacing this function with bpf_map_inc
> > and removing the test code.
> >
> > Signed-off-by: Zhu Yanjun <zyjzyj2000@gmail.com>
> > ---
> >  net/xdp/xsk.c    |  2 +-
> >  net/xdp/xsk.h    |  1 -
> >  net/xdp/xskmap.c | 13 +------------
> >  3 files changed, 2 insertions(+), 14 deletions(-)
> >
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index cfbec3989a76..a3c1f07d77d8 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -548,7 +548,7 @@ static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
> >         node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
> >                                         node);
> >         if (node) {
> > -               WARN_ON(xsk_map_inc(node->map));
> > +               bpf_map_inc(&node->map->map);
>
> Thanks. This is the latest version.
>
> Zhu Yanjun

Thank you. The code now looks good, but could you please resend this
without any of your replies in it (like the comments above). Something
that can be applied as a patch to source code. Use git format-patch
followed by git send-email (without any editing in between). It will
produce a clean patch.

Have you run "checkpatch.pl -strict" on it?

> >                 map = node->map;
> >                 *map_entry = node->map_entry;
> >         }
> > diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
> > index b9e896cee5bb..0aad25c0e223 100644
> > --- a/net/xdp/xsk.h
> > +++ b/net/xdp/xsk.h
> > @@ -41,7 +41,6 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
> >
> >  void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
> >                              struct xdp_sock **map_entry);
> > -int xsk_map_inc(struct xsk_map *map);
> >  void xsk_map_put(struct xsk_map *map);
> >  void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
> >  int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
> > diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
> > index 49da2b8ace8b..6b7e9a72b101 100644
> > --- a/net/xdp/xskmap.c
> > +++ b/net/xdp/xskmap.c
> > @@ -11,12 +11,6 @@
> >
> >  #include "xsk.h"
> >
> > -int xsk_map_inc(struct xsk_map *map)
> > -{
> > -       bpf_map_inc(&map->map);
> > -       return 0;
> > -}
> > -
> >  void xsk_map_put(struct xsk_map *map)
> >  {
> >         bpf_map_put(&map->map);
> > @@ -26,17 +20,12 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
> >                                                struct xdp_sock **map_entry)
> >  {
> >         struct xsk_map_node *node;
> > -       int err;
> >
> >         node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
> >         if (!node)
> >                 return ERR_PTR(-ENOMEM);
> >
> > -       err = xsk_map_inc(map);
> > -       if (err) {
> > -               kfree(node);
> > -               return ERR_PTR(err);
> > -       }
> > +       bpf_map_inc(&map->map);
> >
> >         node->map = map;
> >         node->map_entry = map_entry;
> > --
> > 2.25.1
> >

^ permalink raw reply

* Re: netconsole deadlock with virtnet
From: Steven Rostedt @ 2020-11-23 14:31 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Jason Wang, Sergey Senozhatsky, Michael S. Tsirkin, Petr Mladek,
	John Ogness, virtualization, Amit Shah, Itay Aveksis,
	Ran Rozenstein, netdev
In-Reply-To: <20201123110855.GD3159@unreal>

On Mon, 23 Nov 2020 13:08:55 +0200
Leon Romanovsky <leon@kernel.org> wrote:


>  [   10.028024] Chain exists of:
>  [   10.028025]   console_owner --> target_list_lock --> _xmit_ETHER#2

Note, the problem is that we have a location that grabs the xmit_lock while
holding target_list_lock (and possibly console_owner).


>  [   10.028028]
>  [   10.028028]  Possible interrupt unsafe locking scenario:
>  [   10.028029]
>  [   10.028029]        CPU0                    CPU1
>  [   10.028030]        ----                    ----
>  [   10.028030]   lock(_xmit_ETHER#2);
>  [   10.028032]                                local_irq_disable();
>  [   10.028032]                                lock(console_owner);
>  [   10.028034]                                lock(target_list_lock);
>  [   10.028035]   <Interrupt>
>  [   10.028035]     lock(console_owner);
>  [   10.028036]
>  [   10.028037]  *** DEADLOCK ***
>  [   10.028037]



>  [   10.028107] the dependencies between the lock to be acquired
>  [   10.028107]  and HARDIRQ-irq-unsafe lock:
>  [   10.028108] -> (_xmit_ETHER#2){+.-.}-{2:2} ops: 217 {
>  [   10.028110]    HARDIRQ-ON-W at:
>  [   10.028111]                        __lock_acquire+0x8bc/0x1a94
>  [   10.028111]                        lock_acquire.part.0+0x170/0x360
>  [   10.028112]                        lock_acquire+0x68/0x8c
>  [   10.028113]                        _raw_spin_trylock+0x80/0xd0
>  [   10.028113]                        virtnet_poll+0xac/0x360

xmit_lock is taken in virtnet_poll() (via virtnet_poll_cleantx()).

This is called from the softirq, and interrupts are not disabled.

>  [   10.028114]                        net_rx_action+0x1b0/0x4e0
>  [   10.028115]                        __do_softirq+0x1f4/0x638
>  [   10.028115]                        do_softirq+0xb8/0xcc
>  [   10.028116]                        __local_bh_enable_ip+0x18c/0x200
>  [   10.028116]                        virtnet_napi_enable+0xc0/0xd4
>  [   10.028117]                        virtnet_open+0x98/0x1c0
>  [   10.028118]                        __dev_open+0x12c/0x200
>  [   10.028118]                        __dev_change_flags+0x1a0/0x220
>  [   10.028119]                        dev_change_flags+0x2c/0x70
>  [   10.028119]                        do_setlink+0x214/0xe20
>  [   10.028120]                        __rtnl_newlink+0x514/0x820
>  [   10.028120]                        rtnl_newlink+0x58/0x84
>  [   10.028121]                        rtnetlink_rcv_msg+0x184/0x4b4
>  [   10.028122]                        netlink_rcv_skb+0x60/0x124
>  [   10.028122]                        rtnetlink_rcv+0x20/0x30
>  [   10.028123]                        netlink_unicast+0x1b4/0x270
>  [   10.028124]                        netlink_sendmsg+0x1f0/0x400
>  [   10.028124]                        sock_sendmsg+0x5c/0x70
>  [   10.028125]                        ____sys_sendmsg+0x24c/0x280
>  [   10.028125]                        ___sys_sendmsg+0x88/0xd0
>  [   10.028126]                        __sys_sendmsg+0x70/0xd0
>  [   10.028127]                        __arm64_sys_sendmsg+0x2c/0x40
>  [   10.028128]                        el0_svc_common.constprop.0+0x84/0x200
>  [   10.028128]                        do_el0_svc+0x2c/0x90
>  [   10.028129]                        el0_svc+0x18/0x50
>  [   10.028129]                        el0_sync_handler+0xe0/0x350
>  [   10.028130]                        el0_sync+0x158/0x180

[..]

>  [   10.028171]  ... key      at: [<ffff80001312aef8>] netdev_xmit_lock_key+0x10/0x390
>  [   10.028171]  ... acquired at:
>  [   10.028172]    __lock_acquire+0x134c/0x1a94
>  [   10.028172]    lock_acquire.part.0+0x170/0x360
>  [   10.028173]    lock_acquire+0x68/0x8c
>  [   10.028173]    _raw_spin_lock+0x64/0x90
>  [   10.028174]    virtnet_poll_tx+0x84/0x120
>  [   10.028174]    netpoll_poll_dev+0x12c/0x350
>  [   10.028175]    netpoll_send_skb+0x39c/0x400
>  [   10.028175]    netpoll_send_udp+0x2b8/0x440
>  [   10.028176]    write_msg+0xfc/0x120 [netconsole]
>  [   10.028176]    console_unlock+0x3ec/0x6a4

The above shows the problem. We have:

	console_unlock() (which holds the console_owner lock)
	write_msg() (which holds the target_list_lock)

Then we write_msg() calls:

	netpoll_send_udp() {
	  netpoll_send_skb() {
	    netpoll_poll_dev() {
	      virtnet_poll_tx() (which takes the xmit_lock!)

  DEADLOCK!


In netpoll_send_skb() I see this:

			/* tickle device maybe there is some cleanup */
			netpoll_poll_dev(np->dev);

Which looks to me that it will call some code that should only be used in
softirq context. It's called with locks held that are taken in interrupt
context, and any locks that are taken in netpoll_poll_dev() must always be
taken with interrupts disabled. That is, if xmit_lock is taken within
netpoll_poll_dev(), then it must always be taken with interrupts disabled.
Otherwise you can have the deadlock that lockdep reported.

-- Steve




>  [   10.028177]    register_console+0x17c/0x2f4
>  [   10.028178]    init_netconsole+0x20c/0x1000 [netconsole]
>  [   10.028178]    do_one_initcall+0x8c/0x480
>  [   10.028179]    do_init_module+0x60/0x270
>  [   10.028179]    load_module+0x21f8/0x2734
>  [   10.028180]    __do_sys_finit_module+0xbc/0x12c
>  [   10.028180]    __arm64_sys_finit_module+0x28/0x34
>  [   10.028181]    el0_svc_common.constprop.0+0x84/0x200
>  [   10.028181]    do_el0_svc+0x2c/0x90
>  [   10.028182]    el0_svc+0x18/0x50
>  [   10.028182]    el0_sync_handler+0xe0/0x350
>  [   10.028183]    el0_sync+0x158/0x180
>  [   10.028183]
>  [   10.028183]
>  [   10.028184] stack backtrace:
>  [   10.028185] CPU: 14 PID: 638 Comm: modprobe Not tainted 5.10.0-rc4_for_upstream_min_debug_2020_11_22_19_37 #1
>  [   10.028186] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
>  [   10.028186] Call trace:
>  [   10.028186]  dump_backtrace+0x0/0x1d0
>  [   10.028187]  show_stack+0x20/0x3c
>  [   10.028187]  dump_stack+0xec/0x138
>  [   10.028188]  check_irq_usage+0x6b8/0x6cc
>  [   10.028188]  __lock_acquire+0x134c/0x1a94
>  [   10.028189]  lock_acquire.part.0+0x170/0x360
>  [   10.028189]  lock_acquire+0x68/0x8c
>  [   10.028190]  _raw_spin_lock+0x64/0x90
>  [   10.028191]  virtnet_poll_tx+0x84/0x120
>  [   10.028191]  netpoll_poll_dev+0x12c/0x350
>  [   10.028192]  netpoll_send_skb+0x39c/0x400
>  [   10.028192]  netpoll_send_udp+0x2b8/0x440
>  [   10.028193]  write_msg+0xfc/0x120 [netconsole]
>  [   10.028193]  console_unlock+0x3ec/0x6a4
>  [   10.028194]  register_console+0x17c/0x2f4
>  [   10.028194]  init_netconsole+0x20c/0x1000 [netconsole]
>  [   10.028195]  do_one_initcall+0x8c/0x480
>  [   10.028195]  do_init_module+0x60/0x270
>  [   10.028196]  load_module+0x21f8/0x2734
>  [   10.028197]  __do_sys_finit_module+0xbc/0x12c
>  [   10.028197]  __arm64_sys_finit_module+0x28/0x34
>  [   10.028198]  el0_svc_common.constprop.0+0x84/0x200
>  [   10.028198]  do_el0_svc+0x2c/0x90
>  [   10.028199]  el0_svc+0x18/0x50
>  [   10.028199]  el0_sync_handler+0xe0/0x350
>  [   10.028200]  el0_sync+0x158/0x180
>  [   10.073569] random: crng init done
>  [   10.073964] printk: console [netcon0] enabled
>  [   10.074704] random: 7 urandom warning(s) missed due to ratelimiting
>  [   10.075340] netconsole: network logging started
> 

^ permalink raw reply

* Re: [PATCH v3 1/1] xdp: remove the function xsk_map_inc
From: Zhu Yanjun @ 2020-11-23 14:28 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, David S. Miller, netdev
In-Reply-To: <20201123142743.750971-1-zyjzyj2000@gmail.com>

On Mon, Nov 23, 2020 at 10:27 PM <zyjzyj2000@gmail.com> wrote:
>
> From: Zhu Yanjun <zyjzyj2000@gmail.com>
>
> The function xsk_map_inc is a simple wrapper of bpf_map_inc and
> always returns zero. As such, replacing this function with bpf_map_inc
> and removing the test code.
>
> Signed-off-by: Zhu Yanjun <zyjzyj2000@gmail.com>
> ---
>  net/xdp/xsk.c    |  2 +-
>  net/xdp/xsk.h    |  1 -
>  net/xdp/xskmap.c | 13 +------------
>  3 files changed, 2 insertions(+), 14 deletions(-)
>
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index cfbec3989a76..a3c1f07d77d8 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -548,7 +548,7 @@ static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
>         node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
>                                         node);
>         if (node) {
> -               WARN_ON(xsk_map_inc(node->map));
> +               bpf_map_inc(&node->map->map);

Thanks. This is the latest version.

Zhu Yanjun

>                 map = node->map;
>                 *map_entry = node->map_entry;
>         }
> diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
> index b9e896cee5bb..0aad25c0e223 100644
> --- a/net/xdp/xsk.h
> +++ b/net/xdp/xsk.h
> @@ -41,7 +41,6 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
>
>  void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
>                              struct xdp_sock **map_entry);
> -int xsk_map_inc(struct xsk_map *map);
>  void xsk_map_put(struct xsk_map *map);
>  void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
>  int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
> diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
> index 49da2b8ace8b..6b7e9a72b101 100644
> --- a/net/xdp/xskmap.c
> +++ b/net/xdp/xskmap.c
> @@ -11,12 +11,6 @@
>
>  #include "xsk.h"
>
> -int xsk_map_inc(struct xsk_map *map)
> -{
> -       bpf_map_inc(&map->map);
> -       return 0;
> -}
> -
>  void xsk_map_put(struct xsk_map *map)
>  {
>         bpf_map_put(&map->map);
> @@ -26,17 +20,12 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
>                                                struct xdp_sock **map_entry)
>  {
>         struct xsk_map_node *node;
> -       int err;
>
>         node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
>         if (!node)
>                 return ERR_PTR(-ENOMEM);
>
> -       err = xsk_map_inc(map);
> -       if (err) {
> -               kfree(node);
> -               return ERR_PTR(err);
> -       }
> +       bpf_map_inc(&map->map);
>
>         node->map = map;
>         node->map_entry = map_entry;
> --
> 2.25.1
>

^ permalink raw reply

* Re: [PATCH net-next v2 1/2] lockdep: Introduce in_softirq lockdep assert
From: Peter Zijlstra @ 2020-11-23 14:27 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: mingo, will, viro, kyk.segfault, davem, kuba, linmiaohe,
	martin.varghese, pabeni, pshelar, fw, gnault, steffen.klassert,
	vladimir.oltean, edumazet, saeed, netdev, linux-kernel, linuxarm,
	Thomas Gleixner
In-Reply-To: <1605927976-232804-2-git-send-email-linyunsheng@huawei.com>

On Sat, Nov 21, 2020 at 11:06:15AM +0800, Yunsheng Lin wrote:
> The current semantic for napi_consume_skb() is that caller need
> to provide non-zero budget when calling from NAPI context, and
> breaking this semantic will cause hard to debug problem, because
> _kfree_skb_defer() need to run in atomic context in order to push
> the skb to the particular cpu' napi_alloc_cache atomically.
> 
> So add the lockdep_assert_in_softirq() to assert when the running
> context is not in_softirq, in_softirq means softirq is serving or
> BH is disabled. Because the softirq context can be interrupted by
> hard IRQ or NMI context, so lockdep_assert_in_softirq() need to
> assert about hard IRQ or NMI context too.
> 
> Suggested-by: Jakub Kicinski <kuba@kernel.org>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/linux/lockdep.h | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index f559487..f5e3d81 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -594,6 +594,12 @@ do {									\
>  		      this_cpu_read(hardirqs_enabled)));		\
>  } while (0)

Due to in_softirq() having a deprication notice (due to it being
awefully ambiguous), could we have a nice big comment here that explains
in detail understandable to !network people (me) why this is actually
correct?

I'm not opposed to the thing, if that his what you need, it's fine, but
please put on a comment that explains that in_softirq() is ambiguous and
when you really do need it anyway.

> +#define lockdep_assert_in_softirq()					\
> +do {									\
> +	WARN_ON_ONCE(__lockdep_enabled			&&		\
> +		     (!in_softirq() || in_irq() || in_nmi()));		\
> +} while (0)
> +
>  #else
>  # define might_lock(lock) do { } while (0)
>  # define might_lock_read(lock) do { } while (0)
> @@ -605,6 +611,7 @@ do {									\
>  
>  # define lockdep_assert_preemption_enabled() do { } while (0)
>  # define lockdep_assert_preemption_disabled() do { } while (0)
> +# define lockdep_assert_in_softirq() do { } while (0)
>  #endif
>  
>  #ifdef CONFIG_PROVE_RAW_LOCK_NESTING
> -- 
> 2.8.1
> 

^ permalink raw reply

* Re: [arm64] kernel BUG at kernel/seccomp.c:1309!
From: Gabriel Krisman Bertazi @ 2020-11-23 14:26 UTC (permalink / raw)
  To: Jann Horn
  Cc: Arnd Bergmann, Andy Lutomirski, Thomas Gleixner, Naresh Kamboju,
	open list, Netdev, bpf, lkft-triage, Linux ARM, Daniel Borkmann,
	Kees Cook, Andrii Nakryiko, Song Liu, Yonghong Song,
	Andy Lutomirski, Sumit Semwal, Arnd Bergmann, YiFei Zhu
In-Reply-To: <CAG48ez17CKBMO4193wxuWLRQWQ+q6EV=Qr5oTWiKivMxEi0zQw@mail.gmail.com>

Jann Horn <jannh@google.com> writes:

> On Mon, Nov 23, 2020 at 2:45 PM Arnd Bergmann <arnd@kernel.org> wrote:
>> On Mon, Nov 23, 2020 at 12:15 PM Naresh Kamboju
>> <naresh.kamboju@linaro.org> wrote:
>> >
>> > While booting arm64 kernel the following kernel BUG noticed on several arm64
>> > devices running linux next 20201123 tag kernel.
>> >
>> >
>> > $ git log --oneline next-20201120..next-20201123 -- kernel/seccomp.c
>> > 5c5c5fa055ea Merge remote-tracking branch 'seccomp/for-next/seccomp'
>> > bce6a8cba7bf Merge branch 'linus'
>> > 7ef95e3dbcee Merge branch 'for-linus/seccomp' into for-next/seccomp
>> > fab686eb0307 seccomp: Remove bogus __user annotations
>> > 0d8315dddd28 seccomp/cache: Report cache data through /proc/pid/seccomp_cache
>> > 8e01b51a31a1 seccomp/cache: Add "emulator" to check if filter is constant allow
>> > f9d480b6ffbe seccomp/cache: Lookup syscall allowlist bitmap for fast path
>> > 23d67a54857a seccomp: Migrate to use SYSCALL_WORK flag
>> >
>> >
>> > Please find these easy steps to reproduce the kernel build and boot.
>>
>> Adding Gabriel Krisman Bertazi to Cc, as the last patch (23d67a54857a) here
>> seems suspicious: it changes
>>
>> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
>> index 02aef2844c38..47763f3999f7 100644
>> --- a/include/linux/seccomp.h
>> +++ b/include/linux/seccomp.h
>> @@ -42,7 +42,7 @@ struct seccomp {
>>  extern int __secure_computing(const struct seccomp_data *sd);
>>  static inline int secure_computing(void)
>>  {
>> -       if (unlikely(test_thread_flag(TIF_SECCOMP)))
>> +       if (unlikely(test_syscall_work(SECCOMP)))
>>                 return  __secure_computing(NULL);
>>         return 0;
>>  }
>>
>> which is in the call chain directly before
>>
>> int __secure_computing(const struct seccomp_data *sd)
>> {
>>        int mode = current->seccomp.mode;
>>
>> ...
>>         switch (mode) {
>>         case SECCOMP_MODE_STRICT:
>>                 __secure_computing_strict(this_syscall);  /* may call do_exit */
>>                 return 0;
>>         case SECCOMP_MODE_FILTER:
>>                 return __seccomp_filter(this_syscall, sd, false);
>>         default:
>>                 BUG();
>>         }
>> }
>>
>> Clearly, current->seccomp.mode is set to something other
>> than SECCOMP_MODE_STRICT or SECCOMP_MODE_FILTER
>> while the test_syscall_work(SECCOMP) returns true, and this
>> must have not been the case earlier.
>
> Ah, I think the problem is actually in
> 3136b93c3fb2b7c19e853e049203ff8f2b9dd2cd ("entry: Expose helpers to
> migrate TIF to SYSCALL_WORK flag"). In the !GENERIC_ENTRY case, it
> adds this code:
>
> +#define set_syscall_work(fl)                                           \
> +       set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
> +#define test_syscall_work(fl) \
> +       test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
> +#define clear_syscall_work(fl) \
> +       clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
> +
> +#define set_task_syscall_work(t, fl) \
> +       set_ti_thread_flag(task_thread_info(t), TIF_##fl)
> +#define test_task_syscall_work(t, fl) \
> +       test_ti_thread_flag(task_thread_info(t), TIF_##fl)
> +#define clear_task_syscall_work(t, fl) \
> +       clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
>
> but the SYSCALL_WORK_FLAGS are not valid on !GENERIC_ENTRY, we'll mix
> up (on arm64) SYSCALL_WORK_BIT_SECCOMP (==0) and TIF_SIGPENDING (==0).
>
> As part of fixing this, it might be a good idea to put "enum
> syscall_work_bit" behind a "#ifdef CONFIG_GENERIC_ENTRY" to avoid
> future accidents like this?

Hi Jan, Arnd,

That is correct.  This is a copy pasta mistake.  My apologies.  I didn't
have a !GENERIC_ENTRY device to test, but just the ifdef would have
caught it.

-- 
Gabriel Krisman Bertazi

^ permalink raw reply

* Re: [PATCH net-next] bridge: mrp: Implement LC mode for MRP
From: Nikolay Aleksandrov @ 2020-11-23 14:25 UTC (permalink / raw)
  To: Horatiu Vultur; +Cc: roopa, davem, kuba, linux-kernel, bridge, netdev
In-Reply-To: <20201123123132.uxvec6uwuegioc25@soft-dev3.localdomain>

On 23/11/2020 14:31, Horatiu Vultur wrote:
> The 11/23/2020 14:13, Nikolay Aleksandrov wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>
>> On 23/11/2020 13:14, Horatiu Vultur wrote:
>>> Extend MRP to support LC mode(link check) for the interconnect port.
>>> This applies only to the interconnect ring.
>>>
>>> Opposite to RC mode(ring check) the LC mode is using CFM frames to
>>> detect when the link goes up or down and based on that the userspace
>>> will need to react.
>>> One advantage of the LC mode over RC mode is that there will be fewer
>>> frames in the normal rings. Because RC mode generates InTest on all
>>> ports while LC mode sends CFM frame only on the interconnect port.
>>>
>>> All 4 nodes part of the interconnect ring needs to have the same mode.
>>> And it is not possible to have running LC and RC mode at the same time
>>> on a node.
>>>
>>> Whenever the MIM starts it needs to detect the status of the other 3
>>> nodes in the interconnect ring so it would send a frame called
>>> InLinkStatus, on which the clients needs to reply with their link
>>> status.
>>>
>>> This patch adds the frame header for the frame InLinkStatus and
>>> extends existing rules on how to forward this frame.
>>>
>>> Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
>>> ---
>>>  include/uapi/linux/mrp_bridge.h |  7 +++++++
>>>  net/bridge/br_mrp.c             | 18 +++++++++++++++---
>>>  2 files changed, 22 insertions(+), 3 deletions(-)
>>>
>>
>> Hi Horatiu,
>> The patch looks good overall, just one question below.
> 
> Hi Nik,
> 
> Thanks for taking time to review the patch.
> 
>>
>>> diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
>>> index 6aeb13ef0b1e..450f6941a5a1 100644
>>> --- a/include/uapi/linux/mrp_bridge.h
>>> +++ b/include/uapi/linux/mrp_bridge.h
>>> @@ -61,6 +61,7 @@ enum br_mrp_tlv_header_type {
>>>       BR_MRP_TLV_HEADER_IN_TOPO = 0x7,
>>>       BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8,
>>>       BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9,
>>> +     BR_MRP_TLV_HEADER_IN_LINK_STATUS = 0xa,
>>>       BR_MRP_TLV_HEADER_OPTION = 0x7f,
>>>  };
>>>
>>> @@ -156,4 +157,10 @@ struct br_mrp_in_link_hdr {
>>>       __be16 interval;
>>>  };
>>>
>>> +struct br_mrp_in_link_status_hdr {
>>> +     __u8 sa[ETH_ALEN];
>>> +     __be16 port_role;
>>> +     __be16 id;
>>> +};
>>> +
>>
>> I didn't see this struct used anywhere, am I missing anything?
> 
> Yes, you are right, the struct is not used any. But I put it there as I
> put the other frame types for MRP.
> 

I see, we don't usually add unused code. The patch is fine as-is and since
this is already the case for other MRP parts I'm not strictly against it, so:

Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>

If Jakub decides to adhere to that rule you can keep my acked-by and just remove
the struct for v2.

Thanks,
 Nik


^ permalink raw reply

* Hardcoded multicast queue length in macvlan.c driver causes poor multicast receive performance
From: Thomas Karlsson @ 2020-11-23 14:22 UTC (permalink / raw)
  To: davem@davemloft.net, kuba@kernel.org; +Cc: netdev@vger.kernel.org
In-Reply-To: <485531aec7e243659ee4e3bb7fa2186d@paneda.se>

Hello,

There is a special queue handling in macvlan.c for broadcast and multicast packages that was arbitrarily set to 1000 in commit 07d92d5cc977a7fe1e683e1d4a6f723f7f2778cb . While this is probably sufficient for most uses cases it is insufficient to support high packet rates. I currently have a setup with 144 000 multicast packets incoming per second (144 different live audio RTP streams) and suffer very frequent packet loss. With unicast this is not an issue and I can in addition to the 144kpps load the macvlan interface with another 450mbit/s using iperf.

In order to verify that the queue is the problem I edited the define to 100000 and recompiled the kernel module. After replacing it with rmmod/insmod I get 0 packet loss (measured over 2 days where I before had losses every other second or so) and can also load an additional 450 mbit/s multicast traffic using iperf without losses. So basically no change in performance between unicast/multicast when it comes to lost packets on my machine.

I think It would be best if this queue length was configurable somehow. Either an option when creating the macvlan (like how bridge/passthrough/etc are set) or at least when loading the module (for instance by using a config in /etc/modprobe.d). One size does not fit all in this situation.


Link to code in question using the define (on master):
https://github.com/torvalds/linux/blob/27bba9c532a8d21050b94224ffd310ad0058c353/drivers/net/macvlan.c#L357 

(re-sent in text/plain instead of html)

Best regards,
Thomas Karlsson

^ permalink raw reply

* Re: [PATCH net-next 10/10] mptcp: refine MPTCP-level ack scheduling
From: Paolo Abeni @ 2020-11-23 14:21 UTC (permalink / raw)
  To: Eric Dumazet, Mat Martineau, netdev; +Cc: kuba, mptcp
In-Reply-To: <ca0b65f8-7a69-ff4e-9e0d-66a7a923b0c1@gmail.com>

Hi,

On Mon, 2020-11-23 at 12:57 +0100, Eric Dumazet wrote:
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 4ae2c4a30e44..748343f1a968 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -407,16 +407,42 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
> >  	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
> >  }
> >  
> > -static void mptcp_send_ack(struct mptcp_sock *msk)
> > +static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
> > +{
> > +	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> > +
> > +	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
> > +	if (subflow->request_join && !subflow->fully_established)
> > +		return false;
> > +
> > +	/* only send if our side has not closed yet */
> > +	return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
> > +}
> > +
> > +static void mptcp_send_ack(struct mptcp_sock *msk, bool force)
> >  {
> >  	struct mptcp_subflow_context *subflow;
> > +	struct sock *pick = NULL;
> >  
> >  	mptcp_for_each_subflow(msk, subflow) {
> >  		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> >  
> > -		lock_sock(ssk);
> > -		tcp_send_ack(ssk);
> > -		release_sock(ssk);
> > +		if (force) {
> > +			lock_sock(ssk);
> > +			tcp_send_ack(ssk);
> > +			release_sock(ssk);
> > +			continue;
> > +		}
> > +
> > +		/* if the hintes ssk is still active, use it */
> > +		pick = ssk;
> > +		if (ssk == msk->ack_hint)
> > +			break;
> > +	}
> > +	if (!force && pick) {
> > +		lock_sock(pick);
> > +		tcp_cleanup_rbuf(pick, 1);
> 
> Calling tcp_cleanup_rbuf() on a socket that was never established is going to fail
> with a divide by 0 (mss being 0)
> 
> AFAIK, mptcp_recvmsg() can be called right after a socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP)
> call.
> 
> Probably, after a lock_sock(), you should double check socket state (same above before calling tcp_send_ack())

Thank you for looking into this.

Indeed you are right! I'll try to cook a fix.

Cheers,

Paolo


^ permalink raw reply

* Re: [PATCH 000/141] Fix fall-through warnings for Clang
From: Miguel Ojeda @ 2020-11-23 14:19 UTC (permalink / raw)
  To: James Bottomley
  Cc: Kees Cook, Jakub Kicinski, Gustavo A. R. Silva, linux-kernel,
	alsa-devel, amd-gfx, bridge, ceph-devel, cluster-devel, coreteam,
	devel, dm-devel, drbd-dev, dri-devel, GR-everest-linux-l2,
	GR-Linux-NIC-Dev, intel-gfx, intel-wired-lan, keyrings,
	linux1394-devel, linux-acpi, linux-afs, Linux ARM, linux-arm-msm,
	linux-atm-general, linux-block, linux-can, linux-cifs,
	Linux Crypto Mailing List, linux-decnet-user,
	Ext4 Developers List, linux-fbdev, linux-geode, linux-gpio,
	linux-hams, linux-hwmon, linux-i3c, linux-ide, linux-iio,
	linux-input, linux-integrity, linux-mediatek,
	Linux Media Mailing List, linux-mmc, Linux-MM, linux-mtd,
	linux-nfs, linux-rdma, linux-renesas-soc, linux-scsi, linux-sctp,
	linux-security-module, linux-stm32, linux-usb, linux-watchdog,
	linux-wireless, Network Development, netfilter-devel, nouveau,
	op-tee, oss-drivers, patches, rds-devel, reiserfs-devel,
	samba-technical, selinux, target-devel, tipc-discussion,
	usb-storage, virtualization, wcn36xx,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT), xen-devel,
	linux-hardening, Nick Desaulniers, Nathan Chancellor,
	Miguel Ojeda, Joe Perches
In-Reply-To: <1c7d7fde126bc0acf825766de64bf2f9b888f216.camel@HansenPartnership.com>

On Sun, Nov 22, 2020 at 11:36 PM James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
>
> Well, it seems to be three years of someone's time plus the maintainer
> review time and series disruption of nearly a thousand patches.  Let's
> be conservative and assume the producer worked about 30% on the series
> and it takes about 5-10 minutes per patch to review, merge and for
> others to rework existing series.  So let's say it's cost a person year
> of a relatively junior engineer producing the patches and say 100h of
> review and application time.  The latter is likely the big ticket item
> because it's what we have in least supply in the kernel (even though
> it's 20x vs the producer time).

How are you arriving at such numbers? It is a total of ~200 trivial lines.

> It's not about the risk of the changes it's about the cost of
> implementing them.  Even if you discount the producer time (which
> someone gets to pay for, and if I were the engineering manager, I'd be
> unhappy about), the review/merge/rework time is pretty significant in
> exchange for six minor bug fixes.  Fine, when a new compiler warning
> comes along it's certainly reasonable to see if we can benefit from it
> and the fact that the compiler people think it's worthwhile is enough
> evidence to assume this initially.  But at some point you have to ask
> whether that assumption is supported by the evidence we've accumulated
> over the time we've been using it.  And if the evidence doesn't support
> it perhaps it is time to stop the experiment.

Maintainers routinely review 1-line trivial patches, not to mention
internal API changes, etc.

If some company does not want to pay for that, that's fine, but they
don't get to be maintainers and claim `Supported`.

Cheers,
Miguel

^ permalink raw reply

* [PATCH net] netdevice.h: Fix unintentional disable of ALL_FOR_ALL features on upper device
From: Tariq Toukan @ 2020-11-23 14:12 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Eric Dumazet, Herbert Xu
  Cc: netdev, Moshe Shemesh, Tariq Toukan, Maxim Mikityanskiy,
	Saeed Mahameed, Tariq Toukan

Calling netdev_increment_features() on upper/master device from
netdev_add_tso_features() implies unintentional clearance of ALL_FOR_ALL
features supported by all slaves.  Fix it by passing ALL_FOR_ALL in
addition to ALL_TSO.

Fixes: b0ce3508b25e ("bonding: allow TSO being set on bonding master")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Hi,

I know that netdev_increment_features() does not set any feature that's
unmasked in the mask argument.
I wonder why it can clear them though, was it meant to be like this?
If not, then the proper fix should be in netdev_increment_features(), not
in netdev_add_tso_features().


diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 18dec08439f9..a9d5e4bb829b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4748,7 +4748,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
 static inline netdev_features_t netdev_add_tso_features(netdev_features_t features,
 							netdev_features_t mask)
 {
-	return netdev_increment_features(features, NETIF_F_ALL_TSO, mask);
+	return netdev_increment_features(features, NETIF_F_ALL_TSO | NETIF_F_ALL_FOR_ALL, mask);
 }
 
 int __netdev_update_features(struct net_device *dev);
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH 1/3] xsk: replace datagram_poll by sock_poll_wait
From: Magnus Karlsson @ 2020-11-23 14:11 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Björn Töpel, Magnus Karlsson, Jonathan Lemon,
	David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
	Network Development, bpf, linux-kernel
In-Reply-To: <dfa43bcf7083edd0823e276c0cf8e21f3a226da6.1605686678.git.xuanzhuo@linux.alibaba.com>

On Wed, Nov 18, 2020 at 9:26 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> datagram_poll will judge the current socket status (EPOLLIN, EPOLLOUT)
> based on the traditional socket information (eg: sk_wmem_alloc), but
> this does not apply to xsk. So this patch uses sock_poll_wait instead of
> datagram_poll, and the mask is calculated by xsk_poll.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  net/xdp/xsk.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index cfbec39..7f0353e 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -477,11 +477,13 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>  static __poll_t xsk_poll(struct file *file, struct socket *sock,
>                              struct poll_table_struct *wait)
>  {
> -       __poll_t mask = datagram_poll(file, sock, wait);
> +       __poll_t mask = 0;

It would indeed be nice to not execute a number of tests in
datagram_poll that will never be triggered. It will speed up things
for sure. But we need to make sure that removing those flags that
datagram_poll sets do not have any bad effects in the code above this.
But let us tentatively keep this patch for the next version of the
patch set. Just need to figure out how to solve your problem in a nice
way first. See discussion in patch 0/3.

>         struct sock *sk = sock->sk;
>         struct xdp_sock *xs = xdp_sk(sk);
>         struct xsk_buff_pool *pool;
>
> +       sock_poll_wait(file, sock, wait);
> +
>         if (unlikely(!xsk_is_bound(xs)))
>                 return mask;
>
> --
> 1.8.3.1
>

^ permalink raw reply

* Re: [PATCH bpf] net, xsk: Avoid taking multiple skbuff references
From: Björn Töpel @ 2020-11-23 14:05 UTC (permalink / raw)
  To: Daniel Borkmann, Björn Töpel, ast, netdev, bpf
  Cc: jonathan.lemon, yhs, weqaar.janjua, magnus.karlsson,
	weqaar.a.janjua
In-Reply-To: <12b970c5-6b44-5288-0c79-2df5178d1165@iogearbox.net>

On 2020-11-23 14:53, Daniel Borkmann wrote:
[...]
> 
> Hm, but this way free_on_busy, even though constant, cannot be optimized 
> away?
> Can't you just move the dev_xmit_complete() check out into 
> dev_direct_xmit()
> instead? That way you can just drop the bool, and the below 
> dev_direct_xmit()
> should probably just become an __always_line function in netdevice.h so you
> avoid the double call.
>

Good suggestion! I'll spin a v2.


Björn

^ permalink raw reply

* Re: [PATCH 000/141] Fix fall-through warnings for Clang
From: Miguel Ojeda @ 2020-11-23 14:05 UTC (permalink / raw)
  To: Finn Thain
  Cc: James Bottomley, Kees Cook, Jakub Kicinski, Gustavo A. R. Silva,
	linux-kernel, alsa-devel, amd-gfx, bridge, ceph-devel,
	cluster-devel, coreteam, devel, dm-devel, drbd-dev, dri-devel,
	GR-everest-linux-l2, GR-Linux-NIC-Dev, intel-gfx, intel-wired-lan,
	keyrings, linux1394-devel, linux-acpi, linux-afs, Linux ARM,
	linux-arm-msm, linux-atm-general, linux-block, linux-can,
	linux-cifs, Linux Crypto Mailing List, linux-decnet-user,
	Ext4 Developers List, linux-fbdev, linux-geode, linux-gpio,
	linux-hams, linux-hwmon, linux-i3c, linux-ide, linux-iio,
	linux-input, linux-integrity, linux-mediatek,
	Linux Media Mailing List, linux-mmc, Linux-MM, linux-mtd,
	linux-nfs, linux-rdma, linux-renesas-soc, linux-scsi, linux-sctp,
	linux-security-module, linux-stm32, linux-usb, linux-watchdog,
	linux-wireless, Network Development, netfilter-devel, nouveau,
	op-tee, oss-drivers, patches, rds-devel, reiserfs-devel,
	samba-technical, selinux, target-devel, tipc-discussion,
	usb-storage, virtualization, wcn36xx,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT), xen-devel,
	linux-hardening, Nick Desaulniers, Nathan Chancellor,
	Miguel Ojeda, Joe Perches
In-Reply-To: <alpine.LNX.2.23.453.2011230938390.7@nippy.intranet>

On Sun, Nov 22, 2020 at 11:54 PM Finn Thain <fthain@telegraphics.com.au> wrote:
>
> We should also take into account optimisim about future improvements in
> tooling.

Not sure what you mean here. There is no reliable way to guess what
the intention was with a missing fallthrough, even if you parsed
whitespace and indentation.

> It is if you want to spin it that way.

How is that a "spin"? It is a fact that we won't get *implicit*
fallthrough mistakes anymore (in particular if we make it a hard
error).

> But what we inevitably get is changes like this:
>
>  case 3:
>         this();
> +       break;
>  case 4:
>         hmmm();
>
> Why? Mainly to silence the compiler. Also because the patch author argued
> successfully that they had found a theoretical bug, often in mature code.

If someone changes control flow, that is on them. Every kernel
developer knows what `break` does.

> But is anyone keeping score of the regressions? If unreported bugs count,
> what about unreported regressions?

Introducing `fallthrough` does not change semantics. If you are really
keen, you can always compare the objects because the generated code
shouldn't change.

Cheers,
Miguel

^ permalink raw reply

* Re: [arm64] kernel BUG at kernel/seccomp.c:1309!
From: Jann Horn @ 2020-11-23 14:02 UTC (permalink / raw)
  To: Arnd Bergmann, Gabriel Krisman Bertazi, Andy Lutomirski,
	Thomas Gleixner
  Cc: Naresh Kamboju, open list, Netdev, bpf, lkft-triage, Linux ARM,
	Daniel Borkmann, Kees Cook, Andrii Nakryiko, Song Liu,
	Yonghong Song, Andy Lutomirski, Sumit Semwal, Arnd Bergmann,
	YiFei Zhu
In-Reply-To: <CAK8P3a1Lx1MMQ3s1uWjevsi2wqFo2r=k1hhrxf1spUxEQX_Rag@mail.gmail.com>

On Mon, Nov 23, 2020 at 2:45 PM Arnd Bergmann <arnd@kernel.org> wrote:
> On Mon, Nov 23, 2020 at 12:15 PM Naresh Kamboju
> <naresh.kamboju@linaro.org> wrote:
> >
> > While booting arm64 kernel the following kernel BUG noticed on several arm64
> > devices running linux next 20201123 tag kernel.
> >
> >
> > $ git log --oneline next-20201120..next-20201123 -- kernel/seccomp.c
> > 5c5c5fa055ea Merge remote-tracking branch 'seccomp/for-next/seccomp'
> > bce6a8cba7bf Merge branch 'linus'
> > 7ef95e3dbcee Merge branch 'for-linus/seccomp' into for-next/seccomp
> > fab686eb0307 seccomp: Remove bogus __user annotations
> > 0d8315dddd28 seccomp/cache: Report cache data through /proc/pid/seccomp_cache
> > 8e01b51a31a1 seccomp/cache: Add "emulator" to check if filter is constant allow
> > f9d480b6ffbe seccomp/cache: Lookup syscall allowlist bitmap for fast path
> > 23d67a54857a seccomp: Migrate to use SYSCALL_WORK flag
> >
> >
> > Please find these easy steps to reproduce the kernel build and boot.
>
> Adding Gabriel Krisman Bertazi to Cc, as the last patch (23d67a54857a) here
> seems suspicious: it changes
>
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 02aef2844c38..47763f3999f7 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -42,7 +42,7 @@ struct seccomp {
>  extern int __secure_computing(const struct seccomp_data *sd);
>  static inline int secure_computing(void)
>  {
> -       if (unlikely(test_thread_flag(TIF_SECCOMP)))
> +       if (unlikely(test_syscall_work(SECCOMP)))
>                 return  __secure_computing(NULL);
>         return 0;
>  }
>
> which is in the call chain directly before
>
> int __secure_computing(const struct seccomp_data *sd)
> {
>        int mode = current->seccomp.mode;
>
> ...
>         switch (mode) {
>         case SECCOMP_MODE_STRICT:
>                 __secure_computing_strict(this_syscall);  /* may call do_exit */
>                 return 0;
>         case SECCOMP_MODE_FILTER:
>                 return __seccomp_filter(this_syscall, sd, false);
>         default:
>                 BUG();
>         }
> }
>
> Clearly, current->seccomp.mode is set to something other
> than SECCOMP_MODE_STRICT or SECCOMP_MODE_FILTER
> while the test_syscall_work(SECCOMP) returns true, and this
> must have not been the case earlier.

Ah, I think the problem is actually in
3136b93c3fb2b7c19e853e049203ff8f2b9dd2cd ("entry: Expose helpers to
migrate TIF to SYSCALL_WORK flag"). In the !GENERIC_ENTRY case, it
adds this code:

+#define set_syscall_work(fl)                                           \
+       set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
+#define test_syscall_work(fl) \
+       test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
+#define clear_syscall_work(fl) \
+       clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl)
+
+#define set_task_syscall_work(t, fl) \
+       set_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define test_task_syscall_work(t, fl) \
+       test_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define clear_task_syscall_work(t, fl) \
+       clear_ti_thread_flag(task_thread_info(t), TIF_##fl)

but the SYSCALL_WORK_FLAGS are not valid on !GENERIC_ENTRY, we'll mix
up (on arm64) SYSCALL_WORK_BIT_SECCOMP (==0) and TIF_SIGPENDING (==0).

As part of fixing this, it might be a good idea to put "enum
syscall_work_bit" behind a "#ifdef CONFIG_GENERIC_ENTRY" to avoid
future accidents like this?

^ permalink raw reply

* Re: [PATCH 0/3] xsk: fix for xsk_poll writeable
From: Magnus Karlsson @ 2020-11-23 14:00 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Björn Töpel, Magnus Karlsson, Jonathan Lemon,
	David S. Miller, Jakub Kicinski, Alexei Starovoitov,
	Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend,
	Network Development, bpf, linux-kernel
In-Reply-To: <cover.1605686678.git.xuanzhuo@linux.alibaba.com>

On Wed, Nov 18, 2020 at 9:25 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> I tried to combine cq available and tx writeable, but I found it very difficult.
> Sometimes we pay attention to the status of "available" for both, but sometimes,
> we may only pay attention to one, such as tx writeable, because we can use the
> item of fq to write to tx. And this kind of demand may be constantly changing,
> and it may be necessary to set it every time before entering xsk_poll, so
> setsockopt is not very convenient. I feel even more that using a new event may
> be a better solution, such as EPOLLPRI, I think it can be used here, after all,
> xsk should not have OOB data ^_^.
>
> However, two other problems were discovered during the test:
>
> * The mask returned by datagram_poll always contains EPOLLOUT
> * It is not particularly reasonable to return EPOLLOUT based on tx not full
>
> After fixing these two problems, I found that when the process is awakened by
> EPOLLOUT, the process can always get the item from cq.
>
> Because the number of packets that the network card can send at a time is
> actually limited, suppose this value is "nic_num". Once the number of
> consumed items in the tx queue is greater than nic_num, this means that there
> must also be new recycled items in the cq queue from nic.
>
> In this way, as long as the tx configured by the user is larger, we won't have
> the situation that tx is already in the writeable state but cannot get the item
> from cq.

I think the overall approach of tying this into poll() instead of
setsockopt() is the right way to go. But we need a more robust
solution. Your patch #3 also breaks backwards compatibility and that
is not allowed. Could you please post some simple code example of what
it is you would like to do in user space? So you would like to wake up
when there are entries in the cq that can be retrieved and the reason
you would like to do this is that you then know you can put some more
entries into the Tx ring and they will get sent as there now are free
slots in the cq. Correct me if wrong. Would an event that wakes you up
when there is both space in the Tx ring and space in the cq work? Is
there a case in which we would like to be woken up when only the Tx
ring is non-full? Maybe there are as it might be beneficial to fill
the Tx and while doing that some entries in the cq has been completed
and away the packets go. But it would be great if you could post some
simple example code, does not need to compile or anything. Can be
pseudo code.

It would also be good to know if your goal is max throughput, max
burst size, or something else.

Thanks: Magnus


> Xuan Zhuo (3):
>   xsk: replace datagram_poll by sock_poll_wait
>   xsk: change the tx writeable condition
>   xsk: set tx/rx the min entries
>
>  include/uapi/linux/if_xdp.h |  2 ++
>  net/xdp/xsk.c               | 26 ++++++++++++++++++++++----
>  net/xdp/xsk_queue.h         |  6 ++++++
>  3 files changed, 30 insertions(+), 4 deletions(-)
>
> --
> 1.8.3.1
>

^ permalink raw reply

* Re: [PATCH bpf] net, xsk: Avoid taking multiple skbuff references
From: Daniel Borkmann @ 2020-11-23 13:53 UTC (permalink / raw)
  To: Björn Töpel, ast, netdev, bpf
  Cc: Björn Töpel, jonathan.lemon, yhs, weqaar.janjua,
	magnus.karlsson, weqaar.a.janjua
In-Reply-To: <20201123131215.136131-1-bjorn.topel@gmail.com>

On 11/23/20 2:12 PM, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY")
> addressed the problem that packets were discarded from the Tx AF_XDP
> ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was
> bumping the skbuff reference count, so that the buffer would not be
> freed by dev_direct_xmit(). A reference count larger than one means
> that the skbuff is "shared", which is not the case.
> 
> If the "shared" skbuff is sent to the generic XDP receive path,
> netif_receive_generic_xdp(), and pskb_expand_head() is entered the
> BUG_ON(skb_shared(skb)) will trigger.
> 
> This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(),
> where a user can select the skbuff free policy. This allows AF_XDP to
> avoid bumping the reference count, but still keep the NETDEV_TX_BUSY
> behavior.
> 
> Reported-by: Yonghong Song <yhs@fb.com>
> Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY")
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> ---
>   include/linux/netdevice.h | 1 +
>   net/core/dev.c            | 9 +++++++--
>   net/xdp/xsk.c             | 8 +-------
>   3 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 964b494b0e8d..e7402fca7752 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2815,6 +2815,7 @@ u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
>   		       struct net_device *sb_dev);
>   int dev_queue_xmit(struct sk_buff *skb);
>   int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
> +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id, bool free_on_busy);
>   int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
>   int register_netdevice(struct net_device *dev);
>   void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 82dc6b48e45f..2af79a4253bb 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
>   }
>   EXPORT_SYMBOL(dev_queue_xmit_accel);
>   
> -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
> +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id, bool free_on_busy)
>   {
>   	struct net_device *dev = skb->dev;
>   	struct sk_buff *orig_skb = skb;
> @@ -4211,7 +4211,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
>   
>   	local_bh_enable();
>   
> -	if (!dev_xmit_complete(ret))
> +	if (free_on_busy && !dev_xmit_complete(ret))
>   		kfree_skb(skb);
>   
>   	return ret;

Hm, but this way free_on_busy, even though constant, cannot be optimized away?
Can't you just move the dev_xmit_complete() check out into dev_direct_xmit()
instead? That way you can just drop the bool, and the below dev_direct_xmit()
should probably just become an __always_line function in netdevice.h so you
avoid the double call.

> @@ -4220,6 +4220,11 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
>   	kfree_skb_list(skb);
>   	return NET_XMIT_DROP;
>   }
> +
> +int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
> +{
> +	return __dev_direct_xmit(skb, queue_id, true);
> +}
>   EXPORT_SYMBOL(dev_direct_xmit);
>   
>   /*************************************************************************
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 5a6cdf7b320d..c6ad31b374b7 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -411,11 +411,7 @@ static int xsk_generic_xmit(struct sock *sk)
>   		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
>   		skb->destructor = xsk_destruct_skb;
>   
> -		/* Hinder dev_direct_xmit from freeing the packet and
> -		 * therefore completing it in the destructor
> -		 */
> -		refcount_inc(&skb->users);
> -		err = dev_direct_xmit(skb, xs->queue_id);
> +		err = __dev_direct_xmit(skb, xs->queue_id, false);
>   		if  (err == NETDEV_TX_BUSY) {
>   			/* Tell user-space to retry the send */
>   			skb->destructor = sock_wfree;
> @@ -429,12 +425,10 @@ static int xsk_generic_xmit(struct sock *sk)
>   		/* Ignore NET_XMIT_CN as packet might have been sent */
>   		if (err == NET_XMIT_DROP) {
>   			/* SKB completed but not sent */
> -			kfree_skb(skb);
>   			err = -EBUSY;
>   			goto out;
>   		}
>   
> -		consume_skb(skb);
>   		sent_frame = true;
>   	}
>   
> 
> base-commit: 178648916e73e00de83150eb0c90c0d3a977a46a
> 


^ permalink raw reply

* [RFC 18/18] net: iosm: infrastructure
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

1) Kconfig & Makefile changes for IOSM Driver compilation.
2) Modified driver/net Kconfig & Makefile for driver inclusion.
3) Modified MAINTAINER file for IOSM Driver addition.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 MAINTAINERS                    |  7 +++++++
 drivers/net/Kconfig            |  1 +
 drivers/net/Makefile           |  1 +
 drivers/net/wwan/Kconfig       | 13 +++++++++++++
 drivers/net/wwan/Makefile      |  5 +++++
 drivers/net/wwan/iosm/Kconfig  | 10 ++++++++++
 drivers/net/wwan/iosm/Makefile | 27 +++++++++++++++++++++++++++
 7 files changed, 64 insertions(+)
 create mode 100644 drivers/net/wwan/Kconfig
 create mode 100644 drivers/net/wwan/Makefile
 create mode 100644 drivers/net/wwan/iosm/Kconfig
 create mode 100644 drivers/net/wwan/iosm/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index a008b70f3c16..cb1fc8fabffd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9099,6 +9099,13 @@ M:	Mario Limonciello <mario.limonciello@dell.com>
 S:	Maintained
 F:	drivers/platform/x86/intel-wmi-thunderbolt.c
 
+INTEL WWAN IOSM DRIVER
+M:      M Chetan Kumar <m.chetan.kumar@intel.com>
+M:      Intel Corporation <linuxwwan@intel.com>
+L:      netdev@vger.kernel.org
+S:      Maintained
+F:      drivers/net/wwan/iosm/
+
 INTEL(R) TRACE HUB
 M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 S:	Supported
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c3dbe64e628e..e0f869a2c52f 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -593,4 +593,5 @@ config NET_FAILOVER
 	  a VM with direct attached VF by failing over to the paravirtual
 	  datapath when the VF is unplugged.
 
+source "drivers/net/wwan/Kconfig"
 endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 72e18d505d1a..025fb399d2af 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -84,3 +84,4 @@ thunderbolt-net-y += thunderbolt.o
 obj-$(CONFIG_USB4_NET) += thunderbolt-net.o
 obj-$(CONFIG_NETDEVSIM) += netdevsim/
 obj-$(CONFIG_NET_FAILOVER) += net_failover.o
+obj-$(CONFIG_WWAN)+= wwan/
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
new file mode 100644
index 000000000000..715dfd0598f9
--- /dev/null
+++ b/drivers/net/wwan/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Wireless WAN device configuration
+#
+
+menuconfig WWAN
+	bool "Wireless WAN"
+	help
+	  This section contains all Wireless WAN driver configurations.
+
+if WWAN
+source "drivers/net/wwan/iosm/Kconfig"
+endif # WWAN
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
new file mode 100644
index 000000000000..a81ff28e6cd9
--- /dev/null
+++ b/drivers/net/wwan/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Linux WWAN Device Drivers.
+#
+obj-$(CONFIG_IOSM)+= iosm/
diff --git a/drivers/net/wwan/iosm/Kconfig b/drivers/net/wwan/iosm/Kconfig
new file mode 100644
index 000000000000..fed382fc9cd7
--- /dev/null
+++ b/drivers/net/wwan/iosm/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: (GPL-2.0-only)
+#
+# IOSM Driver configuration
+#
+
+config IOSM
+	tristate "IOSM Driver"
+	depends on INTEL_IOMMU
+	help
+	  This driver enables Intel M.2 WWAN Device communication.
diff --git a/drivers/net/wwan/iosm/Makefile b/drivers/net/wwan/iosm/Makefile
new file mode 100644
index 000000000000..153ae0360244
--- /dev/null
+++ b/drivers/net/wwan/iosm/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only)
+#
+# Copyright (C) 2020 Intel Corporation.
+#
+
+iosm-y = \
+	iosm_ipc_task_queue.o	\
+	iosm_ipc_imem.o			\
+	iosm_ipc_imem_ops.o		\
+	iosm_ipc_mmio.o			\
+	iosm_ipc_sio.o			\
+	iosm_ipc_mbim.o			\
+	iosm_ipc_wwan.o			\
+	iosm_ipc_uevent.o		\
+	iosm_ipc_pm.o			\
+	iosm_ipc_pcie.o			\
+	iosm_ipc_irq.o			\
+	iosm_ipc_chnl_cfg.o		\
+	iosm_ipc_protocol.o		\
+	iosm_ipc_protocol_ops.o	\
+	iosm_ipc_mux.o			\
+	iosm_ipc_mux_codec.o
+
+obj-$(CONFIG_IOSM) := iosm.o
+
+# compilation flags
+#ccflags-y += -DDEBUG
-- 
2.12.3


^ permalink raw reply related

* [RFC 17/18] net: iosm: readme file
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

Documents IOSM Driver interface usage.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/README | 126 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/README

diff --git a/drivers/net/wwan/iosm/README b/drivers/net/wwan/iosm/README
new file mode 100644
index 000000000000..4a489177ad96
--- /dev/null
+++ b/drivers/net/wwan/iosm/README
@@ -0,0 +1,126 @@
+IOSM Driver for PCIe based Intel M.2 Modems
+================================================
+The IOSM (IPC over Shared Memory) driver is a PCIe host driver implemented
+for linux or chrome platform for data exchange over PCIe interface between
+Host platform & Intel M.2 Modem. The driver exposes interface conforming to the
+MBIM protocol [1]. Any front end application ( eg: Modem Manager) could easily
+manage the MBIM interface to enable data communication towards WWAN.
+
+Basic usage
+===========
+MBIM functions are inactive when unmanaged. The IOSM driver only
+provides a userspace interface of a character device representing
+MBIM control channel and does not play any role in managing the
+functionality. It is the job of a userspace application to enumerate
+the port appropriately and enable MBIM functionality.
+
+Examples of few such userspace application are:
+ - mbimcli (included with the libmbim [2] library), and
+ - ModemManager [3]
+
+For establishing an MBIM IP session at least these actions are required by the
+management application:
+ - open the control channel
+ - configure network connection settings
+ - connect to network
+ - configure IP interface
+
+Management application development
+----------------------------------
+The driver and userspace interfaces are described below. The MBIM
+control channel protocol is described in [1].
+
+MBIM control channel userspace ABI
+==================================
+
+/dev/wwanctrl character device
+------------------------------
+The driver exposes an interface to the MBIM function control channel using char
+driver as a subdriver. The userspace end of the control channel pipe is a
+/dev/wwanctrl character device.
+
+The /dev/wwanctrl device is created as a subordinate character device under
+IOSM driver. The character device associated with a specific MBIM function
+can be looked up using sysfs with matching the above device name.
+
+Control channel configuration
+-----------------------------
+The wMaxControlMessage field of the MBIM functional descriptor
+limits the maximum control message size. The management application needs to
+negotiate the control message size as per the requirements.
+See also the ioctl documentation below.
+
+Fragmentation
+-------------
+The userspace application is responsible for all control message
+fragmentation and defragmentation as per MBIM.
+
+/dev/wwanctrl write()
+---------------------
+The MBIM control messages from the management application must not
+exceed the negotiated control message size.
+
+/dev/wwanctrl read()
+--------------------
+The management application must accept control messages of up the
+negotiated control message size.
+
+/dev/wwanctrl ioctl()
+--------------------
+IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size
+This IOCTL command could be used by applications to fetch the Maximum Command
+buffer length supported by the driver which is restricted to 4096 bytes.
+
+	#include <stdio.h>
+	#include <fcntl.h>
+	#include <sys/ioctl.h>
+	#include <linux/types.h>
+	int main()
+	{
+		__u16 max;
+		int fd = open("/dev/wwanctrl", O_RDWR);
+		if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max))
+			printf("wMaxControlMessage is %d\n", max);
+	}
+
+MBIM data channel userspace ABI
+===============================
+
+wwanY network device
+--------------------
+The IOSM driver represents the MBIM data channel as a single
+network device of the "wwan0" type. This network device is initially
+mapped to MBIM IP session 0.
+
+Multiplexed IP sessions (IPS)
+-----------------------------
+IOSM driver allows multiplexing of several IP sessions over the single network
+device of type wwan0. IOSM driver models such IP sessions as 802.1q VLAN
+subdevices of the master wwanY device, mapping MBIM IP session M to VLAN ID M
+for all values of M greater than 0.
+
+The userspace management application is responsible for adding new VLAN links
+prior to establishing MBIM IP sessions where the SessionId is greater than 0.
+These links can be added by using the normal VLAN kernel interfaces.
+
+For example, adding a link for a MBIM IP session with SessionId 5:
+
+  ip link add link wwan0 name wwan0.<name> type vlan id 5
+
+The driver will automatically map the "wwan0.<name>" network device to MBIM
+IP session 5.
+
+References
+==========
+
+[1] "MBIM (Mobile Broadband Interface Model) Registry"
+       - http://compliance.usb.org/mbim/
+
+[2] libmbim - "a glib-based library for talking to WWAN modems and
+      devices which speak the Mobile Interface Broadband Model (MBIM)
+      protocol"
+      - http://www.freedesktop.org/wiki/Software/libmbim/
+
+[3] ModemManager - "a DBus-activated daemon which controls mobile
+      broadband (2G/3G/4G) devices and connections"
+      - http://www.freedesktop.org/wiki/Software/ModemManager/
\ No newline at end of file
-- 
2.12.3


^ permalink raw reply related

* [RFC 15/18] net: iosm: uevent support
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

Report modem status via uevent.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_uevent.c | 47 +++++++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_uevent.h | 41 ++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_uevent.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_uevent.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_uevent.c b/drivers/net/wwan/iosm/iosm_ipc_uevent.c
new file mode 100644
index 000000000000..27542ca27613
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_uevent.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include <linux/slab.h>
+
+#include "iosm_ipc_sio.h"
+#include "iosm_ipc_uevent.h"
+
+/* Update the uevent in work queue context */
+static void ipc_uevent_work(struct work_struct *data)
+{
+	struct ipc_uevent_info *info;
+	char *envp[2] = { NULL, NULL };
+
+	info = container_of(data, struct ipc_uevent_info, work);
+
+	envp[0] = info->uevent;
+
+	if (kobject_uevent_env(&info->dev->kobj, KOBJ_CHANGE, envp))
+		pr_err("uevent %s failed to sent", info->uevent);
+
+	kfree(info);
+}
+
+void ipc_uevent_send(struct device *dev, char *uevent)
+{
+	struct ipc_uevent_info *info;
+
+	if (!uevent || !dev)
+		return;
+
+	info = kzalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info)
+		return;
+
+	/* Initialize the kernel work queue */
+	INIT_WORK(&info->work, ipc_uevent_work);
+
+	/* Store the device and event information */
+	info->dev = dev;
+	snprintf(info->uevent, MAX_UEVENT_LEN, "%s: %s", dev_name(dev), uevent);
+
+	/* Schedule uevent in process context using work queue */
+	schedule_work(&info->work);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_uevent.h b/drivers/net/wwan/iosm/iosm_ipc_uevent.h
new file mode 100644
index 000000000000..422f64411c6e
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_uevent.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_UEVENT_H
+#define IOSM_IPC_UEVENT_H
+
+/* Baseband event strings */
+#define UEVENT_MDM_NOT_READY "MDM_NOT_READY"
+#define UEVENT_ROM_READY "ROM_READY"
+#define UEVENT_MDM_READY "MDM_READY"
+#define UEVENT_CRASH "CRASH"
+#define UEVENT_CD_READY "CD_READY"
+#define UEVENT_CD_READY_LINK_DOWN "CD_READY_LINK_DOWN"
+#define UEVENT_MDM_TIMEOUT "MDM_TIMEOUT"
+
+/* Maximum length of user events */
+#define MAX_UEVENT_LEN 64
+
+/**
+ * struct ipc_uevent_info - Uevent information structure.
+ * @dev:	Pointer to device structure
+ * @uevent:	Uevent information
+ * @work:	Uevent work struct
+ */
+struct ipc_uevent_info {
+	struct device *dev;
+	char uevent[MAX_UEVENT_LEN];
+	struct work_struct work;
+};
+
+/**
+ * ipc_uevent_send - Send modem event to user space.
+ * @dev:	Generic device pointer
+ * @uevent:	Uevent information
+ *
+ */
+void ipc_uevent_send(struct device *dev, char *uevent);
+
+#endif
-- 
2.12.3


^ permalink raw reply related

* [RFC 16/18] net: iosm: net driver
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

1) Create net device for data/IP communication.
2) Bind VLAN ID to mux IP session.
3) Implement net device operations.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_wwan.c | 674 ++++++++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_wwan.h |  72 ++++
 2 files changed, 746 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_wwan.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_wwan.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
new file mode 100644
index 000000000000..f14a971455bb
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include <linux/if_vlan.h>
+
+#include "iosm_ipc_chnl_cfg.h"
+#include "iosm_ipc_imem_ops.h"
+
+/* Minimum number of transmit queues per WWAN root device */
+#define WWAN_MIN_TXQ (1)
+/* Minimum number of receive queues per WWAN root device */
+#define WWAN_MAX_RXQ (1)
+/* Default transmit queue for WWAN root device */
+#define WWAN_DEFAULT_TXQ (0)
+/* VLAN tag for WWAN root device */
+#define WWAN_ROOT_VLAN_TAG (0)
+
+#define IPC_MEM_MIN_MTU_SIZE (68)
+#define IPC_MEM_MAX_MTU_SIZE (1024 * 1024)
+
+#define IPC_MEM_VLAN_TO_SESSION (1)
+
+/* Required alignment for TX in bytes (32 bit/4 bytes)*/
+#define IPC_WWAN_ALIGN (4)
+
+/**
+ * struct ipc_vlan_info - This structure includes information about VLAN device.
+ * @vlan_id:	VLAN tag of the VLAN device.
+ * @ch_id:	IPC channel number for which VLAN device is created.
+ * @stats:	Contains statistics of VLAN devices.
+ */
+struct ipc_vlan_info {
+	int vlan_id;
+	int ch_id;
+	struct net_device_stats stats;
+};
+
+/**
+ * struct iosm_wwan - This structure contains information about WWAN root device
+ *		     and interface to the IPC layer.
+ * @vlan_devs:		Contains information about VLAN devices created under
+ *			WWAN root device.
+ * @netdev:		Pointer to network interface device structure.
+ * @ops_instance:	Instance pointer for Callbacks
+ * @dev:		Pointer device structure
+ * @lock:		Spinlock to be used for atomic operations of the
+ *			root device.
+ * @stats:		Contains statistics of WWAN root device
+ * @vlan_devs_nr:	Number of VLAN devices.
+ * @if_mutex:		Mutex used for add and remove vlan-id
+ * @max_devs:		Maximum supported VLAN devs
+ * @max_ip_devs:	Maximum supported IP VLAN devs
+ * @is_registered:	Registration status with netdev
+ */
+struct iosm_wwan {
+	struct ipc_vlan_info *vlan_devs;
+	struct net_device *netdev;
+	void *ops_instance;
+	struct device *dev;
+	spinlock_t lock; /* Used for atomic operations on root device */
+	struct net_device_stats stats;
+	int vlan_devs_nr;
+	struct mutex if_mutex; /* Mutex used for add and remove vlan-id */
+	int max_devs;
+	int max_ip_devs;
+	u8 is_registered : 1;
+};
+
+/* Get the array index of requested tag. */
+static int ipc_wwan_get_vlan_devs_nr(struct iosm_wwan *ipc_wwan, u16 tag)
+{
+	int i = 0;
+
+	if (!ipc_wwan->vlan_devs)
+		return -EINVAL;
+
+	for (i = 0; i < ipc_wwan->vlan_devs_nr; i++)
+		if (ipc_wwan->vlan_devs[i].vlan_id == tag)
+			return i;
+
+	return -EINVAL;
+}
+
+static int ipc_wwan_add_vlan(struct iosm_wwan *ipc_wwan, u16 vid)
+{
+	if (vid >= 512 || !ipc_wwan->vlan_devs)
+		return -EINVAL;
+
+	if (vid == WWAN_ROOT_VLAN_TAG)
+		return 0;
+
+	mutex_lock(&ipc_wwan->if_mutex);
+
+	/* get channel id */
+	ipc_wwan->vlan_devs[ipc_wwan->vlan_devs_nr].ch_id =
+		imem_sys_wwan_open(ipc_wwan->ops_instance, vid);
+
+	if (ipc_wwan->vlan_devs[ipc_wwan->vlan_devs_nr].ch_id < 0) {
+		dev_err(ipc_wwan->dev,
+			"cannot connect wwan0 & id %d to the IPC mem layer",
+			vid);
+		mutex_unlock(&ipc_wwan->if_mutex);
+		return -ENODEV;
+	}
+
+	/* save vlan id */
+	ipc_wwan->vlan_devs[ipc_wwan->vlan_devs_nr].vlan_id = vid;
+
+	dev_dbg(ipc_wwan->dev, "Channel id %d allocated to vlan id %d",
+		ipc_wwan->vlan_devs[ipc_wwan->vlan_devs_nr].ch_id,
+		ipc_wwan->vlan_devs[ipc_wwan->vlan_devs_nr].vlan_id);
+
+	ipc_wwan->vlan_devs_nr++;
+
+	mutex_unlock(&ipc_wwan->if_mutex);
+
+	return 0;
+}
+
+static int ipc_wwan_remove_vlan(struct iosm_wwan *ipc_wwan, u16 vid)
+{
+	int ch_nr = ipc_wwan_get_vlan_devs_nr(ipc_wwan, vid);
+	int i = 0;
+
+	if (ch_nr < 0) {
+		dev_err(ipc_wwan->dev, "vlan dev not found for vid = %d", vid);
+		return ch_nr;
+	}
+
+	if (ipc_wwan->vlan_devs[ch_nr].ch_id < 0) {
+		dev_err(ipc_wwan->dev, "invalid ch nr %d to kill", ch_nr);
+		return -EINVAL;
+	}
+
+	mutex_lock(&ipc_wwan->if_mutex);
+
+	imem_sys_wwan_close(ipc_wwan->ops_instance, vid,
+			    ipc_wwan->vlan_devs[ch_nr].ch_id);
+
+	ipc_wwan->vlan_devs[ch_nr].ch_id = -1;
+
+	/* re-align the vlan information as we removed one tag */
+	for (i = ch_nr; i < ipc_wwan->vlan_devs_nr; i++)
+		memcpy(&ipc_wwan->vlan_devs[i], &ipc_wwan->vlan_devs[i + 1],
+		       sizeof(struct ipc_vlan_info));
+
+	ipc_wwan->vlan_devs_nr--;
+
+	mutex_unlock(&ipc_wwan->if_mutex);
+
+	return 0;
+}
+
+/* Checks the protocol and discards the Ethernet header or VLAN header
+ * accordingly.
+ */
+static int ipc_wwan_pull_header(struct sk_buff *skb, bool *is_ip)
+{
+	unsigned int header_size;
+	__be16 proto;
+
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		proto = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+
+		if (skb->len < VLAN_ETH_HLEN)
+			header_size = 0;
+		else
+			header_size = VLAN_ETH_HLEN;
+	} else {
+		proto = eth_hdr(skb)->h_proto;
+
+		if (skb->len < ETH_HLEN)
+			header_size = 0;
+		else
+			header_size = ETH_HLEN;
+	}
+
+	/* If a valid pointer */
+	if (header_size > 0 && is_ip) {
+		*is_ip = (proto == htons(ETH_P_IP)) ||
+			 (proto == htons(ETH_P_IPV6));
+
+		/* Discard the vlan/ethernet header. */
+		if (unlikely(!skb_pull(skb, header_size)))
+			header_size = 0;
+	}
+
+	return header_size;
+}
+
+/* Get VLAN tag from IPC SESSION ID */
+static inline u16 ipc_wwan_mux_session_to_vlan_tag(int id)
+{
+	return (u16)(id + IPC_MEM_VLAN_TO_SESSION);
+}
+
+/* Get IPC SESSION ID from VLAN tag */
+static inline int ipc_wwan_vlan_to_mux_session_id(u16 tag)
+{
+	return tag - IPC_MEM_VLAN_TO_SESSION;
+}
+
+/* Add new vlan device and open a channel */
+static int ipc_wwan_vlan_rx_add_vid(struct net_device *netdev, __be16 proto,
+				    u16 vid)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(netdev);
+
+	if (vid != IPC_WWAN_DSS_ID_4)
+		return ipc_wwan_add_vlan(ipc_wwan, vid);
+
+	return 0;
+}
+
+/* Remove vlan device and de-allocate channel */
+static int ipc_wwan_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
+				     u16 vid)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(netdev);
+
+	if (vid == WWAN_ROOT_VLAN_TAG)
+		return 0;
+
+	return ipc_wwan_remove_vlan(ipc_wwan, vid);
+}
+
+static int ipc_wwan_open(struct net_device *netdev)
+{
+	/* Octets in one ethernet addr */
+	if (netdev->addr_len < ETH_ALEN) {
+		pr_err("cannot build the Ethernet address for \"%s\"",
+		       netdev->name);
+		return -ENODEV;
+	}
+
+	/* enable tx path, DL data may follow */
+	netif_tx_start_all_queues(netdev);
+
+	return 0;
+}
+
+static int ipc_wwan_stop(struct net_device *netdev)
+{
+	pr_debug("Stop all TX Queues");
+
+	netif_tx_stop_all_queues(netdev);
+	return 0;
+}
+
+int ipc_wwan_receive(struct iosm_wwan *ipc_wwan, struct sk_buff *skb_arg,
+		     bool dss)
+{
+	struct sk_buff *skb;
+	struct ethhdr *eth;
+	u16 tag = 0;
+
+	if (unlikely(!ipc_wwan)) {
+		if (skb_arg)
+			dev_kfree_skb(skb_arg);
+		return -EINVAL;
+	}
+
+	skb = skb_arg;
+
+	eth = (struct ethhdr *)skb->data;
+	if (unlikely(!eth)) {
+		dev_err(ipc_wwan->dev, "ethernet header info error");
+		dev_kfree_skb(skb);
+		return -1;
+	}
+
+	/* Build the ethernet header.
+	 * for kernel version latest than 3.14.0.
+	 */
+	ether_addr_copy(eth->h_dest, ipc_wwan->netdev->dev_addr);
+	ether_addr_copy(eth->h_source, ipc_wwan->netdev->dev_addr);
+	eth->h_source[ETH_ALEN - 1] ^= 0x01; /* src is us xor 1 */
+	/* set the ethernet payload type: ipv4 or ipv6 or Dummy type
+	 * for 802.3 frames
+	 */
+	eth->h_proto = htons(ETH_P_802_3);
+	if (!dss) {
+		if ((skb->data[ETH_HLEN] & 0xF0) == 0x40)
+			eth->h_proto = htons(ETH_P_IP);
+		else if ((skb->data[ETH_HLEN] & 0xF0) == 0x60)
+			eth->h_proto = htons(ETH_P_IPV6);
+	}
+
+	skb->dev = ipc_wwan->netdev;
+	skb->protocol = eth_type_trans(skb, ipc_wwan->netdev);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	vlan_get_tag(skb, &tag);
+	/* TX stats doesn't include ETH_HLEN.
+	 * eth_type_trans() functions pulls the ethernet header.
+	 * so skb->len does not have ethernet header in it.
+	 */
+	ipc_wwan_update_stats(ipc_wwan, ipc_wwan_vlan_to_mux_session_id(tag),
+			      skb->len, false);
+
+	switch (netif_rx_ni(skb)) {
+	case NET_RX_SUCCESS:
+		break;
+	case NET_RX_DROP:
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/* Align SKB to 32bit, if not already aligned */
+static struct sk_buff *ipc_wwan_skb_align(struct iosm_wwan *ipc_wwan,
+					  struct sk_buff *skb)
+{
+	unsigned int offset = (uintptr_t)skb->data & (IPC_WWAN_ALIGN - 1);
+	struct sk_buff *new_skb;
+
+	if (offset == 0)
+		return skb;
+
+	/* Allocate new skb to copy into */
+	new_skb = dev_alloc_skb(skb->len + (IPC_WWAN_ALIGN - 1));
+	if (unlikely(!new_skb)) {
+		dev_err(ipc_wwan->dev, "failed to reallocate skb");
+		goto out;
+	}
+
+	/* Make sure newly allocated skb is aligned */
+	offset = (uintptr_t)new_skb->data & (IPC_WWAN_ALIGN - 1);
+	if (unlikely(offset != 0))
+		skb_reserve(new_skb, IPC_WWAN_ALIGN - offset);
+
+	/* Copy payload */
+	memcpy(new_skb->data, skb->data, skb->len);
+
+	skb_put(new_skb, skb->len);
+out:
+	dev_kfree_skb(skb);
+	return new_skb;
+}
+
+/* Transmit a packet (called by the kernel) */
+static int ipc_wwan_transmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(netdev);
+	bool is_ip = false;
+	int ret = -EINVAL;
+	int header_size;
+	int idx = 0;
+	u16 tag = 0;
+
+	vlan_get_tag(skb, &tag);
+
+	/* If the SKB is of WWAN root device then don't send it to device.
+	 * Free the SKB and then return.
+	 */
+	if (unlikely(tag == WWAN_ROOT_VLAN_TAG))
+		goto exit;
+
+	/* Discard the Ethernet header or VLAN Ethernet header depending
+	 * on the protocol.
+	 */
+	header_size = ipc_wwan_pull_header(skb, &is_ip);
+	if (!header_size)
+		goto exit;
+
+	/* Get the channel number corresponding to VLAN ID */
+	idx = ipc_wwan_get_vlan_devs_nr(ipc_wwan, tag);
+	if (unlikely(idx < 0 || idx >= ipc_wwan->max_devs ||
+		     ipc_wwan->vlan_devs[idx].ch_id < 0))
+		goto exit;
+
+	/* VLAN IDs from 1 to 255 are for IP data
+	 * 257 to 512 are for non-IP data
+	 */
+	if ((tag > 0 && tag < 256)) {
+		if (unlikely(!is_ip)) {
+			ret = -EXDEV;
+			goto exit;
+		}
+	} else if (tag > 256 && tag < 512) {
+		if (unlikely(is_ip)) {
+			ret = -EXDEV;
+			goto exit;
+		}
+
+		/* Align the SKB only for control packets if not aligned. */
+		skb = ipc_wwan_skb_align(ipc_wwan, skb);
+		if (!skb)
+			goto exit;
+	} else {
+		/* Unknown VLAN IDs */
+		ret = -EXDEV;
+		goto exit;
+	}
+
+	/* Send the SKB to device for transmission */
+	ret = imem_sys_wwan_transmit(ipc_wwan->ops_instance, tag,
+				     ipc_wwan->vlan_devs[idx].ch_id, skb);
+
+	/* Return code of zero is success */
+	if (ret == 0) {
+		ret = NETDEV_TX_OK;
+	} else if (ret == -2) {
+		/* Return code -2 is to enable re-enqueue of the skb.
+		 * Re-push the stripped header before returning busy.
+		 */
+		if (unlikely(!skb_push(skb, header_size))) {
+			dev_err(ipc_wwan->dev, "unable to push eth hdr");
+			ret = -EIO;
+			goto exit;
+		}
+
+		ret = NETDEV_TX_BUSY;
+	} else {
+		ret = -EIO;
+		goto exit;
+	}
+
+	return ret;
+
+exit:
+	/* Log any skb drop except for WWAN Root device */
+	if (tag != 0)
+		dev_dbg(ipc_wwan->dev, "skb dropped.VLAN ID: %d, ret: %d", tag,
+			ret);
+
+	dev_kfree_skb_any(skb);
+	return ret;
+}
+
+static int ipc_wwan_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(dev);
+	unsigned long flags = 0;
+
+	if (unlikely(new_mtu < IPC_MEM_MIN_MTU_SIZE ||
+		     new_mtu > IPC_MEM_MAX_MTU_SIZE)) {
+		dev_err(ipc_wwan->dev, "mtu %d out of range %d..%d", new_mtu,
+			IPC_MEM_MIN_MTU_SIZE, IPC_MEM_MAX_MTU_SIZE);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&ipc_wwan->lock, flags);
+	dev->mtu = new_mtu;
+	spin_unlock_irqrestore(&ipc_wwan->lock, flags);
+	return 0;
+}
+
+static int ipc_wwan_change_mac_addr(struct net_device *dev, void *sock_addr)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(dev);
+	struct sockaddr *addr = sock_addr;
+	unsigned long flags = 0;
+	int result = 0;
+	u8 *sock_data;
+
+	sock_data = (u8 *)addr->sa_data;
+
+	spin_lock_irqsave(&ipc_wwan->lock, flags);
+
+	if (is_zero_ether_addr(sock_data)) {
+		dev->addr_len = 1;
+		memset(dev->dev_addr, 0, 6);
+		dev_dbg(ipc_wwan->dev, "mac addr set to zero");
+		goto exit;
+	}
+
+	result = eth_mac_addr(dev, sock_addr);
+exit:
+	spin_unlock_irqrestore(&ipc_wwan->lock, flags);
+	return result;
+}
+
+static int ipc_wwan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	if (cmd != SIOCSIFHWADDR ||
+	    !access_ok((void __user *)ifr, sizeof(struct ifreq)) ||
+	    dev->addr_len > sizeof(struct sockaddr))
+		return -EINVAL;
+
+	return ipc_wwan_change_mac_addr(dev, &ifr->ifr_hwaddr);
+}
+
+static struct net_device_stats *ipc_wwan_get_stats(struct net_device *dev)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(dev);
+
+	return &ipc_wwan->stats;
+}
+
+/* validate mac address for wwan devices */
+static int ipc_wwan_eth_validate_addr(struct net_device *netdev)
+{
+	return eth_validate_addr(netdev);
+}
+
+/* return valid TX queue for the mapped VLAN device
+ * for kernel version latest than 4.19.0
+ */
+static u16 ipc_wwan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+				 struct net_device *sb_dev)
+{
+	struct iosm_wwan *ipc_wwan = netdev_priv(netdev);
+	u16 txqn = 0xFFFF;
+	u16 tag = 0;
+
+	/* get VLAN tag for the current skb
+	 * if the packet is untagged, return the default queue.
+	 */
+	if (vlan_get_tag(skb, &tag) < 0)
+		return WWAN_DEFAULT_TXQ;
+
+	/* TX Queues are allocated as following:
+	 *
+	 * if vlan ID == 0 is used for VLAN root device i.e. WWAN0.
+	 * Assign default TX Queue which is 0.
+	 *
+	 * if vlan ID >= IMEM_WWAN_CTRL_VLAN_ID_START
+	 * && <= IMEM_WWAN_CTRL_VLAN_ID_END then we use default
+	 * TX Queue which is 0.
+	 *
+	 * if vlan ID >= IMEM_WWAN_DATA_VLAN_ID_START
+	 * && <= Max IP devices then allocate separate
+	 * TX Queue to each VLAN ID.
+	 *
+	 * For any other vlan ID return invalid Tx Queue
+	 */
+	if (tag >= IMEM_WWAN_DATA_VLAN_ID_START && tag <= ipc_wwan->max_ip_devs)
+		txqn = tag;
+	else if ((tag >= IMEM_WWAN_CTRL_VLAN_ID_START &&
+		  tag <= IMEM_WWAN_CTRL_VLAN_ID_END) ||
+		 tag == WWAN_ROOT_VLAN_TAG)
+		txqn = WWAN_DEFAULT_TXQ;
+
+	dev_dbg(ipc_wwan->dev, "VLAN tag = %u, TX Queue selected %u", tag,
+		txqn);
+	return txqn;
+}
+
+static const struct net_device_ops ipc_wwandev_ops = {
+	.ndo_open = ipc_wwan_open,
+	.ndo_stop = ipc_wwan_stop,
+	.ndo_start_xmit = ipc_wwan_transmit,
+	.ndo_change_mtu = ipc_wwan_change_mtu,
+	.ndo_validate_addr = ipc_wwan_eth_validate_addr,
+	.ndo_do_ioctl = ipc_wwan_ioctl,
+	.ndo_get_stats = ipc_wwan_get_stats,
+	.ndo_vlan_rx_add_vid = ipc_wwan_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ipc_wwan_vlan_rx_kill_vid,
+	.ndo_set_mac_address = ipc_wwan_change_mac_addr,
+	.ndo_select_queue = ipc_wwan_select_queue,
+};
+
+void ipc_wwan_update_stats(struct iosm_wwan *ipc_wwan, int id, size_t len,
+			   bool tx)
+{
+	int idx =
+		ipc_wwan_get_vlan_devs_nr(ipc_wwan,
+					  ipc_wwan_mux_session_to_vlan_tag(id));
+
+	if (unlikely(idx < 0 || idx >= ipc_wwan->max_devs)) {
+		dev_err(ipc_wwan->dev, "invalid VLAN device");
+		return;
+	}
+
+	if (tx) {
+		/* Update vlan device tx statistics */
+		ipc_wwan->vlan_devs[idx].stats.tx_packets++;
+		ipc_wwan->vlan_devs[idx].stats.tx_bytes += len;
+		/* Update root device tx statistics */
+		ipc_wwan->stats.tx_packets++;
+		ipc_wwan->stats.tx_bytes += len;
+	} else {
+		/* Update vlan device rx statistics */
+		ipc_wwan->vlan_devs[idx].stats.rx_packets++;
+		ipc_wwan->vlan_devs[idx].stats.rx_bytes += len;
+		/* Update root device rx statistics */
+		ipc_wwan->stats.rx_packets++;
+		ipc_wwan->stats.rx_bytes += len;
+	}
+}
+
+void ipc_wwan_tx_flowctrl(struct iosm_wwan *ipc_wwan, int id, bool on)
+{
+	u16 vid = ipc_wwan_mux_session_to_vlan_tag(id);
+
+	dev_dbg(ipc_wwan->dev, "MUX session id[%d]: %s", id,
+		on ? "Enable" : "Disable");
+	if (on)
+		netif_stop_subqueue(ipc_wwan->netdev, vid);
+	else
+		netif_wake_subqueue(ipc_wwan->netdev, vid);
+}
+
+static struct device_type wwan_type = { .name = "wwan" };
+
+struct iosm_wwan *ipc_wwan_init(void *ops_instance, struct device *dev,
+				int max_sessions)
+{
+	int max_tx_q = WWAN_MIN_TXQ + max_sessions;
+	struct iosm_wwan *ipc_wwan;
+	struct net_device *netdev;
+
+	/* allocate ethernet device */
+	netdev = alloc_etherdev_mqs(sizeof(*ipc_wwan), max_tx_q, WWAN_MAX_RXQ);
+
+	if (unlikely(!netdev || !ops_instance))
+		return NULL;
+
+	ipc_wwan = netdev_priv(netdev);
+
+	ipc_wwan->dev = dev;
+	ipc_wwan->netdev = netdev;
+	ipc_wwan->is_registered = false;
+
+	ipc_wwan->vlan_devs_nr = 0;
+	ipc_wwan->ops_instance = ops_instance;
+
+	ipc_wwan->max_devs = max_sessions + IPC_MEM_MAX_CHANNELS;
+	ipc_wwan->max_ip_devs = max_sessions;
+
+	ipc_wwan->vlan_devs = kcalloc(ipc_wwan->max_devs,
+				      sizeof(ipc_wwan->vlan_devs[0]),
+				      GFP_KERNEL);
+
+	spin_lock_init(&ipc_wwan->lock);
+	mutex_init(&ipc_wwan->if_mutex);
+
+	/* allocate random ethernet address */
+	eth_random_addr(netdev->dev_addr);
+	netdev->addr_assign_type = NET_ADDR_RANDOM;
+
+	snprintf(netdev->name, IFNAMSIZ, "%s", "wwan0");
+	netdev->netdev_ops = &ipc_wwandev_ops;
+	netdev->flags |= IFF_NOARP;
+	netdev->features |=
+		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_FILTER;
+	SET_NETDEV_DEVTYPE(netdev, &wwan_type);
+
+	if (register_netdev(netdev)) {
+		dev_err(ipc_wwan->dev, "register_netdev failed");
+		ipc_wwan_deinit(ipc_wwan);
+		return NULL;
+	}
+
+	ipc_wwan->is_registered = true;
+
+	netif_device_attach(netdev);
+
+	/* Set Max MTU for kernel version latest than 4.10.0. */
+	netdev->max_mtu = IPC_MEM_MAX_MTU_SIZE;
+
+	return ipc_wwan;
+}
+
+void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan)
+{
+	if (ipc_wwan->is_registered)
+		unregister_netdev(ipc_wwan->netdev);
+	kfree(ipc_wwan->vlan_devs);
+	ipc_wwan->vlan_devs = NULL;
+	free_netdev(ipc_wwan->netdev);
+}
+
+bool ipc_wwan_is_tx_stopped(struct iosm_wwan *ipc_wwan, int id)
+{
+	u16 vid = ipc_wwan_mux_session_to_vlan_tag(id);
+
+	return __netif_subqueue_stopped(ipc_wwan->netdev, vid);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.h b/drivers/net/wwan/iosm/iosm_ipc_wwan.h
new file mode 100644
index 000000000000..3c3b1fb31ae1
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_WWAN_H
+#define IOSM_IPC_WWAN_H
+
+#define IMEM_WWAN_DATA_VLAN_ID_START 1
+#define IMEM_WWAN_CTRL_VLAN_ID_START 257
+#define IMEM_WWAN_CTRL_VLAN_ID_END 512
+
+/**
+ * ipc_wwan_init - Allocate, Init and register WWAN device
+ * @ops_instance:	Instance pointer for callback
+ * @dev:		Pointer to device structure
+ * @max_sessions:	Maximum number of sessions
+ *
+ * Returns: Pointer to instance on success else NULL
+ */
+struct iosm_wwan *ipc_wwan_init(void *ops_instance, struct device *dev,
+				int max_sessions);
+
+/**
+ * ipc_wwan_deinit - Unregister and free WWAN device, clear pointer
+ * @ipc_wwan:	Pointer to wwan instance data
+ */
+void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan);
+
+/**
+ * ipc_wwan_receive - Receive a downlink packet from CP.
+ * @ipc_wwan:	Pointer to wwan instance
+ * @skb_arg:	Pointer to struct sk_buff
+ * @dss:	Set to true if vlan id is greater than
+ *		IMEM_WWAN_CTRL_VLAN_ID_START else false
+ *
+ * Return: 0 on success else error code
+ */
+int ipc_wwan_receive(struct iosm_wwan *ipc_wwan, struct sk_buff *skb_arg,
+		     bool dss);
+
+/**
+ * ipc_wwan_update_stats - Update device statistics
+ * @ipc_wwan:	Pointer to wwan instance
+ * @id:		Ipc mux channel session id
+ * @len:	Number of bytes to update
+ * @tx:		True if statistics needs to be updated for transmit
+ *		else false
+ *
+ */
+void ipc_wwan_update_stats(struct iosm_wwan *ipc_wwan, int id, size_t len,
+			   bool tx);
+
+/**
+ * ipc_wwan_tx_flowctrl - Enable/Disable TX flow control
+ * @ipc_wwan:	Pointer to wwan instance
+ * @id:		Ipc mux channel session id
+ * @on:		if true then flow ctrl would be enabled else disable
+ *
+ */
+void ipc_wwan_tx_flowctrl(struct iosm_wwan *ipc_wwan, int id, bool on);
+
+/**
+ * ipc_wwan_is_tx_stopped - Checks if Tx stopped for a VLAN id.
+ * @ipc_wwan:	Pointer to wwan instance
+ * @id:		Ipc mux channel session id
+ *
+ * Return: true if stopped, false otherwise
+ */
+bool ipc_wwan_is_tx_stopped(struct iosm_wwan *ipc_wwan, int id);
+
+#endif
-- 
2.12.3


^ permalink raw reply related

* [RFC 14/18] net: iosm: protocol operations
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

1) Update UL/DL transfer descriptors in message ring.
2) Define message set for pipe/sleep protocol.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c | 563 ++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h | 358 ++++++++++++++++
 2 files changed, 921 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
new file mode 100644
index 000000000000..beca5e06203a
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include "iosm_ipc_protocol.h"
+#include "iosm_ipc_protocol_ops.h"
+
+/* Get the next free message element.*/
+static union ipc_mem_msg_entry *
+ipc_protocol_free_msg_get(struct iosm_protocol *ipc_protocol, int *index)
+{
+	u32 head = ipc_protocol->p_ap_shm->msg_head;
+	u32 new_head = (head + 1) % IPC_MEM_MSG_ENTRIES;
+	union ipc_mem_msg_entry *msg;
+
+	if (new_head == ipc_protocol->p_ap_shm->msg_tail) {
+		dev_err(ipc_protocol->dev, "message ring is full");
+		return NULL;
+	}
+
+	/* Get the pointer to the next free message element,
+	 * reset the fields and mark is as invalid.
+	 */
+	msg = &ipc_protocol->p_ap_shm->msg_ring[head];
+	memset(msg, 0, sizeof(*msg));
+
+	/* return index in message ring */
+	*index = head;
+
+	return msg;
+}
+
+/* Updates the message ring Head pointer */
+void ipc_protocol_msg_hp_update(void *instance)
+{
+	struct iosm_protocol *ipc_protocol = instance;
+	u32 head = ipc_protocol->p_ap_shm->msg_head;
+	u32 new_head = (head + 1) % IPC_MEM_MSG_ENTRIES;
+
+	/* Update head pointer and fire doorbell. */
+	ipc_protocol->p_ap_shm->msg_head = new_head;
+	ipc_protocol->old_msg_tail = ipc_protocol->p_ap_shm->msg_tail;
+
+	/* Host Sleep negotiation happens through Message Ring. So Host Sleep
+	 * check should be avoided by sending false as last argument.
+	 */
+	ipc_pm_signal_hpda_doorbell(ipc_protocol->pm, IPC_HP_MR, false);
+}
+
+/* Allocate and prepare a OPEN_PIPE message.
+ * This also allocates the memory for the new TDR structure and
+ * updates the pipe structure referenced in the preparation arguments.
+ */
+static int ipc_protocol_msg_prepipe_open(struct iosm_protocol *ipc_protocol,
+					 union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+	struct ipc_pipe *pipe = args->pipe_open.pipe;
+	struct ipc_protocol_td *tdr;
+	struct sk_buff **skbr;
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -1;
+	}
+
+	/* Allocate the skbuf elements for the skbuf which are on the way.
+	 * SKB ring is internal memory allocation for driver. No need to
+	 * re-calculate the start and end addresses.
+	 */
+	skbr = kcalloc(pipe->nr_of_entries, sizeof(*skbr), GFP_ATOMIC);
+	if (!skbr)
+		return -ENOMEM;
+
+	/* Allocate the transfer descriptors for the pipe. */
+	tdr = pci_alloc_consistent(ipc_protocol->pcie->pci,
+				   pipe->nr_of_entries * sizeof(*tdr),
+				   &pipe->phy_tdr_start);
+	if (!tdr) {
+		kfree(skbr);
+		dev_err(ipc_protocol->dev, "tdr alloc error");
+		return -ENOMEM;
+	}
+
+	pipe->max_nr_of_queued_entries = pipe->nr_of_entries - 1;
+	pipe->nr_of_queued_entries = 0;
+	pipe->tdr_start = tdr;
+	pipe->skbr_start = skbr;
+	pipe->old_tail = 0;
+
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = 0;
+
+	msg->open_pipe.type_of_message = IPC_MEM_MSG_OPEN_PIPE;
+	msg->open_pipe.pipe_nr = pipe->pipe_nr;
+	msg->open_pipe.tdr_addr = pipe->phy_tdr_start;
+	msg->open_pipe.tdr_entries = pipe->nr_of_entries;
+	msg->open_pipe.interrupt_moderation = pipe->irq_moderation;
+	msg->open_pipe.accumulation_backoff = pipe->accumulation_backoff;
+	msg->open_pipe.reliable = true;
+	msg->open_pipe.optimized_completion = true;
+	msg->open_pipe.irq_vector = pipe->irq;
+
+	return index;
+}
+
+static int ipc_protocol_msg_prepipe_close(struct iosm_protocol *ipc_protocol,
+					  union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+	struct ipc_pipe *pipe = args->pipe_close.pipe;
+
+	if (!msg)
+		return -1;
+
+	msg->close_pipe.type_of_message = IPC_MEM_MSG_CLOSE_PIPE;
+	msg->close_pipe.pipe_nr = pipe->pipe_nr;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_CLOSE_PIPE(pipe_nr=%d)",
+		msg->close_pipe.pipe_nr);
+
+	return index;
+}
+
+static int ipc_protocol_msg_prep_sleep(struct iosm_protocol *ipc_protocol,
+				       union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -1;
+	}
+
+	/* Prepare and send the host sleep message to CP to enter or exit D3. */
+	msg->host_sleep.type_of_message = IPC_MEM_MSG_SLEEP;
+	msg->host_sleep.target = args->sleep.target; /* 0=host, 1=device */
+
+	/* state; 0=enter, 1=exit 2=enter w/o protocol */
+	msg->host_sleep.state = args->sleep.state;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_SLEEP(target=%d; state=%d)",
+		msg->host_sleep.target, msg->host_sleep.state);
+
+	return index;
+}
+
+static int ipc_protocol_msg_prep_feature_set(struct iosm_protocol *ipc_protocol,
+					     union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -1;
+	}
+
+	msg->feature_set.type_of_message = IPC_MEM_MSG_FEATURE_SET;
+	msg->feature_set.reset_enable = args->feature_set.reset_enable;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_FEATURE_SET(reset_enable=%d)",
+		msg->feature_set.reset_enable);
+
+	return index;
+}
+
+/* Processes the message consumed by CP. */
+bool ipc_protocol_msg_process(void *instance, int irq)
+{
+	struct iosm_protocol *ipc_protocol = instance;
+	struct ipc_rsp **rsp_ring = ipc_protocol->rsp_ring;
+	bool msg_processed = false;
+	int i;
+
+	if (ipc_protocol->p_ap_shm->msg_tail >= IPC_MEM_MSG_ENTRIES) {
+		dev_err(ipc_protocol->dev, "msg_tail out of range: %d",
+			ipc_protocol->p_ap_shm->msg_tail);
+		return msg_processed;
+	}
+
+	if (irq != IMEM_IRQ_DONT_CARE &&
+	    irq != ipc_protocol->p_ap_shm->ci.msg_irq_vector)
+		return msg_processed;
+
+	for (i = ipc_protocol->old_msg_tail;
+	     i != ipc_protocol->p_ap_shm->msg_tail;
+	     i = (i + 1) % IPC_MEM_MSG_ENTRIES) {
+		union ipc_mem_msg_entry *msg =
+			&ipc_protocol->p_ap_shm->msg_ring[i];
+
+		dev_dbg(ipc_protocol->dev, "msg[%d]: type=%u status=%d", i,
+			msg->common.type_of_message,
+			msg->common.completion_status);
+
+		/* Update response with status and wake up waiting requestor */
+		if (rsp_ring[i]) {
+			rsp_ring[i]->status =
+				(enum ipc_mem_msg_cs)
+					msg->common.completion_status;
+			complete(&rsp_ring[i]->completion);
+			rsp_ring[i] = NULL;
+		}
+		msg_processed = true;
+	}
+
+	ipc_protocol->old_msg_tail = i;
+	return msg_processed;
+}
+
+/* Sends data from UL list to CP for the provided pipe by updating the Head
+ * pointer of given pipe.
+ */
+bool ipc_protocol_ul_td_send(void *protocol_inst, struct ipc_pipe *pipe,
+			     struct sk_buff_head *p_ul_list)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+	struct ipc_protocol_td *td;
+	bool hpda_pending = false;
+	s32 free_elements = 0;
+	struct sk_buff *skb;
+	u32 head;
+	u32 tail;
+
+	if (!ipc_protocol->p_ap_shm) {
+		dev_err(ipc_protocol->dev, "driver is not initialized");
+		return false;
+	}
+
+	/* Get head and tail of the td list and calculate
+	 * the number of free elements.
+	 */
+	head = ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr];
+	tail = pipe->old_tail;
+
+	while (!skb_queue_empty(p_ul_list)) {
+		if (head < tail)
+			free_elements = tail - head - 1;
+		else
+			free_elements =
+				pipe->nr_of_entries - head + ((s32)tail - 1);
+
+		if (free_elements <= 0) {
+			dev_dbg(ipc_protocol->dev,
+				"no free td elements for UL pipe %d",
+				pipe->pipe_nr);
+			break;
+		}
+
+		/* Get the td address. */
+		td = &pipe->tdr_start[head];
+
+		/* Take the first element of the uplink list and add it
+		 * to the td list.
+		 */
+		skb = skb_dequeue(p_ul_list);
+		if (WARN_ON(!skb))
+			break;
+
+		/* Save the reference to the uplink skbuf. */
+		pipe->skbr_start[head] = skb;
+
+		td->buffer.address = IPC_CB(skb)->mapping;
+		td->scs.size = skb->len;
+		td->scs.completion_status = 0;
+		td->next = 0;
+		td->reserved1 = 0;
+
+		pipe->nr_of_queued_entries++;
+
+		/* Calculate the new head and save it. */
+		head++;
+		if (head >= pipe->nr_of_entries)
+			head = 0;
+
+		ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = head;
+	}
+
+	if (pipe->old_head != head) {
+		dev_dbg(ipc_protocol->dev, "New UL TDs Pipe:%d", pipe->pipe_nr);
+
+		pipe->old_head = head;
+		/* Trigger doorbell because of pending UL packets. */
+		hpda_pending = true;
+	}
+
+	return hpda_pending;
+}
+
+/* Checks for Tail pointer update from CP and returns the data as SKB. */
+struct sk_buff *ipc_protocol_ul_td_process(void *protocol_inst,
+					   struct ipc_pipe *pipe)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+	struct ipc_protocol_td *p_td = &pipe->tdr_start[pipe->old_tail];
+	struct sk_buff *skb = pipe->skbr_start[pipe->old_tail];
+
+	pipe->nr_of_queued_entries--;
+	pipe->old_tail++;
+	if (pipe->old_tail >= pipe->nr_of_entries)
+		pipe->old_tail = 0;
+
+	if (!p_td->buffer.address) {
+		dev_err(ipc_protocol->dev, "Td buffer address is NULL");
+		return NULL;
+	}
+
+	if (p_td->buffer.address != IPC_CB(skb)->mapping) {
+		dev_err(ipc_protocol->dev,
+			"pipe(%d): invalid buf_addr=%p or skb->data=%llx",
+			pipe->pipe_nr, (void *)p_td->buffer.address,
+			skb ? IPC_CB(skb)->mapping : 0);
+		return NULL;
+	}
+
+	return skb;
+}
+
+/* Allocates an SKB for CP to send data and updates the Head Pointer
+ * of the given Pipe#.
+ */
+bool ipc_protocol_dl_td_prepare(void *protocol_inst, struct ipc_pipe *pipe)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+	u32 head, new_head;
+	struct ipc_protocol_td *td;
+	dma_addr_t mapping = 0;
+	struct sk_buff *skb;
+	u32 tail;
+
+	/* Get head and tail of the td list and calculate
+	 * the number of free elements.
+	 */
+	head = ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr];
+	tail = ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr];
+
+	new_head = head + 1;
+	if (new_head >= pipe->nr_of_entries)
+		new_head = 0;
+
+	if (new_head == tail)
+		return false;
+
+	/* Get the td address. */
+	td = &pipe->tdr_start[head];
+
+	/* Allocate the skbuf for the descriptor. */
+	skb = ipc_pcie_alloc_skb(ipc_protocol->pcie, pipe->buf_size, GFP_ATOMIC,
+				 &mapping, DMA_FROM_DEVICE,
+				 IPC_MEM_DL_ETH_OFFSET);
+	if (!skb)
+		return false;
+
+	td->buffer.address = mapping;
+	td->scs.size = pipe->buf_size;
+	td->scs.completion_status = 0;
+	td->next = 0;
+	td->reserved1 = 0;
+
+	/* store the new head value. */
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = new_head;
+
+	/* Save the reference to the skbuf. */
+	pipe->skbr_start[head] = skb;
+
+	pipe->nr_of_queued_entries++;
+
+	return true;
+}
+
+/* Processes the TD processed from CP by checking the Tail Pointer for given
+ * pipe.
+ */
+struct sk_buff *ipc_protocol_dl_td_process(void *protocol_inst,
+					   struct ipc_pipe *pipe)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+	u32 tail = ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr];
+	struct ipc_protocol_td *p_td;
+	struct sk_buff *skb;
+
+	if (!pipe->tdr_start)
+		return NULL;
+
+	/* Copy the reference to the downlink buffer. */
+	p_td = &pipe->tdr_start[pipe->old_tail];
+	skb = pipe->skbr_start[pipe->old_tail];
+
+	/* Reset the ring elements. */
+	pipe->skbr_start[pipe->old_tail] = NULL;
+
+	pipe->nr_of_queued_entries--;
+
+	pipe->old_tail++;
+	if (pipe->old_tail >= pipe->nr_of_entries)
+		pipe->old_tail = 0;
+
+	if (!skb->data) {
+		dev_err(ipc_protocol->dev, "skb is null");
+		goto ret;
+	} else if (!p_td->buffer.address) {
+		dev_err(ipc_protocol->dev, "td/buffer address is null");
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	if (!IPC_CB(skb)) {
+		dev_err(ipc_protocol->dev, "pipe# %d, tail: %d skb_cb is NULL",
+			pipe->pipe_nr, tail);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	if (p_td->buffer.address != IPC_CB(skb)->mapping) {
+		dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
+			(void *)p_td->buffer.address, skb->data);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	} else if (p_td->scs.size > pipe->buf_size) {
+		dev_err(ipc_protocol->dev, "invalid buffer size %d > %d",
+			p_td->scs.size, pipe->buf_size);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	} else if (p_td->scs.completion_status == IPC_MEM_TD_CS_ABORT) {
+		/* Discard aborted buffers. */
+		dev_dbg(ipc_protocol->dev, "discard 'aborted' buffers");
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	/* Set the length field in skbuf. */
+	skb_put(skb, p_td->scs.size);
+
+ret:
+	return skb;
+}
+
+void ipc_protocol_get_head_tail_index(void *protocol_inst,
+				      struct ipc_pipe *pipe, u32 *head,
+				      u32 *tail)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+
+	if (head)
+		*head = ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr];
+
+	if (tail)
+		*tail = ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr];
+}
+
+/* Frees the TDs given to CP.  */
+void ipc_protocol_pipe_cleanup(void *protocol_inst, struct ipc_pipe *pipe)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+	struct sk_buff *skb;
+	u32 head;
+	u32 tail;
+
+	if (!ipc_protocol->p_ap_shm) {
+		dev_err(ipc_protocol->dev, "p_ap_shm is NULL");
+		return;
+	}
+
+	/* Get the start and the end of the buffer list. */
+	head = ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr];
+	tail = pipe->old_tail;
+
+	/* Reset tail and head to 0. */
+	ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr] = 0;
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = 0;
+
+	/* Free pending uplink and downlink buffers. */
+	if (pipe->skbr_start) {
+		while (head != tail) {
+			/* Get the reference to the skbuf,
+			 * which is on the way and free it.
+			 */
+			skb = pipe->skbr_start[tail];
+			if (skb)
+				ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+
+			tail++;
+			if (tail >= pipe->nr_of_entries)
+				tail = 0;
+		}
+
+		kfree(pipe->skbr_start);
+		pipe->skbr_start = NULL;
+	}
+
+	pipe->old_tail = 0;
+
+	/* Free and reset the td and skbuf circular buffers. kfree is save! */
+	if (pipe->tdr_start) {
+		pci_free_consistent(ipc_protocol->pcie->pci,
+				    sizeof(*pipe->tdr_start) *
+					    pipe->nr_of_entries,
+				    pipe->tdr_start, pipe->phy_tdr_start);
+
+		pipe->tdr_start = NULL;
+	}
+}
+
+enum ipc_mem_device_ipc_state ipc_protocol_get_ipc_status(void *protocol_inst)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+
+	return (enum ipc_mem_device_ipc_state)
+		ipc_protocol->p_ap_shm->device_info.ipc_status;
+}
+
+enum ipc_mem_exec_stage
+ipc_protocol_get_ap_exec_stage(struct iosm_protocol *ipc_protocol)
+{
+	return ipc_protocol->p_ap_shm->device_info.execution_stage;
+}
+
+int ipc_protocol_msg_prep(void *instance, enum ipc_msg_prep_type msg_type,
+			  union ipc_msg_prep_args *args)
+{
+	struct iosm_protocol *ipc_protocol = instance;
+
+	switch (msg_type) {
+	case IPC_MSG_PREP_SLEEP:
+		return ipc_protocol_msg_prep_sleep(ipc_protocol, args);
+
+	case IPC_MSG_PREP_PIPE_OPEN:
+		return ipc_protocol_msg_prepipe_open(ipc_protocol, args);
+
+	case IPC_MSG_PREP_PIPE_CLOSE:
+		return ipc_protocol_msg_prepipe_close(ipc_protocol, args);
+
+	case IPC_MSG_PREP_FEATURE_SET:
+		return ipc_protocol_msg_prep_feature_set(ipc_protocol, args);
+
+		/* Unsupported messages in protocol */
+	case IPC_MSG_PREP_MAP:
+	case IPC_MSG_PREP_UNMAP:
+	default:
+		dev_err(ipc_protocol->dev,
+			"unsupported message type: %d in protocol", msg_type);
+		return -1;
+	}
+}
+
+u32 ipc_protocol_pm_dev_get_sleep_notification(void *protocol_inst)
+{
+	struct iosm_protocol *ipc_protocol = protocol_inst;
+
+	return ipc_protocol->p_ap_shm->device_info.device_sleep_notification;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h
new file mode 100644
index 000000000000..d59324faff2b
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PROTOCOL_OPS_H
+#define IOSM_IPC_PROTOCOL_OPS_H
+
+#include "iosm_ipc_protocol.h"
+
+/**
+ * enum ipc_mem_td_cs - Completion status of a TD
+ * @IPC_MEM_TD_CS_INVALID:	      Initial status - td not yet used.
+ * @IPC_MEM_TD_CS_PARTIAL_TRANSFER:   More data pending -> next TD used for this
+ * @IPC_MEM_TD_CS_END_TRANSFER:	      IO transfer is complete.
+ * @IPC_MEM_TD_CS_OVERFLOW:	      IO transfer to small for the buff to write
+ * @IPC_MEM_TD_CS_ABORT:	      TD marked as abort and shall be discarded
+ *				      by AP.
+ * @IPC_MEM_TD_CS_ERROR:	      General error.
+ */
+enum ipc_mem_td_cs {
+	IPC_MEM_TD_CS_INVALID,
+	IPC_MEM_TD_CS_PARTIAL_TRANSFER,
+	IPC_MEM_TD_CS_END_TRANSFER,
+	IPC_MEM_TD_CS_OVERFLOW,
+	IPC_MEM_TD_CS_ABORT,
+	IPC_MEM_TD_CS_ERROR,
+};
+
+/* Completion status of IPC Message */
+enum ipc_mem_msg_cs {
+	IPC_MEM_MSG_CS_INVALID,
+	IPC_MEM_MSG_CS_SUCCESS,
+	IPC_MEM_MSG_CS_ERROR,
+};
+
+/**
+ * struct ipc_msg_prep_args_pipe - Structures for argument passing towards
+ *				   the actual message preparation
+ * @pipe:	Pipe to open/close
+ */
+struct ipc_msg_prep_args_pipe {
+	struct ipc_pipe *pipe; /* pipe to open/close */
+};
+
+struct ipc_msg_prep_args_sleep {
+	unsigned int target; /* 0=host, 1=device */
+	unsigned int state; /* 0=enter sleep, 1=exit sleep */
+};
+
+struct ipc_msg_prep_feature_set {
+	/* 0 = out-of-band, 1 = in-band-crash notification */
+	unsigned int reset_enable;
+};
+
+struct ipc_msg_prep_map {
+	unsigned int region_id;
+	unsigned long addr;
+	size_t size;
+};
+
+struct ipc_msg_prep_unmap {
+	unsigned int region_id;
+};
+
+/* Union for message to handle the message to CP in the tasklet context. */
+union ipc_msg_prep_args {
+	struct ipc_msg_prep_args_pipe pipe_open;
+	struct ipc_msg_prep_args_pipe pipe_close;
+	struct ipc_msg_prep_args_sleep sleep;
+	struct ipc_msg_prep_feature_set feature_set;
+	struct ipc_msg_prep_map map;
+	struct ipc_msg_prep_unmap unmap;
+};
+
+/**
+ * enum ipc_msg_prep_type - Enum for message prepare actions
+ * @IPC_MSG_PREP_SLEEP:		prepare a sleep message
+ * @IPC_MSG_PREP_PIPE_OPEN:	prepare a pipe open message
+ * @IPC_MSG_PREP_PIPE_CLOSE:	prepare a pipe close message
+ * @IPC_MSG_PREP_FEATURE_SET:	prepare a feature set message
+ * @IPC_MSG_PREP_MAP:		prepare a memory map message
+ * @IPC_MSG_PREP_UNMAP:		prepare a memory unmap message
+ */
+enum ipc_msg_prep_type {
+	IPC_MSG_PREP_SLEEP,
+	IPC_MSG_PREP_PIPE_OPEN,
+	IPC_MSG_PREP_PIPE_CLOSE,
+	IPC_MSG_PREP_FEATURE_SET,
+	IPC_MSG_PREP_MAP,
+	IPC_MSG_PREP_UNMAP,
+};
+
+/**
+ * struct ipc_rsp - Response for message to CP
+ * @completion:	For waking up requestor
+ * @status:	Completion status
+ */
+struct ipc_rsp {
+	struct completion completion;
+	enum ipc_mem_msg_cs status;
+};
+
+/**
+ * enum ipc_mem_msg - Type-definition of the messages.
+ * @IPC_MEM_MSG_OPEN_PIPE:	AP ->CP: Open a pipe
+ * @IPC_MEM_MSG_CLOSE_PIPE:	AP ->CP: Close a pipe
+ * @IPC_MEM_MSG_ABORT_PIPE:	AP ->CP: wait for completion of the
+ *				running transfer and abort all pending
+ *				IO-transfers for the pipe
+ * @IPC_MEM_MSG_SLEEP:		AP ->CP: host enter or exit sleep
+ * @IPC_MEM_MSG_FEATURE_SET:	AP ->CP: Intel feature configuration
+ */
+enum ipc_mem_msg {
+	IPC_MEM_MSG_OPEN_PIPE = 0x01,
+	IPC_MEM_MSG_CLOSE_PIPE = 0x02,
+	IPC_MEM_MSG_ABORT_PIPE = 0x03,
+	IPC_MEM_MSG_SLEEP = 0x04,
+	IPC_MEM_MSG_FEATURE_SET = 0xF0,
+};
+
+struct ipc_mem_msg_open_pipe {
+	u64 tdr_addr;
+	u32 tdr_entries : 16;
+	u32 pipe_nr : 8;
+	u32 type_of_message : 8;
+	u32 irq_vector : 5;
+	u32 optimized_completion : 1;
+	u32 reliable : 1;
+	u32 reserved1 : 1;
+	u32 interrupt_moderation : 24;
+	u32 accumulation_backoff : 24;
+	u32 reserved2 : 8;
+	u32 completion_status;
+};
+
+/* Message structure for close pipe. */
+struct ipc_mem_msg_close_pipe {
+	u32 reserved1[2];
+	u32 reserved2 : 16;
+	u32 pipe_nr : 8;
+	u32 type_of_message : 8;
+	u32 reserved3;
+	u32 reserved4;
+	u32 completion_status;
+};
+
+/* Message structure for abort pipe. */
+struct ipc_mem_msg_abort_pipe {
+	u32 reserved1[2];
+	u32 reserved2 : 16;
+	u32 pipe_nr : 8;
+	u32 type_of_message : 8;
+	u32 reserved3;
+	u32 reserved4;
+	u32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_host_sleep - Message structure for sleep message.
+ * @reserved1:		Reserved
+ * @target:		0=host, 1=device, host or EP devie
+ *			is the message target
+ * @state:		0=enter sleep, 1=exit sleep,
+ *			2=enter sleep no protocol
+ * @reserved2:		Reserved
+ * @type_of_message:	Message type
+ * @reserved3:		Reserved
+ * @reserved4:		Reserved
+ * @completion_status:	Message Completion Status
+ */
+struct ipc_mem_msg_host_sleep {
+	u32 reserved1[2];
+	u32 target : 8;
+	u32 state : 8;
+	u32 reserved2 : 8;
+	u32 type_of_message : 8;
+	u32 reserved3;
+	u32 reserved4;
+	u32 completion_status;
+};
+
+/* Message structure for feature_set message */
+struct ipc_mem_msg_feature_set {
+	u32 reserved1[2];
+	u32 reserved2 : 23;
+	u32 reset_enable : 1;
+	u32 type_of_message : 8;
+	u32 reserved3;
+	u32 reserved4;
+	u32 completion_status;
+};
+
+/* Message structure for completion status update. */
+struct ipc_mem_msg_common {
+	u32 reserved1[2];
+	u32 reserved2 : 24;
+	u32 type_of_message : 8;
+	u32 reserved3;
+	u32 reserved4;
+	u32 completion_status;
+};
+
+/* Union with all possible messages. */
+union ipc_mem_msg_entry {
+	struct ipc_mem_msg_open_pipe open_pipe;
+	struct ipc_mem_msg_close_pipe close_pipe;
+	struct ipc_mem_msg_abort_pipe abort_pipe;
+	struct ipc_mem_msg_host_sleep host_sleep;
+	struct ipc_mem_msg_feature_set feature_set;
+	/* Used to access msg_type and to set the completion status. */
+	struct ipc_mem_msg_common common;
+};
+
+/* Transfer descriptor definition. */
+struct ipc_protocol_td {
+	union {
+		/*   0 :  63 - 64-bit address of a buffer in host memory. */
+		dma_addr_t address;
+		struct {
+			/*   0 :  31 - 32 bit address */
+			__le32 address;
+			/*  32 :  63 - corresponding descriptor */
+			__le32 desc;
+		} __attribute__ ((__packed__)) shm;
+	} buffer;
+
+	struct {
+	/*	64 :  87 - Size of the buffer.
+	 *	The host provides the size of the buffer queued.
+	 *	The EP device reads this value and shall update
+	 *	it for downlink transfers to indicate the
+	 *	amount of data written in buffer.
+	 */
+		u32 size : 24;
+	/*	88 :  95 - This field provides the completion status
+	 *	of the TD. When queuing the TD, the host sets
+	 *	the status to 0. The EP device updates this
+	 *	field when completing the TD.
+	 */
+		u32 completion_status : 8;
+	} __attribute__ ((__packed__)) scs;
+
+	/*  96 : 103 - nr of following descriptors */
+	u32 next : 8;
+	/* 104 : 127 - reserved */
+	u32 reserved1 : 24;
+} __attribute__ ((__packed__));
+
+/**
+ * ipc_protocol_msg_prep - Prepare message based upon message type
+ * @ptr:	iosm_protocol instance
+ * @msg_type:	message prepare type
+ * @args:	message arguments
+ *
+ * Return: 0 on success, -1 in case of failure
+ */
+int ipc_protocol_msg_prep(void *ptr, enum ipc_msg_prep_type msg_type,
+			  union ipc_msg_prep_args *args);
+
+/**
+ * ipc_protocol_msg_hp_update - Function for head pointer update
+ *				of message ring
+ * @ptr:	iosm_protocol instance
+ */
+void ipc_protocol_msg_hp_update(void *ptr);
+
+/**
+ * ipc_protocol_msg_process - Function for processing responses
+ *			      to IPC messages
+ * @ptr:	iosm_protocol instance
+ * @irq:	IRQ vector
+ *
+ * Return:	True on success; false if error
+ */
+bool ipc_protocol_msg_process(void *ptr, int irq);
+
+/**
+ * ipc_protocol_ul_td_send - Function for sending the data to CP
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe instance
+ * @p_ul_list: uplink sk_buff list
+ *
+ * Return: true in success; false in case of error
+ */
+bool ipc_protocol_ul_td_send(void *ptr, struct ipc_pipe *pipe,
+			     struct sk_buff_head *p_ul_list);
+
+/**
+ * ipc_protocol_ul_td_process - Function for processing the sent data
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe instance
+ *
+ * Return: sk_buff instance
+ */
+struct sk_buff *ipc_protocol_ul_td_process(void *ptr, struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_dl_td_prepare - Function for providing DL TDs to CP
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe instance
+ *
+ * Return: true in success; false in case of error
+ */
+bool ipc_protocol_dl_td_prepare(void *ptr, struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_dl_td_process - Function for processing the DL data
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe instance
+ *
+ * Return: sk_buff instance
+ */
+struct sk_buff *ipc_protocol_dl_td_process(void *ptr, struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_get_head_tail_index - Function for getting Head and Tail
+ *				      pointer index of given pipe
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe Instance
+ * @head: head pointer index of the given pipe
+ * @tail: tail pointer index of the given pipe
+ */
+void ipc_protocol_get_head_tail_index(void *ptr, struct ipc_pipe *pipe,
+				      u32 *head, u32 *tail);
+/**
+ * ipc_protocol_get_ipc_status - Function for getting the IPC Status
+ * @ptr: iosm_protocol instance
+ *
+ * Return: Returns IPC State
+ */
+enum ipc_mem_device_ipc_state ipc_protocol_get_ipc_status(void *ptr);
+
+/**
+ * ipc_protocol_pipe_cleanup - Function to cleanup pipe resources
+ * @ptr: iosm_protocol instance
+ * @pipe: Pipe instance
+ */
+void ipc_protocol_pipe_cleanup(void *ptr, struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_get_ap_exec_stage - Function for getting AP Exec Stage
+ * @ipc_protocol: pointer to struct iosm protocol
+ *
+ * Return: returns BOOT Stages
+ */
+enum ipc_mem_exec_stage
+ipc_protocol_get_ap_exec_stage(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_pm_dev_get_sleep_notification - Function for getting Dev Sleep
+ *						notification
+ * @ptr: iosm_protocol instance
+ *
+ * Return: Returns dev PM State
+ */
+u32 ipc_protocol_pm_dev_get_sleep_notification(void *ptr);
+#endif
-- 
2.12.3


^ permalink raw reply related

* [RFC 12/18] net: iosm: power management
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

Implements state machine to handle host & device sleep.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_pm.c | 334 ++++++++++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_pm.h | 216 +++++++++++++++++++++++
 2 files changed, 550 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pm.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pm.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_pm.c b/drivers/net/wwan/iosm/iosm_ipc_pm.c
new file mode 100644
index 000000000000..662f8f309ec0
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pm.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include "iosm_ipc_protocol.h"
+#include "iosm_ipc_task_queue.h"
+
+/* Timeout value in MS for the PM to wait for device to reach active state */
+#define IPC_PM_ACTIVE_TIMEOUT_MS (500)
+
+/* Value definitions for union ipc_pm_cond members.
+ *
+ * Note that here "active" has the value 1, as compared to the enums
+ * ipc_mem_host_pm_state or ipc_mem_dev_pm_state, where "active" is 0
+ */
+#define IPC_PM_SLEEP (0)
+#define IPC_PM_ACTIVE (1)
+
+/* Trigger the doorbell interrupt on cp to change the PM sleep/active status */
+#define ipc_cp_irq_sleep_control(ipc_pcie, data)                               \
+	ipc_doorbell_fire(ipc_pcie, IPC_DOORBELL_IRQ_SLEEP, data)
+
+/* Trigger the doorbell interrupt on CP to do hpda update */
+#define ipc_cp_irq_hpda_update(ipc_pcie, data)                                 \
+	ipc_doorbell_fire(ipc_pcie, IPC_DOORBELL_IRQ_HPDA, 0xFF & (data))
+
+void ipc_pm_signal_hpda_doorbell(struct iosm_pm *ipc_pm, u32 identifier,
+				 bool host_slp_check)
+{
+	if (host_slp_check && ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE &&
+	    ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE_WAIT) {
+		ipc_pm->pending_hpda_update = true;
+		dev_dbg(ipc_pm->dev,
+			"Pending HPDA update set. Host PM_State: %d identifier:%d",
+			ipc_pm->host_pm_state, identifier);
+		return;
+	}
+
+	if (!ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_IRQ, true)) {
+		ipc_pm->pending_hpda_update = true;
+		dev_dbg(ipc_pm->dev, "Pending HPDA update set. identifier:%d",
+			identifier);
+		return;
+	}
+	ipc_pm->pending_hpda_update = false;
+
+	/* Trigger the irq towards CP */
+	ipc_cp_irq_hpda_update(ipc_pm->pcie, identifier);
+
+	ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_IRQ, false);
+}
+
+/* Wake up the device if it is in low power mode. */
+static bool ipc_pm_link_activate(struct iosm_pm *ipc_pm)
+{
+	if (ipc_pm->cp_state == IPC_MEM_DEV_PM_ACTIVE)
+		return true;
+
+	if (ipc_pm->cp_state == IPC_MEM_DEV_PM_SLEEP) {
+		if (ipc_pm->ap_state == IPC_MEM_DEV_PM_SLEEP) {
+			/* Wake up the device. */
+			ipc_cp_irq_sleep_control(ipc_pm->pcie,
+						 IPC_MEM_DEV_PM_WAKEUP);
+			ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE_WAIT;
+
+			return false;
+		}
+
+		if (ipc_pm->ap_state == IPC_MEM_DEV_PM_ACTIVE_WAIT)
+			return false;
+
+		return true;
+	}
+
+	/* link is not ready */
+	return false;
+}
+
+void ipc_pm_host_slp_reinit_dev_active_completion(struct iosm_pm *ipc_pm)
+{
+	if (!ipc_pm)
+		return;
+
+	atomic_set(&ipc_pm->host_sleep_pend, 1);
+
+	reinit_completion(&ipc_pm->host_sleep_complete);
+}
+
+bool ipc_pm_wait_for_device_active(struct iosm_pm *ipc_pm)
+{
+	bool ret_val = false;
+
+	if (ipc_pm->ap_state != IPC_MEM_DEV_PM_ACTIVE)
+
+		/* Wait for IPC_PM_ACTIVE_TIMEOUT_MS for Device sleep state
+		 * machine to enter ACTIVE state.
+		 */
+		if (!WAIT_FOR_TIMEOUT(&ipc_pm->host_sleep_complete,
+				      IPC_PM_ACTIVE_TIMEOUT_MS)) {
+			dev_err(ipc_pm->dev,
+				"PM timeout. Expected State:%d. Actual: %d",
+				IPC_MEM_DEV_PM_ACTIVE, ipc_pm->ap_state);
+			goto  active_timeout;
+		}
+
+	ret_val = true;
+active_timeout:
+	/* Reset the atomic variable in any case as device sleep
+	 * state machine change is no longer of interest.
+	 */
+	atomic_set(&ipc_pm->host_sleep_pend, 0);
+
+	return ret_val;
+}
+
+static void ipc_pm_on_link_sleep(struct iosm_pm *ipc_pm)
+{
+	/* pending sleep ack and all conditions are cleared
+	 * -> signal SLEEP__ACK to CP
+	 */
+	ipc_pm->cp_state = IPC_MEM_DEV_PM_SLEEP;
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_SLEEP;
+
+	ipc_cp_irq_sleep_control(ipc_pm->pcie, IPC_MEM_DEV_PM_SLEEP);
+}
+
+static void ipc_pm_on_link_wake(struct iosm_pm *ipc_pm, bool ack)
+{
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE;
+
+	if (ack) {
+		ipc_pm->cp_state = IPC_MEM_DEV_PM_ACTIVE;
+
+		ipc_cp_irq_sleep_control(ipc_pm->pcie, IPC_MEM_DEV_PM_ACTIVE);
+
+		/* check the consume state !!! */
+		if (atomic_cmpxchg(&ipc_pm->host_sleep_pend, 1, 0))
+			complete(&ipc_pm->host_sleep_complete);
+	}
+
+	/* Check for pending HPDA update.
+	 * Pending HP update could be because of sending message was
+	 * put on hold due to Device sleep state or due to TD update
+	 * which could be because of Device Sleep and Host Sleep
+	 * states.
+	 */
+	if (ipc_pm->pending_hpda_update &&
+	    ipc_pm->host_pm_state == IPC_MEM_HOST_PM_ACTIVE)
+		ipc_pm_signal_hpda_doorbell(ipc_pm, IPC_HP_PM_TRIGGER, true);
+}
+
+bool ipc_pm_trigger(struct iosm_pm *ipc_pm, enum ipc_pm_unit unit, bool active)
+{
+	union ipc_pm_cond old_cond;
+	union ipc_pm_cond new_cond;
+	bool link_active;
+
+	/* Save the current D3 state. */
+	new_cond = ipc_pm->pm_cond;
+	old_cond = ipc_pm->pm_cond;
+
+	/* Calculate the power state only in the runtime phase. */
+	switch (unit) {
+	case IPC_PM_UNIT_IRQ: /* CP irq */
+		new_cond.irq = active;
+		break;
+
+	case IPC_PM_UNIT_LINK: /* Device link state. */
+		new_cond.link = active;
+		break;
+
+	case IPC_PM_UNIT_HS: /* Host sleep trigger requires Link. */
+		new_cond.hs = active;
+		break;
+
+	default:
+		break;
+	}
+
+	/* Something changed ? */
+	if (old_cond.raw == new_cond.raw) {
+		/* Stay in the current PM state. */
+		link_active = old_cond.link == IPC_PM_ACTIVE;
+		goto ret;
+	}
+
+	ipc_pm->pm_cond = new_cond;
+
+	if (new_cond.link)
+		ipc_pm_on_link_wake(ipc_pm, unit == IPC_PM_UNIT_LINK);
+	else if (unit == IPC_PM_UNIT_LINK)
+		ipc_pm_on_link_sleep(ipc_pm);
+
+	if (old_cond.link == IPC_PM_SLEEP && new_cond.raw != 0) {
+		link_active = ipc_pm_link_activate(ipc_pm);
+		goto ret;
+	}
+
+	link_active = old_cond.link == IPC_PM_ACTIVE;
+
+ret:
+	return link_active;
+}
+
+bool ipc_pm_prepare_host_sleep(struct iosm_pm *ipc_pm)
+{
+	if (!ipc_pm)
+		return false;
+
+	/* suspend not allowed if host_pm_state is not IPC_MEM_HOST_PM_ACTIVE */
+	if (ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE) {
+		dev_err(ipc_pm->dev, "host_pm_state=%d\tExpected to be: %d",
+			ipc_pm->host_pm_state, IPC_MEM_HOST_PM_ACTIVE);
+		return false;
+	}
+
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_SLEEP_WAIT_D3;
+
+	return true;
+}
+
+bool ipc_pm_prepare_host_active(struct iosm_pm *ipc_pm)
+{
+	if (!ipc_pm)
+		return false;
+
+	if (ipc_pm->host_pm_state != IPC_MEM_HOST_PM_SLEEP) {
+		dev_err(ipc_pm->dev, "host_pm_state=%d\tExpected to be: %d",
+			ipc_pm->host_pm_state, IPC_MEM_HOST_PM_SLEEP);
+		return false;
+	}
+
+	/* Sending Sleep Exit message to CP. Update the state */
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE_WAIT;
+
+	return true;
+}
+
+bool ipc_pm_dev_slp_notification(struct iosm_pm *ipc_pm, u32 cp_pm_req)
+{
+	if (!ipc_pm)
+		return false;
+
+	if (cp_pm_req == ipc_pm->device_sleep_notification)
+		return false;
+
+	ipc_pm->device_sleep_notification = cp_pm_req;
+
+	/* Evaluate the PM request. */
+	switch (ipc_pm->cp_state) {
+	case IPC_MEM_DEV_PM_ACTIVE:
+		switch (cp_pm_req) {
+		case IPC_MEM_DEV_PM_ACTIVE:
+			break;
+
+		case IPC_MEM_DEV_PM_SLEEP:
+
+			/* Inform the PM that the device link can go down. */
+			ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_LINK, false);
+
+			return true;
+
+		default:
+			dev_err(ipc_pm->dev,
+				"loc-pm=(%d=active): confused req-pm=%d",
+				ipc_pm->cp_state, cp_pm_req);
+			break;
+		}
+		break;
+
+	case IPC_MEM_DEV_PM_SLEEP:
+		switch (cp_pm_req) {
+		case IPC_MEM_DEV_PM_ACTIVE:
+			/* Inform the PM that the device link is active. */
+			ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_LINK, true);
+			break;
+
+		case IPC_MEM_DEV_PM_SLEEP:
+			break;
+
+		default:
+			dev_err(ipc_pm->dev,
+				"loc-pm=(%d=sleep): confused req-pm=%d",
+				ipc_pm->cp_state, cp_pm_req);
+			break;
+		}
+		break;
+
+	default:
+		dev_err(ipc_pm->dev, "confused loc-pm=%d, req-pm=%d",
+			ipc_pm->cp_state, cp_pm_req);
+		break;
+	}
+
+	return false;
+}
+
+struct iosm_pm *ipc_pm_init(struct iosm_imem *ipc_imem)
+{
+	struct iosm_pm *ipc_pm = kzalloc(sizeof(*ipc_pm), GFP_KERNEL);
+
+	if (!ipc_pm)
+		return NULL;
+
+	ipc_pm->pcie = ipc_imem->pcie;
+	ipc_pm->dev = ipc_imem->dev;
+
+	ipc_pm->pm_cond.irq = IPC_PM_SLEEP;
+	ipc_pm->pm_cond.hs = IPC_PM_SLEEP;
+	ipc_pm->pm_cond.link = IPC_PM_ACTIVE;
+
+	ipc_pm->cp_state = IPC_MEM_DEV_PM_ACTIVE;
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE;
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+
+	ipc_pm->ipc_tasklet = ipc_imem->ipc_tasklet;
+	ipc_pm->ipc_task = ipc_imem->ipc_task;
+
+	/* Create generic wait-for-completion handler for Host Sleep
+	 * and device sleep coordination.
+	 */
+	init_completion(&ipc_pm->host_sleep_complete);
+
+	atomic_set(&ipc_pm->host_sleep_pend, 0);
+
+	return ipc_pm;
+}
+
+void ipc_pm_deinit(struct iosm_pm *ipc_pm)
+{
+	complete(&ipc_pm->host_sleep_complete);
+	kfree(ipc_pm);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pm.h b/drivers/net/wwan/iosm/iosm_ipc_pm.h
new file mode 100644
index 000000000000..f09a90fe43df
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pm.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PM_H
+#define IOSM_IPC_PM_H
+
+#include <linux/interrupt.h>
+
+/**
+ * union ipc_pm_cond - Conditions for D3 and the sleep message to CP.
+ * @raw:	raw/combined value for faster check
+ * @irq:	IRQ towards CP
+ * @hs:		Host Sleep
+ * @link:	Device link state.
+ */
+union ipc_pm_cond {
+	unsigned int raw;
+
+	struct {
+		unsigned int irq : 1;
+		unsigned int hs : 1;
+		unsigned int link : 1;
+	};
+};
+
+/**
+ * enum ipc_mem_host_pm_state - Possible states of the SLEEP finite state
+ *				machine.
+ * @IPC_MEM_HOST_PM_ACTIVE:		   Host is active
+ * @IPC_MEM_HOST_PM_ACTIVE_WAIT:	   Intermediate state before going to
+ *					   active
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_IDLE:	   Intermediate state to wait for idle
+ *					   before going into sleep
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_D3:	   Intermediate state to wait for D3
+ *					   before going to sleep
+ * @IPC_MEM_HOST_PM_SLEEP:		   after this state the interface is not
+ *					   accessible host is in suspend to RAM
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_EXIT_SLEEP: Intermediate state before exiting
+ *					   sleep
+ */
+enum ipc_mem_host_pm_state {
+	IPC_MEM_HOST_PM_ACTIVE,
+	IPC_MEM_HOST_PM_ACTIVE_WAIT,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_IDLE,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_D3,
+	IPC_MEM_HOST_PM_SLEEP,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_EXIT_SLEEP,
+};
+
+/**
+ * enum ipc_mem_dev_pm_state - Possible states of the SLEEP finite state
+ *			       machine.
+ * @IPC_MEM_DEV_PM_ACTIVE:	IPC_MEM_DEV_PM_ACTIVE is the initial power
+ *				management state.
+ *				IRQ(struct
+ *				ipc_mem_device_info.device_sleep_notification)
+ *				and DOORBELL-IRQ-HPDA(data) values.
+ * @IPC_MEM_DEV_PM_SLEEP:	IPC_MEM_DEV_PM_SLEEP is PM state for sleep.
+ * @IPC_MEM_DEV_PM_WAKEUP:	DOORBELL-IRQ-DEVICE_WAKE(data).
+ * @IPC_MEM_DEV_PM_HOST_SLEEP:	DOORBELL-IRQ-HOST_SLEEP(data).
+ * @IPC_MEM_DEV_PM_ACTIVE_WAIT:	Local intermediate states.
+ *				Before AP triggers DOORBELL-IRQ-SLEEP(data)
+ *				either the intermediate device link state is
+ *				SYNC_ACTIVE_WAIT i.e. the user is blocked until
+ *				the link interworking was finished about IRQ and
+ *				DOORBELL-IRQ-HPDA or the intermediate device
+ *				link state is ACTIVE_WAIT i.e. the data transfer
+ *				starts after the DOORBELL-IRQ-HPDA
+ *				(IPC_MEM_DEV_PM_ACTIVE).
+ */
+enum ipc_mem_dev_pm_state {
+	IPC_MEM_DEV_PM_ACTIVE,
+	IPC_MEM_DEV_PM_SLEEP,
+	IPC_MEM_DEV_PM_WAKEUP,
+	IPC_MEM_DEV_PM_HOST_SLEEP,
+	IPC_MEM_DEV_PM_ACTIVE_WAIT,
+};
+
+/**
+ * struct iosm_pm - Power management instance data
+ * @pcie:			Pointer to iosm_pcie structure
+ * @dev:			Pointer to device structure
+ * @ipc_tasklet:		Tasklet instance
+ * @ipc_task:			Tasklet for scheduling a wakeup in task context
+ * @host_pm_state:		PM states for host
+ * @host_sleep_pend:		Variable to indicate Host Sleep Pending
+ * @host_sleep_complete:	Generic wait-for-completion used in
+ *				case of Host Sleep
+ * @pm_cond:			Conditions for power management
+ * @ap_state:			Current power management state, the
+ *				initial state is IPC_MEM_DEV_PM_ACTIVE eq. 0.
+ * @cp_state:			PM State of CP
+ * @device_sleep_notification:	last handled device_sleep_notfication
+ * @pending_hpda_update:	is a HPDA update pending?
+ */
+struct iosm_pm {
+	struct iosm_pcie *pcie;
+	struct device *dev;
+	struct tasklet_struct *ipc_tasklet;
+	struct ipc_task_queue *ipc_task;
+	enum ipc_mem_host_pm_state host_pm_state;
+	atomic_t host_sleep_pend;
+	struct completion host_sleep_complete;
+	union ipc_pm_cond pm_cond;
+	enum ipc_mem_dev_pm_state ap_state;
+	enum ipc_mem_dev_pm_state cp_state;
+	u32 device_sleep_notification;
+	u8 pending_hpda_update : 1;
+};
+
+/**
+ * enum ipc_pm_unit - Power management units.
+ * @IPC_PM_UNIT_IRQ:	IRQ towards CP
+ * @IPC_PM_UNIT_HS:	Host Sleep for converged protocol
+ * @IPC_PM_UNIT_LINK:	Link state controlled by CP.
+ */
+enum ipc_pm_unit {
+	IPC_PM_UNIT_IRQ, /* IRQ towards CP */
+	IPC_PM_UNIT_HS, /* Host Sleep for converged protocol */
+	IPC_PM_UNIT_LINK, /* Link state controlled by CP. */
+};
+
+/**
+ * ipc_pm_init - Allocate power management component
+ * @ipc_imem:	Pointer to iosm_imem structure
+ *
+ * Returns: pointer to allocated PM component or NULL on failure.
+ */
+struct iosm_pm *ipc_pm_init(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_pm_deinit - Free power management component, invalidating its pointer.
+ * @ipc_pm:	Pointer to pm component.
+ */
+void ipc_pm_deinit(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_dev_slp_notification - Handle a sleep notification message from the
+ *				 device. This can be called from interrupt state
+ *				 This function handles Host Sleep requests too
+ *				 if the Host Sleep protocol is register based.
+ * @ipc_pm:			Pointer to power management component
+ * @sleep_notification:		Actual notification from device
+ *
+ * Returns: true if dev sleep state has to be checked, false otherwise.
+ */
+bool ipc_pm_dev_slp_notification(struct iosm_pm *ipc_pm,
+				 u32 sleep_notification);
+
+/**
+ * ipc_pm_prepare_host_sleep - Prepare the PM for sleep by entering
+ *			       IPC_MEM_HOST_PM_SLEEP_WAIT_D3 state.
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true on success, false if the host was not active.
+ */
+bool ipc_pm_prepare_host_sleep(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_prepare_host_active - Prepare the PM for wakeup by entering
+ *				IPC_MEM_HOST_PM_ACTIVE_WAIT state.
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true on success, false if the host was not sleeping.
+ */
+bool ipc_pm_prepare_host_active(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_wait_for_device_active - Wait for up to IPC_PM_ACTIVE_TIMEOUT_MS ms
+ *				   for the device to reach active state
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true if device is active
+ */
+bool ipc_pm_wait_for_device_active(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_signal_hpda_doorbell - Wake up the device if it is in low power mode
+ *				 and trigger a head pointer update interrupt.
+ * @ipc_pm:		Pointer to power management component
+ * @identifier:		specifies what component triggered hpda update irq
+ * @host_slp_check:	if set to true then Host Sleep state machine check will
+ *			be performed. If Host Sleep state machine allows HP
+ *			update then only doorbell is triggered otherwise pending
+ *			flag will be set. If set to false then Host Sleep check
+ *			will not be performed. This is helpful for Host Sleep
+ *			negotiation through message ring.
+ */
+void ipc_pm_signal_hpda_doorbell(struct iosm_pm *ipc_pm, u32 identifier,
+				 bool host_slp_check);
+/**
+ * ipc_pm_host_slp_reinit_dev_active_completion - This function initializes
+ *						  the atomic variable and
+ *						  completion object used to
+ *						  get notification about
+ *						  Device Sleep state machine
+ *						  changed to ACTIVE state
+ *						  so that Sleep negotiation
+ *						  can be started.
+ * @ipc_pm:	Pointer to power management component
+ */
+void ipc_pm_host_slp_reinit_dev_active_completion(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_trigger - Update power manager and wake up the link if needed
+ * @ipc_pm:	Pointer to power management component
+ * @unit:	Power management units
+ * @active:	Device link state
+ *
+ * Returns: true if link is active.
+ */
+bool ipc_pm_trigger(struct iosm_pm *ipc_pm, enum ipc_pm_unit unit, bool active);
+
+#endif
-- 
2.12.3


^ permalink raw reply related

* [RFC 13/18] net: iosm: shared memory protocol
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

1) Defines messaging protocol for handling Transfer Descriptor
   in both UL/DL direction.
2) Ring buffer management.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_protocol.c | 287 ++++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_protocol.h | 219 +++++++++++++++++++++++
 2 files changed, 506 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.c b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
new file mode 100644
index 000000000000..82d75d3d191c
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include "iosm_ipc_protocol.h"
+#include "iosm_ipc_task_queue.h"
+
+int ipc_protocol_tq_msg_send(struct iosm_protocol *ipc_protocol,
+			     enum ipc_msg_prep_type msg_type,
+			     union ipc_msg_prep_args *prep_args,
+			     struct ipc_rsp *response)
+{
+	int index = ipc_protocol_msg_prep(ipc_protocol, msg_type, prep_args);
+
+	/* Store reference towards caller specified response in response ring
+	 * and signal CP
+	 */
+	if (index >= 0 && index < IPC_MEM_MSG_ENTRIES) {
+		ipc_protocol->rsp_ring[index] = response;
+		ipc_protocol_msg_hp_update(ipc_protocol);
+	}
+
+	return index;
+}
+
+/* Tasklet message send call back function */
+static int ipc_protocol_tq_msg_send_cb(void *instance, int arg, void *msg,
+				       size_t size)
+{
+	struct ipc_call_msg_send_args *send_args = msg;
+	struct iosm_protocol *ipc_protocol =
+		((struct iosm_imem *)instance)->ipc_protocol;
+
+	return ipc_protocol_tq_msg_send(ipc_protocol, send_args->msg_type,
+					send_args->prep_args,
+					send_args->response);
+}
+
+/* Remove reference to a response. This is typically used when a requestor timed
+ * out and is no longer interested in the response.
+ */
+static int ipc_protocol_tq_msg_remove(void *instance, int arg, void *msg,
+				      size_t size)
+{
+	struct iosm_protocol *ipc_protocol =
+		((struct iosm_imem *)instance)->ipc_protocol;
+
+	ipc_protocol->rsp_ring[arg] = NULL;
+	return 0;
+}
+
+int ipc_protocol_msg_send(struct iosm_protocol *ipc_protocol,
+			  enum ipc_msg_prep_type prep,
+			  union ipc_msg_prep_args *prep_args)
+{
+	struct ipc_call_msg_send_args send_args;
+	unsigned int exec_timeout;
+	struct ipc_rsp response;
+	int result = -1;
+	int index;
+
+	exec_timeout = (ipc_protocol_get_ap_exec_stage(ipc_protocol) ==
+					IPC_MEM_EXEC_STAGE_RUN ?
+				IPC_MSG_COMPLETE_RUN_DEFAULT_TIMEOUT :
+				IPC_MSG_COMPLETE_BOOT_DEFAULT_TIMEOUT);
+
+	/* Trap if called from non-preemptible context */
+	might_sleep();
+
+	response.status = IPC_MEM_MSG_CS_INVALID;
+	init_completion(&response.completion);
+
+	send_args.msg_type = prep;
+	send_args.prep_args = prep_args;
+	send_args.response = &response;
+
+	/* Allocate and prepare message to be sent in tasklet context.
+	 * A positive index returned form tasklet_call references the message
+	 * in case it needs to be cancelled when there is a timeout.
+	 */
+	index = ipc_task_queue_send_task(ipc_protocol->imem,
+					 ipc_protocol_tq_msg_send_cb, 0,
+					 &send_args, 0, true);
+
+	if (index < 0) {
+		dev_err(ipc_protocol->dev, "msg %d failed", prep);
+		return index;
+	}
+
+	/* Wait for the device to respond to the message */
+	switch (wait_for_completion_timeout(&response.completion,
+					    msecs_to_jiffies(exec_timeout))) {
+	case 0:
+		/* Timeout, there was no response from the device.
+		 * Remove the reference to the local response completion
+		 * object as we are no longer interested in the response.
+		 */
+		ipc_task_queue_send_task(ipc_protocol->imem,
+					 ipc_protocol_tq_msg_remove, index,
+					 NULL, 0, true);
+		dev_err(ipc_protocol->dev, "msg timeout");
+		ipc_uevent_send(ipc_protocol->pcie->dev, UEVENT_MDM_TIMEOUT);
+		break;
+	default:
+		/* We got a response in time; check completion status: */
+		if (response.status == IPC_MEM_MSG_CS_SUCCESS)
+			result = 0;
+		else
+			dev_err(ipc_protocol->dev,
+				"msg completion status error %d",
+				response.status);
+		break;
+	}
+
+	return result;
+}
+
+static int ipc_protocol_msg_send_host_sleep(struct iosm_protocol *ipc_protocol,
+					    u32 state)
+{
+	union ipc_msg_prep_args prep_args = {
+		.sleep.target = 0,
+		.sleep.state = state,
+	};
+
+	return ipc_protocol_msg_send(ipc_protocol, IPC_MSG_PREP_SLEEP,
+				     &prep_args);
+}
+
+void ipc_protocol_doorbell_trigger(struct iosm_protocol *ipc_protocol,
+				   u32 identifier)
+{
+	ipc_pm_signal_hpda_doorbell(ipc_protocol->pm, identifier, true);
+}
+
+bool ipc_protocol_pm_dev_sleep_handle(struct iosm_protocol *ipc_protocol)
+{
+	u32 ipc_status = ipc_protocol_get_ipc_status(ipc_protocol);
+	u32 requested;
+
+	if (ipc_status != IPC_MEM_DEVICE_IPC_RUNNING) {
+		dev_err(ipc_protocol->dev,
+			"irq ignored, CP IPC state is %d, should be RUNNING",
+			ipc_status);
+
+		/* Stop further processing. */
+		return false;
+	}
+
+	/* Get a copy of the requested PM state by the device and the local
+	 * device PM state.
+	 */
+	requested = ipc_protocol_pm_dev_get_sleep_notification(ipc_protocol);
+
+	return ipc_pm_dev_slp_notification(ipc_protocol->pm, requested);
+}
+
+static int ipc_protocol_tq_wakeup_dev_slp(void *instance, int arg, void *msg,
+					  size_t size)
+{
+	struct iosm_protocol *ipc_protocol =
+		((struct iosm_imem *)instance)->ipc_protocol;
+
+	/* Wakeup from device sleep if it is not ACTIVE */
+	if (!ipc_pm_trigger(ipc_protocol->pm, IPC_PM_UNIT_HS, true))
+		/* Link was not active. Prepare for notification and waiting */
+		ipc_pm_host_slp_reinit_dev_active_completion(ipc_protocol->pm);
+
+	ipc_pm_trigger(ipc_protocol->pm, IPC_PM_UNIT_HS, false);
+
+	return 0;
+}
+
+bool ipc_protocol_suspend(struct iosm_protocol *ipc_protocol)
+{
+	if (!ipc_pm_prepare_host_sleep(ipc_protocol->pm))
+		return false;
+
+	ipc_task_queue_send_task(ipc_protocol->imem,
+				 ipc_protocol_tq_wakeup_dev_slp, 0, NULL, 0,
+				 true);
+
+	if (!ipc_pm_wait_for_device_active(ipc_protocol->pm)) {
+		ipc_uevent_send(ipc_protocol->pcie->dev, UEVENT_MDM_TIMEOUT);
+		return false;
+	}
+
+	/* Send the sleep message for sync sys calls. */
+	dev_dbg(ipc_protocol->dev, "send (TARGET_HOST, ENTER_SLEEP)");
+	if (ipc_protocol_msg_send_host_sleep(ipc_protocol,
+					     IPC_HOST_SLEEP_ENTER_SLEEP)) {
+		/* Sending ENTER_SLEEP message failed, we are still active */
+		ipc_protocol->pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+		return false;
+	}
+
+	ipc_protocol->pm->host_pm_state = IPC_MEM_HOST_PM_SLEEP;
+
+	return true;
+}
+
+bool ipc_protocol_resume(struct iosm_protocol *ipc_protocol)
+{
+	if (!ipc_pm_prepare_host_active(ipc_protocol->pm))
+		return false;
+
+	dev_dbg(ipc_protocol->dev, "send (TARGET_HOST, EXIT_SLEEP)");
+	if (ipc_protocol_msg_send_host_sleep(ipc_protocol,
+					     IPC_HOST_SLEEP_EXIT_SLEEP)) {
+		ipc_protocol->pm->host_pm_state = IPC_MEM_HOST_PM_SLEEP;
+		return false;
+	}
+
+	ipc_protocol->pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+
+	return true;
+}
+
+struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
+{
+	struct iosm_protocol *ipc_protocol =
+		kzalloc(sizeof(*ipc_protocol), GFP_KERNEL);
+	struct ipc_protocol_context_info *p_ci;
+	u64 addr;
+
+	if (!ipc_protocol)
+		return NULL;
+
+	ipc_protocol->dev = ipc_imem->dev;
+	ipc_protocol->pcie = ipc_imem->pcie;
+	ipc_protocol->imem = ipc_imem;
+	ipc_protocol->p_ap_shm = NULL;
+	ipc_protocol->phy_ap_shm = 0;
+
+	ipc_protocol->old_msg_tail = 0;
+
+	ipc_protocol->p_ap_shm =
+		pci_alloc_consistent(ipc_protocol->pcie->pci,
+				     sizeof(*ipc_protocol->p_ap_shm),
+				     &ipc_protocol->phy_ap_shm);
+
+	if (!ipc_protocol->p_ap_shm) {
+		dev_err(ipc_protocol->dev, "pci shm alloc error");
+		kfree(ipc_protocol);
+		return NULL;
+	}
+
+	/* Prepare the context info for CP. */
+	addr = ipc_protocol->phy_ap_shm;
+	p_ci = &ipc_protocol->p_ap_shm->ci;
+	p_ci->device_info_addr =
+		addr + offsetof(struct ipc_protocol_ap_shm, device_info);
+	p_ci->head_array =
+		addr + offsetof(struct ipc_protocol_ap_shm, head_array);
+	p_ci->tail_array =
+		addr + offsetof(struct ipc_protocol_ap_shm, tail_array);
+	p_ci->msg_head = addr + offsetof(struct ipc_protocol_ap_shm, msg_head);
+	p_ci->msg_tail = addr + offsetof(struct ipc_protocol_ap_shm, msg_tail);
+	p_ci->msg_ring_addr =
+		addr + offsetof(struct ipc_protocol_ap_shm, msg_ring);
+	p_ci->msg_ring_entries = IPC_MEM_MSG_ENTRIES;
+	p_ci->msg_irq_vector = IPC_MSG_IRQ_VECTOR;
+	p_ci->device_info_irq_vector = IPC_DEVICE_IRQ_VECTOR;
+
+	ipc_mmio_set_contex_info_addr(ipc_imem->mmio, addr);
+
+	ipc_protocol->pm = ipc_pm_init(ipc_imem);
+
+	if (!ipc_protocol->pm) {
+		ipc_protocol_deinit(ipc_protocol);
+		return NULL;
+	}
+
+	return ipc_protocol;
+}
+
+void ipc_protocol_deinit(struct iosm_protocol *proto)
+{
+	pci_free_consistent(proto->pcie->pci, sizeof(*proto->p_ap_shm),
+			    proto->p_ap_shm, proto->phy_ap_shm);
+
+	proto->p_ap_shm = NULL;
+	/* Free PM component. Must be freed before pcie, stats, params */
+	ipc_pm_deinit(proto->pm);
+	kfree(proto);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
new file mode 100644
index 000000000000..e963c1901d23
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PROTOCOL_H
+#define IOSM_IPC_PROTOCOL_H
+
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_pm.h"
+#include "iosm_ipc_protocol_ops.h"
+
+/* Trigger the doorbell interrupt on CP. */
+#define IPC_DOORBELL_IRQ_HPDA 0
+#define IPC_DOORBELL_IRQ_IPC 1
+#define IPC_DOORBELL_IRQ_SLEEP 2
+
+/* IRQ vector number. */
+#define IPC_DEVICE_IRQ_VECTOR 0
+#define IPC_MSG_IRQ_VECTOR 0
+#define IPC_UL_PIPE_IRQ_VECTOR 0
+#define IPC_DL_PIPE_IRQ_VECTOR 0
+
+#define IPC_MEM_MSG_ENTRIES 128
+
+/* Default time out for sending IPC messages like open pipe, close pipe etc.
+ * during run mode.
+ *
+ * If the message interface lock to CP times out, the link to CP is broken.
+ * mode : run mode (IPC_MEM_EXEC_STAGE_RUN)
+ * unit : milliseconds
+ */
+#define IPC_MSG_COMPLETE_RUN_DEFAULT_TIMEOUT 500 /* 0.5 seconds */
+
+/* Default time out for sending IPC messages like open pipe, close pipe etc.
+ * during boot mode.
+ *
+ * If the message interface lock to CP times out, the link to CP is broken.
+ * mode : boot mode
+ * (IPC_MEM_EXEC_STAGE_BOOT | IPC_MEM_EXEC_STAGE_PSI | IPC_MEM_EXEC_STAGE_EBL)
+ * unit : milliseconds
+ */
+#define IPC_MSG_COMPLETE_BOOT_DEFAULT_TIMEOUT 500 /* 0.5 seconds */
+
+/**
+ * struct ipc_protocol_context_info - Structure of the context info
+ * @device_info_addr:		64 bit address to device info
+ * @head_array:			64 bit address to head pointer arr for the pipes
+ * @tail_array:			64 bit address to tail pointer arr for the pipes
+ * @msg_head:			64 bit address to message head pointer
+ * @msg_tail:			64 bit address to message tail pointer
+ * @msg_ring_addr:		64 bit pointer to the message ring buffer
+ * @msg_ring_entries:		This field provides the number of entries which
+ *				the MR can hold
+ * @msg_irq_vector:		This field provides the IRQ which shall be
+ *				generated by the EP device when generating
+ *				completion for Messages.
+ * @device_info_irq_vector:	This field provides the IRQ which shall be
+ *				generated by the EP dev after updating Dev. Info
+ * @reserved:			reserved
+ */
+struct ipc_protocol_context_info {
+	phys_addr_t device_info_addr;
+	phys_addr_t head_array;
+	phys_addr_t tail_array;
+	phys_addr_t msg_head;
+	phys_addr_t msg_tail;
+	phys_addr_t msg_ring_addr;
+	u32 msg_ring_entries : 16;
+	u32 msg_irq_vector : 5;
+	u32 device_info_irq_vector : 5;
+	u32 reserved : 6;
+};
+
+/* Structure for the device information. */
+struct ipc_protocol_device_info {
+	u32 execution_stage;
+	u32 ipc_status;
+	u32 device_sleep_notification;
+};
+
+/* Protocol Shared Memory Structure */
+struct ipc_protocol_ap_shm {
+	struct ipc_protocol_context_info ci;
+	struct ipc_protocol_device_info device_info;
+
+	u32 msg_head;
+	u32 head_array[IPC_MEM_MAX_PIPES];
+	u32 msg_tail;
+	u32 tail_array[IPC_MEM_MAX_PIPES];
+
+	/* Circular buffers for the read/tail and write/head indeces. */
+	union ipc_mem_msg_entry msg_ring[IPC_MEM_MSG_ENTRIES];
+};
+
+/**
+ * struct iosm_protocol - Structure for IPC protocol.
+ * @p_ap_shm:		Pointer to Protocol Shared Memory Structure
+ * @pm:			Pointer to struct iosm_pm
+ * @pcie:		Pointer to struct iosm_pcie
+ * @imem:		Pointer to struct iosm_imem
+ * @rsp_ring:		Array of OS completion objects to be triggered once CP
+ *			acknowledges a request in the message ring
+ * @dev:		Pointer to device structure
+ * @phy_ap_shm:		Physical/Mapped representation of the shared memory info
+ * @old_msg_tail:	Old msg tail ptr, until AP has handled ACK's from CP
+ */
+struct iosm_protocol {
+	struct ipc_protocol_ap_shm *p_ap_shm;
+	struct iosm_pm *pm;
+	struct iosm_pcie *pcie;
+	struct iosm_imem *imem;
+	struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES];
+	struct device *dev;
+	phys_addr_t phy_ap_shm;
+	u32 old_msg_tail;
+};
+
+/**
+ * struct ipc_call_msg_send_args - Structure for message argument for
+ *				   tasklet function.
+ * @prep_args:		Arguments for message preparation function
+ * @response:		Can be NULL if result can be ignored
+ * @msg_type:		Message Type
+ */
+struct ipc_call_msg_send_args {
+	union ipc_msg_prep_args *prep_args;
+	struct ipc_rsp *response;
+	enum ipc_msg_prep_type msg_type;
+};
+
+/**
+ * ipc_protocol_tq_msg_send - Call message preparation func. and Send msg to CP
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ * @msg_type:		Message type
+ * @prep_args:		Message arguments
+ * @response:		Pointer to a response object which has a
+ *			completion object and return code.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int ipc_protocol_tq_msg_send(struct iosm_protocol *ipc_protocol,
+			     enum ipc_msg_prep_type msg_type,
+			     union ipc_msg_prep_args *prep_args,
+			     struct ipc_rsp *response);
+
+/**
+ * ipc_protocol_msg_send - Send a message to CP and wait for response
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ * @prep:		Message type
+ * @prep_args:		Message arguments
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int ipc_protocol_msg_send(struct iosm_protocol *ipc_protocol,
+			  enum ipc_msg_prep_type prep,
+			  union ipc_msg_prep_args *prep_args);
+
+/**
+ * ipc_protocol_suspend - Signal to CP that host wants to go to sleep (suspend).
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ *
+ * Returns: true if host can suspend, false if suspend must be aborted.
+ */
+bool ipc_protocol_suspend(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_resume - Signal to CP that host wants to resume operation.
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ *
+ * Returns: true if host can resume, false if there is a problem.
+ */
+bool ipc_protocol_resume(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_pm_dev_sleep_handle - Handles the Device Sleep state change
+ *				      notification.
+ * @ipc_protocol:	Pointer to ipc_protocol instance.
+ *
+ * Returns: True if sleep notification handled, False otherwise.
+ */
+bool ipc_protocol_pm_dev_sleep_handle(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_doorbell_trigger - Wrapper for PM function which wake up the
+ *				   device if it is in low power mode
+ *				   and trigger a head pointer update interrupt.
+ * @ipc_protocol:	Pointer to ipc_protocol instance.
+ * @identifier:		Specifies what component triggered hpda
+ *			update irq
+ */
+void ipc_protocol_doorbell_trigger(struct iosm_protocol *ipc_protocol,
+				   u32 identifier);
+
+/**
+ * ipc_protocol_sleep_notification_string - Returns last Sleep Notification as
+ *					    string.
+ * @ipc_protocol:	Instance pointer of Protocol module.
+ *
+ * Returns: Pointer to string.
+ */
+const char *
+ipc_protocol_sleep_notification_string(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_init - Allocates IPC protocol instance data
+ * @ipc_imem:		Pointer to iosm_imem structure
+ *
+ * Returns: Address of ipc protocol instance data
+ */
+struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_protocol_deinit - Deallocates IPC protocol instance data
+ * @ipc_protocol:	pointer to the IPC protocol instance data
+ */
+void ipc_protocol_deinit(struct iosm_protocol *ipc_protocol);
+
+#endif
-- 
2.12.3


^ permalink raw reply related

* [RFC 11/18] net: iosm: encode or decode datagram
From: M Chetan Kumar @ 2020-11-23 13:51 UTC (permalink / raw)
  To: netdev, linux-wireless; +Cc: johannes, krishna.c.sudi, m.chetan.kumar
In-Reply-To: <20201123135123.48892-1-m.chetan.kumar@intel.com>

1) Encode UL packet into datagram.
2) Decode DL datagram and route it to network layer.
3) Supports credit based flow control.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
---
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 902 +++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 194 +++++++
 2 files changed, 1096 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux_codec.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
new file mode 100644
index 000000000000..54437651704e
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -0,0 +1,902 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#include <linux/if_vlan.h>
+
+#include "iosm_ipc_imem_ops.h"
+#include "iosm_ipc_mux_codec.h"
+#include "iosm_ipc_task_queue.h"
+
+/* Test the link power state and send a MUX command in blocking mode. */
+static int mux_tq_cmd_send(void *instance, int arg, void *msg, size_t size)
+{
+	struct iosm_mux *ipc_mux = ((struct iosm_imem *)instance)->mux;
+	const struct mux_acb *acb = msg;
+
+	skb_queue_tail(&ipc_mux->channel->ul_list, acb->skb);
+	imem_ul_send(ipc_mux->imem);
+
+	return 0;
+}
+
+static int mux_acb_send(struct iosm_mux *ipc_mux, bool blocking)
+{
+	struct completion *completion = &ipc_mux->channel->ul_sem;
+
+	if (ipc_task_queue_send_task(ipc_mux->imem, mux_tq_cmd_send, 0,
+				     &ipc_mux->acb, sizeof(ipc_mux->acb),
+				     false)) {
+		dev_err(ipc_mux->dev, "unable to send mux command");
+		return -1;
+	}
+
+	/* if blocking, suspend the app and wait for irq in the flash or
+	 * crash phase. return false on timeout to indicate failure.
+	 */
+	if (blocking) {
+		u32 wait_time_milliseconds = IPC_MUX_CMD_RUN_DEFAULT_TIMEOUT;
+
+		reinit_completion(completion);
+
+		if (WAIT_FOR_TIMEOUT(completion, wait_time_milliseconds) == 0) {
+			dev_err(ipc_mux->dev, "ch[%d] timeout",
+				ipc_mux->channel_id);
+			ipc_uevent_send(ipc_mux->imem->dev, UEVENT_MDM_TIMEOUT);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+/* Prepare mux Command */
+static struct mux_lite_cmdh *mux_lite_add_cmd(struct iosm_mux *ipc_mux, u32 cmd,
+					      struct mux_acb *acb, void *param,
+					      u32 param_size)
+{
+	struct mux_lite_cmdh *cmdh = (struct mux_lite_cmdh *)acb->skb->data;
+
+	cmdh->signature = MUX_SIG_CMDH;
+	cmdh->command_type = cmd;
+	cmdh->if_id = acb->if_id;
+
+	acb->cmd = cmd;
+
+	cmdh->cmd_len = offsetof(struct mux_lite_cmdh, param) + param_size;
+	cmdh->transaction_id = ipc_mux->tx_transaction_id++;
+
+	if (param)
+		memcpy(&cmdh->param, param, param_size);
+
+	skb_put(acb->skb, cmdh->cmd_len);
+
+	return cmdh;
+}
+
+static int mux_acb_alloc(struct iosm_mux *ipc_mux)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+	struct sk_buff *skb;
+	dma_addr_t mapping;
+
+	/* Allocate skb memory for the uplink buffer. */
+	skb = ipc_pcie_alloc_skb(ipc_mux->pcie, MUX_MAX_UL_ACB_BUF_SIZE,
+				 GFP_ATOMIC, &mapping, DMA_TO_DEVICE, 0);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Save the skb address. */
+	acb->skb = skb;
+
+	memset(skb->data, 0, MUX_MAX_UL_ACB_BUF_SIZE);
+
+	return 0;
+}
+
+int mux_dl_acb_send_cmds(struct iosm_mux *ipc_mux, u32 cmd_type, u8 if_id,
+			 u32 transaction_id, union mux_cmd_param *param,
+			 size_t res_size, bool blocking, bool respond)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+	struct mux_lite_cmdh *ack_lite;
+	int ret = 0;
+
+	acb->if_id = if_id;
+	ret = mux_acb_alloc(ipc_mux);
+	if (ret)
+		return ret;
+
+	ack_lite = mux_lite_add_cmd(ipc_mux, cmd_type, acb, param, res_size);
+	if (respond)
+		ack_lite->transaction_id = (u32)transaction_id;
+
+	ret = mux_acb_send(ipc_mux, blocking);
+
+	return ret;
+}
+
+void mux_netif_tx_flowctrl(struct mux_session *session, int idx, bool on)
+{
+	/* Inform the network interface to start/stop flow ctrl */
+	if (ipc_wwan_is_tx_stopped(session->wwan, idx) != on)
+		ipc_wwan_tx_flowctrl(session->wwan, idx, on);
+}
+
+static int mux_dl_cmdresps_decode_process(struct iosm_mux *ipc_mux,
+					  struct mux_lite_cmdh *cmdh)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+
+	switch (cmdh->command_type) {
+	case MUX_CMD_OPEN_SESSION_RESP:
+	case MUX_CMD_CLOSE_SESSION_RESP:
+		/* Resume the control application. */
+		acb->got_param = cmdh->param;
+		break;
+
+	case MUX_LITE_CMD_FLOW_CTL_ACK:
+		/* This command type is not expected as response for
+		 * Aggregation version of the protocol. So return non-zero.
+		 */
+		if (ipc_mux->protocol != MUX_LITE)
+			return -EINVAL;
+
+		dev_dbg(ipc_mux->dev, "if[%u] FLOW_CTL_ACK(%u) received",
+			cmdh->if_id, cmdh->transaction_id);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	acb->wanted_response = MUX_CMD_INVALID;
+	acb->got_response = cmdh->command_type;
+	complete(&ipc_mux->channel->ul_sem);
+
+	return 0;
+}
+
+static int mux_dl_dlcmds_decode_process(struct iosm_mux *ipc_mux,
+					struct mux_lite_cmdh *cmdh)
+{
+	union mux_cmd_param *param = &cmdh->param;
+	struct mux_session *session;
+	int new_size;
+
+	dev_dbg(ipc_mux->dev, "if_id[%d]: dlcmds decode process %d",
+		cmdh->if_id, cmdh->command_type);
+
+	switch (cmdh->command_type) {
+	case MUX_LITE_CMD_FLOW_CTL:
+
+		if (cmdh->if_id >= ipc_mux->nr_sessions) {
+			dev_err(ipc_mux->dev, "if_id [%d] not valid",
+				cmdh->if_id);
+			return -EINVAL; /* No session interface id. */
+		}
+
+		session = &ipc_mux->session[cmdh->if_id];
+
+		new_size = offsetof(struct mux_lite_cmdh, param) +
+			   sizeof(param->flow_ctl);
+		if (param->flow_ctl.mask == 0xFFFFFFFF) {
+			/* Backward Compatibility */
+			if (cmdh->cmd_len == new_size)
+				session->flow_ctl_mask = param->flow_ctl.mask;
+			else
+				session->flow_ctl_mask = ~0;
+			/* if CP asks for FLOW CTRL Enable
+			 * then set our internal flow control Tx flag
+			 * to limit uplink session queueing
+			 */
+			session->net_tx_stop = true;
+			/* Update the stats */
+			session->flow_ctl_en_cnt++;
+		} else if (param->flow_ctl.mask == 0) {
+			/* Just reset the Flow control mask and let
+			 * mux_flow_ctrl_low_thre_b take control on
+			 * our internal Tx flag and enabling kernel
+			 * flow control
+			 */
+			/* Backward Compatibility */
+			if (cmdh->cmd_len == new_size)
+				session->flow_ctl_mask = param->flow_ctl.mask;
+			else
+				session->flow_ctl_mask = 0;
+			/* Update the stats */
+			session->flow_ctl_dis_cnt++;
+		} else {
+			break;
+		}
+
+		dev_dbg(ipc_mux->dev, "if[%u] FLOW CTRL 0x%08X", cmdh->if_id,
+			param->flow_ctl.mask);
+		break;
+
+	case MUX_LITE_CMD_LINK_STATUS_REPORT:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Decode and Send appropriate response to a command block. */
+static void mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	struct mux_lite_cmdh *cmdh = (struct mux_lite_cmdh *)skb->data;
+
+	if (mux_dl_cmdresps_decode_process(ipc_mux, cmdh)) {
+		/* Unable to decode command response indicates the cmd_type
+		 * may be a command instead of response. So try to decoding it.
+		 */
+		if (!mux_dl_dlcmds_decode_process(ipc_mux, cmdh)) {
+			/* Decoded command may need a response. Give the
+			 * response according to the command type.
+			 */
+			union mux_cmd_param *mux_cmd = NULL;
+			size_t size = 0;
+			u32 cmd = MUX_LITE_CMD_LINK_STATUS_REPORT_RESP;
+
+			if (cmdh->command_type ==
+			    MUX_LITE_CMD_LINK_STATUS_REPORT) {
+				mux_cmd = &cmdh->param;
+				mux_cmd->link_status_resp.response =
+					MUX_CMD_RESP_SUCCESS;
+				/* response field is u32 */
+				size = sizeof(u32);
+			} else if (cmdh->command_type ==
+				   MUX_LITE_CMD_FLOW_CTL) {
+				cmd = MUX_LITE_CMD_FLOW_CTL_ACK;
+			} else {
+				return;
+			}
+
+			if (mux_dl_acb_send_cmds(ipc_mux, cmd, cmdh->if_id,
+						 cmdh->transaction_id, mux_cmd,
+						 size, false, true))
+				dev_err(ipc_mux->dev,
+					"if_id %d: cmd send failed",
+					cmdh->if_id);
+		}
+	}
+}
+
+/* Pass the DL packet to the netif layer. */
+static int mux_net_receive(struct iosm_mux *ipc_mux, int if_id,
+			   struct iosm_wwan *wwan, u32 offset, u8 service_class,
+			   struct sk_buff *skb)
+{
+	/* for "zero copy" use clone */
+	struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (!dest_skb)
+		return -1;
+
+	skb_pull(dest_skb, offset);
+
+	skb_set_tail_pointer(dest_skb, dest_skb->len);
+
+	/* Goto the start of the Ethernet header. */
+	skb_push(dest_skb, ETH_HLEN);
+
+	/* map session to vlan */
+	__vlan_hwaccel_put_tag(dest_skb, htons(ETH_P_8021Q), if_id + 1);
+
+	/* Pass the packet to the netif layer. */
+	dest_skb->priority = service_class;
+
+	return ipc_wwan_receive(wwan, dest_skb, false);
+}
+
+/* Decode Flow Credit Table in the block */
+static void mux_dl_fcth_decode(struct iosm_mux *ipc_mux, void *block)
+{
+	struct ipc_mem_lite_gen_tbl *fct = (struct ipc_mem_lite_gen_tbl *)block;
+	struct iosm_wwan *wwan;
+	int ul_credits = 0;
+	int if_id = 0;
+
+	if (fct->vfl_length != sizeof(fct->vfl[0].nr_of_bytes)) {
+		dev_err(ipc_mux->dev, "unexpected FCT length: %d",
+			fct->vfl_length);
+		return;
+	}
+
+	if_id = fct->if_id;
+	if (if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "not supported if_id: %d", if_id);
+		return;
+	}
+
+	/* Is the session active ? */
+	wwan = ipc_mux->session[if_id].wwan;
+	if (!wwan) {
+		dev_err(ipc_mux->dev, "session Net ID is NULL");
+		return;
+	}
+
+	ul_credits = fct->vfl[0].nr_of_bytes;
+
+	dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
+		if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
+
+	/* Update the Flow Credit information from ADB */
+	ipc_mux->session[if_id].ul_flow_credits += ul_credits;
+
+	/* Check whether the TX can be started */
+	if (ipc_mux->session[if_id].ul_flow_credits > 0) {
+		ipc_mux->session[if_id].net_tx_stop = false;
+		mux_netif_tx_flowctrl(&ipc_mux->session[if_id],
+				      ipc_mux->session[if_id].if_id, false);
+	}
+}
+
+/* Decode non-aggregated datagram */
+static void mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	u32 pad_len, packet_offset;
+	struct iosm_wwan *wwan;
+	struct mux_adgh *adgh;
+	u8 *block = skb->data;
+	int rc = 0;
+	u8 if_id;
+
+	adgh = (struct mux_adgh *)block;
+
+	if (adgh->signature != MUX_SIG_ADGH) {
+		dev_err(ipc_mux->dev, "invalid ADGH signature received");
+		return;
+	}
+
+	if_id = adgh->if_id;
+	if (if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "invalid if_id while decoding %d", if_id);
+		return;
+	}
+
+	/* Is the session active ? */
+	wwan = ipc_mux->session[if_id].wwan;
+	if (!wwan) {
+		dev_err(ipc_mux->dev, "session Net ID is NULL");
+		return;
+	}
+
+	/* Store the pad len for the corresponding session
+	 * Pad bytes as negotiated in the open session less the header size
+	 * (see session management chapter for details).
+	 * If resulting padding is zero or less, the additional head padding is
+	 * omitted. For e.g., if HEAD_PAD_LEN = 16 or less, this field is
+	 * omitted if HEAD_PAD_LEN = 20, then this field will have 4 bytes
+	 * set to zero
+	 */
+	pad_len =
+		ipc_mux->session[if_id].dl_head_pad_len - IPC_MEM_DL_ETH_OFFSET;
+	packet_offset = sizeof(*adgh) + pad_len;
+
+	if_id += ipc_mux->wwan_q_offset;
+
+	/* Pass the packet to the netif layer */
+	rc = mux_net_receive(ipc_mux, if_id, wwan, packet_offset,
+			     adgh->service_class, skb);
+	if (rc) {
+		dev_err(ipc_mux->dev, "mux adgh decoding error");
+		return;
+	}
+	ipc_mux->session[if_id].flush = 1;
+}
+
+void ipc_mux_dl_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	u32 signature;
+
+	if (!skb->data || !ipc_mux)
+		return;
+
+	/* Decode the MUX header type. */
+	signature = le32_to_cpup((__le32 *)skb->data);
+
+	switch (signature) {
+	case MUX_SIG_ADGH:
+		mux_dl_adgh_decode(ipc_mux, skb);
+		break;
+
+	case MUX_SIG_FCTH:
+		mux_dl_fcth_decode(ipc_mux, skb->data);
+		break;
+
+	case MUX_SIG_CMDH:
+		mux_dl_cmd_decode(ipc_mux, skb);
+		break;
+
+	default:
+		dev_err(ipc_mux->dev, "invalid ABH signature");
+	}
+
+	ipc_pcie_kfree_skb(ipc_mux->pcie, skb);
+}
+
+static int mux_ul_skb_alloc(struct iosm_mux *ipc_mux, struct mux_adb *ul_adb,
+			    u32 type)
+{
+	/* Take the first element of the free list. */
+	struct sk_buff *skb = skb_dequeue(&ul_adb->free_list);
+	int qlt_size;
+
+	if (!skb)
+		return -1; /* Wait for a free ADB skb. */
+
+	/* Mark it as UL ADB to select the right free operation. */
+	IPC_CB(skb)->op_type = (u8)UL_MUX_OP_ADB;
+
+	switch (type) {
+	case MUX_SIG_ADGH:
+		/* Save the ADB memory settings. */
+		ul_adb->dest_skb = skb;
+		ul_adb->buf = skb->data;
+		ul_adb->size = IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE;
+		/* reset statistic counter */
+		ul_adb->if_cnt = 0;
+		ul_adb->payload_size = 0;
+		ul_adb->dg_cnt_total = 0;
+
+		ul_adb->adgh = (struct mux_adgh *)skb->data;
+		memset(ul_adb->adgh, 0, sizeof(struct mux_adgh));
+		break;
+
+	case MUX_SIG_QLTH:
+		qlt_size = offsetof(struct ipc_mem_lite_gen_tbl, vfl) +
+			   (MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl));
+
+		if (qlt_size > IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE) {
+			dev_err(ipc_mux->dev,
+				"can't support. QLT size:%d SKB size: %d",
+				qlt_size, IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE);
+			return -1;
+		}
+
+		ul_adb->qlth_skb = skb;
+		memset((ul_adb->qlth_skb)->data, 0, qlt_size);
+		skb_put(skb, qlt_size);
+		break;
+	}
+
+	return 0;
+}
+
+static void mux_ul_adgh_finish(struct iosm_mux *ipc_mux)
+{
+	struct mux_adb *ul_adb = &ipc_mux->ul_adb;
+	long long bytes;
+	char *str;
+
+	if (!ul_adb || !ul_adb->dest_skb) {
+		dev_err(ipc_mux->dev, "no dest skb");
+		return;
+	}
+	skb_put(ul_adb->dest_skb, ul_adb->adgh->length);
+	skb_queue_tail(&ipc_mux->channel->ul_list, ul_adb->dest_skb);
+	ul_adb->dest_skb = NULL;
+
+	if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS) {
+		struct mux_session *session;
+
+		session = &ipc_mux->session[ul_adb->adgh->if_id];
+		str = "available_credits";
+		bytes = (long long)session->ul_flow_credits;
+
+	} else {
+		str = "pend_bytes";
+		bytes = ipc_mux->ul_data_pend_bytes;
+		ipc_mux->ul_data_pend_bytes += ul_adb->adgh->length;
+	}
+
+	dev_dbg(ipc_mux->dev, "UL ADGH: size=%d, if_id=%d, payload=%d, %s=%lld",
+		ul_adb->adgh->length, ul_adb->adgh->if_id, ul_adb->payload_size,
+		str, bytes);
+}
+
+/* Allocates an ADB from the free list and initializes it with ADBH  */
+static bool mux_ul_adb_allocate(struct iosm_mux *ipc_mux, struct mux_adb *adb,
+				int *size_needed, u32 type)
+{
+	bool ret_val = false;
+	int status;
+
+	if (!adb->dest_skb) {
+		/* Allocate memory for the ADB including of the
+		 * datagram table header.
+		 */
+		status = mux_ul_skb_alloc(ipc_mux, adb, type);
+		if (status != 0)
+			/* Is a pending ADB available ? */
+			ret_val = true; /* None. */
+
+		/* Update size need to zero only for new ADB memory */
+		*size_needed = 0;
+	}
+
+	return ret_val;
+}
+
+/* Informs the network stack to stop sending further packets for all opened
+ * sessions
+ */
+static void mux_stop_tx_for_all_sessions(struct iosm_mux *ipc_mux)
+{
+	struct mux_session *session;
+	int idx;
+
+	for (idx = 0; idx < ipc_mux->nr_sessions; idx++) {
+		session = &ipc_mux->session[idx];
+
+		if (!session->wwan)
+			continue;
+
+		session->net_tx_stop = true;
+	}
+}
+
+/* Sends Queue Level Table of all opened sessions */
+static bool mux_lite_send_qlt(struct iosm_mux *ipc_mux)
+{
+	struct ipc_mem_lite_gen_tbl *qlt;
+	struct mux_session *session;
+	bool qlt_updated = false;
+	int i, ql_idx;
+	int qlt_size;
+
+	if (!ipc_mux->initialized || ipc_mux->state != MUX_S_ACTIVE)
+		return qlt_updated;
+
+	qlt_size = offsetof(struct ipc_mem_lite_gen_tbl, vfl) +
+		   MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl);
+
+	for (i = 0; i < ipc_mux->nr_sessions; i++) {
+		session = &ipc_mux->session[i];
+
+		if (!session->wwan || session->flow_ctl_mask != 0)
+			continue;
+
+		if (mux_ul_skb_alloc(ipc_mux, &ipc_mux->ul_adb, MUX_SIG_QLTH)) {
+			dev_err(ipc_mux->dev,
+				"no reserved mem to send QLT of if_id: %d", i);
+			break;
+		}
+
+		/* Prepare QLT */
+		qlt = (struct ipc_mem_lite_gen_tbl *)(ipc_mux->ul_adb.qlth_skb)
+			      ->data;
+		qlt->signature = MUX_SIG_QLTH;
+		qlt->length = qlt_size;
+		qlt->if_id = i;
+		qlt->vfl_length = MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl);
+		qlt->reserved[0] = 0;
+		qlt->reserved[1] = 0;
+
+		for (ql_idx = 0; ql_idx < MUX_QUEUE_LEVEL; ql_idx++)
+			qlt->vfl[ql_idx].nr_of_bytes = session->ul_list.qlen;
+
+		/* Add QLT to the transfer list. */
+		skb_queue_tail(&ipc_mux->channel->ul_list,
+			       ipc_mux->ul_adb.qlth_skb);
+
+		qlt_updated = true;
+		ipc_mux->ul_adb.qlth_skb = NULL;
+	}
+
+	if (qlt_updated)
+		/* Updates the TDs with ul_list */
+		(void)imem_ul_write_td(ipc_mux->imem);
+
+	return qlt_updated;
+}
+
+/* Checks the available credits for the specified session and returns
+ * number of packets for which credits are available.
+ */
+static int mux_ul_bytes_credits_check(struct iosm_mux *ipc_mux,
+				      struct mux_session *session,
+				      struct sk_buff_head *ul_list,
+				      int max_nr_of_pkts)
+{
+	int pkts_to_send = 0;
+	struct sk_buff *skb;
+	int credits = 0;
+
+	if (!ipc_mux || !session || !ul_list)
+		return 0;
+
+	if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS) {
+		credits = session->ul_flow_credits;
+		if (credits <= 0) {
+			dev_dbg(ipc_mux->dev,
+				"FC::if_id[%d] Insuff.Credits/Qlen:%d/%u",
+				session->if_id, session->ul_flow_credits,
+				session->ul_list.qlen); /* nr_of_bytes */
+			return 0;
+		}
+	} else {
+		credits = IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B -
+			  ipc_mux->ul_data_pend_bytes;
+		if (credits <= 0) {
+			mux_stop_tx_for_all_sessions(ipc_mux);
+
+			dev_dbg(ipc_mux->dev,
+				"if_id[%d] Stopped encoding.PendBytes: %llu, high_thresh: %d",
+				session->if_id, ipc_mux->ul_data_pend_bytes,
+				IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B);
+			return 0;
+		}
+	}
+
+	/* Check if there are enough credits/bytes available to send the
+	 * requested max_nr_of_pkts. Otherwise restrict the nr_of_pkts
+	 * depending on available credits.
+	 */
+	skb_queue_walk(ul_list, skb)
+	{
+		if (!(credits >= skb->len && pkts_to_send < max_nr_of_pkts))
+			break;
+		credits -= skb->len;
+		pkts_to_send++;
+	}
+
+	return pkts_to_send;
+}
+
+/* Encode the UL IP packet according to Lite spec. */
+static int mux_ul_adgh_encode(struct iosm_mux *ipc_mux, int session_id,
+			      struct mux_session *session,
+			      struct sk_buff_head *ul_list, struct mux_adb *adb,
+			      int nr_of_pkts)
+{
+	int offset = sizeof(struct mux_adgh);
+	int adb_updated = -EINVAL;
+	struct sk_buff *src_skb;
+	int aligned_size = 0;
+	int nr_of_skb = 0;
+	u32 pad_len = 0;
+	int vlan_id;
+
+	/* Re-calculate the number of packets depending on number of bytes to be
+	 * processed/available credits.
+	 */
+	nr_of_pkts = mux_ul_bytes_credits_check(ipc_mux, session, ul_list,
+						nr_of_pkts);
+
+	/* If calculated nr_of_pkts from available credits is <= 0
+	 * then nothing to do.
+	 */
+	if (nr_of_pkts <= 0)
+		return 0;
+
+	/* Read configured UL head_pad_length for session.*/
+	if (session->ul_head_pad_len > IPC_MEM_DL_ETH_OFFSET)
+		pad_len = session->ul_head_pad_len - IPC_MEM_DL_ETH_OFFSET;
+
+	/* Process all pending UL packets for this session
+	 * depending on the allocated datagram table size.
+	 */
+	while (nr_of_pkts > 0) {
+		/* get destination skb allocated */
+		if (mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed,
+					MUX_SIG_ADGH)) {
+			dev_err(ipc_mux->dev, "no reserved memory for ADGH");
+			return -ENOMEM;
+		}
+
+		/* Peek at the head of the list. */
+		src_skb = skb_peek(ul_list);
+		if (!src_skb) {
+			dev_err(ipc_mux->dev,
+				"skb peek return NULL with count : %d",
+				nr_of_pkts);
+			break;
+		}
+
+		/* Calculate the memory value. */
+		aligned_size = ALIGN((pad_len + src_skb->len), 4);
+
+		ipc_mux->size_needed = sizeof(struct mux_adgh) + aligned_size;
+
+		if (ipc_mux->size_needed > adb->size) {
+			dev_dbg(ipc_mux->dev, "size needed %d, adgh size %d",
+				ipc_mux->size_needed, adb->size);
+			/* Return 1 if any IP packet is added to the transfer
+			 * list.
+			 */
+			return nr_of_skb ? 1 : 0;
+		}
+
+		vlan_id = session_id + ipc_mux->wwan_q_offset;
+		ipc_wwan_update_stats(session->wwan, vlan_id, src_skb->len,
+				      true);
+
+		/* Add buffer (without head padding to next pending transfer) */
+		memcpy(adb->buf + offset + pad_len, src_skb->data,
+		       src_skb->len);
+
+		adb->adgh->signature = MUX_SIG_ADGH;
+		adb->adgh->if_id = session_id;
+		adb->adgh->length =
+			sizeof(struct mux_adgh) + pad_len + src_skb->len;
+		adb->adgh->service_class = src_skb->priority;
+		adb->adgh->next_count = --nr_of_pkts;
+		adb->dg_cnt_total++;
+		adb->payload_size += src_skb->len;
+
+		if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS)
+			/* Decrement the credit value as we are processing the
+			 * datagram from the UL list.
+			 */
+			session->ul_flow_credits -= src_skb->len;
+
+		/* Remove the processed elements and free it. */
+		src_skb = skb_dequeue(ul_list);
+		dev_kfree_skb(src_skb);
+		nr_of_skb++;
+
+		mux_ul_adgh_finish(ipc_mux);
+	}
+
+	if (nr_of_skb) {
+		/* Send QLT info to modem if pending bytes > high watermark
+		 * in case of mux lite
+		 */
+		if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS ||
+		    ipc_mux->ul_data_pend_bytes >=
+			    IPC_MEM_MUX_UL_FLOWCTRL_LOW_B)
+			adb_updated = mux_lite_send_qlt(ipc_mux);
+		else
+			adb_updated = 1;
+
+		/* Updates the TDs with ul_list */
+		(void)imem_ul_write_td(ipc_mux->imem);
+	}
+
+	return adb_updated;
+}
+
+bool ipc_mux_ul_data_encode(struct iosm_mux *ipc_mux)
+{
+	struct sk_buff_head *ul_list;
+	struct mux_session *session;
+	int updated = 0;
+	int session_id;
+	int dg_n;
+	int i;
+
+	if (!ipc_mux || ipc_mux->state != MUX_S_ACTIVE ||
+	    ipc_mux->adb_prep_ongoing)
+		return false;
+
+	ipc_mux->adb_prep_ongoing = true;
+
+	for (i = 0; i < ipc_mux->nr_sessions; i++) {
+		session_id = ipc_mux->rr_next_session;
+		session = &ipc_mux->session[session_id];
+
+		/* Go to next handle rr_next_session overflow */
+		ipc_mux->rr_next_session++;
+		if (ipc_mux->rr_next_session >= ipc_mux->nr_sessions)
+			ipc_mux->rr_next_session = 0;
+
+		if (!session->wwan || session->flow_ctl_mask ||
+		    session->net_tx_stop)
+			continue;
+
+		ul_list = &session->ul_list;
+
+		/* Is something pending in UL and flow ctrl off */
+		dg_n = skb_queue_len(ul_list);
+		if (dg_n > MUX_MAX_UL_DG_ENTRIES)
+			dg_n = MUX_MAX_UL_DG_ENTRIES;
+
+		if (dg_n == 0)
+			/* Nothing to do for ipc_mux session
+			 * -> try next session id.
+			 */
+			continue;
+
+		updated = mux_ul_adgh_encode(ipc_mux, session_id, session,
+					     ul_list, &ipc_mux->ul_adb, dg_n);
+	}
+
+	ipc_mux->adb_prep_ongoing = false;
+	return updated == 1;
+}
+
+void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	struct mux_adgh *adgh;
+
+	if (!ipc_mux || !skb || !skb->data)
+		return;
+
+	adgh = (struct mux_adgh *)skb->data;
+
+	if (adgh->signature == MUX_SIG_ADGH && ipc_mux->ul_flow == MUX_UL)
+		ipc_mux->ul_data_pend_bytes -= adgh->length;
+
+	if (ipc_mux->ul_flow == MUX_UL)
+		dev_dbg(ipc_mux->dev, "ul_data_pend_bytes: %lld",
+			ipc_mux->ul_data_pend_bytes);
+
+	/* Reset the skb settings. */
+	skb->tail = 0;
+	skb->len = 0;
+
+	/* Add the consumed ADB to the free list. */
+	skb_queue_tail((&ipc_mux->ul_adb.free_list), skb);
+}
+
+/* Start the NETIF uplink send transfer in MUX mode. */
+static int mux_tq_ul_trigger_encode(void *instance, int arg, void *msg,
+				    size_t size)
+{
+	struct iosm_mux *ipc_mux = ((struct iosm_imem *)instance)->mux;
+	bool ul_data_pend = false;
+
+	/* Add session UL data to a ADB and ADGH */
+	ul_data_pend = ipc_mux_ul_data_encode(ipc_mux);
+	if (ul_data_pend)
+		/* Delay the doorbell irq */
+		imem_td_update_timer_start(ipc_mux->imem);
+
+	/* reset the debounce flag */
+	ipc_mux->ev_mux_net_transmit_pending = false;
+
+	return 0;
+}
+
+int ipc_mux_ul_trigger_encode(struct iosm_mux *ipc_mux, int if_id,
+			      struct sk_buff *skb)
+{
+	struct mux_session *session = &ipc_mux->session[if_id];
+
+	if (ipc_mux->channel &&
+	    ipc_mux->channel->state != IMEM_CHANNEL_ACTIVE) {
+		dev_err(ipc_mux->dev,
+			"channel state is not IMEM_CHANNEL_ACTIVE");
+		return -1;
+	}
+
+	if (!session->wwan) {
+		dev_err(ipc_mux->dev, "session net ID is NULL");
+		return -1;
+	}
+
+	/* Session is under flow control.
+	 * Check if packet can be queued in session list, if not
+	 * suspend net tx
+	 */
+	if (skb_queue_len(&session->ul_list) >=
+	    (session->net_tx_stop ?
+		     IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD :
+		     (IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD *
+		      IPC_MEM_MUX_UL_SESS_FCOFF_THRESHOLD_FACTOR))) {
+		mux_netif_tx_flowctrl(session, session->if_id, true);
+		return -2;
+	}
+
+	/* Add skb to the uplink skb accumulator. */
+	skb_queue_tail(&session->ul_list, skb);
+
+	/* Inform the IPC kthread to pass uplink IP packets to CP. */
+	if (!ipc_mux->ev_mux_net_transmit_pending) {
+		ipc_mux->ev_mux_net_transmit_pending = true;
+		if (ipc_task_queue_send_task(ipc_mux->imem,
+					     mux_tq_ul_trigger_encode, 0, NULL,
+					     0, false))
+			return -1;
+	}
+	dev_dbg(ipc_mux->dev, "mux ul if[%d] qlen=%d/%u, len=%d/%d, prio=%d",
+		if_id, skb_queue_len(&session->ul_list), session->ul_list.qlen,
+		skb->len, skb->truesize, skb->priority);
+
+	return 0;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
new file mode 100644
index 000000000000..796790113ad5
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_MUX_CODEC_H
+#define IOSM_IPC_MUX_CODEC_H
+
+#include "iosm_ipc_mux.h"
+
+/* Queue level size and reporting
+ * >1 is enable, 0 is disable
+ */
+#define MUX_QUEUE_LEVEL 1
+
+/* Size of the buffer for the IP MUX commands. */
+#define MUX_MAX_UL_ACB_BUF_SIZE 256
+
+/* Maximum number of packets in a go per session */
+#define MUX_MAX_UL_DG_ENTRIES 100
+
+/* ADGH: Signature of the Datagram Header. */
+#define MUX_SIG_ADGH 0x48474441
+
+/* CMDH: Signature of the Command Header. */
+#define MUX_SIG_CMDH 0x48444D43
+
+/* QLTH: Signature of the Queue Level Table */
+#define MUX_SIG_QLTH 0x48544C51
+
+/* FCTH: Signature of the Flow Credit Table */
+#define MUX_SIG_FCTH 0x48544346
+
+/* MUX UL session threshold factor */
+#define IPC_MEM_MUX_UL_SESS_FCOFF_THRESHOLD_FACTOR (4)
+
+/* Size of the buffer for the IP MUX Lite data buffer. */
+#define IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE (2 * 1024)
+
+/* MUX UL session threshold in number of packets */
+#define IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD (64)
+
+/* Default time out for sending IPC session commands like
+ * open session, close session etc
+ * unit : milliseconds
+ */
+#define IPC_MUX_CMD_RUN_DEFAULT_TIMEOUT 1000 /* 1 second */
+
+/* MUX UL flow control lower threshold in bytes */
+#define IPC_MEM_MUX_UL_FLOWCTRL_LOW_B 10240 /* 10KB */
+
+/* MUX UL flow control higher threshold in bytes (5ms worth of data)*/
+#define IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B (110 * 1024)
+
+/**
+ * struct mux_adgh - Aggregated Datagram Header.
+ * @signature:		Signature of the Aggregated Datagram Header(0x48474441)
+ * @length:		Length (in bytes) of the datagram header. This length
+ *			shall include the header size. Min value: 0x10
+ * @if_id:		ID of the interface the datagrams belong to
+ * @opt_ipv4v6:		Indicates IPv4(=0)/IPv6(=1), It is optional if not
+ *			used set it to zero.
+ * @reserved:		Reserved bits. Set to zero.
+ * @service_class:	Service class identifier for the datagram.
+ * @next_count:		Count of the datagrams that shall be following this
+ *			datagrams for this interface. A count of zero means
+ *			the next datagram may not belong to this interface.
+ * @reserved1:		Reserved bytes, Set to zero
+ */
+struct mux_adgh {
+	u32 signature;
+	u16 length;
+	u8 if_id;
+	u8 opt_ipv4v6 : 1;
+	u8 reserved : 7;
+	u8 service_class;
+	u8 next_count;
+	u8 reserved1[6];
+};
+
+/**
+ * struct mux_lite_cmdh - MUX Lite Command Header
+ * @signature:		Signature of the Command Header(0x48444D43)
+ * @cmd_len:		Length (in bytes) of the command. This length shall
+ *			include the header size. Minimum value: 0x10
+ * @if_id:		ID of the interface the commands in the table belong to.
+ * @reserved:		Reserved Set to zero.
+ * @command_type:	Command Enum.
+ * @transaction_id:	4 byte value shall be generated and sent along with a
+ *			command Responses and ACKs shall have the same
+ *			Transaction ID as their commands. It shall be unique to
+ *			the command transaction on the given interface.
+ * @param:		Optional parameters used with the command.
+ */
+struct mux_lite_cmdh {
+	u32 signature;
+	u16 cmd_len;
+	u8 if_id;
+	u8 reserved;
+	u32 command_type;
+	u32 transaction_id;
+	union mux_cmd_param param;
+};
+
+/**
+ * struct mux_lite_vfl - value field in generic table
+ * @nr_of_bytes:	Number of bytes available to transmit in the queue.
+ */
+struct mux_lite_vfl {
+	u32 nr_of_bytes;
+};
+
+/**
+ * struct ipc_mem_lite_gen_tbl - Generic table format for Queue Level
+ *				 and Flow Credit
+ * @signature:	Signature of the table
+ * @length:	Length of the table
+ * @if_id:	ID of the interface the table belongs to
+ * @vfl_length:	Value field length
+ * @reserved:	Reserved
+ * @vfl:	Value field of variable length
+ */
+struct ipc_mem_lite_gen_tbl {
+	u32 signature;
+	u16 length;
+	u8 if_id;
+	u8 vfl_length;
+	u32 reserved[2];
+	struct mux_lite_vfl vfl[1];
+};
+
+/**
+ * ipc_mux_dl_decode -Route the DL packet through the IP MUX layer
+ *		      depending on Header.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @skb:	Pointer to ipc_skb.
+ */
+void ipc_mux_dl_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb);
+
+/**
+ * mux_dl_acb_send_cmds - Respond to the Command blocks.
+ * @ipc_mux:		Pointer to MUX data-struct
+ * @cmd_type:		Command
+ * @if_id:		Session interface id.
+ * @transaction_id:	Command transaction id.
+ * @param:		Pointer to command params.
+ * @res_size:		Response size
+ * @blocking:		True for blocking send
+ * @respond:		If true return transaction ID
+ *
+ * Returns: 0 in success and -ve for failure
+ */
+int mux_dl_acb_send_cmds(struct iosm_mux *ipc_mux, u32 cmd_type, u8 if_id,
+			 u32 transaction_id, union mux_cmd_param *param,
+			 size_t res_size, bool blocking, bool respond);
+
+/**
+ * mux_netif_tx_flowctrl - Enable/Disable TX flow control on MUX sessions.
+ * @session:	Pointer to mux_session struct
+ * @idx:	Session ID
+ * @on:		true for Enable and false for disable flow control
+ */
+void mux_netif_tx_flowctrl(struct mux_session *session, int idx, bool on);
+
+/**
+ * ipc_mux_ul_trigger_encode - Route the UL packet through the IP MUX layer
+ *			       for encoding.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @if_id:	Session ID.
+ * @skb:	Pointer to ipc_skb.
+ *
+ * Returns: 0 if successfully encoded
+ *	    -1 on failure
+ *	    -2 if packet has to be retransmitted.
+ */
+int ipc_mux_ul_trigger_encode(struct iosm_mux *ipc_mux, int if_id,
+			      struct sk_buff *skb);
+/**
+ * ipc_mux_ul_data_encode - UL encode function for calling from Tasklet context.
+ * @ipc_mux:	Pointer to MUX data-struct
+ *
+ * Returns: TRUE if any packet of any session is encoded FALSE otherwise.
+ */
+bool ipc_mux_ul_data_encode(struct iosm_mux *ipc_mux);
+
+/**
+ * ipc_mux_ul_encoded_process - Handles the Modem processed UL data by adding
+ *				the SKB to the UL free list.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @skb:	Pointer to ipc_skb.
+ */
+void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb);
+
+#endif
-- 
2.12.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox