Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 5/7] irda: w83977af_ir: Use the common brace style
From: Joe Perches @ 2016-12-06 18:16 UTC (permalink / raw)
  To: Samuel Ortiz; +Cc: Arnd Bergmann, Sergei Shtylyov, netdev, linux-kernel
In-Reply-To: <cover.1481047792.git.joe@perches.com>

Add braces where appropriate and remove an unnecessary else.

Signed-off-by: Joe Perches <joe@perches.com>
---
 drivers/net/irda/w83977af_ir.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 92f1525b0570..5d5ec8938e2b 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -462,8 +462,9 @@ static void w83977af_change_speed(struct w83977af_ir *self, __u32 speed)
 	if (speed > PIO_MAX_SPEED) {
 		outb(ICR_EFSFI, iobase + ICR);
 		w83977af_dma_receive(self);
-	} else
+	} else {
 		outb(ICR_ERBRI, iobase + ICR);
+	}
 
 	/* Restore SSR */
 	outb(set, iobase + SSR);
@@ -502,8 +503,8 @@ static netdev_tx_t w83977af_hard_xmit(struct sk_buff *skb,
 			w83977af_change_speed(self, speed);
 			dev_kfree_skb(skb);
 			return NETDEV_TX_OK;
-		} else
-			self->new_speed = speed;
+		}
+		self->new_speed = speed;
 	}
 
 	/* Save current set */
@@ -649,8 +650,9 @@ static void w83977af_dma_xmit_complete(struct w83977af_ir *self)
 
 		/* Clear bit, by writing 1 to it */
 		outb(AUDR_UNDR, iobase + AUDR);
-	} else
+	} else {
 		self->netdev->stats.tx_packets++;
+	}
 
 	if (self->new_speed) {
 		w83977af_change_speed(self, self->new_speed);
@@ -813,9 +815,8 @@ static int w83977af_dma_receive_complete(struct w83977af_ir *self)
 		} else {
 			/* Check if we have transferred all data to memory */
 			switch_bank(iobase, SET0);
-			if (inb(iobase + USR) & USR_RDR) {
+			if (inb(iobase + USR) & USR_RDR)
 				udelay(80); /* Should be enough!? */
-			}
 
 			skb = dev_alloc_skb(len + 1);
 			if (!skb)  {
@@ -969,7 +970,6 @@ static __u8 w83977af_fir_interrupt(struct w83977af_ir *self, int isr)
 	/* End of frame detected in FIFO */
 	if (isr & (ISR_FEND_I | ISR_FSF_I)) {
 		if (w83977af_dma_receive_complete(self)) {
-
 			/* Wait for next status FIFO interrupt */
 			new_icr |= ICR_EFSFI;
 		} else {
@@ -1094,8 +1094,9 @@ static int w83977af_is_receiving(struct w83977af_ir *self)
 			status =  TRUE;
 		}
 		outb(set, iobase + SSR);
-	} else
+	} else {
 		status = (self->rx_buff.state != OUTSIDE_FRAME);
+	}
 
 	return status;
 }
@@ -1141,8 +1142,9 @@ static int w83977af_net_open(struct net_device *dev)
 	if (self->io.speed > 115200) {
 		outb(ICR_EFSFI, iobase + ICR);
 		w83977af_dma_receive(self);
-	} else
+	} else {
 		outb(ICR_ERBRI, iobase + ICR);
+	}
 
 	/* Restore bank register */
 	outb(set, iobase + SSR);
-- 
2.10.0.rc2.1.g053435c

^ permalink raw reply related

* Re: [RESEND][PATCH v4] cgroup: Use CAP_SYS_RESOURCE to allow a process to migrate other tasks between cgroups
From: Tejun Heo @ 2016-12-06 18:23 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: John Stultz, Alexei Starovoitov, Andy Lutomirski,
	Mickaël Salaün, Daniel Mack, David S. Miller, kafai,
	Florian Westphal, Harald Hoyer, Network Development,
	Sargun Dhillon, Pablo Neira Ayuso, lkml, Li Zefan,
	Jonathan Corbet, open list:CONTROL GROUP (CGROUP),
	Android Kernel Team, Rom Lemarchand, Colin Cross
In-Reply-To: <CALCETrUmwPaWbN_U2XAsg6Z0UGBJ=Ou7BoD09-GZFdRVT5Y-EQ@mail.gmail.com>

Hello,

On Tue, Dec 06, 2016 at 10:13:53AM -0800, Andy Lutomirski wrote:
> > Delegation is an explicit operation and reflected in the ownership of
> > the subdirectories and cgroup interface files in them.  The
> > subhierarchy containment is achieved by requiring the user who's
> > trying to migrate a process to have write perm on cgroup.procs on the
> > common ancestor of the source and target in addition to the target.
> 
> OK, I see what you're doing.  That's interesting.

It's something born out of usages of cgroup v1.  People used it that
way (chowning files and directories) and combined with the uid checksn
it yielded something which is useful sometimes, but it always had
issues with hierarchical behaviors, which files to chmod and the weird
combination of uid checks.  cgroup v2 has a clear delegation model but
the uid checks are still left in as not changing was the default.

It's not necessary and I'm thinking about queueing something like the
following in the next cycle.

As for the android CAP discussion, I think it'd be nice to share an
existing CAP but if we can't find a good one to share, let's create a
new one.

Thanks.

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 4cc07ce..34b4b44 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -328,14 +328,12 @@ a process with a non-root euid to migrate a target process into a
 cgroup by writing its PID to the "cgroup.procs" file, the following
 conditions must be met.
 
-- The writer's euid must match either uid or suid of the target process.
-
 - The writer must have write access to the "cgroup.procs" file.
 
 - The writer must have write access to the "cgroup.procs" file of the
   common ancestor of the source and destination cgroups.
 
-The above three constraints ensure that while a delegatee may migrate
+The above two constraints ensure that while a delegatee may migrate
 processes around freely in the delegated sub-hierarchy it can't pull
 in from or push out to outside the sub-hierarchy.
 
@@ -350,10 +348,10 @@ all processes under C0 and C1 belong to U0.
 
 Let's also say U0 wants to write the PID of a process which is
 currently in C10 into "C00/cgroup.procs".  U0 has write access to the
-file and uid match on the process; however, the common ancestor of the
-source cgroup C10 and the destination cgroup C00 is above the points
-of delegation and U0 would not have write access to its "cgroup.procs"
-files and thus the write will be denied with -EACCES.
+file; however, the common ancestor of the source cgroup C10 and the
+destination cgroup C00 is above the points of delegation and U0 would
+not have write access to its "cgroup.procs" files and thus the write
+will be denied with -EACCES.
 
 
 2-6. Guidelines
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 85bc9be..49384ff 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2854,12 +2854,18 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 	 * even if we're attaching all tasks in the thread group, we only
 	 * need to check permissions on one of them.
 	 */
-	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
-	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid))
-		ret = -EACCES;
 
-	if (!ret && cgroup_on_dfl(dst_cgrp)) {
+	/* root is allowed to do anything */
+	if (uid_eq(cred->euid, GLOBAL_ROOT_UID))
+		goto out;
+
+	/*
+	 * On v2, follow the delegation model.  Inside a delegated subtree,
+	 * the delegatee can move around the processes however it sees fit.
+	 *
+	 * On v1, a process should match one of the target's uids.
+	 */
+	if (cgroup_on_dfl(dst_cgrp)) {
 		struct super_block *sb = of->file->f_path.dentry->d_sb;
 		struct cgroup *cgrp;
 		struct inode *inode;
@@ -2877,8 +2883,11 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 			ret = inode_permission(inode, MAY_WRITE);
 			iput(inode);
 		}
+	} else if (!uid_eq(cred->euid, tcred->uid) &&
+		   !uid_eq(cred->euid, tcred->suid)) {
+		ret = -EACCES;
 	}
-
+out:
 	put_cred(tcred);
 	return ret;
 }

^ permalink raw reply related

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Paolo Abeni @ 2016-12-06 18:31 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481046434.18162.599.camel@edumazet-glaptop3.roam.corp.google.com>

On Tue, 2016-12-06 at 09:47 -0800, Eric Dumazet wrote:
> On Tue, 2016-12-06 at 18:08 +0100, Paolo Abeni wrote:
> > On Tue, 2016-12-06 at 11:34 +0100, Paolo Abeni wrote:
> > > On Mon, 2016-12-05 at 09:57 -0800, Eric Dumazet wrote:
> > > > From: Eric Dumazet <edumazet@google.com>
> > > > 
> > > > In UDP recvmsg() path we currently access 3 cache lines from an skb
> > > > while holding receive queue lock, plus another one if packet is
> > > > dequeued, since we need to change skb->next->prev
> > > > 
> > > > 1st cache line (contains ->next/prev pointers, offsets 0x00 and 0x08)
> > > > 2nd cache line (skb->len & skb->peeked, offsets 0x80 and 0x8e)
> > > > 3rd cache line (skb->truesize/users, offsets 0xe0 and 0xe4)
> > > > 
> > > > skb->peeked is only needed to make sure 0-length packets are properly
> > > > handled while MSG_PEEK is operated.
> > > > 
> > > > I had first the intent to remove skb->peeked but the "MSG_PEEK at
> > > > non-zero offset" support added by Sam Kumar makes this not possible.
> > > > 
> > > > This patch avoids one cache line miss during the locked section, when
> > > > skb->len and skb->peeked do not have to be read.
> > > > 
> > > > It also avoids the skb_set_peeked() cost for non empty UDP datagrams.
> > > > 
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > 
> > > Thank you for all the good work.
> > > 
> > > After all your improvement, I see the cacheline miss in inet_recvmsg()
> > > as a major perf offender for the user space process in the udp flood
> > > scenario due to skc_rxhash sharing the same sk_drops cacheline.
> > > 
> > > Using an udp-specific drop counter (and an sk_drops accessor to wrap
> > > sk_drops access where needed), we could avoid such cache miss. With that
> > > - patch for udp.h only below - I get 3% improvement on top of all the
> > > pending udp patches, and the gain should be more relevant after the 2
> > > queues rework. What do you think ?
> > 
> > Here follow what I'm experimenting. 
> 
> Well, new socket layout makes this kind of patches not really needed ?
> 
> 	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
> 	socket_lock_t              sk_lock;              /*  0x88  0x20 */
> 	atomic_t                   sk_drops;             /*  0xa8   0x4 */
> 	int                        sk_rcvlowat;          /*  0xac   0x4 */
> 	struct sk_buff_head        sk_error_queue;       /*  0xb0  0x18 */
> 	/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */

cacheline 2 boundary (128 bytes) is 8 bytes before sk_lock: cacheline 2
includes also skc_refcnt and skc_rxhash from __sk_common (I use 'pahole
-E ...' to get the full blown output). skc_rxhash is read for each
packet in inet_recvmsg()/sock_rps_record_flow() if CONFIG_RPS is set. I
get a cache miss per packet there and inet_recvmsg() in my test takes
about 8% of the whole u/s processing time.

> I mentioned at some point that we can very easily instruct 
> sock_skb_set_dropcount() to not read sk_drops if application
> does not care about getting sk_drops ;)
> 
> https://patchwork.kernel.org/patch/9405677/
> 
> Now sk_drops was moved, the plan is to submit this patch in an official way.

Looking forward to that!

Thank you,

Paolo

^ permalink raw reply

* Re: "af_unix: conditionally use freezable blocking calls in read" is wrong
From: Colin Cross @ 2016-12-06 18:37 UTC (permalink / raw)
  To: Cong Wang; +Cc: Al Viro, David Miller, Linux Kernel Network Developers
In-Reply-To: <CAM_iQpXn7ssNpxviX+sLf0k-88GoWubcBeoMW=JkZTh_q8iKdg@mail.gmail.com>

On Mon, Dec 5, 2016 at 8:24 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Sun, Dec 4, 2016 at 7:52 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>> On Sun, Dec 04, 2016 at 09:42:14PM -0500, David Miller wrote:
>>> >     I've run into that converting AF_UNIX to generic_file_splice_read();
>>> > I can kludge around that ("freezable unless ->msg_iter is ITER_PIPE"), but
>>> > that only delays trouble.
>>> >
>>> >     Note that the only other user of freezable_schedule_timeout() is
>>> > a very different story - it's a kernel thread, which *does* have a guaranteed
>>> > locking environment.  Making such assumptions in unix_stream_recvmsg(),
>>> > OTOH, is insane...
>>>
>>> We have to otherwise Android phones drain their batteries in 10
>>> minutes.
>>>
>>> I'm not going to revert this and be responsible for that.

This is an optimization for going in and out of suspend without
context switching through blocked processes, reverting it will not
cause batteries to drain in 10 minutes.  On my phone, it would cause
~83 context switches on each transition in and out of suspend, which
sometimes happens every 1-5 seconds on noisy networks, but more
normally happens on the order of minutes.

>>>
>>> So you have to find a way to make the freezable calls legitimate.
>>
>> Oh, well...  As I said, I can kludge around that - call from
>> generic_file_splice_read() can be distinguished by looking at the
>> ->msg_iter->type; it still means unpleasantness for kernel_recvmsg()
>> users - in effect, it can only be called with locks held if you know that
>> the socket is not an AF_UNIX one.
>>
>> BTW, how do they deal with plain pipes?
>
> I suppose this question is for Colin. ;)

The original patch set is at https://lkml.org/lkml/2013/4/29/495.  It
was targeted to the sites on which many threads were blocked on an
Android device, pipe_wait didn't show up high on the list (there is
only 1 thread blocked on pipe_wait on my phone right now), so I didn't
look at it.

^ permalink raw reply

* Re: [PATCH v2 net-next 3/4] mlx4: xdp: Reserve headroom for receiving packet when XDP prog is active
From: Martin KaFai Lau @ 2016-12-06 18:27 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Linux Netdev List, Alexei Starovoitov, Brenden Blanco,
	Daniel Borkmann, David Miller, Jesper Dangaard Brouer,
	Saeed Mahameed, Tariq Toukan, Kernel Team
In-Reply-To: <CALzJLG8-dZAvio7035CM4bFtzGanaLNWA=pExZfBJ6n9-K9ntQ@mail.gmail.com>

On Tue, Dec 06, 2016 at 06:50:47PM +0200, Saeed Mahameed wrote:
> On Mon, Dec 5, 2016 at 9:55 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> > On Mon, Dec 05, 2016 at 02:54:06AM +0200, Saeed Mahameed wrote:
> >> On Sun, Dec 4, 2016 at 5:17 AM, Martin KaFai Lau <kafai@fb.com> wrote:
> >> > Reserve XDP_PACKET_HEADROOM and honor bpf_xdp_adjust_head()
> >> > when XDP prog is active.  This patch only affects the code
> >> > path when XDP is active.
> >> >
> >> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> >> > ---
> >>
> >> Hi Martin, Sorry for the late review, i have some comments below
> >>
> >> >  drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++++++--
> >> >  drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 23 +++++++++++++++++------
> >> >  drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  9 +++++----
> >> >  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  3 ++-
> >> >  4 files changed, 39 insertions(+), 13 deletions(-)
> >> >
> >> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> >> > index 311c14153b8b..094a13b52cf6 100644
> >> > --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> >> > +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> >> > @@ -51,7 +51,8 @@
> >> >  #include "mlx4_en.h"
> >> >  #include "en_port.h"
> >> >
> >> > -#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN)))
> >> > +#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
> >> > +                                  XDP_PACKET_HEADROOM))
> >> >
> >> >  int mlx4_en_setup_tc(struct net_device *dev, u8 up)
> >> >  {
> >> > @@ -1551,6 +1552,7 @@ int mlx4_en_start_port(struct net_device *dev)
> >> >         struct mlx4_en_tx_ring *tx_ring;
> >> >         int rx_index = 0;
> >> >         int err = 0;
> >> > +       int mtu;
> >> >         int i, t;
> >> >         int j;
> >> >         u8 mc_list[16] = {0};
> >> > @@ -1684,8 +1686,12 @@ int mlx4_en_start_port(struct net_device *dev)
> >> >         }
> >> >
> >> >         /* Configure port */
> >> > +       mtu = priv->rx_skb_size + ETH_FCS_LEN;
> >> > +       if (priv->tx_ring_num[TX_XDP])
> >> > +               mtu += XDP_PACKET_HEADROOM;
> >> > +
> >>
> >> Why would the physical MTU care for the headroom you preserve for XDP prog?
> >> This is the wire MTU, it shouldn't be changed, please keep it as
> >> before, any preservation you make in packets buffers are needed only
> >> for FWD case or modify case (HW or wire should not care about them).
> >
> > Thanks for your feedback!
>
> Just doing my job :))
>
> >
> > FWD:
> > packet received from a port
> > => process by a XDP prog
> > => XDP_TX out to the same port.
> >
> > For example, if the received packet has 1500 payload and the XDP prog
> > encapsulates it in an IPv6 header (+40 bytes).  After testing, it cannot
> > be sent out due to the HW/wire MTU is 1500.
> >
> > Even the wire MTU info was passed to the XDP prog, there is not much a
> > XDP prog could do here other than dropping it.
> >
> > Hence, this patch gives guarantee to the XDP prog such that
> > it can always send out what it has received + XDP_PACKET_HEADROOM.
> >
>
> Still i am not convinced ! this is against common sense,
> this means that the XDP prog can send packets larger than the  MTU
> seen on netdev!
>
> anyway if a packet with the size (MTU + XDP_PACKET_HEADROOM) was sent
> from XDP ring and HW allowed it to exit somehow (with the code you
> provided :)), most likely it will be dropped
> at the other end.
The MTU of our receiver side is larger than 1500.

If the otherside could not handle >1500, we could lower the box running
XDP prog to 1460.

Just ensure we are on the same page.  The rx MTU stays the same (1500)
because the rx_desc's byte_count is not raised by XDP_PACKET_HEADROOM.

>
> I still think XDP prog should not be allowed to FW packets larger than
> the MTU seen on the netdev and you shouldn't modify the wire MTU just
> for this case.
>
> >>
> >> >         err = mlx4_SET_PORT_general(mdev->dev, priv->port,
> >> > -                                   priv->rx_skb_size + ETH_FCS_LEN,
> >> > +                                   mtu,
> >> >                                     priv->prof->tx_pause,
> >> >                                     priv->prof->tx_ppp,
> >> >                                     priv->prof->rx_pause,
> >> > @@ -2255,6 +2261,13 @@ static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
> >> >  {
> >> >         struct mlx4_en_priv *priv = netdev_priv(dev);
> >> >
> >> > +       if (mtu + XDP_PACKET_HEADROOM > priv->max_mtu) {
> >> > +               en_err(priv,
> >> > +                      "Device max mtu:%d does not allow %d bytes reserved headroom for XDP prog\n",
> >> > +                      priv->max_mtu, XDP_PACKET_HEADROOM);
> >> > +               return false;
> >> > +       }
> >> > +
> >> >         if (mtu > MLX4_EN_MAX_XDP_MTU) {
> >> >                 en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
> >> >                        mtu, MLX4_EN_MAX_XDP_MTU);
> >> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> >> > index 23e9d04d1ef4..324771ac929e 100644
> >> > --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> >> > +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> >> > @@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
> >> >         struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
> >> >         const struct mlx4_en_frag_info *frag_info;
> >> >         struct page *page;
> >> > -       dma_addr_t dma;
> >> >         int i;
> >> >
> >> >         for (i = 0; i < priv->num_frags; i++) {
> >> > @@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
> >> >
> >> >         for (i = 0; i < priv->num_frags; i++) {
> >> >                 frags[i] = ring_alloc[i];
> >> > -               dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
> >> > +               frags[i].page_offset += priv->frag_info[i].rx_headroom;
> >>
> >> I don't see any need for headroom on frag_info other that frag0 (which
> >> where the packet starts).
> >> What is the meaning of a headroom of a frag in a middle of a packet ?
> >>
> >> if you agree with me then, you can use XDP_PACKET_HEADROOM as is where
> >> needed (i.e frag0 page offset) and remove
> >> "priv->frag_info[i].rx_headroom"
> >>
> >> ...
> >>
> >> After going through the code a little bit i see that this code is
> >> shared between XDP and common path, and you didn't want to add boolean
> >> conditions.
> >>
> >> Ok i see what you did here.
> >>
> >> Maybe we can pass headroom as a function parameter and split frag0
> >> handling from the rest ?
> >> If it is too much then i am ok with the code as it is,
> > Right, this patch does the boolean check (XDP active or not) early on
> > in mlx4_en_calc_rx_buf() (i.e. out of the fast path) and store
> > the result in priv->frag_info[0].rx_headroom.
> >
> > Just want to ensure I understand your comment correctly.
> > You prefer not to store the boolean test result in frag_info[0].rx_headroom
> > since it is redundant to !!priv->tx_ring_num[TX_XDP] and rx_headroom is also
> > confusing for frag[1-3].
> >
> > Instead, do the XDP [in]active test before calling mlx4_en_alloc_frags()
> > and then only adjust frags[0].page_offset by +XDP_PACKET_HEADROOM if is needed.
> > It could be done either by passing an extra argument to mlx4_en_alloc_frags()
> > or completely separate mlx4_en_alloc_frags().  I am fine with this also.
> >
>
> Correct, but if this change will add extra checks to the data path
> then I am ok with the current code.
Right, the check has to be done somewhere in the data path.
Lets stay with the current approach then.

>
> >
> >>
> >> > +               rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
> >> > +                                                   frags[i].page_offset);
> >> >                 ring_alloc[i] = page_alloc[i];
> >> > -               rx_desc->data[i].addr = cpu_to_be64(dma);
> >> >         }
> >> >
> >> >         return 0;
> >> > @@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
> >> >
> >> >         if (ring->page_cache.index > 0) {
> >> >                 frags[0] = ring->page_cache.buf[--ring->page_cache.index];
> >> > -               rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
> >> > +               rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
> >> > +                                                   frags[0].page_offset);
> >> >                 return 0;
> >> >         }
> >> >
> >> > @@ -889,6 +890,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> >> >                 if (xdp_prog) {
> >> >                         struct xdp_buff xdp;
> >> >                         dma_addr_t dma;
> >> > +                       void *pg_addr, *orig_data;
> >> >                         u32 act;
> >> >
> >> >                         dma = be64_to_cpu(rx_desc->data[0].addr);
> >> > @@ -896,11 +898,18 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> >> >                                                 priv->frag_info[0].frag_size,
> >> >                                                 DMA_FROM_DEVICE);
> >> >
> >> > -                       xdp.data = page_address(frags[0].page) +
> >> > -                                                       frags[0].page_offset;
> >> > +                       pg_addr = page_address(frags[0].page);
> >> > +                       orig_data = pg_addr + frags[0].page_offset;
> >> > +                       xdp.data = orig_data;
> >> >                         xdp.data_end = xdp.data + length;
> >> >
> >> >                         act = bpf_prog_run_xdp(xdp_prog, &xdp);
> >> > +
> >> > +                       if (xdp.data != orig_data) {
> >> > +                               length = xdp.data_end - xdp.data;
> >> > +                               frags[0].page_offset = xdp.data - pg_addr;
> >> > +                       }
> >> > +
> >> >
> >>
> >> is this needed only for XDP FWD case ?
> > No. It is also for PASS.
> >
>
> I see.
>
> >> is this the only way to detect that the user modified the packet
> >> headers (comparing pointers, before and after) ?
> > Yes
> >
> >>
> >> if the answer is yes, it should be faster to unconditionally reset
> >> packet offset and lenght on XDP_FWD :
> >> case XDP_FWD:
> >>    length = xdp.data_end - xdp.data;
> >>    frags[0].page_offset = xdp.data - pg_addr;
> >>
> >>
> >> >                         switch (act) {
> >> >                         case XDP_PASS:
> >> >                                 break;
> >> > @@ -1180,6 +1189,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
> >> >                  */
> >> >                 priv->frag_info[0].frag_stride = PAGE_SIZE;
> >> >                 priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
> >> > +               priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
> >> >                 i = 1;
> >> >         } else {
> >> >                 int buf_size = 0;
> >> > @@ -1194,6 +1204,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
> >> >                                 ALIGN(priv->frag_info[i].frag_size,
> >> >                                       SMP_CACHE_BYTES);
> >> >                         priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
> >> > +                       priv->frag_info[i].rx_headroom = 0;
> >>
> >> IMHO, redundant. as you see here frag0 and other frags handling are
> >> separated, maybe we can do the same in mlx4_en_alloc_frags.
> >>
> >> >                         buf_size += priv->frag_info[i].frag_size;
> >> >                         i++;
> >> >                 }
> >> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> >> > index 4b597dca5c52..9e5f38cefe5f 100644
> >> > --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> >> > +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> >> > @@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
> >> >         struct mlx4_en_rx_alloc frame = {
> >> >                 .page = tx_info->page,
> >> >                 .dma = tx_info->map0_dma,
> >> > -               .page_offset = 0,
> >> > +               .page_offset = XDP_PACKET_HEADROOM,
> >> >                 .page_size = PAGE_SIZE,
> >> >         };
> >> >
> >> > @@ -1132,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
> >> >         tx_info->page = frame->page;
> >> >         frame->page = NULL;
> >> >         tx_info->map0_dma = dma;
> >> > -       tx_info->map0_byte_count = length;
> >> > +       tx_info->map0_byte_count = length + frame->page_offset;
> >>
> >> Didn't you already take care of lenght by the following code:
> >>                        if (xdp.data != orig_data) {
> >>                                length = xdp.data_end - xdp.data;
> >>                                frags[0].page_offset = xdp.data - pg_addr;
> >>                         }
> >>
> > Before this patch, length always assumes the data starts at the beginning
> > of the page and dma is the start of the page.  Hence, adding
> > framg->page_offset back to the length here.
> >
> > However, if I read the codes correctly, I think the map0_byte_count (before or
> > after this patch) does not matter since it is only used in dma_unmap_page() and
> > PAGE_SIZE is always used in dma_unmap_page() for this code patch.  Hence, I think
> > we can just set map0_byte_count to PAGE_SIZE here.
> >
>
> Right, in mlx4_alloc_pages we always map with PAGE_SIZE <<  order
>  dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
>   frag_info->dma_dir);
> for XDP order is always 0, so you can safely set it to PAGE_SIZE.
>
> >> and here  frame->page_offset is not really page offset, it can only be
> >> XDP_PACKET_HEADROOM.
> > Note that the XDP prog can call bpf_xdp_adjust_head() to add a header.
> > The XDP prog can extend up to XDP_PACKET_HEADROOM (256) bytes but it
> > can also (and usually) only add 40 bytes IPv6 header and then XDP_TX it out.
> >
>
> I see.
>
> >>
> >> >         tx_info->nr_txbb = nr_txbb;
> >> >         tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
> >> >         tx_info->data_offset = (void *)data - (void *)tx_desc;
> >> > @@ -1141,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
> >> >         tx_info->linear = 1;
> >> >         tx_info->inl = 0;
> >> >
> >> > -       dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
> >> > +       dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
> >> > +                                        length, PCI_DMA_TODEVICE);
> >> >
> >> > -       data->addr = cpu_to_be64(dma);
> >> > +       data->addr = cpu_to_be64(dma + frame->page_offset);
> >> >         data->lkey = ring->mr_key;
> >> >         dma_wmb();
> >> >         data->byte_count = cpu_to_be32(length);
> >> > diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> >> > index 20a936428f4a..ba1c6cd0cc79 100644
> >> > --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> >> > +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> >> > @@ -475,7 +475,8 @@ struct mlx4_en_frag_info {
> >> >         u16 frag_prefix_size;
> >> >         u32 frag_stride;
> >> >         enum dma_data_direction dma_dir;
> >> > -       int order;
> >> > +       u16 order;
> >> > +       u16 rx_headroom;
> >> >  };
> >> >
> >> >  #ifdef CONFIG_MLX4_EN_DCB
> >> > --
> >> > 2.5.1
> >> >

^ permalink raw reply

* Re: [PATCH] drivers: net: cpsw-phy-sel: Clear RGMII_IDMODE on "rgmii" links
From: Alex @ 2016-12-06 18:56 UTC (permalink / raw)
  To: David Miller
  Cc: mugunthanvnm, grygorii.strashko, linux-omap, netdev, linux-kernel,
	gokhan
In-Reply-To: <20161206.113630.1454582039498444818.davem@davemloft.net>



On 12/06/2016 08:36 AM, David Miller wrote:
> From: Alexandru Gagniuc <alex.g@adaptrum.com>
> Date: Mon,  5 Dec 2016 17:33:53 -0800
>
>> Support for setting the RGMII_IDMODE bit was added in commit:
>> "drivers: net: cpsw-phy-sel: add support to configure rgmii internal delay"
>> However, that commit did not add the symmetrical clearing of the bit
>> by way of setting it in "mask". Add it here.
>>
>> Note that the documentation marks clearing this bit as "reserved",
>> however, according to TI, support for delaying the clock does exist in
>> the MAC, although it is not officially supported.
>> We tested this on a board with an RGMII to RGMII link that will not
>> work unless this bit is cleared.
>>
>> Signed-off-by: Alexandru Gagniuc <alex.g@adaptrum.com>
>
> Commits must be referenced by both short-form SHA1-ID as well as
> the commit header text.
>
> And since this change is fixing that commit, you should also provide
> a proper "Fixes: " tag on the line right before your signoff.

Thank you very much for the feedback. I will update accordingly.

Alex

> Thanks.
>

^ permalink raw reply

* [PATCH v2] drivers: net: cpsw-phy-sel: Clear RGMII_IDMODE on "rgmii" links
From: Alexandru Gagniuc @ 2016-12-06 18:56 UTC (permalink / raw)
  To: mugunthanvnm
  Cc: davem, grygorii.strashko, linux-omap, netdev, linux-kernel,
	gokhan, Alexandru Gagniuc
In-Reply-To: <20161206.113630.1454582039498444818.davem@davemloft.net>

Support for setting the RGMII_IDMODE bit was added in the commit
referenced below. However, that commit did not add the symmetrical
clearing of the bit by way of setting it in "mask". Add it here.

Note that the documentation marks clearing this bit as "reserved",
however, according to TI, support for delaying the clock does exist in
the MAC, although it is not officially supported.
We tested this on a board with an RGMII to RGMII link that will not
work unless this bit is cleared.

Fixes: 0fb26c3063ea ("drivers: net: cpsw-phy-sel: add support to configure rgmii internal delay")
Signed-off-by: Alexandru Gagniuc <alex.g@adaptrum.com>
---
 drivers/net/ethernet/ti/cpsw-phy-sel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index ba1e45f..1801364 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -81,6 +81,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv,
 	};
 
 	mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6);
+	mask |= BIT(slave + 4);
 	mode <<= slave * 2;
 
 	if (priv->rmii_clock_external) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Eric Dumazet @ 2016-12-06 18:58 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481049061.7129.18.camel@redhat.com>

On Tue, 2016-12-06 at 19:31 +0100, Paolo Abeni wrote:

> cacheline 2 boundary (128 bytes) is 8 bytes before sk_lock: cacheline 2
> includes also skc_refcnt and skc_rxhash from __sk_common (I use 'pahole
> -E ...' to get the full blown output). skc_rxhash is read for each
> packet in inet_recvmsg()/sock_rps_record_flow() if CONFIG_RPS is set. I
> get a cache miss per packet there and inet_recvmsg() in my test takes
> about 8% of the whole u/s processing time.



Wait a minute, this sk->sk_rxhash should only be read on connected
socket. Relying on it being 0 was okay only if we did not care
of false sharing. And UDP sockets used to grab socket refcount, so we
had false sharing a _lot_ in the past.

We must fix this if not already done properly.

Can you take care of this problem ?

Thanks !

^ permalink raw reply

* Re: [PATCH v2 net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
From: Martin KaFai Lau @ 2016-12-06 18:52 UTC (permalink / raw)
  To: John Fastabend
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Jesper Dangaard Brouer, Saeed Mahameed,
	Tariq Toukan, Kernel Team
In-Reply-To: <5846F6E3.2040808@gmail.com>

On Tue, Dec 06, 2016 at 09:35:31AM -0800, John Fastabend wrote:
> On 16-12-03 07:17 PM, Martin KaFai Lau wrote:
> > This patch allows XDP prog to extend/remove the packet
> > data at the head (like adding or removing header).  It is
> > done by adding a new XDP helper bpf_xdp_adjust_head().
> >
> > It also renames bpf_helper_changes_skb_data() to
> > bpf_helper_changes_pkt_data() to better reflect
> > that XDP prog does not work on skb.
> >
> > Acked-by: Alexei Starovoitov <ast@kernel.org>
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
>
>
> [...]
>
> >
> > -bool bpf_helper_changes_skb_data(void *func)
> > +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
> > +{
> > +	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
> > +	 * XDP prog is set.
> > +	 * If the above is not true for the other drivers to support
> > +	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
> > +	 */
> > +	unsigned long addr = (unsigned long)xdp->data & PAGE_MASK;
> > +	void *data_hard_start = (void *)addr;
> > +	void *data = xdp->data + offset;
> > +
> > +	if (unlikely(data < data_hard_start || data > xdp->data_end - ETH_HLEN))
> > +		return -EINVAL;
> > +
> > +	xdp->data = data;
> > +
> > +	return 0;
> > +}
> > +
>
> Sorry for the delay but I don't like the assumptions being made here
> with regards to page alignment and free space.
>
> Instead of taking the offset from PAGE_MASK how about adding a pointer
> to xdp_buff so that we get,
>
>  struct xdp_buff {
>  	void *data;
> 	void *data_end;
> 	void *data_start;
>  };
>
> This gives the headroom explicitly in the data structure. This way we
> can handle non-paged aligned usages and also some of the drivers are
> putting metadata (descriptors) at the front of the page. With this
> patch we could stomp on that metadata with the above we avoid that
> problem altogether.
Extending the xdp_buff like this was my first local attempt.  And
then I realized the assumption that mlx4/5 is making.  Since
there is no immediate need for this patch series and the xdp_buff
can always be extended later if needed without breaking
the xdp prog, I dropped the xdp_buff addition for now in this patch.

Sure, it will be done at the next spin.

^ permalink raw reply

* Re: Avoid deadlock situation due to use of xmit_lock
From: Lino Sanfilippo @ 2016-12-06 19:10 UTC (permalink / raw)
  To: David Miller
  Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
	linux-kernel, netdev
In-Reply-To: <20161206.100607.919832811727709338.davem@davemloft.net>

Hi,

On 06.12.2016 16:06, David Miller wrote:
> From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
> Date: Sat,  3 Dec 2016 00:06:04 +0100
> 
>> after stumbling over a potential deadlock situation in the altera driver 
>> (see http://marc.info/?l=linux-netdev&m=148054615230447&w=2), I checked
>> all other ethernet drivers for the same issue and actually found it in 2
>> more, namely stmmac, and sxgbe. Please see the commit messages for a 
>> description of the problem.
>> These 2 patches fix the concerning drivers.
> 
> First of all, I don't want to apply these patches without proper testing
> and ACKs from the individual driver maintainers.
> 
> For both of these drivers, this situation only exists because the TX
> path uses the unnecessary ->tx_lock.  This private lock should be
> removed completely and the driver should use the lock the mid-layer
> already holds in the transmit path and take it in the TX reclaim path
> instead of the private ->tx_lock.
> 

Ok, I will prepare a new set of patches to remove those private locks entirely.

Regards,
Lino

^ permalink raw reply

* Re: Gigabit ethernet driver for Alacritechs SLIC devices (v4)
From: Lino Sanfilippo @ 2016-12-06 19:11 UTC (permalink / raw)
  To: David Miller
  Cc: charrer, liodot, gregkh, andrew, roszenrami, markus.boehme,
	f.fainelli, devel, linux-kernel, netdev
In-Reply-To: <20161206.113004.1297109434722530511.davem@davemloft.net>

On 06.12.2016 17:30, David Miller wrote:
> From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
> Date: Mon,  5 Dec 2016 23:07:15 +0100
> 
>> this is the forth version of the slicoss gigabit ethernet driver (which is a
>> rework of the driver from Alacritech which can currently be found under
>> drivers/staging/slicoss). The driver is supposed to support Mojave, Oasis and
>> Kalahari cards, for both copper and fiber.
>> 
>> If this code is accepted the staging version can be removed.
>> 
>> The driver has been tested on a SEN2104ET adapter (4 Port PCIe copper).
> 
> I've applied this series, nice work.
> 

Great, thanks!

Regards,
Lino

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Paolo Abeni @ 2016-12-06 19:16 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481050715.18162.604.camel@edumazet-glaptop3.roam.corp.google.com>

On Tue, 2016-12-06 at 10:58 -0800, Eric Dumazet wrote:
> On Tue, 2016-12-06 at 19:31 +0100, Paolo Abeni wrote:
> 
> > cacheline 2 boundary (128 bytes) is 8 bytes before sk_lock: cacheline 2
> > includes also skc_refcnt and skc_rxhash from __sk_common (I use 'pahole
> > -E ...' to get the full blown output). skc_rxhash is read for each
> > packet in inet_recvmsg()/sock_rps_record_flow() if CONFIG_RPS is set. I
> > get a cache miss per packet there and inet_recvmsg() in my test takes
> > about 8% of the whole u/s processing time.
> 
> Wait a minute, this sk->sk_rxhash should only be read on connected
> socket. Relying on it being 0 was okay only if we did not care
> of false sharing. And UDP sockets used to grab socket refcount, so we
> had false sharing a _lot_ in the past.

Thank you for the pointer.

> We must fix this if not already done properly.
> 
> Can you take care of this problem ?

I'll try, but it can be very soon: I'll have limited time and bad
internet connection up to next week.

Cheers,

Paolo

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Eric Dumazet @ 2016-12-06 19:35 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481051786.7129.22.camel@redhat.com>

On Tue, 2016-12-06 at 20:16 +0100, Paolo Abeni wrote:
> On Tue, 2016-12-06 at 10:58 -0800, Eric Dumazet wrote:
> > On Tue, 2016-12-06 at 19:31 +0100, Paolo Abeni wrote:
> > 
> > > cacheline 2 boundary (128 bytes) is 8 bytes before sk_lock: cacheline 2
> > > includes also skc_refcnt and skc_rxhash from __sk_common (I use 'pahole
> > > -E ...' to get the full blown output). skc_rxhash is read for each
> > > packet in inet_recvmsg()/sock_rps_record_flow() if CONFIG_RPS is set. I
> > > get a cache miss per packet there and inet_recvmsg() in my test takes
> > > about 8% of the whole u/s processing time.
> > 
> > Wait a minute, this sk->sk_rxhash should only be read on connected
> > socket. Relying on it being 0 was okay only if we did not care
> > of false sharing. And UDP sockets used to grab socket refcount, so we
> > had false sharing a _lot_ in the past.
> 
> Thank you for the pointer.
> 
> > We must fix this if not already done properly.
> > 
> > Can you take care of this problem ?
> 
> I'll try, but it can be very soon: I'll have limited time and bad
> internet connection up to next week.

Do not worry, I had a better idea anyway. I am testing it ;)

^ permalink raw reply

* Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection
From: Grygorii Strashko @ 2016-12-06 19:39 UTC (permalink / raw)
  To: Richard Cochran, Murali Karicheri
  Cc: David S. Miller, netdev, Mugunthan V N, Sekhar Nori, linux-kernel,
	linux-omap, Rob Herring, devicetree, Wingman Kwok, linux-clk
In-Reply-To: <bf207ac4-0f9f-d4a7-14ac-704d93fb7764@ti.com>



On 11/30/2016 11:35 AM, Grygorii Strashko wrote:
>
>
> On 11/30/2016 03:56 AM, Richard Cochran wrote:
>> On Mon, Nov 28, 2016 at 05:04:24PM -0600, Grygorii Strashko wrote:
>>> Some CPTS instances, which can be found on KeyStone 2 1/10G Ethernet
>>> Switch Subsystems, can control an external multiplexer that selects
>>> one of up to 32 clocks for time sync reference (RFTCLK). This feature
>>> can be configured through CPTS_RFTCLK_SEL register (offset: x08).
>>>
>>> Hence, introduce optional DT cpts_rftclk_sel poperty wich, if present,
>>> will specify CPTS reference clock. The cpts_rftclk_sel should be
>>> omitted in DT if HW doesn't support this feature. The external fixed
>>> rate clocks can be defined in board files as "fixed-clock".
>>
>> Can't you implement this using the clock tree, rather than an ad-hoc
>> DT property?
>>
>
> I've thought about this, but decided to move forward with this impl
>  which is pretty simple. I will try.
>
>

I come with below RFC patch. if no objection I'll move forward with it.

According to Keystone 2 66AK2e DM there are 7 possible ref clocks:
0000 = SYSCLK2
0001 = SYSCLK3
0010 = TIMI0
0011 = TIMI1
0100 = TSIPCLKA
1000 = TSREFCLK
1100 = TSIPCLKB
Others = Reserved

2 from above clocks are internal SYSCLK2 and SYSCLK3 - other external
(board specific). So default definition of cpts_mux will have only two parents.
If ext clock is going to be use as cpts rftclk then it should be
defined in board file and  cpts_refclk_mux definition updated to support 
this ext clock:

timi1clk: timi1clk {
	#clock-cells = <0>;
	compatible = "fixed-clock";
	clock-frequency = <xxxxxxxxxx>;
	clock-output-names = "timi1";
};

&cpts_mux {
	clocks = <&chipclk12>, <&chipclk13>, <timi1clk>;
	cpts-mux-tbl = <0>, <1>, <3>;
	assigned-clocks = <&cpts_mux>;
	assigned-clock-parents = <&timi1clk>;
};



>From ec5c7bed0e021c2ca7e9392173bf67bb9a45d0f4 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Mon, 5 Dec 2016 12:34:45 -0600
Subject: [PATCH] cpts refclk sel

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 arch/arm/boot/dts/keystone-k2e-netcp.dtsi | 10 +++++-
 drivers/net/ethernet/ti/cpts.c            | 52 ++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
index 919e655..b27aa22 100644
--- a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
@@ -138,7 +138,7 @@ netcp: netcp@24000000 {
 	/* NetCP address range */
 	ranges = <0 0x24000000 0x1000000>;

-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
+	clocks = <&clkpa>, <&clkcpgmac>, <&cpts_mux>;
 	clock-names = "pa_clk", "ethss_clk", "cpts";
 	dma-coherent;

@@ -162,6 +162,14 @@ netcp: netcp@24000000 {
 			cpts-ext-ts-inputs = <6>;
 			cpts-ts-comp-length;

+			cpts_mux: cpts_refclk_mux {
+				#clock-cells = <0>;
+				clocks = <&chipclk12>, <&chipclk13>;
+				cpts-mux-tbl = <0>, <1>;
+				assigned-clocks = <&cpts_mux>;
+				assigned-clock-parents = <&chipclk12>;
+			};
+
 			interfaces {
 				gbe0: interface-0 {
 					slave-port = <0>;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 938de22..ef94316 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -17,6 +17,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
  */
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/if.h>
 #include <linux/hrtimer.h>
@@ -672,6 +673,7 @@ int cpts_register(struct cpts *cpts)
 	cpts->phc_index = ptp_clock_index(cpts->clock);

 	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
+
 	return 0;

 err_ptp:
@@ -741,6 +743,54 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
 		 freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
 }

+static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
+{
+	unsigned int num_parents;
+	const char **parent_names;
+	struct device_node *refclk_np;
+	void __iomem *reg;
+	struct clk *clk;
+	u32 *mux_table;
+	int ret;
+
+	refclk_np = of_get_child_by_name(node, "cpts_refclk_mux");
+	if (!refclk_np)
+		return -EINVAL;
+
+	num_parents = of_clk_get_parent_count(refclk_np);
+	if (num_parents < 1) {
+		dev_err(cpts->dev, "mux-clock %s must have parents\n",
+			refclk_np->name);
+		return -EINVAL;
+	}
+	parent_names = devm_kzalloc(cpts->dev, (sizeof(char *) * num_parents),
+				    GFP_KERNEL);
+	if (!parent_names)
+		return -ENOMEM;
+
+	of_clk_parent_fill(refclk_np, parent_names, num_parents);
+
+	mux_table = devm_kzalloc(cpts->dev, sizeof(*mux_table) * (32 + 1),
+				 GFP_KERNEL);
+	if (!mux_table)
+		return -ENOMEM;
+
+	ret = of_property_read_variable_u32_array(refclk_np, "cpts-mux-tbl",
+						  mux_table, 1, 32);
+	if (ret < 0)
+		return ret;
+
+	reg = &cpts->reg->rftclk_sel;
+
+	clk = clk_register_mux_table(cpts->dev, refclk_np->name,
+				     parent_names, num_parents,
+				     0, reg, 0, 0x1F, 0, mux_table, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	return of_clk_add_provider(refclk_np, of_clk_src_simple_get, clk);
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
 	int ret = -EINVAL;
@@ -787,7 +837,7 @@ static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 	if (!of_property_read_u32(node, "cpts-ext-ts-inputs", &prop))
 		cpts->ext_ts_inputs = prop;

-	return 0;
+	return cpts_of_mux_clk_setup(cpts, node);

 of_error:
 	dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
-- 
2.10.1



-- 
regards,
-grygorii

^ permalink raw reply related

* Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection
From: Richard Cochran @ 2016-12-06 20:25 UTC (permalink / raw)
  To: Grygorii Strashko
  Cc: Murali Karicheri, David S. Miller, netdev, Mugunthan V N,
	Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Wingman Kwok, linux-clk
In-Reply-To: <11994fbc-3713-6ef7-8a44-8a2442106dfc@ti.com>

On Tue, Dec 06, 2016 at 01:39:40PM -0600, Grygorii Strashko wrote:
> I come with below RFC patch. if no objection I'll move forward with it.

Thanks for following through with this!

The am335x will also need the MUX in its clock tree, won't it?


Thanks,
Richard

^ permalink raw reply

* Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection
From: Grygorii Strashko @ 2016-12-06 20:40 UTC (permalink / raw)
  To: Richard Cochran
  Cc: Murali Karicheri, David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA,
	Mugunthan V N, Sekhar Nori, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-omap-u79uwXL29TY76Z2rM5mHXA, Rob Herring,
	devicetree-u79uwXL29TY76Z2rM5mHXA, Wingman Kwok,
	linux-clk-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161206202558.GB23605-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>



On 12/06/2016 02:25 PM, Richard Cochran wrote:
> On Tue, Dec 06, 2016 at 01:39:40PM -0600, Grygorii Strashko wrote:
>> I come with below RFC patch. if no objection I'll move forward with it.
> 
> Thanks for following through with this!
> 
> The am335x will also need the MUX in its clock tree, won't it?
> 

Not exactly. I do not see CPTS_RFTCLK_SEL register in trm, but looks like
it's already implemented in am335 clock tree:
	cpsw_cpts_rft_clk: cpsw_cpts_rft_clk@520 {
		#clock-cells = <0>;
		compatible = "ti,mux-clock";
		clocks = <&dpll_core_m5_ck>, <&dpll_core_m4_ck>;
		reg = <0x0520>;
	};

and ssigned-clock-xx can be used to change parent in board file:

&cpsw_cpts_rft_clk {
assigned-clocks = <&cpsw_cpts_rft_clk>;
assigned-clock-parents = <&dpll_core_m4_ck>;
};


-- 
regards,
-grygorii
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 4/6] net: ethernet: ti: cpts: add ptp pps support
From: Grygorii Strashko @ 2016-12-06 20:43 UTC (permalink / raw)
  To: Richard Cochran
  Cc: David S. Miller, netdev, Mugunthan V N, Sekhar Nori, linux-kernel,
	linux-omap, Rob Herring, devicetree, Murali Karicheri,
	Wingman Kwok
In-Reply-To: <20161206180857.GA20680@localhost.localdomain>



On 12/06/2016 12:08 PM, Richard Cochran wrote:
> On Wed, Nov 30, 2016 at 11:05:19AM +0100, Richard Cochran wrote:
>> Can you adjust the frequency of the keystone devices in hardware?  If
>> so, then please implement it, and just disable PPS for the CPSW.
>>
>> The only reason I used the timecounter for frequency adjustment was
>> because the am335x HW is broken.  But this shouldn't hold back other
>> newer HW without the same silicon flaws.
>
> I am talking here about the ADPLLLJ units.  Are they usable on the
> keystone?
>
> If so, please implement the frequency adjustment with them.
>

No, I think it's not impossible (at least as I know now).
i'll drop this patch for now.

By the way, I've tested am335 (BBB)+ HW_TS_PUSH + PWM.
Seems works.

-- 
regards,
-grygorii

^ permalink raw reply

* Re: [PATCH net] net: ep93xx_eth: Do not crash unloading module
From: Florian Fainelli @ 2016-12-06 20:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, hsweeten
In-Reply-To: <20161205.153602.1770726597007697695.davem@davemloft.net>

On 12/05/2016 12:36 PM, David Miller wrote:
> From: Florian Fainelli <f.fainelli@gmail.com>
> Date: Sun,  4 Dec 2016 19:22:05 -0800
> 
>> When we unload the ep93xx_eth, whether we have opened the network
>> interface or not, we will either hit a kernel paging request error, or a
>> simple NULL pointer de-reference because:
>>
>> - if ep93xx_open has been called, we have created a valid DMA mapping
>>   for ep->descs, when we call ep93xx_stop, we also call
>>   ep93xx_free_buffers, ep->descs now has a stale value
>>
>> - if ep93xx_open has not been called, we have a NULL pointer for
>>   ep->descs, so performing any operation against that address just won't
>>   work
>>
>> Fix this by adding a NULL pointer check for ep->descs which means that
>> ep93xx_free_buffers() was able to successfully tear down the descriptors
>> and free the DMA cookie as well.
>>
>> Fixes: 1d22e05df818 ("[PATCH] Cirrus Logic ep93xx ethernet driver")
>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> 
> Applied, thanks Florian.

Thanks! Can you also queue this one for -stable? The original commit
dates back from 2006, but it should apply with minor hunks to all
-stable kernels.

Cheers
-- 
Florian

^ permalink raw reply

* Re: [PATCH net-next 1/3] net/flow_dissector: Enable matching on flags for TC filter consumers
From: Or Gerlitz @ 2016-12-06 20:51 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Or Gerlitz, David S. Miller, Linux Netdev List, Jiri Pirko,
	Roi Dayan, Hadar Har-Zion, Tom Herbert
In-Reply-To: <20161206124358.GM1984@nanopsycho>

On Tue, Dec 6, 2016 at 2:43 PM, Jiri Pirko <jiri@resnulli.us> wrote:
> Tue, Dec 06, 2016 at 01:32:57PM CET, ogerlitz@mellanox.com wrote:

>> struct flow_keys {
>>+#define FLOW_KEYS_HASH_START_FIELD control
>>       struct flow_dissector_key_control control;
>>-#define FLOW_KEYS_HASH_START_FIELD basic

> How is this hashing related to cls_flower dissections? What am I
> missing?

Oops, this is my bad. I convinced myself that flower dissections will
not work for any field which is not accounted for hash start field, I
was wrong, dropping this patch.

^ permalink raw reply

* [PATCH net-next V2 7/7] liquidio VF rx data and ctl path
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481058367-3937-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for VF receive data control path.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 412 ++++++++++++++++++++-
 .../net/ethernet/cavium/liquidio/octeon_device.c   |   2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.c |  10 +
 .../ethernet/cavium/liquidio/response_manager.c    |   3 +-
 4 files changed, 423 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index ce5cdcd..e3724c6 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -38,6 +38,8 @@
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+/* Bit mask values for lio->ifstate */
+#define   LIO_IFSTATE_DROQ_OPS             0x01
 #define   LIO_IFSTATE_REGISTERED           0x02
 #define   LIO_IFSTATE_RUNNING              0x04
 
@@ -55,6 +57,14 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+struct liquidio_rx_ctl_context {
+	int octeon_id;
+
+	wait_queue_head_t wc;
+
+	int cond;
+};
+
 union tx_info {
 	u64 u64;
 	struct {
@@ -177,6 +187,16 @@ static int wait_for_pending_requests(struct octeon_device *oct)
 };
 
 /**
+ * \brief check interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to check
+ */
+static inline int ifstate_check(struct lio *lio, int state_flag)
+{
+	return atomic_read(&lio->ifstate) & state_flag;
+}
+
+/**
  * \brief set interface state
  * @param lio per-network private data
  * @param state_flag flag state to set
@@ -510,6 +530,31 @@ static inline void update_link_status(struct net_device *netdev,
 	}
 }
 
+static void update_txq_status(struct octeon_device *oct, int iq_num)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+	struct net_device *netdev;
+	struct lio *lio;
+
+	netdev = oct->props[iq->ifidx].netdev;
+	lio = GET_LIO(netdev);
+	if (netif_is_multiqueue(netdev)) {
+		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+		    lio->linfo.link.s.link_up &&
+		    (!octnet_iq_is_full(oct, iq_num))) {
+			netif_wake_subqueue(netdev, iq->q_index);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+						  tx_restart, 1);
+		} else {
+			if (!octnet_iq_is_full(oct, lio->txq)) {
+				INCR_INSTRQUEUE_PKT_COUNT(
+				    lio->oct_dev, lio->txq, tx_restart, 1);
+				wake_q(netdev, lio->txq);
+			}
+		}
+	}
+}
+
 static
 int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
 {
@@ -818,6 +863,91 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 }
 
 /**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+			    u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_rx_ctl_context *ctx;
+
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (status)
+		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+			CVM_CAST64(status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Send Rx control command
+ * @param lio per-network private data
+ * @param start_stop whether to start or stop
+ */
+static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+{
+	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+	struct liquidio_rx_ctl_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int retval;
+
+	if (oct->props[lio->ifidx].rx_on == start_stop)
+		return;
+
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+					  16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+	ncmd->s.param1 = start_stop;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = rx_ctl_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 5000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+	} else {
+		/* Sleep on a wait queue till the cond flag indicates that the
+		 * response arrived or timed-out.
+		 */
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+			return;
+		oct->props[lio->ifidx].rx_on = start_stop;
+	}
+
+	octeon_free_soft_command(oct, sc);
+}
+
+/**
  * \brief Destroy NIC device interface
  * @param oct octeon device
  * @param ifidx which interface to destroy
@@ -828,6 +958,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
 	struct net_device *netdev = oct->props[ifidx].netdev;
+	struct napi_struct *napi, *n;
 	struct lio *lio;
 
 	if (!netdev) {
@@ -843,6 +974,15 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
 		liquidio_stop(netdev);
 
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		oct->droq[0]->ops.poll_mode = 0;
+	}
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -863,7 +1003,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
  */
 static int liquidio_stop_nic_module(struct octeon_device *oct)
 {
-	int i;
+	struct lio *lio;
+	int i, j;
 
 	dev_dbg(&oct->pci_dev->dev, "Stopping network interfaces\n");
 	if (!oct->ifcount) {
@@ -871,6 +1012,17 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 		return 1;
 	}
 
+	spin_lock_bh(&oct->cmd_resp_wqlock);
+	oct->cmd_resp_state = OCT_DRV_OFFLINE;
+	spin_unlock_bh(&oct->cmd_resp_wqlock);
+
+	for (i = 0; i < oct->ifcount; i++) {
+		lio = GET_LIO(oct->props[i].netdev);
+		for (j = 0; j < lio->linfo.num_rxpciq; j++)
+			octeon_unregister_droq_ops(oct,
+						   lio->linfo.rxpciq[j].s.q_no);
+	}
+
 	for (i = 0; i < oct->ifcount; i++)
 		liquidio_destroy_nic_device(oct, i);
 
@@ -1091,6 +1243,41 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
+ * \brief Setup output queue
+ * @param oct octeon device
+ * @param q_no which queue
+ * @param num_descs how many descriptors
+ * @param desc_size size of each descriptor
+ * @param app_ctx application context
+ */
+static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
+			     int desc_size, void *app_ctx)
+{
+	int ret_val;
+
+	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
+	/* droq creation and local register settings. */
+	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (ret_val == 1) {
+		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
+		return 0;
+	}
+
+	/* Enable the droq queues */
+	octeon_set_droq_pkt_op(oct, q_no, 1);
+
+	/* Send Credit for Octeon Output queues. Credits are always
+	 * sent after the output queue is enabled.
+	 */
+	writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
+
+	return ret_val;
+}
+
+/**
  * \brief Callback for getting interface configuration
  * @param status status of request
  * @param buf pointer to resp structure
@@ -1142,6 +1329,155 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
 	return (u16)(qindex % (lio->linfo.num_txpciq));
 }
 
+/** Routine to push packets arriving on Octeon interface upto network layer.
+ * @param oct_id   - octeon device id.
+ * @param skbuff   - skbuff struct to be passed to network layer.
+ * @param len      - size of total data received.
+ * @param rh       - Control header associated with the packet
+ * @param param    - additional control data with the packet
+ * @param arg      - farg registered in droq_ops
+ */
+static void
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+		     void *skbuff,
+		     u32 len,
+		     union octeon_rh *rh,
+		     void *param,
+		     void *arg)
+{
+	struct napi_struct *napi = param;
+	struct octeon_droq *droq =
+		container_of(param, struct octeon_droq, napi);
+	struct net_device *netdev = (struct net_device *)arg;
+	struct sk_buff *skb = (struct sk_buff *)skbuff;
+
+	if (netdev) {
+		struct lio *lio = GET_LIO(netdev);
+		int packet_was_received;
+
+		/* Do not proceed if the interface is not in RUNNING state. */
+		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+			recv_buffer_free(skb);
+			droq->stats.rx_dropped++;
+			return;
+		}
+
+		skb->dev = netdev;
+
+		skb_record_rx_queue(skb, droq->q_no);
+		if (likely(len > MIN_SKB_SIZE)) {
+			struct octeon_skb_page_info *pg_info;
+			unsigned char *va;
+
+			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+			if (pg_info->page) {
+				/* For Paged allocation use the frags */
+				va = page_address(pg_info->page) +
+					pg_info->page_offset;
+				memcpy(skb->data, va, MIN_SKB_SIZE);
+				skb_put(skb, MIN_SKB_SIZE);
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						pg_info->page,
+						pg_info->page_offset +
+						MIN_SKB_SIZE,
+						len - MIN_SKB_SIZE,
+						LIO_RXBUFFER_SZ);
+			}
+		} else {
+			struct octeon_skb_page_info *pg_info =
+				((struct octeon_skb_page_info *)(skb->cb));
+			skb_copy_to_linear_data(skb,
+						page_address(pg_info->page) +
+						pg_info->page_offset, len);
+			skb_put(skb, len);
+			put_page(pg_info->page);
+		}
+
+		skb_pull(skb, rh->r_dh.len * 8);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if ((netdev->features & NETIF_F_RXCSUM) &&
+		    (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))
+			/* checksum has already been verified */
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+
+		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
+
+		if (packet_was_received) {
+			droq->stats.rx_bytes_received += len;
+			droq->stats.rx_pkts_received++;
+			netdev->last_rx = jiffies;
+		} else {
+			droq->stats.rx_dropped++;
+			netif_info(lio, rx_err, lio->netdev,
+				   "droq:%d  error rx_dropped:%llu\n",
+				   droq->q_no, droq->stats.rx_dropped);
+		}
+
+	} else {
+		recv_buffer_free(skb);
+	}
+}
+
+/**
+ * \brief callback when receive interrupt occurs and we are in NAPI mode
+ * @param arg pointer to octeon output queue
+ */
+static void liquidio_vf_napi_drv_callback(void *arg)
+{
+	struct octeon_droq *droq = arg;
+
+	napi_schedule_irqoff(&droq->napi);
+}
+
+/**
+ * \brief Entry point for NAPI polling
+ * @param napi NAPI structure
+ * @param budget maximum number of items to process
+ */
+static int liquidio_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct octeon_instr_queue *iq;
+	struct octeon_device *oct;
+	struct octeon_droq *droq;
+	int tx_done = 0, iq_no;
+	int work_done;
+
+	droq = container_of(napi, struct octeon_droq, napi);
+	oct = droq->oct_dev;
+	iq_no = droq->q_no;
+
+	/* Handle Droq descriptors */
+	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+						 POLL_EVENT_PROCESS_PKTS,
+						 budget);
+
+	/* Flush the instruction queue */
+	iq = oct->instr_queue[iq_no];
+	if (iq) {
+		/* Process iq buffers with in the budget limits */
+		tx_done = octeon_flush_iq(oct, iq, 1, budget);
+		/* Update iq read-index rather than waiting for next interrupt.
+		 * Return back if tx_done is false.
+		 */
+		update_txq_status(oct, iq_no);
+	} else {
+		dev_err(&oct->pci_dev->dev, "%s: iq (%d) num invalid\n",
+			__func__, iq_no);
+	}
+
+	if ((work_done < budget) && (tx_done)) {
+		napi_complete(napi);
+		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
+					     POLL_EVENT_ENABLE_INTR, 0);
+		return 0;
+	}
+
+	return (!tx_done) ? (budget) : (work_done);
+}
+
 /**
  * \brief Setup input and output queues
  * @param octeon_dev octeon device
@@ -1153,16 +1489,68 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
  */
 static inline int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
 {
+	struct octeon_droq_ops droq_ops;
 	struct net_device *netdev;
+	static int cpu_id_modulus;
+	struct octeon_droq *droq;
+	struct napi_struct *napi;
+	static int cpu_id;
 	int num_tx_descs;
 	struct lio *lio;
 	int retval = 0;
-	int q;
+	int q, q_no;
 
 	netdev = octeon_dev->props[ifidx].netdev;
 
 	lio = GET_LIO(netdev);
 
+	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+	droq_ops.fptr = liquidio_push_packet;
+	droq_ops.farg = netdev;
+
+	droq_ops.poll_mode = 1;
+	droq_ops.napi_fn = liquidio_vf_napi_drv_callback;
+	cpu_id = 0;
+	cpu_id_modulus = num_present_cpus();
+
+	/* set up DROQs. */
+	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+
+		retval = octeon_setup_droq(
+		    octeon_dev, q_no,
+		    CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
+						lio->ifidx),
+		    CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
+						   lio->ifidx),
+		    NULL);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"%s : Runtime DROQ(RxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+
+		droq = octeon_dev->droq[q_no];
+		napi = &droq->napi;
+		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
+
+		/* designate a CPU for this droq */
+		droq->cpu_id = cpu_id;
+		cpu_id++;
+		if (cpu_id >= cpu_id_modulus)
+			cpu_id = 0;
+
+		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
+	}
+
+	/* 23XX VF can send/recv control messages (via the first VF-owned
+	 * droq) from the firmware even if the ethX interface is down,
+	 * so that's why poll_mode must be off for the first droq.
+	 */
+	octeon_dev->droq[0]->ops.poll_mode = 0;
+
 	/* set up IQs. */
 	for (q = 0; q < lio->linfo.num_txpciq; q++) {
 		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
@@ -1189,6 +1577,16 @@ static int liquidio_open(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+
+	if (!oct->props[lio->ifidx].napi_enabled) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_enable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 1;
+
+		oct->droq[0]->ops.poll_mode = 1;
+	}
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
@@ -1198,6 +1596,9 @@ static int liquidio_open(struct net_device *netdev)
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 	start_txq(netdev);
 
+	/* tell Octeon to start forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 1);
+
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
 
 	return 0;
@@ -1220,6 +1621,9 @@ static int liquidio_stop(struct net_device *netdev)
 	netif_carrier_off(netdev);
 	lio->link_changes++;
 
+	/* tell Octeon to stop forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 0);
+
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
 	txqs_stop(netdev);
@@ -2016,6 +2420,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			goto setup_nic_dev_fail;
 		}
 
+		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
+
 		/* For VFs, enable Octeon device interrupts here,
 		 * as this is contingent upon IO queue setup
 		 */
@@ -2026,8 +2432,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		 * tx and rx queues
 		 */
 		lio->txq = lio->linfo.txpciq[0].s.q_no;
+		lio->rxq = lio->linfo.rxpciq[0].s.q_no;
 
 		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
 		if (setup_glists(lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 583818e..a8df493 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1374,7 +1374,7 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
 	/*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
 	 *to trigger tx interrupts as well, if they are pending.
 	 */
-	if (oct && OCTEON_CN23XX_PF(oct)) {
+	if (oct && (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))) {
 		if (droq)
 			writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
 		/*we race with firmrware here. read and write the IN_DONE_CNTS*/
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 8bf1ac76..0be87d1 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -28,6 +28,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
 
 struct niclist {
 	struct list_head list;
@@ -261,6 +262,11 @@ int octeon_init_droq(struct octeon_device *oct,
 
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
 		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_vf);
+
+		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
 	} else {
 		return 1;
 	}
@@ -889,6 +895,10 @@ static inline void octeon_droq_drop_packets(struct octeon_device *oct,
 			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		}
 		break;
+
+		case OCTEON_CN23XX_VF_VID:
+			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+		break;
 		}
 		return 0;
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index fdaf742..2fbaae9 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -84,7 +84,8 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 
 		sc = (struct octeon_soft_command *)ordered_sc_list->
 		    head.next;
-		if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (OCTEON_CN23XX_PF(octeon_dev) ||
+		    OCTEON_CN23XX_VF(octeon_dev)) {
 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
 			rptr = sc->cmd.cmd3.rptr;
 		} else {
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V2 5/7] liquidio CN23XX: VF xmit
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481058367-3937-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for transmit functionality in VF.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 .../ethernet/cavium/liquidio/cn23xx_vf_device.c    |  21 ++
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 340 +++++++++++++++++++++
 .../net/ethernet/cavium/liquidio/request_manager.c |   6 +-
 3 files changed, 364 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
index 108e487..b6117b6 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
@@ -529,6 +529,26 @@ static u64 cn23xx_vf_msix_interrupt_handler(void *dev)
 	return ret;
 }
 
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+	u32 pkt_in_done = readl(iq->inst_cnt_reg);
+	u32 last_done;
+	u32 new_idx;
+
+	last_done = pkt_in_done - iq->pkt_in_done;
+	iq->pkt_in_done = pkt_in_done;
+
+	/* Modulo of the new index with the IQ size will give us
+	 * the new index.  The iq->reset_instr_cnt is always zero for
+	 * cn23xx, so no extra adjustments are needed.
+	 */
+	new_idx = (iq->octeon_read_index +
+		   (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+		  iq->max_count;
+
+	return new_idx;
+}
+
 static void cn23xx_enable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
 {
 	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
@@ -660,6 +680,7 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
 	oct->fn_list.msix_interrupt_handler = cn23xx_vf_msix_interrupt_handler;
 
 	oct->fn_list.setup_device_regs = cn23xx_setup_vf_device_regs;
+	oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
 
 	oct->fn_list.enable_interrupt = cn23xx_enable_vf_interrupt;
 	oct->fn_list.disable_interrupt = cn23xx_disable_vf_interrupt;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index e4ee6ec..cf80722 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -55,6 +55,21 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+union tx_info {
+	u64 u64;
+	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+		u16 gso_size;
+		u16 gso_segs;
+		u32 reserved;
+#else
+		u32 reserved;
+		u16 gso_segs;
+		u16 gso_size;
+#endif
+	} s;
+};
+
 #define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
 
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
@@ -255,6 +270,19 @@ static void start_txq(struct net_device *netdev)
 }
 
 /**
+ * \brief Stop a queue
+ * @param netdev network device
+ * @param q which queue to stop
+ */
+static inline void stop_q(struct net_device *netdev, int q)
+{
+	if (netif_is_multiqueue(netdev))
+		netif_stop_subqueue(netdev, q);
+	else
+		netif_stop_queue(netdev);
+}
+
+/**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
  */
@@ -945,6 +973,45 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
 }
 
 /**
+ * \brief Setup input and output queues
+ * @param octeon_dev octeon device
+ * @param ifidx Interface index
+ *
+ * Note: Queues are with respect to the octeon device. Thus
+ * an input queue is for egress packets, and output queues
+ * are for ingress packets.
+ */
+static inline int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
+{
+	struct net_device *netdev;
+	int num_tx_descs;
+	struct lio *lio;
+	int retval = 0;
+	int q;
+
+	netdev = octeon_dev->props[ifidx].netdev;
+
+	lio = GET_LIO(netdev);
+
+	/* set up IQs. */
+	for (q = 0; q < lio->linfo.num_txpciq; q++) {
+		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
+		    octeon_get_conf(octeon_dev), lio->ifidx);
+		retval = octeon_setup_iq(octeon_dev, ifidx, q,
+					 lio->linfo.txpciq[q], num_tx_descs,
+					 netdev_get_tx_queue(netdev, q));
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				" %s : Runtime IQ(TxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * \brief Net device open for LiquidIO
  * @param netdev network device
  */
@@ -1180,6 +1247,259 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	return 0;
 }
 
+/** \brief Transmit networks packets to the Octeon interface
+ * @param skbuff   skbuff struct to be passed to network layer.
+ * @param netdev   pointer to network device
+ * @returns whether the packet was transmitted to the device okay or not
+ *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
+ */
+static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct octnet_buf_free_info *finfo;
+	union octnic_cmd_setup cmdsetup;
+	struct octnic_data_pkt ndata;
+	struct octeon_instr_irh *irh;
+	struct oct_iq_stats *stats;
+	struct octeon_device *oct;
+	int q_idx = 0, iq_no = 0;
+	union tx_info *tx_info;
+	struct lio *lio;
+	int status = 0;
+	u64 dptr = 0;
+	u32 tag = 0;
+	int j;
+
+	lio = GET_LIO(netdev);
+	oct = lio->oct_dev;
+
+	if (netif_is_multiqueue(netdev)) {
+		q_idx = skb->queue_mapping;
+		q_idx = (q_idx % (lio->linfo.num_txpciq));
+		tag = q_idx;
+		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
+	} else {
+		iq_no = lio->txq;
+	}
+
+	stats = &oct->instr_queue[iq_no]->stats;
+
+	/* Check for all conditions in which the current packet cannot be
+	 * transmitted.
+	 */
+	if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
+	    (!lio->linfo.link.s.link_up) || (skb->len <= 0)) {
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed link_status : %d\n",
+			   lio->linfo.link.s.link_up);
+		goto lio_xmit_failed;
+	}
+
+	/* Use space in skb->cb to store info used to unmap and
+	 * free the buffers.
+	 */
+	finfo = (struct octnet_buf_free_info *)skb->cb;
+	finfo->lio = lio;
+	finfo->skb = skb;
+	finfo->sc = NULL;
+
+	/* Prepare the attributes for the data to be passed to OSI. */
+	memset(&ndata, 0, sizeof(struct octnic_data_pkt));
+
+	ndata.buf = finfo;
+
+	ndata.q_no = iq_no;
+
+	if (netif_is_multiqueue(netdev)) {
+		if (octnet_iq_is_full(oct, ndata.q_no)) {
+			/* defer sending if queue is full */
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			stats->tx_iq_busy++;
+			return NETDEV_TX_BUSY;
+		}
+	} else {
+		if (octnet_iq_is_full(oct, lio->txq)) {
+			/* defer sending if queue is full */
+			stats->tx_iq_busy++;
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			return NETDEV_TX_BUSY;
+		}
+	}
+
+	ndata.datasize = skb->len;
+
+	cmdsetup.u64 = 0;
+	cmdsetup.s.iq_no = iq_no;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		cmdsetup.s.transport_csum = 1;
+
+	if (!skb_shinfo(skb)->nr_frags) {
+		cmdsetup.s.u.datasize = skb->len;
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+		/* Offload checksum calculation for TCP/UDP packets */
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      skb->data,
+				      skb->len,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		ndata.reqtype = REQTYPE_NORESP_NET;
+
+	} else {
+		struct skb_frag_struct *frag;
+		struct octnic_gather *g;
+		int i, frags;
+
+		spin_lock(&lio->glist_lock[q_idx]);
+		g = (struct octnic_gather *)list_delete_head(
+		    &lio->glist[q_idx]);
+		spin_unlock(&lio->glist_lock[q_idx]);
+
+		if (!g) {
+			netif_info(lio, tx_err, lio->netdev,
+				   "Transmit scatter gather: glist null!\n");
+			goto lio_xmit_failed;
+		}
+
+		cmdsetup.s.gather = 1;
+		cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
+		memset(g->sg, 0, g->sg_size);
+
+		g->sg[0].ptr[0] = dma_map_single(&oct->pci_dev->dev,
+						 skb->data,
+						 (skb->len - skb->data_len),
+						 DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+		add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
+
+		frags = skb_shinfo(skb)->nr_frags;
+		i = 1;
+		while (frags--) {
+			frag = &skb_shinfo(skb)->frags[i - 1];
+
+			g->sg[(i >> 2)].ptr[(i & 3)] =
+				dma_map_page(&oct->pci_dev->dev,
+					     frag->page.p,
+					     frag->page_offset,
+					     frag->size,
+					     DMA_TO_DEVICE);
+			if (dma_mapping_error(&oct->pci_dev->dev,
+					      g->sg[i >> 2].ptr[i & 3])) {
+				dma_unmap_single(&oct->pci_dev->dev,
+						 g->sg[0].ptr[0],
+						 skb->len - skb->data_len,
+						 DMA_TO_DEVICE);
+				for (j = 1; j < i; j++) {
+					frag = &skb_shinfo(skb)->frags[j - 1];
+					dma_unmap_page(&oct->pci_dev->dev,
+						       g->sg[j >> 2].ptr[j & 3],
+						       frag->size,
+						       DMA_TO_DEVICE);
+				}
+				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+					__func__);
+				return NETDEV_TX_BUSY;
+			}
+
+			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+			i++;
+		}
+
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      g->sg, g->sg_size,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
+				__func__);
+			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
+					 skb->len - skb->data_len,
+					 DMA_TO_DEVICE);
+			for (j = 1; j <= frags; j++) {
+				frag = &skb_shinfo(skb)->frags[j - 1];
+				dma_unmap_page(&oct->pci_dev->dev,
+					       g->sg[j >> 2].ptr[j & 3],
+					       frag->size, DMA_TO_DEVICE);
+			}
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		finfo->g = g;
+
+		ndata.reqtype = REQTYPE_NORESP_NET_SG;
+	}
+
+	irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+	tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+
+	if (skb_shinfo(skb)->gso_size) {
+		tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
+		tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
+	}
+
+	status = octnet_send_nic_data_pkt(oct, &ndata);
+	if (status == IQ_SEND_FAILED)
+		goto lio_xmit_failed;
+
+	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
+
+	if (status == IQ_SEND_STOP) {
+		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
+			iq_no);
+		stop_q(lio->netdev, q_idx);
+	}
+
+	netif_trans_update(netdev);
+
+	if (skb_shinfo(skb)->gso_size)
+		stats->tx_done += skb_shinfo(skb)->gso_segs;
+	else
+		stats->tx_done++;
+	stats->tx_tot_bytes += skb->len;
+
+	return NETDEV_TX_OK;
+
+lio_xmit_failed:
+	stats->tx_dropped++;
+	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
+		   iq_no, stats->tx_dropped);
+	if (dptr)
+		dma_unmap_single(&oct->pci_dev->dev, dptr,
+				 ndata.datasize, DMA_TO_DEVICE);
+	tx_buffer_free(skb);
+	return NETDEV_TX_OK;
+}
+
+/** \brief Network device Tx timeout
+ * @param netdev    pointer to network device
+ */
+static void liquidio_tx_timeout(struct net_device *netdev)
+{
+	struct lio *lio;
+
+	lio = GET_LIO(netdev);
+
+	netif_info(lio, tx_err, lio->netdev,
+		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
+		   netdev->stats.tx_dropped);
+	netif_trans_update(netdev);
+	txqs_wake(netdev);
+}
+
 /** Sending command to enable/disable RX checksum offload
  * @param netdev                pointer to network device
  * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
@@ -1282,8 +1602,10 @@ static int liquidio_set_features(struct net_device *netdev,
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
+	.ndo_start_xmit		= liquidio_xmit,
 	.ndo_set_mac_address	= liquidio_set_mac,
 	.ndo_set_rx_mode	= liquidio_set_mcast_list,
+	.ndo_tx_timeout		= liquidio_tx_timeout,
 	.ndo_change_mtu		= liquidio_change_mtu,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
@@ -1507,6 +1829,24 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		/* Copy MAC Address to OS network device structure */
 		ether_addr_copy(netdev->dev_addr, mac);
 
+		if (setup_io_queues(octeon_dev, i)) {
+			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		/* For VFs, enable Octeon device interrupts here,
+		 * as this is contingent upon IO queue setup
+		 */
+		octeon_dev->fn_list.enable_interrupt(octeon_dev,
+						     OCTEON_ALL_INTR);
+
+		/* By default all interfaces on a single Octeon uses the same
+		 * tx and rx queues
+		 */
+		lio->txq = lio->linfo.txpciq[0].s.q_no;
+
+		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+
 		if (setup_glists(lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index ea2b7e4..3ce6675 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -394,7 +394,7 @@ static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
 
-			if (OCTEON_CN23XX_PF(oct))
+			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
 				irh = (struct octeon_instr_irh *)
 					&sc->cmd.cmd3.irh;
 			else
@@ -607,7 +607,7 @@ static void check_db_timeout(struct work_struct *work)
 
 	oct_cfg = octeon_get_conf(oct);
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 
 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
@@ -700,7 +700,7 @@ int octeon_send_soft_command(struct octeon_device *oct,
 	struct octeon_instr_irh *irh;
 	u32 len;
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 		if (ih3->dlengsz) {
 			WARN_ON(!sc->dmadptr);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V2 0/7] liquidio VF data path
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem; +Cc: netdev, Raghu Vatsavayi

Dave,

Following patch series adds support for VF data path
related features. I have removed unnecessary "void *"
casting. Please apply patches in following order as 
some of them depend on earlier patches.

Raghu Vatsavayi (7):
  liquidio CN23XX: VF offload features
  liquidio CN23XX: VF link status
  liquidio CN23XX: VF mac address
  liquidio CN23XX: VF scatter gather lists
  liquidio CN23XX: VF xmit
  liquidio CN23XX: VF TX buffers
  liquidio VF rx data and ctl path

 .../ethernet/cavium/liquidio/cn23xx_vf_device.c    |   21 +
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 2094 +++++++++++++++++++-
 .../net/ethernet/cavium/liquidio/liquidio_common.h |    1 +
 .../net/ethernet/cavium/liquidio/octeon_device.c   |    5 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.c |   10 +
 .../net/ethernet/cavium/liquidio/octeon_network.h  |    1 +
 .../net/ethernet/cavium/liquidio/request_manager.c |    6 +-
 .../ethernet/cavium/liquidio/response_manager.c    |    3 +-
 8 files changed, 2124 insertions(+), 17 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH net-next V2 3/7] liquidio CN23XX: VF mac address
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481058367-3937-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for configuring mtu, multicast and mac address.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 195 +++++++++++++++++++++
 .../net/ethernet/cavium/liquidio/liquidio_common.h |   1 +
 .../net/ethernet/cavium/liquidio/octeon_network.h  |   1 +
 3 files changed, 197 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index eddd77f..0e23e2f 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -864,6 +864,194 @@ static int liquidio_stop(struct net_device *netdev)
 	return 0;
 }
 
+/**
+ * \brief Converts a mask based on net device flags
+ * @param netdev network device
+ *
+ * This routine generates a octnet_ifflags mask from the net device flags
+ * received from the OS.
+ */
+static inline enum octnet_ifflags get_new_flags(struct net_device *netdev)
+{
+	enum octnet_ifflags f = OCTNET_IFFLAG_UNICAST;
+
+	if (netdev->flags & IFF_PROMISC)
+		f |= OCTNET_IFFLAG_PROMISC;
+
+	if (netdev->flags & IFF_ALLMULTI)
+		f |= OCTNET_IFFLAG_ALLMULTI;
+
+	if (netdev->flags & IFF_MULTICAST) {
+		f |= OCTNET_IFFLAG_MULTICAST;
+
+		/* Accept all multicast addresses if there are more than we
+		 * can handle
+		 */
+		if (netdev_mc_count(netdev) > MAX_OCTEON_MULTICAST_ADDR)
+			f |= OCTNET_IFFLAG_ALLMULTI;
+	}
+
+	if (netdev->flags & IFF_BROADCAST)
+		f |= OCTNET_IFFLAG_BROADCAST;
+
+	return f;
+}
+
+static void liquidio_set_uc_list(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mac;
+
+	if (lio->netdev_uc_count == netdev_uc_count(netdev))
+		return;
+
+	if (netdev_uc_count(netdev) > MAX_NCTRL_UDD) {
+		dev_err(&oct->pci_dev->dev, "too many MAC addresses in netdev uc list\n");
+		return;
+	}
+
+	lio->netdev_uc_count = netdev_uc_count(netdev);
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_UC_LIST;
+	nctrl.ncmd.s.more = lio->netdev_uc_count;
+	nctrl.ncmd.s.param1 = oct->vf_num;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mac = &nctrl.udd[0];
+	netdev_for_each_uc_addr(ha, netdev) {
+		ether_addr_copy(((u8 *)mac) + 2, ha->addr);
+		mac++;
+	}
+
+	octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+}
+
+/**
+ * \brief Net device set_multicast_list
+ * @param netdev network device
+ */
+static void liquidio_set_mcast_list(struct net_device *netdev)
+{
+	int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mc;
+	int ret;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	/* Create a ctrl pkt command to be sent to core app. */
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
+	nctrl.ncmd.s.param1 = get_new_flags(netdev);
+	nctrl.ncmd.s.param2 = mc_count;
+	nctrl.ncmd.s.more = mc_count;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mc = &nctrl.udd[0];
+	netdev_for_each_mc_addr(ha, netdev) {
+		*mc = 0;
+		ether_addr_copy(((u8 *)mc) + 2, ha->addr);
+		/* no need to swap bytes */
+		if (++mc > &nctrl.udd[mc_count])
+			break;
+	}
+
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* Apparently, any activity in this call from the kernel has to
+	 * be atomic. So we won't wait for response.
+	 */
+	nctrl.wait_time = 0;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
+			ret);
+	}
+
+	liquidio_set_uc_list(netdev);
+}
+
+/**
+ * \brief Net device set_mac_address
+ * @param netdev network device
+ */
+static int liquidio_set_mac(struct net_device *netdev, void *p)
+{
+	struct sockaddr *addr = (struct sockaddr *)p;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+		return 0;
+
+	if (lio->linfo.macaddr_is_admin_asgnd)
+		return -EPERM;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+	nctrl.ncmd.s.param1 = 0;
+	nctrl.ncmd.s.more = 1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+	nctrl.wait_time = 100;
+
+	nctrl.udd[0] = 0;
+	/* The MAC Address is presented in network byte order. */
+	ether_addr_copy((u8 *)&nctrl.udd[0] + 2, addr->sa_data);
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
+		return -ENOMEM;
+	}
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data);
+
+	return 0;
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->mtu = new_mtu;
+
+	netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+		 netdev->name, netdev->mtu, new_mtu);
+
+	netdev->mtu = new_mtu;
+
+	return 0;
+}
+
 /** Sending command to enable/disable RX checksum offload
  * @param netdev                pointer to network device
  * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
@@ -966,6 +1154,9 @@ static int liquidio_set_features(struct net_device *netdev,
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
+	.ndo_set_mac_address	= liquidio_set_mac,
+	.ndo_set_rx_mode	= liquidio_set_mcast_list,
+	.ndo_change_mtu		= liquidio_change_mtu,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
 	.ndo_select_queue	= select_q,
@@ -1165,6 +1356,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		netdev->hw_features = lio->dev_capability;
 
+		/* MTU range: 68 - 16000 */
+		netdev->min_mtu = LIO_MIN_MTU_SIZE;
+		netdev->max_mtu = LIO_MAX_MTU_SIZE;
+
 		/* Point to the  properties for octeon device to which this
 		 * interface belongs.
 		 */
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index f308ee4..ba329f6 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -212,6 +212,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_CMD_ID_ACTIVE         0x1a
 
+#define   OCTNET_CMD_SET_UC_LIST       0x1b
 #define   OCTNET_CMD_SET_VF_LINKSTATE  0x1c
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index e94edc8..6bb8941 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -123,6 +123,7 @@ struct lio {
 	/* work queue for  link status */
 	struct cavium_wq	link_status_wq;
 
+	int netdev_uc_count;
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V2 2/7] liquidio CN23XX: VF link status
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481058367-3937-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for VF link status related changes.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 249 +++++++++++++++++++++
 1 file changed, 249 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 81a578f..eddd77f 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -163,6 +163,186 @@ static inline void ifstate_reset(struct lio *lio, int state_flag)
 	atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
 }
 
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_stop(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_stop_subqueue(netdev, i);
+	} else {
+		netif_stop_queue(netdev);
+	}
+}
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_start(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+	} else {
+		netif_start_queue(netdev);
+	}
+}
+
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_wake(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++) {
+			int qno = lio->linfo.txpciq[i % (lio->linfo.num_txpciq)]
+				      .s.q_no;
+			if (__netif_subqueue_stopped(netdev, i)) {
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+							  tx_restart, 1);
+				netif_wake_subqueue(netdev, i);
+			}
+		}
+	} else {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
+		netif_wake_queue(netdev);
+	}
+}
+
+/**
+ * \brief Start Tx queue
+ * @param netdev network device
+ */
+static void start_txq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (lio->linfo.link.s.link_up) {
+		txqs_start(netdev);
+		return;
+	}
+}
+
+/**
+ * \brief Print link information
+ * @param netdev network device
+ */
+static void print_link_info(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
+		struct oct_link_info *linfo = &lio->linfo;
+
+		if (linfo->link.s.link_up) {
+			netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
+				   linfo->link.s.speed,
+				   (linfo->link.s.duplex) ? "Full" : "Half");
+		} else {
+			netif_info(lio, link, lio->netdev, "Link Down\n");
+		}
+	}
+}
+
+/**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = (struct lio *)wk->ctxptr;
+
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static inline int setup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->link_status_wq.wq = alloc_workqueue("link-status",
+						 WQ_MEM_RECLAIM, 0);
+	if (!lio->link_status_wq.wq) {
+		dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+		return -1;
+	}
+	INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+			  octnet_link_status_change);
+	lio->link_status_wq.wk.ctxptr = lio;
+
+	return 0;
+}
+
+static inline void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (lio->link_status_wq.wq) {
+		cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+		destroy_workqueue(lio->link_status_wq.wq);
+	}
+}
+
+/**
+ * \brief Update link status
+ * @param netdev network device
+ * @param ls link status structure
+ *
+ * Called on receipt of a link status response from the core application to
+ * update each interface's link status.
+ */
+static inline void update_link_status(struct net_device *netdev,
+				      union oct_link_status *ls)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
+		lio->linfo.link.u64 = ls->u64;
+
+		print_link_info(netdev);
+		lio->link_changes++;
+
+		if (lio->linfo.link.s.link_up) {
+			netif_carrier_on(netdev);
+			txqs_wake(netdev);
+		} else {
+			netif_carrier_off(netdev);
+			txqs_stop(netdev);
+		}
+
+		if (lio->linfo.link.s.mtu < netdev->mtu) {
+			dev_warn(&oct->pci_dev->dev,
+				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+				 netdev->mtu, lio->linfo.link.s.mtu);
+			lio->mtu = lio->linfo.link.s.mtu;
+			netdev->mtu = lio->linfo.link.s.mtu;
+			queue_delayed_work(lio->link_status_wq.wq,
+					   &lio->link_status_wq.wk.work, 0);
+		}
+	}
+}
+
 static
 int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
 {
@@ -499,6 +679,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
+	cleanup_link_status_change_wq(netdev);
+
 	free_netdev(netdev);
 
 	oct->props[ifidx].gmxport = -1;
@@ -635,6 +817,28 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
 }
 
 /**
+ * \brief Net device open for LiquidIO
+ * @param netdev network device
+ */
+static int liquidio_open(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	ifstate_set(lio, LIO_IFSTATE_RUNNING);
+
+	/* Ready for link status updates */
+	lio->intf_open = 1;
+
+	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
+	start_txq(netdev);
+
+	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
+
+	return 0;
+}
+
+/**
  * \brief Net device stop for LiquidIO
  * @param netdev network device
  */
@@ -653,6 +857,8 @@ static int liquidio_stop(struct net_device *netdev)
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
+	txqs_stop(netdev);
+
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
 	return 0;
@@ -758,11 +964,47 @@ static int liquidio_set_features(struct net_device *netdev,
 }
 
 static const struct net_device_ops lionetdevops = {
+	.ndo_open		= liquidio_open,
+	.ndo_stop		= liquidio_stop,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
 	.ndo_select_queue	= select_q,
 };
 
+static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf)
+{
+	struct octeon_device *oct = (struct octeon_device *)buf;
+	struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+	union oct_link_status *ls;
+	int gmxport = 0;
+	int i;
+
+	if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
+		dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
+			recv_pkt->buffer_size[0],
+			recv_pkt->rh.r_nic_info.gmxport);
+		goto nic_info_err;
+	}
+
+	gmxport = recv_pkt->rh.r_nic_info.gmxport;
+	ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
+
+	octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
+
+	for (i = 0; i < oct->ifcount; i++) {
+		if (oct->props[i].gmxport == gmxport) {
+			update_link_status(oct->props[i].netdev, ls);
+			break;
+		}
+	}
+
+nic_info_err:
+	for (i = 0; i < recv_pkt->buffer_count; i++)
+		recv_buffer_free(recv_pkt->buffer_ptr[i]);
+	octeon_free_recv_info(recv_info);
+	return 0;
+}
+
 /**
  * \brief Setup network interfaces
  * @param octeon_dev  octeon device
@@ -788,6 +1030,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 	ifidx_or_pfnum = octeon_dev->pf_num;
 
+	/* This is to handle link status changes */
+	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, OPCODE_NIC_INFO,
+				    lio_nic_info, octeon_dev);
+
 	for (i = 0; i < octeon_dev->ifcount; i++) {
 		resp_size = sizeof(struct liquidio_if_cfg_resp);
 		ctx_size = sizeof(struct liquidio_if_cfg_context);
@@ -946,6 +1192,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE,
 					     0);
 
+		if (setup_link_status_change_wq(netdev))
+			goto setup_nic_dev_fail;
+
 		/* Register the network device with the OS */
 		if (register_netdev(netdev)) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V2 4/7] liquidio CN23XX: VF scatter gather lists
From: Raghu Vatsavayi @ 2016-12-06 21:06 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481058367-3937-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for VF scatter gather lists.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 134 +++++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 0e23e2f..e4ee6ec 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -55,10 +55,28 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
+
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE \
 		(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
+struct octnic_gather {
+	/* List manipulation. Next and prev pointers. */
+	struct list_head list;
+
+	/* Size of the gather component at sg in bytes. */
+	int sg_size;
+
+	/* Number of bytes that sg was adjusted to make it 8B-aligned. */
+	int adjust;
+
+	/* Gather component that can accommodate max sized fragment list
+	 * received from the IP layer.
+	 */
+	struct octeon_sg_entry *sg;
+};
+
 struct octeon_device_priv {
 	/* Tasklet structures for this device. */
 	struct tasklet_struct droq_tasklet;
@@ -237,6 +255,114 @@ static void start_txq(struct net_device *netdev)
 }
 
 /**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static inline struct list_head *list_delete_head(struct list_head *root)
+{
+	struct list_head *node;
+
+	if ((root->prev == root) && (root->next == root))
+		node = NULL;
+	else
+		node = root->next;
+
+	if (node)
+		list_del(node);
+
+	return node;
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+static void delete_glists(struct lio *lio)
+{
+	struct octnic_gather *g;
+	int i;
+
+	if (!lio->glist)
+		return;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		do {
+			g = (struct octnic_gather *)
+			    list_delete_head(&lio->glist[i]);
+			if (g) {
+				if (g->sg)
+					kfree((void *)((unsigned long)g->sg -
+							g->adjust));
+				kfree(g);
+			}
+		} while (g);
+	}
+
+	kfree(lio->glist);
+	kfree(lio->glist_lock);
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+static int setup_glists(struct lio *lio, int num_iqs)
+{
+	struct octnic_gather *g;
+	int i, j;
+
+	lio->glist_lock =
+	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
+	if (!lio->glist_lock)
+		return 1;
+
+	lio->glist =
+	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
+	if (!lio->glist) {
+		kfree(lio->glist_lock);
+		return 1;
+	}
+
+	for (i = 0; i < num_iqs; i++) {
+		spin_lock_init(&lio->glist_lock[i]);
+
+		INIT_LIST_HEAD(&lio->glist[i]);
+
+		for (j = 0; j < lio->tx_qsize; j++) {
+			g = kzalloc(sizeof(*g), GFP_KERNEL);
+			if (!g)
+				break;
+
+			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+				      OCT_SG_ENTRY_SIZE);
+
+			g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+			if (!g->sg) {
+				kfree(g);
+				break;
+			}
+
+			/* The gather component should be aligned on 64-bit
+			 * boundary
+			 */
+			if (((unsigned long)g->sg) & 7) {
+				g->adjust = 8 - (((unsigned long)g->sg) & 7);
+				g->sg = (struct octeon_sg_entry *)
+					((unsigned long)g->sg + g->adjust);
+			}
+			list_add_tail(&g->list, &lio->glist[i]);
+		}
+
+		if (j != lio->tx_qsize) {
+			delete_glists(lio);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * \brief Print link information
  * @param netdev network device
  */
@@ -681,6 +807,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
 	cleanup_link_status_change_wq(netdev);
 
+	delete_glists(lio);
+
 	free_netdev(netdev);
 
 	oct->props[ifidx].gmxport = -1;
@@ -1379,6 +1507,12 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		/* Copy MAC Address to OS network device structure */
 		ether_addr_copy(netdev->dev_addr, mac);
 
+		if (setup_glists(lio, num_iqueues)) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Gather list allocation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
 		if (netdev->features & NETIF_F_LRO)
 			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
 					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
-- 
1.8.3.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox