Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH RFC net-next 3/7] sctp: add dst_pending_confirm flag
From: Neil Horman @ 2016-12-21 14:11 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: netdev, linux-sctp, YueHaibing
In-Reply-To: <1482094596-26046-4-git-send-email-ja@ssi.bg>

On Sun, Dec 18, 2016 at 10:56:32PM +0200, Julian Anastasov wrote:
> Add new transport flag to allow sockets to confirm neighbour.
> When same struct dst_entry can be used for many different
> neighbours we can not use it for pending confirmations.
> The flag is propagated from transport to every packet.
> It is reset when cached dst is reset.
> 
> Reported-by: YueHaibing <yuehaibing@huawei.com>
> Fixes: 5110effee8fd ("net: Do delayed neigh confirmation.")
> Fixes: f2bb4bedf35d ("ipv4: Cache output routes in fib_info nexthops.")
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  include/net/sctp/sctp.h    |  6 ++----
>  include/net/sctp/structs.h |  4 ++++
>  net/sctp/associola.c       |  3 +--
>  net/sctp/output.c          | 10 +++++++++-
>  net/sctp/outqueue.c        |  2 +-
>  net/sctp/sm_make_chunk.c   |  6 ++----
>  net/sctp/sm_sideeffect.c   |  2 +-
>  net/sctp/socket.c          |  4 ++--
>  net/sctp/transport.c       | 18 +++++++++++++++++-
>  9 files changed, 39 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index f0dcaeb..85d9083 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -586,10 +586,8 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
>   */
>  static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
>  {
> -	if (t->dst && !dst_check(t->dst, t->dst_cookie)) {
> -		dst_release(t->dst);
> -		t->dst = NULL;
> -	}
> +	if (t->dst && !dst_check(t->dst, t->dst_cookie))
> +		sctp_transport_dst_release(t);
>  
>  	return t->dst;
>  }
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index 92daabd..e842e84 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -838,6 +838,8 @@ struct sctp_transport {
>  
>  	__u32 burst_limited;	/* Holds old cwnd when max.burst is applied */
>  
> +	__u32 dst_pending_confirm;	/* need to confirm neighbour */
> +
>  	/* Destination */
>  	struct dst_entry *dst;
>  	/* Source address. */
> @@ -980,6 +982,8 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
>  void sctp_transport_reset(struct sctp_transport *);
>  void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
>  void sctp_transport_immediate_rtx(struct sctp_transport *);
> +void sctp_transport_dst_release(struct sctp_transport *t);
> +void sctp_transport_dst_confirm(struct sctp_transport *t);
>  
>  
>  /* This is the structure we use to queue packets as they come into
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 68428e1..7bd26e0 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -820,8 +820,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
>  		if (transport->state != SCTP_UNCONFIRMED)
>  			transport->state = SCTP_INACTIVE;
>  		else {
> -			dst_release(transport->dst);
> -			transport->dst = NULL;
> +			sctp_transport_dst_release(transport);
>  			ulp_notify = false;
>  		}
>  
> diff --git a/net/sctp/output.c b/net/sctp/output.c
> index f5320a8..4684a00 100644
> --- a/net/sctp/output.c
> +++ b/net/sctp/output.c
> @@ -550,6 +550,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
>  	struct sctp_association *asoc = tp->asoc;
>  	struct sctp_chunk *chunk, *tmp;
>  	int pkt_count, gso = 0;
> +	int confirm;
>  	struct dst_entry *dst;
>  	struct sk_buff *head;
>  	struct sctphdr *sh;
> @@ -628,7 +629,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
>  			asoc->peer.last_sent_to = tp;
>  	}
>  	head->ignore_df = packet->ipfragok;
> -	tp->af_specific->sctp_xmit(head, tp);
> +	confirm = tp->dst_pending_confirm;
> +	if (confirm)
> +		head->dst_pending_confirm = 1;
Why not use your confirm helper function here?

> +	/* neighbour should be confirmed on successful transmission or
> +	 * positive error
> +	 */
> +	if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
> +		tp->dst_pending_confirm = 0;
>  
>  out:
>  	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
> diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
> index e540826..8234799 100644
> --- a/net/sctp/outqueue.c
> +++ b/net/sctp/outqueue.c
> @@ -1641,7 +1641,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
>  
>  		if (forward_progress) {
>  			if (transport->dst)
> -				dst_confirm(transport->dst);
> +				sctp_transport_dst_confirm(transport);
>  		}
>  	}
>  
> diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
> index 9e9690b..6fb15bf 100644
> --- a/net/sctp/sm_make_chunk.c
> +++ b/net/sctp/sm_make_chunk.c
> @@ -3317,8 +3317,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> -			dst_release(transport->dst);
> -			transport->dst = NULL;
> +			sctp_transport_dst_release(transport);
>  		}
>  		break;
>  	case SCTP_PARAM_DEL_IP:
> @@ -3332,8 +3331,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> -			dst_release(transport->dst);
> -			transport->dst = NULL;
> +			sctp_transport_dst_release(transport);
>  		}
>  		break;
>  	default:
> diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
> index c345bf1..9255b29 100644
> --- a/net/sctp/sm_sideeffect.c
> +++ b/net/sctp/sm_sideeffect.c
> @@ -723,7 +723,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
>  	 * forward progress.
>  	 */
>  	if (t->dst)
> -		dst_confirm(t->dst);
> +		sctp_transport_dst_confirm(t);
>  
>  	/* The receiver of the HEARTBEAT ACK should also perform an
>  	 * RTT measurement for that destination transport address
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 318c678..9ee676a 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -588,7 +588,7 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
>  			list_for_each_entry(trans,
>  			    &asoc->peer.transport_addr_list, transports) {
>  				/* Clear the source and route cache */
> -				dst_release(trans->dst);
> +				sctp_transport_dst_release(trans);
>  				trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
>  				    2*asoc->pathmtu, 4380));
>  				trans->ssthresh = asoc->peer.i.a_rwnd;
> @@ -839,7 +839,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  		 */
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  					transports) {
> -			dst_release(transport->dst);
> +			sctp_transport_dst_release(transport);
>  			sctp_transport_route(transport, NULL,
>  					     sctp_sk(asoc->base.sk));
>  		}
> diff --git a/net/sctp/transport.c b/net/sctp/transport.c
> index ce54dce..db66514 100644
> --- a/net/sctp/transport.c
> +++ b/net/sctp/transport.c
> @@ -227,7 +227,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
>  {
>  	/* If we don't have a fresh route, look one up */
>  	if (!transport->dst || transport->dst->obsolete) {
> -		dst_release(transport->dst);
> +		sctp_transport_dst_release(transport);
>  		transport->af_specific->get_dst(transport, &transport->saddr,
>  						&transport->fl, sk);
>  	}
> @@ -659,3 +659,19 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t)
>  			sctp_transport_hold(t);
>  	}
>  }
> +
> +/* Drop dst */
> +void sctp_transport_dst_release(struct sctp_transport *t)
> +{
> +	dst_release(t->dst);
> +	t->dst = NULL;
> +	t->dst_pending_confirm = 0;
> +}
> +
> +/* Schedule neighbour confirm */
> +void sctp_transport_dst_confirm(struct sctp_transport *t)
> +{
> +	if (!t->dst_pending_confirm)
> +		t->dst_pending_confirm = 1;
I'm not sure why you check it for being zero before setting it here, just set it
unilaterally instead, or are you trying to avoid dirtying a cacheline?

Neil

> +}
> +
> -- 
> 1.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH net] tcp: add a missing barrier in tcp_tasklet_func()
From: Eric Dumazet @ 2016-12-21 13:42 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Eric Dumazet, Madalin Bucur

From: Eric Dumazet <edumazet@google.com>

Madalin reported crashes happening in tcp_tasklet_func() on powerpc64

Before TSQ_QUEUED bit is cleared, we must ensure the changes done
by list_del(&tp->tsq_node); are committed to memory, otherwise
corruption might happen, as an other cpu could catch TSQ_QUEUED
clearance too soon.

We can notice that old kernels were immune to this bug, because
TSQ_QUEUED was cleared after a bh_lock_sock(sk)/bh_unlock_sock(sk)
section, but they could have missed a kick to write additional bytes,
when NIC interrupts for a given flow are spread to multiple cpus.

Affected TCP flows would need an incoming ACK or RTO timer to add more
packets to the pipe. So overall situation should be better now.

Fixes: b223feb9de2a ("tcp: tsq: add shortcut in tcp_tasklet_func()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Madalin Bucur <madalin.bucur@nxp.com>
Tested-by: Madalin Bucur <madalin.bucur@nxp.com>
---
 net/ipv4/tcp_output.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b45101f3d2bd2e0f0077305a061add4f7ea0de27..31a255b555ad86a3537c077862e3ea38f9b44284 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -769,6 +769,7 @@ static void tcp_tasklet_func(unsigned long data)
 		list_del(&tp->tsq_node);

 		sk = (struct sock *)tp;
+		smp_mb__before_atomic();
 		clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);

 		if (!sk->sk_lock.owned &&

^ permalink raw reply related

* RE: [PATCH v3] net: macb: Added PCI wrapper for Platform Driver.
From: Bartosz Folta @ 2016-12-21 13:42 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Nicolas Ferre, David S. Miller, Niklas Cassel, Alexandre Torgue,
	Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Rafal Ozieblo
In-Reply-To: <CAHp75Vf7VBWVWE-jcM0bv9t7QSnpAG46izpkDpqx7VaSdPgUCw@mail.gmail.com>

> And one more
>
> +       plat_info.dma_mask = DMA_BIT_MASK(32);
>
> Perhaps
>  plat_info.dma_mask = pdev->dma_mask;
> ?

Thank you.

I tested your suggestion. I have sent second version of patch.
https://patchwork.ozlabs.org/patch/707800/

Regards,
Bartosz Folta


^ permalink raw reply

* Re: Soft lockup in tc_classify
From: Shahar Klein @ 2016-12-21 12:58 UTC (permalink / raw)
  To: Daniel Borkmann, Cong Wang
  Cc: shahark, Or Gerlitz, Linux Netdev List, Roi Dayan, David Miller,
	Jiri Pirko, John Fastabend, Hadar Hen Zion
In-Reply-To: <585A564D.4030702@iogearbox.net>



On 12/21/2016 12:15 PM, Daniel Borkmann wrote:
> On 12/21/2016 08:03 AM, Cong Wang wrote:
>> On Tue, Dec 20, 2016 at 10:44 PM, Shahar Klein <shahark@mellanox.com>
>> wrote:
> [...]
>> Looks like you added a debug printk inside tcf_destroy() too,
>> which seems racy with filter creation, it should not happen since
>> in both cases we take RTNL lock.
>>
>> Don't know if changing all RCU_INIT_POINTER in that file to
>> rcu_assign_pointer could help anything or not. Mind to try?
>
> I don't think at this point that it's RCU related at all.
>
> I have a theory on what is happening. Quoting the piece in question from
> Shahar's log:
>
>  1: thread-2845[cpu-1] setting tp_created to 1 tp=ffff94b5b0280780
> back=ffff94b9ea932060
>  2: thread-2856[cpu-1] setting tp_created to 1 tp=ffff94b9ea9322a0
> back=ffff94b9ea932060
>  3: thread-2843[cpu-1] setting tp_created to 1 tp=ffff94b5b402c960
> back=ffff94b9ea932060
>  4: destroy ffff94b5b669fea0 tcf_destroy:1905
>  5: thread-2853[cpu-1] setting tp_created to 1 tp=ffff94b5b02805a0
> back=ffff94b9ea932060
>  6: thread-2853[cpu-1] add/change filter by: fl_get [cls_flower]
> tp=ffff94b5b02805a0 tp->next=ffff94b9ea932060
>  7: destroy ffff94b5b0280780 tcf_destroy:1905
>  8: thread-2845[cpu-1] add/change filter by: fl_get [cls_flower]
> tp=ffff94b5b02805a0 tp->next=ffff94b5b02805a0
>
> The interesting thing is that all this happens on CPU1, so as you say
> we're under rtnl.
> In 1), thread-2845 creates tp=ffff94b5b0280780, which is destroyed in
> 7), presumably also
> by thread-2845, and the weird part is why suddenly in 8) thread-2845
> adds a created filter
> without actually creating it. Plus, thread-2845 got interrupted, which
> means it must have
> dropped rntl in the middle. We drop it in tc_ctl_tfilter() when we do
> tcf_proto_lookup_ops()
> and need to pull in a module, but here this doesn't make sense at all
> since i) at this
> point we haven't created the tp yet and 2) flower was already there.
> Thus the only explanation
> where this must have happened is where we called tp->ops->change(). So
> here the return
> code must have been -EAGAIN, which makes sense because in 7) we
> destroyed that specific
> tp instance. Which means we goto replay but *do not* clear tp_created. I
> think that is
> the bug in question. So, while we dropped rtnl in the meantime, some
> other tp instance
> was added (tp=ffff94b5b02805a0) that we had a match on in round 2, but
> we still think it
> was newly created which wasn't the actual case. So we'd need to deal
> with the fact that
> ->change() callback could return -EAGAIN as well. Now looking at flower,
> I think the call
> chain must have been fl_change() -> fl_set_parms() ->
> tcf_exts_validate() -> tcf_action_init()
> -> tcf_action_init_1(). And here one possibility I see is that
> tc_lookup_action_n()
> failed, therefore we shortly dropped rtnl for the request_module() where
> the module
> got loaded successfully and thus error code from there is -EAGAIN that
> got propagated
> all the way through ->change() from tc_ctl_tfilter(). So it looks like a
> generic issue
> not specifically tied to flower.
>
> Shahar, can you test the following? Thanks!
>
>  net/sched/cls_api.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 3fbba79..1ecdf80 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb,
> struct nlmsghdr *n)
>      unsigned long cl;
>      unsigned long fh;
>      int err;
> -    int tp_created = 0;
> +    int tp_created;
>
>      if ((n->nlmsg_type != RTM_GETTFILTER) &&
>          !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
>          return -EPERM;
>
>  replay:
> +    tp_created = 0;
> +
>      err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
>      if (err < 0)
>          return err;

Test run successfully!
I'll remove all other debug "fixes" and run again later

Thanks Daniel!

^ permalink raw reply

* Re: Soft lockup in tc_classify
From: Daniel Borkmann @ 2016-12-21 13:18 UTC (permalink / raw)
  To: Shahar Klein, Cong Wang
  Cc: Or Gerlitz, Linux Netdev List, Roi Dayan, David Miller,
	Jiri Pirko, John Fastabend, Hadar Hen Zion
In-Reply-To: <cb55ede9-3e6e-8e68-3005-8bdb2105122e@mellanox.com>

On 12/21/2016 01:58 PM, Shahar Klein wrote:
> On 12/21/2016 12:15 PM, Daniel Borkmann wrote:
>> On 12/21/2016 08:03 AM, Cong Wang wrote:
>>> On Tue, Dec 20, 2016 at 10:44 PM, Shahar Klein <shahark@mellanox.com>
>>> wrote:
>> [...]
>>> Looks like you added a debug printk inside tcf_destroy() too,
>>> which seems racy with filter creation, it should not happen since
>>> in both cases we take RTNL lock.
>>>
>>> Don't know if changing all RCU_INIT_POINTER in that file to
>>> rcu_assign_pointer could help anything or not. Mind to try?
>>
>> I don't think at this point that it's RCU related at all.
>>
>> I have a theory on what is happening. Quoting the piece in question from
>> Shahar's log:
>>
>>  1: thread-2845[cpu-1] setting tp_created to 1 tp=ffff94b5b0280780
>> back=ffff94b9ea932060
>>  2: thread-2856[cpu-1] setting tp_created to 1 tp=ffff94b9ea9322a0
>> back=ffff94b9ea932060
>>  3: thread-2843[cpu-1] setting tp_created to 1 tp=ffff94b5b402c960
>> back=ffff94b9ea932060
>>  4: destroy ffff94b5b669fea0 tcf_destroy:1905
>>  5: thread-2853[cpu-1] setting tp_created to 1 tp=ffff94b5b02805a0
>> back=ffff94b9ea932060
>>  6: thread-2853[cpu-1] add/change filter by: fl_get [cls_flower]
>> tp=ffff94b5b02805a0 tp->next=ffff94b9ea932060
>>  7: destroy ffff94b5b0280780 tcf_destroy:1905
>>  8: thread-2845[cpu-1] add/change filter by: fl_get [cls_flower]
>> tp=ffff94b5b02805a0 tp->next=ffff94b5b02805a0
>>
>> The interesting thing is that all this happens on CPU1, so as you say
>> we're under rtnl.
>> In 1), thread-2845 creates tp=ffff94b5b0280780, which is destroyed in
>> 7), presumably also
>> by thread-2845, and the weird part is why suddenly in 8) thread-2845
>> adds a created filter
>> without actually creating it. Plus, thread-2845 got interrupted, which
>> means it must have
>> dropped rntl in the middle. We drop it in tc_ctl_tfilter() when we do
>> tcf_proto_lookup_ops()
>> and need to pull in a module, but here this doesn't make sense at all
>> since i) at this
>> point we haven't created the tp yet and 2) flower was already there.
>> Thus the only explanation
>> where this must have happened is where we called tp->ops->change(). So
>> here the return
>> code must have been -EAGAIN, which makes sense because in 7) we
>> destroyed that specific
>> tp instance. Which means we goto replay but *do not* clear tp_created. I
>> think that is
>> the bug in question. So, while we dropped rtnl in the meantime, some
>> other tp instance
>> was added (tp=ffff94b5b02805a0) that we had a match on in round 2, but
>> we still think it
>> was newly created which wasn't the actual case. So we'd need to deal
>> with the fact that
>> ->change() callback could return -EAGAIN as well. Now looking at flower,
>> I think the call
>> chain must have been fl_change() -> fl_set_parms() ->
>> tcf_exts_validate() -> tcf_action_init()
>> -> tcf_action_init_1(). And here one possibility I see is that
>> tc_lookup_action_n()
>> failed, therefore we shortly dropped rtnl for the request_module() where
>> the module
>> got loaded successfully and thus error code from there is -EAGAIN that
>> got propagated
>> all the way through ->change() from tc_ctl_tfilter(). So it looks like a
>> generic issue
>> not specifically tied to flower.
>>
>> Shahar, can you test the following? Thanks!
>>
>>  net/sched/cls_api.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>> index 3fbba79..1ecdf80 100644
>> --- a/net/sched/cls_api.c
>> +++ b/net/sched/cls_api.c
>> @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb,
>> struct nlmsghdr *n)
>>      unsigned long cl;
>>      unsigned long fh;
>>      int err;
>> -    int tp_created = 0;
>> +    int tp_created;
>>
>>      if ((n->nlmsg_type != RTM_GETTFILTER) &&
>>          !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
>>          return -EPERM;
>>
>>  replay:
>> +    tp_created = 0;
>> +
>>      err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
>>      if (err < 0)
>>          return err;
>
> Test run successfully!
> I'll remove all other debug "fixes" and run again later
>
> Thanks Daniel!

Great, thanks for confirming so far Shahar!
I'll cook an official patch in the meantime.

^ permalink raw reply

* Re: [PATCH] at86rf230: Allow slow GPIO pins for "rstn"
From: Stefan Schmidt @ 2016-12-21 13:11 UTC (permalink / raw)
  To: Andrey Smirnov, linux-wpan
  Cc: Alexander Aring, netdev, linux-kernel, Chris Healy
In-Reply-To: <1482103533-13187-1-git-send-email-andrew.smirnov@gmail.com>

Hello.

On 19/12/16 00:25, Andrey Smirnov wrote:
> Driver code never touches "rstn" signal in atomic context, so there's
> no need to implicitly put such restriction on it by using gpio_set_value
> to manipulate it. Replace gpio_set_value to gpio_set_value_cansleep to
> fix that.

We need to make sure we are not assuming it can be called  in such a 
context in the future now. But that is something we can worry about if 
it comes up.

> As a an example of where such restriction might be inconvenient,
> consider a hardware design where "rstn" is connected to a pin of I2C/SPI
> GPIO expander chip.

Is this a real life issue you run into?

> Cc: Chris Healy <cphealy@gmail.com>
> Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
> ---
>  drivers/net/ieee802154/at86rf230.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
> index 9f10da6..7700690 100644
> --- a/drivers/net/ieee802154/at86rf230.c
> +++ b/drivers/net/ieee802154/at86rf230.c
> @@ -1710,9 +1710,9 @@ static int at86rf230_probe(struct spi_device *spi)
>  	/* Reset */
>  	if (gpio_is_valid(rstn)) {
>  		udelay(1);
> -		gpio_set_value(rstn, 0);
> +		gpio_set_value_cansleep(rstn, 0);
>  		udelay(1);
> -		gpio_set_value(rstn, 1);
> +		gpio_set_value_cansleep(rstn, 1);
>  		usleep_range(120, 240);
>  	}
>
>


Acked-by: Stefan Schmidt <stefan@osg.samsung.com>

regards
Stefan Schmidt

^ permalink raw reply

* Re: [RFC PATCH 1/7] PF driver modified to enable HW filter support, changes works in backward compatibility mode Enable required things in Makefile Enable LZ4 dependecy inside config file
From: Sunil Kovvuri @ 2016-12-21 13:05 UTC (permalink / raw)
  To: Satha Koteswara Rao
  Cc: LKML, Sunil Goutham, Robert Richter, David S. Miller, David Daney,
	rvatsavayi, derek.chickles, philip.romanov, Linux Netdev List,
	LAKML
In-Reply-To: <1482310011-1862-2-git-send-email-satha.rao@caviumnetworks.com>

>
>  #define NIC_MAX_RSS_HASH_BITS          8
>  #define NIC_MAX_RSS_IDR_TBL_SIZE       (1 << NIC_MAX_RSS_HASH_BITS)
> +#define NIC_TNS_RSS_IDR_TBL_SIZE       5

So you want to use only 5 queues per VF when TNS is enabled, is it ??
There are 4096 RSS indices in total, for each VF you can use max 32.
I guess you wanted to set no of hash bits to 5 instead of table size.

>  #define RSS_HASH_KEY_SIZE              5 /* 320 bit key */
>
>  struct nicvf_rss_info {
> @@ -255,74 +258,6 @@ struct nicvf_drv_stats {
>         struct u64_stats_sync   syncp;
>  };
>
> -struct nicvf {
> -       struct nicvf            *pnicvf;
> -       struct net_device       *netdev;
> -       struct pci_dev          *pdev;
> -       void __iomem            *reg_base;

Didn't get why you moved this structure to the end of file.
Looks like an unnecessary modification.


> +static unsigned int num_vfs;
> +module_param(num_vfs, uint, 0644);
> +MODULE_PARM_DESC(num_vfs, "Non zero positive value, specifies number of VF's per physical port");

So what if driver is built-in instead of module, I can't use TNS is it ?

>
> +/* Set RBDR Backpressure (RBDR_BP) and CQ backpressure (CQ_BP) of vnic queues
> + * to 129 each

Why 129 ??
RBDR minimum size is 8K buffers, why you want to assert BP when still
~4K buffers
are available. Isn't 4K a huge number to start asserting backpressure ?

^ permalink raw reply

* [PATCH v2] net: macb: Applied changes according to review made by Andy Shevchenko.
From: Bartosz Folta @ 2016-12-21 13:04 UTC (permalink / raw)
  To: Nicolas Ferre, David S. Miller, Niklas Cassel, Alexandre Torgue,
	Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org
  Cc: Bartosz Folta, Rafal Ozieblo
In-Reply-To: <1482325169-22771-1-git-send-email-bfolta@cadence.com>

This patch is sent in regard to recently applied patch
Commit 83a77e9ec4150ee4acc635638f7dedd9da523a26
net: macb: Added PCI wrapper for Platform Driver.

Signed-off-by: Bartosz Folta <bfolta@cadence.com>
---
Changed in v2:
Size of DMA mask is set based on value held by PCI device.
---
 drivers/net/ethernet/cadence/macb_pci.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 92be2cd..9906fda 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -1,5 +1,5 @@
 /**
- * macb_pci.c - Cadence GEM PCI wrapper.
+ * Cadence GEM PCI wrapper.
  *
  * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com
  *
@@ -45,32 +45,27 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct macb_platform_data plat_data;
 	struct resource res[2];
 
-	/* sanity check */
-	if (!id)
-		return -EINVAL;
-
 	/* enable pci device */
-	err = pci_enable_device(pdev);
+	err = pcim_enable_device(pdev);
 	if (err < 0) {
-		dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X",
-			err);
-		return -EACCES;
+		dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err);
+		return err;
 	}
 
 	pci_set_master(pdev);
 
 	/* set up resources */
 	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
-	res[0].start = pdev->resource[0].start;
-	res[0].end = pdev->resource[0].end;
+	res[0].start = pci_resource_start(pdev, 0);
+	res[0].end = pci_resource_end(pdev, 0);
 	res[0].name = PCI_DRIVER_NAME;
 	res[0].flags = IORESOURCE_MEM;
-	res[1].start = pdev->irq;
+	res[1].start = pci_irq_vector(pdev, 0);
 	res[1].name = PCI_DRIVER_NAME;
 	res[1].flags = IORESOURCE_IRQ;
 
-	dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n",
-		 (void *)(uintptr_t)pci_resource_start(pdev, 0));
+	dev_info(&pdev->dev, "EMAC physical base addr: %pa\n",
+		 &res[0].start);
 
 	/* set up macb platform data */
 	memset(&plat_data, 0, sizeof(plat_data));
@@ -100,7 +95,7 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	plat_info.num_res = ARRAY_SIZE(res);
 	plat_info.data = &plat_data;
 	plat_info.size_data = sizeof(plat_data);
-	plat_info.dma_mask = DMA_BIT_MASK(32);
+	plat_info.dma_mask = pdev->dma_mask;
 
 	/* register platform device */
 	plat_dev = platform_device_register_full(&plat_info);
@@ -120,7 +115,6 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	clk_unregister(plat_data.pclk);
 
 err_pclk_register:
-	pci_disable_device(pdev);
 	return err;
 }
 
@@ -130,7 +124,6 @@ static void macb_remove(struct pci_dev *pdev)
 	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
 
 	platform_device_unregister(plat_dev);
-	pci_disable_device(pdev);
 	clk_unregister(plat_data->pclk);
 	clk_unregister(plat_data->hclk);
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v4 2/2] net: dsa: mv88e6xxx: Add support for ethernet switch 88E6341/88E6141
From: Romain Perier @ 2016-12-21 12:57 UTC (permalink / raw)
  To: Andrew Lunn, Vivien Didelot, Florian Fainelli, Jason Cooper,
	Sebastian Hesselbarth, Gregory Clement
  Cc: netdev, devicetree, Rob Herring, Ian Campbell, Pawel Moll,
	Mark Rutland, Kumar Gala, linux-arm-kernel, Thomas Petazzoni,
	Nadav Haklai, Romain Perier
In-Reply-To: <20161221125734.1034-1-romain.perier@free-electrons.com>

The Marvell 88E6341 device is single-chip, 6-port ethernet switch with
four integrated 10/100/1000Mbps ethernet transceivers and one high speed
SerDes interfaces. It is compatible with switches of family 88E6352.

This commit adds basic support for this switch by describing its
capabilities to the driver.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---

Changes in v3:
 - Added tag "Reviewed-by" by Andrew
 
Changes in v2:
 - Add a dedicated data structure for the operations of the 88E6341
 - Re-ordered PORT_SWITCH_ID_PROD_NUM_6341 in alphabetic order with other
   macros

 drivers/net/dsa/mv88e6xxx/chip.c      | 42 +++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h |  4 +++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 76d944e..5e97dc4 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3625,6 +3625,34 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 	.reset = mv88e6352_g1_reset,
 };
 
+static const struct mv88e6xxx_ops mv88e6341_ops = {
+	/* MV88E6XXX_FAMILY_6352 */
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+	.port_set_link = mv88e6xxx_port_set_link,
+	.port_set_duplex = mv88e6xxx_port_set_duplex,
+	.port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+	.port_set_speed = mv88e6352_port_set_speed,
+	.port_tag_remap = mv88e6095_port_tag_remap,
+	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
+	.port_set_egress_unknowns = mv88e6351_port_set_egress_unknowns,
+	.port_set_ether_type = mv88e6351_port_set_ether_type,
+	.port_jumbo_config = mv88e6165_port_jumbo_config,
+	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+	.port_pause_config = mv88e6097_port_pause_config,
+	.stats_snapshot = mv88e6320_g1_stats_snapshot,
+	.stats_get_sset_count = mv88e6095_stats_get_sset_count,
+	.stats_get_strings = mv88e6095_stats_get_strings,
+	.stats_get_stats = mv88e6095_stats_get_stats,
+	.g1_set_cpu_port = mv88e6095_g1_set_cpu_port,
+	.g1_set_egress_port = mv88e6095_g1_set_egress_port,
+	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.reset = mv88e6352_g1_reset,
+};
+
 static const struct mv88e6xxx_ops mv88e6350_ops = {
 	/* MV88E6XXX_FAMILY_6351 */
 	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -4086,6 +4114,20 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ops = &mv88e6321_ops,
 	},
 
+	[MV88E6341] = {
+		.prod_num = PORT_SWITCH_ID_PROD_NUM_6341,
+		.family = MV88E6XXX_FAMILY_6352,
+		.name = "Marvell 88E6341",
+		.num_databases = 4096,
+		.num_ports = 6,
+		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
+		.age_time_coeff = 15000,
+		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+		.ops = &mv88e6341_ops,
+	},
+
 	[MV88E6350] = {
 		.prod_num = PORT_SWITCH_ID_PROD_NUM_6350,
 		.family = MV88E6XXX_FAMILY_6351,
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index af54bae..cb55fdb 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -100,6 +100,7 @@
 #define PORT_SWITCH_ID_PROD_NUM_6240	0x240
 #define PORT_SWITCH_ID_PROD_NUM_6290	0x290
 #define PORT_SWITCH_ID_PROD_NUM_6321	0x310
+#define PORT_SWITCH_ID_PROD_NUM_6341	0x340
 #define PORT_SWITCH_ID_PROD_NUM_6352	0x352
 #define PORT_SWITCH_ID_PROD_NUM_6350	0x371
 #define PORT_SWITCH_ID_PROD_NUM_6351	0x375
@@ -432,6 +433,7 @@ enum mv88e6xxx_model {
 	MV88E6290,
 	MV88E6320,
 	MV88E6321,
+	MV88E6341,
 	MV88E6350,
 	MV88E6351,
 	MV88E6352,
@@ -448,7 +450,7 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6185,	/* 6108 6121 6122 6131 6152 6155 6182 6185 */
 	MV88E6XXX_FAMILY_6320,	/* 6320 6321 */
 	MV88E6XXX_FAMILY_6351,	/* 6171 6175 6350 6351 */
-	MV88E6XXX_FAMILY_6352,	/* 6172 6176 6240 6352 */
+	MV88E6XXX_FAMILY_6352,	/* 6172 6176 6240 6341 6352 */
 	MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
 };
 
-- 
2.9.3

^ permalink raw reply related

* [PATCH net-next v4 1/2] net: dsa: mv88e6xxx: Don't forbid MDIO I/Os for PHY addr >= num_of_ports
From: Romain Perier @ 2016-12-21 12:57 UTC (permalink / raw)
  To: Andrew Lunn, Vivien Didelot, Florian Fainelli, Jason Cooper,
	Sebastian Hesselbarth, Gregory Clement
  Cc: netdev, devicetree, Rob Herring, Ian Campbell, Pawel Moll,
	Mark Rutland, Kumar Gala, linux-arm-kernel, Thomas Petazzoni,
	Nadav Haklai, Romain Perier
In-Reply-To: <20161221125734.1034-1-romain.perier@free-electrons.com>

Some Marvell ethernet switches have internal ethernet transceivers with
hardcoded phy addresses. These addresses can be greater than the number
of ports or its value might be different than the associated port number.
This is for example the case for MV88E6341 that has 6 ports and internal
Port 1 to Port4 PHYs mapped at SMI addresses from 0x11 to 0x14.

This commits fixes the issue by removing the condition in MDIO callbacks.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---

Changes in v4:
 - Fixed typo in the commit log

Changes in v2:
 - Added tag "Reviewed-by" by Andrew
 - Fixed typo in the commit log

 drivers/net/dsa/mv88e6xxx/chip.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index b5f0e1e..76d944e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2881,9 +2881,6 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
 	u16 val;
 	int err;
 
-	if (phy >= mv88e6xxx_num_ports(chip))
-		return 0xffff;
-
 	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_phy_read(chip, phy, reg, &val);
 	mutex_unlock(&chip->reg_lock);
@@ -2896,9 +2893,6 @@ static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	struct mv88e6xxx_chip *chip = bus->priv;
 	int err;
 
-	if (phy >= mv88e6xxx_num_ports(chip))
-		return 0xffff;
-
 	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_phy_write(chip, phy, reg, val);
 	mutex_unlock(&chip->reg_lock);
-- 
2.9.3

^ permalink raw reply related

* [PATCH net-next v4 0/2] Add support for the ethernet switch on the ESPRESSObin
From: Romain Perier @ 2016-12-21 12:57 UTC (permalink / raw)
  To: Andrew Lunn, Vivien Didelot, Florian Fainelli, Jason Cooper,
	Sebastian Hesselbarth, Gregory Clement
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
	Rob Herring, Ian Campbell, Pawel Moll, Mark Rutland, Kumar Gala,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Thomas Petazzoni, Nadav Haklai, Romain Perier

This set of patches adds support for the Marvell ethernet switch 88E6341.
It also add the devicetree definition of this switch to the DT board.

Romain Perier (2):
  net: dsa: mv88e6xxx: Don't forbid MDIO I/Os for PHY addr >=
    num_of_ports
  net: dsa: mv88e6xxx: Add support for ethernet switch 88E6341/88E6141

 drivers/net/dsa/mv88e6xxx/chip.c      | 48 ++++++++++++++++++++++++++++++-----
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h |  4 ++-
 2 files changed, 45 insertions(+), 7 deletions(-)

-- 

Note: As requested by Gregory, I have removed the patch for the DT (already merged).

2.9.3

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH 5/7] Multiple VF's grouped together under single physical port called PF group PF Group maintainance API's
From: Sunil Kovvuri @ 2016-12-21 12:43 UTC (permalink / raw)
  To: Satha Koteswara Rao
  Cc: LKML, Sunil Goutham, Robert Richter, David S. Miller, David Daney,
	rvatsavayi, derek.chickles, philip.romanov, Linux Netdev List,
	LAKML
In-Reply-To: <1482310011-1862-6-git-send-email-satha.rao@caviumnetworks.com>

On Wed, Dec 21, 2016 at 2:16 PM, Satha Koteswara Rao
<satha.rao@caviumnetworks.com> wrote:
> +struct tns_global_st {
> +       u64 magic;
> +       char     version[16];
> +       u64 reg_cnt;
> +       struct table_static_s tbl_info[TNS_MAX_TABLE];
> +};
> +
> +#define PF_COUNT 3
> +#define PF_1   0
> +#define PF_2   64
> +#define PF_3   96
> +#define PF_END 128

Some comments please ... what is 0, 64, 96 ??
You can read PCI_SRIOV_TOTAL_VF from PCI config space instead of
defining PF_END with 128.

^ permalink raw reply

* Re: ipv6: handle -EFAULT from skb_copy_bits
From: Hannes Frederic Sowa @ 2016-12-21 12:41 UTC (permalink / raw)
  To: Cong Wang, Dave Jones; +Cc: David Miller, Linux Kernel Network Developers
In-Reply-To: <1482323232.2260.2.camel@stressinduktion.org>

On Wed, 2016-12-21 at 13:27 +0100, Hannes Frederic Sowa wrote: 
> @@ -555,8 +566,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
>                 goto out;
>  
>         offset = rp->offset;
> -       total_len = inet_sk(sk)->cork.base.length;
> -       if (offset >= total_len - 1) {
> +       transport_len = raw6_corked_transport_len(sk);
> +       if (offset >= transport_len - 1) {
>                 err = -EINVAL;
>                 ip6_flush_pending_frames(sk);
>                 goto out;
> @@ -598,7 +609,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
>                 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
>  
>         csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
> -                              total_len, fl6->flowi6_proto, tmp_csum);
> +                              transport_len, fl6->flowi6_proto, tmp_csum);
>  
> 

Ops, here we need actually the total_len plus the opt->opt_nflen to
always calculate the correct checksum.

^ permalink raw reply

* Re: [RFC PATCH 4/7] HW Filter Initialization code and register access APIs
From: Sunil Kovvuri @ 2016-12-21 12:36 UTC (permalink / raw)
  To: Satha Koteswara Rao
  Cc: LKML, Sunil Goutham, Robert Richter, David S. Miller, David Daney,
	rvatsavayi, derek.chickles, philip.romanov, Linux Netdev List,
	LAKML
In-Reply-To: <1482310011-1862-5-git-send-email-satha.rao@caviumnetworks.com>

On Wed, Dec 21, 2016 at 2:16 PM, Satha Koteswara Rao
<satha.rao@caviumnetworks.com> wrote:
> ---
>  drivers/net/ethernet/cavium/thunder/pf_reg.c | 660 +++++++++++++++++++++++++++
>  1 file changed, 660 insertions(+)
>  create mode 100644 drivers/net/ethernet/cavium/thunder/pf_reg.c
>
> diff --git a/drivers/net/ethernet/cavium/thunder/pf_reg.c b/drivers/net/ethernet/cavium/thunder/pf_reg.c

Sunil>>
>From the file name 'pf_reg.c', what is PF here ?
TNS is not a SRIOV device right ?

> new file mode 100644
> index 0000000..1f95c7f
> --- /dev/null
> +++ b/drivers/net/ethernet/cavium/thunder/pf_reg.c
> @@ -0,0 +1,660 @@
> +/*
> + * Copyright (C) 2015 Cavium, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of version 2 of the GNU General Public License
> + * as published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/version.h>
> +#include <linux/proc_fs.h>
> +#include <linux/device.h>
> +#include <linux/mman.h>
> +#include <linux/uaccess.h>
> +#include <linux/delay.h>
> +#include <linux/cdev.h>
> +#include <linux/err.h>
> +#include <linux/device.h>
> +#include <linux/io.h>
> +#include <linux/firmware.h>
> +#include "pf_globals.h"
> +#include "pf_locals.h"
> +#include "tbl_access.h"
> +#include "linux/lz4.h"
> +
> +struct tns_table_s tbl_info[TNS_MAX_TABLE];
> +
> +#define TNS_TDMA_SST_ACC_CMD_ADDR      0x0000842000000270ull
> +
> +#define BAR0_START 0x842000000000
> +#define BAR0_END   0x84200000FFFF
> +#define BAR0_SIZE  (64 * 1024)
> +#define BAR2_START 0x842040000000
> +#define BAR2_END   0x84207FFFFFFF
> +#define BAR2_SIZE  (1024 * 1024 * 1024)
> +
> +#define NODE1_BAR0_START 0x942000000000
> +#define NODE1_BAR0_END   0x94200000FFFF
> +#define NODE1_BAR0_SIZE  (64 * 1024)
> +#define NODE1_BAR2_START 0x942040000000
> +#define NODE1_BAR2_END   0x94207FFFFFFF
> +#define NODE1_BAR2_SIZE  (1024 * 1024 * 1024)

Sunil>> This is absurd, why are you using hardcoded HW addresses,
why not use TNS device's PCI BARs.

> +/* Allow a max of 4 chunks for the Indirect Read/Write */
> +#define MAX_SIZE (64 * 4)
> +#define CHUNK_SIZE (64)
> +/* To protect register access */
> +spinlock_t pf_reg_lock;
> +
> +u64 iomem0;
> +u64 iomem2;
> +u8 tns_enabled;
> +u64 node1_iomem0;
> +u64 node1_iomem2;
> +u8 node1_tns;
> +int n1_tns;

Sunil>> A simple structure would have nice instead of so many global variables.

> +
> +int tns_write_register_indirect(int node_id, u64 address, u8 size,
> +                               u8 *kern_buffer)
> +{
> +       union tns_tdma_sst_acc_cmd acccmd;
> +       union tns_tdma_sst_acc_stat_t accstat;
> +       union tns_acc_data data;
> +       int i, j, w = 0;
> +       int cnt = 0;
> +       u32 *dataw = NULL;
> +       int temp = 0;
> +       int k = 0;
> +       int chunks = 0;
> +       u64 acccmd_address;
> +       u64 lmem2 = 0, lmem0 = 0;
> +
> +       if (size == 0 || !kern_buffer) {
> +               filter_dbg(FERR, "%s data size cannot be zero\n", __func__);
> +               return TNS_ERROR_INVALID_ARG;
> +       }
> +       if (size > MAX_SIZE) {
> +               filter_dbg(FERR, "%s Max allowed size exceeded\n", __func__);
> +               return TNS_ERROR_DATA_TOO_LARGE;
> +       }
> +       if (node_id) {
> +               lmem0 = node1_iomem0;
> +               lmem2 = node1_iomem2;
> +       } else {
> +               lmem0 = iomem0;
> +               lmem2 = iomem2;
> +       }
> +
> +       chunks = ((size + (CHUNK_SIZE - 1)) / CHUNK_SIZE);
> +       acccmd_address = (address & 0x00000000ffffffff);
> +       spin_lock_bh(&pf_reg_lock);
> +
> +       for (k = 0; k < chunks; k++) {

Sunil>> Why not use some proper variable names, instead of i,j,k,w,
temp e.t.c e.t.c


> +               /* Should never happen */
> +               if (size < 0) {
> +                       filter_dbg(FERR, "%s size mismatch [CHUNK %d]\n",
> +                                  __func__, k);
> +                       break;
> +               }
> +               temp = (size > CHUNK_SIZE) ? CHUNK_SIZE : size;
> +               dataw = (u32 *)(kern_buffer + (k * CHUNK_SIZE));
> +               cnt = ((temp + 3) / 4);
> +               data.u = 0ULL;
> +               for (j = 0, i = 0; i < cnt; i++) {
> +                       /* Odd words go in the upper 32 bits of the data
> +                        * register
> +                        */
> +                       if (i & 1) {
> +                               data.s.upper32 = dataw[i];
> +                               writeq_relaxed(data.u, (void *)(lmem0 +
> +                                              TNS_TDMA_SST_ACC_WDATX(j)));
> +                               data.u = 0ULL;
> +                               j++; /* Advance to the next data word */
> +                               w = 0;
> +                       } else {
> +                               /* Lower 32 bits contain words 0, 2, 4, etc. */
> +                               data.s.lower32 = dataw[i];
> +                               w = 1;
> +                       }
> +               }
> +
> +               /* If the last word was a partial (< 64 bits) then
> +                * see if we need to write it.
> +                */
> +               if (w)
> +                       writeq_relaxed(data.u, (void *)(lmem0 +
> +                                      TNS_TDMA_SST_ACC_WDATX(j)));
> +
> +               acccmd.u = 0ULL;
> +               acccmd.s.go = 1; /* Cleared once the request is serviced */
> +               acccmd.s.size = cnt;
> +               acccmd.s.addr = (acccmd_address >> 2);
> +               writeq_relaxed(acccmd.u, (void *)(lmem0 +
> +                              TDMA_SST_ACC_CMD));
> +               accstat.u = 0ULL;
> +
> +               while (!accstat.s.cmd_done && !accstat.s.error)
> +                       accstat.u = readq_relaxed((void *)(lmem0 +
> +                                         TDMA_SST_ACC_STAT));
> +
> +               if (accstat.s.error) {
> +                       data.u = readq_relaxed((void *)(lmem2 +
> +                                              TDMA_NB_INT_STAT));
> +                       filter_dbg(FERR, "%s Reading data from ", __func__);
> +                       filter_dbg(FERR, "0x%0lx chunk %d failed 0x%0lx",
> +                                  (unsigned long)address, k,
> +                                  (unsigned long)data.u);
> +                       spin_unlock_bh(&pf_reg_lock);
> +                       kfree(kern_buffer);
> +                       return TNS_ERROR_INDIRECT_WRITE;
> +               }
> +               /* Calculate the next offset to write */
> +               acccmd_address = acccmd_address + CHUNK_SIZE;
> +               size -= CHUNK_SIZE;
> +       }
> +       spin_unlock_bh(&pf_reg_lock);
> +
> +       return 0;
> +}
> +
> +int tns_read_register_indirect(int node_id, u64 address, u8 size,
> +                              u8 *kern_buffer)
> +{
> +       union tns_tdma_sst_acc_cmd acccmd;
> +       union tns_tdma_sst_acc_stat_t accstat;
> +       union tns_acc_data data;
> +       int i, j, dcnt;
> +       int cnt = 0;
> +       u32 *dataw = NULL;
> +       int temp = 0;
> +       int k = 0;
> +       int chunks = 0;
> +       u64 acccmd_address;
> +       u64 lmem2 = 0, lmem0 = 0;
> +
> +       if (size == 0 || !kern_buffer) {
> +               filter_dbg(FERR, "%s data size cannot be zero\n", __func__);
> +               return TNS_ERROR_INVALID_ARG;
> +       }
> +       if (size > MAX_SIZE) {
> +               filter_dbg(FERR, "%s Max allowed size exceeded\n", __func__);
> +               return TNS_ERROR_DATA_TOO_LARGE;
> +       }
> +       if (node_id) {
> +               lmem0 = node1_iomem0;
> +               lmem2 = node1_iomem2;
> +       } else {
> +               lmem0 = iomem0;
> +               lmem2 = iomem2;
> +       }
> +
> +       chunks = ((size + (CHUNK_SIZE - 1)) / CHUNK_SIZE);
> +       acccmd_address = (address & 0x00000000ffffffff);
> +       spin_lock_bh(&pf_reg_lock);
> +       for (k = 0; k < chunks; k++) {
> +               /* This should never happen */
> +               if (size < 0) {
> +                       filter_dbg(FERR, "%s size mismatch [CHUNK:%d]\n",
> +                                  __func__, k);
> +                       break;
> +               }
> +               temp = (size > CHUNK_SIZE) ? CHUNK_SIZE : size;
> +               dataw = (u32 *)(kern_buffer + (k * CHUNK_SIZE));
> +               cnt = ((temp + 3) / 4);
> +               acccmd.u = 0ULL;
> +               acccmd.s.op = 1; /* Read operation */
> +               acccmd.s.size = cnt;
> +               acccmd.s.addr = (acccmd_address >> 2);
> +               acccmd.s.go = 1; /* Execute */
> +               writeq_relaxed(acccmd.u, (void *)(lmem0 +
> +                              TDMA_SST_ACC_CMD));
> +               accstat.u = 0ULL;
> +
> +               while (!accstat.s.cmd_done && !accstat.s.error)
> +                       accstat.u = readq_relaxed((void *)(lmem0 +
> +                                                 TDMA_SST_ACC_STAT));
> +
> +               if (accstat.s.error) {
> +                       data.u = readq_relaxed((void *)(lmem2 +
> +                                              TDMA_NB_INT_STAT));
> +                       filter_dbg(FERR, "%s Reading data from", __func__);
> +                       filter_dbg(FERR, "0x%0lx chunk %d failed 0x%0lx",
> +                                  (unsigned long)address, k,
> +                                  (unsigned long)data.u);
> +                       spin_unlock_bh(&pf_reg_lock);
> +                       kfree(kern_buffer);
> +                       return TNS_ERROR_INDIRECT_READ;
> +               }
> +
> +               dcnt = cnt / 2;
> +               if (cnt & 1)
> +                       dcnt++;
> +               for (i = 0, j = 0; (j < dcnt) && (i < cnt); j++) {
> +                       data.u = readq_relaxed((void *)(lmem0 +
> +                                              TNS_TDMA_SST_ACC_RDATX(j)));
> +                       dataw[i++] = data.s.lower32;
> +                       if (i < cnt)
> +                               dataw[i++] = data.s.upper32;
> +               }
> +               /* Calculate the next offset to read */
> +               acccmd_address = acccmd_address + CHUNK_SIZE;
> +               size -= CHUNK_SIZE;
> +       }
> +       spin_unlock_bh(&pf_reg_lock);
> +       return 0;
> +}
> +
> +u64 tns_read_register(u64 start, u64 offset)
> +{
> +       return readq_relaxed((void *)(start + offset));
> +}
> +
> +void tns_write_register(u64 start, u64 offset, u64 data)
> +{
> +       writeq_relaxed(data, (void *)(start + offset));
> +}
> +
> +/* Check if TNS is available. If yes return 0 else 1 */
> +int is_tns_available(void)
> +{
> +       union tns_tdma_cap tdma_cap;
> +
> +       tdma_cap.u = tns_read_register(iomem0, TNS_TDMA_CAP_OFFSET);
> +       tns_enabled = tdma_cap.s.switch_capable;
> +       /* In multi-node systems, make sure TNS should be there in both nodes */

Can't node-0 TNS work with node-0 interfaces if node-1 TNS is not detected ?

> +       if (nr_node_ids > 1) {
> +               tdma_cap.u = tns_read_register(node1_iomem0,
> +                                              TNS_TDMA_CAP_OFFSET);
> +               if (tdma_cap.s.switch_capable)
> +                       n1_tns = 1;
> +       }
> +       tns_enabled &= tdma_cap.s.switch_capable;
> +       return (!tns_enabled);
> +}
> +
> +int bist_error_check(void)
> +{
> +       int fail = 0, i;
> +       u64 bist_stat = 0;
> +
> +       for (i = 0; i < 12; i++) {
> +               bist_stat = tns_read_register(iomem0, (i * 16));
> +               if (bist_stat) {
> +                       filter_dbg(FERR, "TNS BIST%d fail 0x%llx\n",
> +                                  i, bist_stat);
> +                       fail = 1;
> +               }
> +               if (!n1_tns)
> +                       continue;
> +               bist_stat = tns_read_register(node1_iomem0, (i * 16));
> +               if (bist_stat) {
> +                       filter_dbg(FERR, "TNS(N1) BIST%d fail 0x%llx\n",
> +                                  i, bist_stat);
> +                       fail = 1;
> +               }
> +       }
> +
> +       return fail;
> +}
> +
> +int replay_indirect_trace(int node, u64 *buf_ptr, int idx)
> +{
> +       union _tns_sst_config cmd = (union _tns_sst_config)(buf_ptr[idx]);
> +       int remaining = cmd.cmd.run;
> +       u64 io_addr;
> +       int word_cnt = cmd.cmd.word_cnt;
> +       int size = (word_cnt + 1) / 2;
> +       u64 stride = word_cnt;
> +       u64 acc_cmd = cmd.copy.do_copy;
> +       u64 lmem2 = 0, lmem0 = 0;
> +       union tns_tdma_sst_acc_stat_t accstat;
> +       union tns_acc_data data;
> +
> +       if (node) {
> +               lmem0 = node1_iomem0;
> +               lmem2 = node1_iomem2;
> +       } else {
> +               lmem0 = iomem0;
> +               lmem2 = iomem2;
> +       }
> +
> +       if (word_cnt == 0) {
> +               word_cnt = 16;
> +               stride = 16;
> +               size = 8;
> +       } else {
> +               // make stride next power of 2

Please use proper commenting, have you ran checkpatch ?

> +               if (cmd.cmd.powerof2stride)
> +                       while ((stride & (stride - 1)) != 0)
> +                               stride++;
> +       }
> +       stride *= 4; //convert stride from 32-bit words to bytes
> +
> +       do {
> +               int addr_p = 1;
> +               /* extract (big endian) data from the config
> +                * into the data array
> +                */
> +               while (size > 0) {
> +                       io_addr = lmem0 + TDMA_SST_ACC_CMD + addr_p * 16;
> +                       tns_write_register(io_addr, 0, buf_ptr[idx + size]);
> +                       addr_p += 1;
> +                       size--;
> +               }
> +               tns_write_register((lmem0 + TDMA_SST_ACC_CMD), 0, acc_cmd);
> +               /* TNS Block access registers indirectly, ran memory barrier
> +                * between two writes
> +                */
> +               wmb();
> +               /* Check for completion */
> +               accstat.u = 0ULL;
> +               while (!accstat.s.cmd_done && !accstat.s.error)
> +                       accstat.u = readq_relaxed((void *)(lmem0 +
> +                                                          TDMA_SST_ACC_STAT));
> +
> +               /* Check for error, and report it */
> +               if (accstat.s.error) {
> +                       filter_dbg(FERR, "%s data from 0x%0llx failed 0x%llx\n",
> +                                  __func__, acc_cmd, accstat.u);
> +                       data.u = readq_relaxed((void *)(lmem2 +
> +                                                       TDMA_NB_INT_STAT));
> +                       filter_dbg(FERR, "Status 0x%llx\n", data.u);
> +               }
> +               /* update the address */
> +               acc_cmd += stride;
> +               size = (word_cnt + 1) / 2;
> +               usleep_range(20, 30);
> +       } while (remaining-- > 0);
> +
> +       return size;
> +}
> +
> +void replay_tns_node(int node, u64 *buf_ptr, int reg_cnt)
> +{
> +       int counter = 0;
> +       u64 offset = 0;
> +       u64 io_address;
> +       int datapathmode = 1;
> +       u64 lmem2 = 0, lmem0 = 0;
> +
> +       if (node) {
> +               lmem0 = node1_iomem0;
> +               lmem2 = node1_iomem2;
> +       } else {
> +               lmem0 = iomem0;
> +               lmem2 = iomem2;
> +       }
> +       for (counter = 0; counter < reg_cnt; counter++) {
> +               if (buf_ptr[counter] == 0xDADADADADADADADAull) {
> +                       datapathmode = 1;
> +                       continue;
> +               } else if (buf_ptr[counter] == 0xDEDEDEDEDEDEDEDEull) {
> +                       datapathmode = 0;
> +                       continue;
> +               }
> +               if (datapathmode == 1) {
> +                       if (buf_ptr[counter] >= BAR0_START &&
> +                           buf_ptr[counter] <= BAR0_END) {
> +                               offset = buf_ptr[counter] - BAR0_START;
> +                               io_address = lmem0 + offset;
> +                       } else if (buf_ptr[counter] >= BAR2_START &&
> +                                  buf_ptr[counter] <= BAR2_END) {
> +                               offset = buf_ptr[counter] - BAR2_START;
> +                               io_address = lmem2 + offset;
> +                       } else {
> +                               filter_dbg(FERR, "%s Address 0x%llx invalid\n",
> +                                          __func__, buf_ptr[counter]);
> +                               return;
> +                       }
> +
> +                       tns_write_register(io_address, 0, buf_ptr[counter + 1]);
> +                       /* TNS Block access registers indirectly, ran memory
> +                        * barrier between two writes
> +                        */
> +                       wmb();
> +                       counter += 1;
> +                       usleep_range(20, 30);
> +               } else if (datapathmode == 0) {
> +                       int sz = replay_indirect_trace(node, buf_ptr, counter);
> +
> +                       counter += sz;
> +               }
> +       }
> +}
> +
> +int alloc_table_info(int i, struct table_static_s tbl_sdata[])
> +{
> +       tbl_info[i].ddata[0].bitmap = kcalloc(BITS_TO_LONGS(tbl_sdata[i].depth),
> +                                             sizeof(uintptr_t), GFP_KERNEL);
> +       if (!tbl_info[i].ddata[0].bitmap)
> +               return 1;
> +
> +       if (!n1_tns)
> +               return 0;
> +
> +       tbl_info[i].ddata[1].bitmap = kcalloc(BITS_TO_LONGS(tbl_sdata[i].depth),
> +                                             sizeof(uintptr_t), GFP_KERNEL);
> +       if (!tbl_info[i].ddata[1].bitmap) {
> +               kfree(tbl_info[i].ddata[0].bitmap);
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +void tns_replay_register_trace(const struct firmware *fw, struct device *dev)
> +{
> +       int i;
> +       int node = 0;
> +       u8 *buffer = NULL;
> +       u64 *buf_ptr = NULL;
> +       struct tns_global_st *fw_header = NULL;
> +       struct table_static_s tbl_sdata[TNS_MAX_TABLE];
> +       size_t src_len;
> +       size_t dest_len = TNS_FW_MAX_SIZE;
> +       int rc;
> +       u8 *fw2_buf = NULL;
> +       unsigned char *decomp_dest = NULL;
> +
> +       fw2_buf = (u8 *)fw->data;
> +       src_len = fw->size - 8;
> +
> +       decomp_dest = kcalloc((dest_len * 2), sizeof(char), GFP_KERNEL);
> +       if (!decomp_dest)
> +               return;
> +
> +       memset(decomp_dest, 0, (dest_len * 2));
> +       rc = lz4_decompress_unknownoutputsize(&fw2_buf[8], src_len, decomp_dest,
> +                                             &dest_len);
> +       if (rc) {
> +               filter_dbg(FERR, "Decompress Error %d\n", rc);
> +               pr_info("Uncompressed destination length %ld\n", dest_len);
> +               kfree(decomp_dest);
> +               return;
> +       }
> +       fw_header = (struct tns_global_st *)decomp_dest;
> +       buffer = (u8 *)decomp_dest;
> +
> +       filter_dbg(FINFO, "TNS Firmware version: %s Loading...\n",
> +                  fw_header->version);
> +
> +       memset(tbl_info, 0x0, sizeof(tbl_info));
> +       buf_ptr = (u64 *)(buffer + sizeof(struct tns_global_st));
> +       memcpy(tbl_sdata, fw_header->tbl_info, sizeof(fw_header->tbl_info));
> +
> +       for (i = 0; i < TNS_MAX_TABLE; i++) {
> +               if (!tbl_sdata[i].valid)
> +                       continue;
> +               memcpy(&tbl_info[i].sdata, &tbl_sdata[i],
> +                      sizeof(struct table_static_s));
> +               if (alloc_table_info(i, tbl_sdata)) {
> +                       kfree(decomp_dest);
> +                       return;
> +               }
> +       }
> +
> +       for (node = 0; node < nr_node_ids; node++)
> +               replay_tns_node(node, buf_ptr, fw_header->reg_cnt);
> +
> +       kfree(decomp_dest);
> +       release_firmware(fw);
> +}
> +
> +int tns_init(const struct firmware *fw, struct device *dev)
> +{
> +       int result = 0;
> +       int i = 0;
> +       int temp;
> +       union tns_tdma_config tdma_config;
> +       union tns_tdma_lmacx_config tdma_lmac_cfg;
> +       u64 reg_init_val;
> +
> +       spin_lock_init(&pf_reg_lock);
> +
> +       /* use two regions insted of a single big mapping to save
> +        * the kernel virtual space
> +        */
> +       iomem0 = (u64)ioremap(BAR0_START, BAR0_SIZE);
> +       if (iomem0 == 0ULL) {
> +               filter_dbg(FERR, "Node0 ioremap failed for BAR0\n");
> +               result = -EAGAIN;
> +               goto error;
> +       } else {
> +               filter_dbg(FINFO, "ioremap success for BAR0\n");
> +       }
> +
> +       if (nr_node_ids > 1) {
> +               node1_iomem0 = (u64)ioremap(NODE1_BAR0_START, NODE1_BAR0_SIZE);
> +               if (node1_iomem0 == 0ULL) {
> +                       filter_dbg(FERR, "Node1 ioremap failed for BAR0\n");
> +                       result = -EAGAIN;
> +                       goto error;
> +               } else {
> +                       filter_dbg(FINFO, "ioremap success for BAR0\n");
> +               }
> +       }
> +
> +       if (is_tns_available()) {
> +               filter_dbg(FERR, "TNS NOT AVAILABLE\n");
> +               goto error;
> +       }
> +
> +       if (bist_error_check()) {
> +               filter_dbg(FERR, "BIST ERROR CHECK FAILED");
> +               goto error;
> +       }
> +
> +       /* NIC0-BGX0 is TNS, NIC1-BGX1 is TNS, DISABLE BACKPRESSURE */

Sunil>> Why disable backpressure, if it's in TNS mode ?

> +       reg_init_val = 0ULL;
> +       pr_info("NIC Block configured in TNS/TNS mode");
> +       tns_write_register(iomem0, TNS_RDMA_CONFIG_OFFSET, reg_init_val);
> +       usleep_range(10, 20);

Sunil>> Why sleep after every register write ?

> +       if (n1_tns) {
> +               tns_write_register(node1_iomem0, TNS_RDMA_CONFIG_OFFSET,
> +                                  reg_init_val);
> +               usleep_range(10, 20);
> +       }
> +
> +       // Configure each LMAC with 512 credits in BYPASS mode
> +       for (i = TNS_MIN_LMAC; i < (TNS_MIN_LMAC + TNS_MAX_LMAC); i++) {
> +               tdma_lmac_cfg.u = 0ULL;
> +               tdma_lmac_cfg.s.fifo_cdts = 0x200;
> +               tns_write_register(iomem0, TNS_TDMA_LMACX_CONFIG_OFFSET(i),
> +                                  tdma_lmac_cfg.u);
> +               usleep_range(10, 20);
> +               if (n1_tns) {
> +                       tns_write_register(node1_iomem0,
> +                                          TNS_TDMA_LMACX_CONFIG_OFFSET(i),
> +                                          tdma_lmac_cfg.u);
> +                       usleep_range(10, 20);
> +               }
> +       }
> +
> +       //ENABLE TNS CLOCK AND CSR READS
> +       temp = tns_read_register(iomem0, TNS_TDMA_CONFIG_OFFSET);
> +       tdma_config.u = temp;
> +       tdma_config.s.clk_2x_ena = 1;
> +       tdma_config.s.clk_ena = 1;
> +       tns_write_register(iomem0, TNS_TDMA_CONFIG_OFFSET, tdma_config.u);
> +       if (n1_tns)
> +               tns_write_register(node1_iomem0, TNS_TDMA_CONFIG_OFFSET,
> +                                  tdma_config.u);
> +
> +       temp = tns_read_register(iomem0, TNS_TDMA_CONFIG_OFFSET);
> +       tdma_config.u = temp;
> +       tdma_config.s.csr_access_ena = 1;
> +       tns_write_register(iomem0, TNS_TDMA_CONFIG_OFFSET, tdma_config.u);
> +       if (n1_tns)
> +               tns_write_register(node1_iomem0, TNS_TDMA_CONFIG_OFFSET,
> +                                  tdma_config.u);
> +
> +       reg_init_val = 0ULL;
> +       tns_write_register(iomem0, TNS_TDMA_RESET_CTL_OFFSET, reg_init_val);
> +       if (n1_tns)
> +               tns_write_register(node1_iomem0, TNS_TDMA_RESET_CTL_OFFSET,
> +                                  reg_init_val);
> +
> +       iomem2 = (u64)ioremap(BAR2_START, BAR2_SIZE);
> +       if (iomem2 == 0ULL) {
> +               filter_dbg(FERR, "ioremap failed for BAR2\n");
> +               result = -EAGAIN;
> +               goto error;
> +       } else {
> +               filter_dbg(FINFO, "ioremap success for BAR2\n");
> +       }
> +
> +       if (n1_tns) {
> +               node1_iomem2 = (u64)ioremap(NODE1_BAR2_START, NODE1_BAR2_SIZE);
> +               if (node1_iomem2 == 0ULL) {
> +                       filter_dbg(FERR, "Node1 ioremap failed for BAR2\n");
> +                       result = -EAGAIN;
> +                       goto error;
> +               } else {
> +                       filter_dbg(FINFO, "Node1 ioremap success for BAR2\n");
> +               }
> +       }
> +       msleep(1000);
> +       //We will replay register trace to initialize TNS block
> +       tns_replay_register_trace(fw, dev);
> +
> +       return 0;
> +error:
> +       if (iomem0 != 0)
> +               iounmap((void *)iomem0);
> +       if (iomem2 != 0)
> +               iounmap((void *)iomem2);
> +
> +       if (node1_iomem0 != 0)
> +               iounmap((void *)node1_iomem0);
> +       if (node1_iomem2 != 0)
> +               iounmap((void *)node1_iomem2);
> +
> +       return result;
> +}
> +
> +void tns_exit(void)
> +{
> +       int i;
> +
> +       if (iomem0 != 0)
> +               iounmap((void *)iomem0);
> +       if (iomem2 != 0)
> +               iounmap((void *)iomem2);
> +
> +       if (node1_iomem0 != 0)
> +               iounmap((void *)node1_iomem0);
> +       if (node1_iomem2 != 0)
> +               iounmap((void *)node1_iomem2);
> +
> +       for (i = 0; i < TNS_MAX_TABLE; i++) {
> +               if (!tbl_info[i].sdata.valid)
> +                       continue;
> +               kfree(tbl_info[i].ddata[0].bitmap);
> +               kfree(tbl_info[i].ddata[n1_tns].bitmap);
> +       }
> +}
> --
> 1.8.3.1
>

^ permalink raw reply

* Re: at86rf230: Allow slow GPIO pins for "rstn"
From: Nikita Yushchenko @ 2016-12-21 12:30 UTC (permalink / raw)
  To: linux-wpan
  Cc: Andrey Smirnov, Alexander Aring, netdev, linux-kernel,
	Chris Healy, Nikita Yushchenko
In-Reply-To: <1482103533-13187-1-git-send-email-andrew.smirnov@gmail.com>

> Driver code never touches "rstn" signal in atomic context, so there's
> no need to implicitly put such restriction on it by using gpio_set_value
> to manipulate it. Replace gpio_set_value to gpio_set_value_cansleep to
> fix that.
> 
> As a an example of where such restriction might be inconvenient,
> consider a hardware design where "rstn" is connected to a pin of I2C/SPI
> GPIO expander chip.
> 
> Cc: Chris Healy <cphealy@gmail.com>
> Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>

Reviewed-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>

^ permalink raw reply

* Re: ipv6: handle -EFAULT from skb_copy_bits
From: Hannes Frederic Sowa @ 2016-12-21 12:27 UTC (permalink / raw)
  To: Cong Wang, Dave Jones; +Cc: David Miller, Linux Kernel Network Developers
In-Reply-To: <CAM_iQpXWCWMKDjLVTLuHNSKKGq5MWf5ZRLpyE=PDD3EgQ7PS4Q@mail.gmail.com>

On Tue, 2016-12-20 at 22:09 -0800, Cong Wang wrote:
> On Tue, Dec 20, 2016 at 2:12 PM, Dave Jones <davej@codemonkey.org.uk> wrote:
> >         fd = socket(AF_INET6, SOCK_RAW, 7);
> > 
> >         setsockopt(fd, SOL_IPV6, IPV6_CHECKSUM, &zero, 4);
> >         setsockopt(fd, SOL_IPV6, IPV6_DSTOPTS, &buf, LEN);
> > 
> 
> Interesting, you set the checksum offset to be 0, but the packet size
> is actually 49, transport header is located at offset 48, so apparently
> the packet doesn't have room for a 16bit checksum after network header.
> 
> Your original patch seems reasonable to me, unless there is some
> check in __ip6_append_data() which is supposed to catch this, but
> CHECKSUM is specific to raw socket only.

The calculation of total_len is wrong here:

	total_len = inet_sk(sk)->cork.base.length;
	if (offset >= total_len - 1) {
		err = -EINVAL;
		ip6_flush_pending_frames(sk);
		goto out;
	}


At least for this bug to fix we need to subtract the extension header
length after the fragmentation header, so:

--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -536,6 +536,17 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        goto out;
 }
 
+static unsigned int raw6_corked_transport_len(const struct sock *sk)
+{
+       unsigned int len = inet_sk(sk)->cork.base.length;
+       struct ipv6_txoptions *opt = inet6_sk(sk)->cork.opt;
+
+       if (likely(!opt))
+               return len;
+
+       return len - opt->opt_flen;
+}
+
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
                                     struct raw6_sock *rp)
 {
@@ -543,7 +554,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
        int err = 0;
        int offset;
        int len;
-       int total_len;
+       int transport_len;
        __wsum tmp_csum;
        __sum16 csum;
 
@@ -555,8 +566,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
                goto out;
 
        offset = rp->offset;
-       total_len = inet_sk(sk)->cork.base.length;
-       if (offset >= total_len - 1) {
+       transport_len = raw6_corked_transport_len(sk);
+       if (offset >= transport_len - 1) {
                err = -EINVAL;
                ip6_flush_pending_frames(sk);
                goto out;
@@ -598,7 +609,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
                tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
 
        csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
-                              total_len, fl6->flowi6_proto, tmp_csum);
+                              transport_len, fl6->flowi6_proto, tmp_csum);
 
        if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
                csum = CSUM_MANGLED_0;

^ permalink raw reply

* Re: [PATCH v3] net: macb: Added PCI wrapper for Platform Driver.
From: Andy Shevchenko @ 2016-12-21 12:22 UTC (permalink / raw)
  To: Bartosz Folta
  Cc: Nicolas Ferre, David S. Miller, Niklas Cassel, Alexandre Torgue,
	Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Rafal Ozieblo
In-Reply-To: <CAHp75VcD8uCoiXiMV2f7OrPUDysGQX_VFZM+9+1pzRSNkT883Q@mail.gmail.com>

On Wed, Dec 21, 2016 at 2:16 PM, Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
> On Wed, Dec 21, 2016 at 1:53 PM, Bartosz Folta <bfolta@cadence.com> wrote:

>> I think that there is a delay on some mailing lists (marc.info and
>> mail-archive.com). Latest messages I found there are from December
>> 16th 2016.
>
> You may refer to
> https://www.spinics.net/lists/netdev/msg410864.html

And one more

+       plat_info.dma_mask = DMA_BIT_MASK(32);

Perhaps
 plat_info.dma_mask = pdev->dma_mask;
?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH v3] net: macb: Added PCI wrapper for Platform Driver.
From: Andy Shevchenko @ 2016-12-21 12:16 UTC (permalink / raw)
  To: Bartosz Folta
  Cc: Nicolas Ferre, David S. Miller, Niklas Cassel, Alexandre Torgue,
	Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Rafal Ozieblo
In-Reply-To: <SN1PR0701MB19517F15DBF69C2666FD67FACC930@SN1PR0701MB1951.namprd07.prod.outlook.com>

On Wed, Dec 21, 2016 at 1:53 PM, Bartosz Folta <bfolta@cadence.com> wrote:
> Please find link to patch holding changes that you suggested:
> https://lkml.kernel.org/r/SN1PR0701MB1951B994F86160A24C436F30CC930@SN1PR0701MB1951.namprd07.prod.outlook.com
>
> Here is alternative link to mentioned patch.
> https://patchwork.ozlabs.org/patch/707720/

Thanks!

>
> I think that there is a delay on some mailing lists (marc.info and
> mail-archive.com). Latest messages I found there are from December
> 16th 2016.
>

You may refer to
https://www.spinics.net/lists/netdev/msg410864.html

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [RFC PATCH 0/7] ThunderX Embedded switch support
From: Sunil Kovvuri @ 2016-12-21 12:03 UTC (permalink / raw)
  To: Satha Koteswara Rao
  Cc: LKML, Sunil Goutham, Robert Richter, David S. Miller, David Daney,
	rvatsavayi, derek.chickles, philip.romanov, Linux Netdev List,
	LAKML
In-Reply-To: <1482310011-1862-1-git-send-email-satha.rao@caviumnetworks.com>

It would be easier for anyone to review if you prepare patches based on
features rather than based on modifications to files.

Thanks,
Sunil.

^ permalink raw reply

* RE: [PATCH v3] net: macb: Added PCI wrapper for Platform Driver.
From: Bartosz Folta @ 2016-12-21 11:53 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Nicolas Ferre, David S. Miller, Niklas Cassel, Alexandre Torgue,
	Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Rafal Ozieblo
In-Reply-To: <CAHp75VeOcAsTDsYDuQ_2nG1RUWn-txuHBJqyJnY-ywvPfeiK6g@mail.gmail.com>

Please find link to patch holding changes that you suggested:
https://lkml.kernel.org/r/SN1PR0701MB1951B994F86160A24C436F30CC930@SN1PR0701MB1951.namprd07.prod.outlook.com

Here is alternative link to mentioned patch.
https://patchwork.ozlabs.org/patch/707720/

I think that there is a delay on some mailing lists (marc.info and
mail-archive.com). Latest messages I found there are from December
16th 2016.

Regards,
Bartosz Folta

^ permalink raw reply

* Re: [PATCH 2/3] NFC: trf7970a: Add device tree option of 1.8 Volt IO voltage
From: Geoff Lansberry @ 2016-12-21 11:47 UTC (permalink / raw)
  To: Mark Greer
  Cc: linux-wireless, lauro.venancio, aloisio.almeida, sameo, robh+dt,
	mark.rutland, netdev, devicetree, linux-kernel, justin
In-Reply-To: <20161221022347.GB5444@animalcreek.com>

Thanks Mark.   Should I resubmit patches with the requested edits today, or wait a bit for more comments?  What is the desired etiquette?

> On Dec 20, 2016, at 9:23 PM, Mark Greer <mgreer@animalcreek.com> wrote:
> 
>> On Tue, Dec 20, 2016 at 11:16:31AM -0500, Geoff Lansberry wrote:
>> From: Geoff Lansberry <geoff@kuvee.com>
>> 
>> The TRF7970A has configuration options for supporting hardware designs
>> with 1.8 Volt or 3.3 Volt IO.   This commit adds a device tree option,
>> using a fixed regulator binding, for setting the io voltage to match
>> the hardware configuration. If no option is supplied it defaults to
>> 3.3 volt configuration.
> 
> Sign-off ??  Same comment for you other patches.
> 
> <time passes>
> 
> Okay I see you have it at the end of the patch.  It should be here.
> 'git commit -s' is your friend.
> 
>> ---
>> .../devicetree/bindings/net/nfc/trf7970a.txt       |  4 ++--
>> drivers/nfc/trf7970a.c                             | 28 +++++++++++++++++++++-
>> 2 files changed, 29 insertions(+), 3 deletions(-)
>> 
>> diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
>> index e262ac1..b5777d8 100644
>> --- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
>> +++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
>> @@ -21,9 +21,9 @@ Optional SoC Specific Properties:
>> - t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum
>>   where an extra byte is returned by Read Multiple Block commands issued
>>   to Type 5 tags.
>> +- vdd-io-supply: Regulator specifying voltage for vdd-io
>> - clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz
>> 
>> -
>> Example (for ARM-based BeagleBone with TRF7970A on SPI1):
>> 
>> &spi1 {
>> @@ -41,11 +41,11 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1):
>>                  <&gpio2 5 GPIO_ACTIVE_LOW>;
>>        vin-supply = <&ldo3_reg>;
>>        vin-voltage-override = <5000000>;
>> +        vdd-io-supply = <&ldo2_reg>;
>>        autosuspend-delay = <30000>;
>>        irq-status-read-quirk;
>>        en2-rf-quirk;
>>        t5t-rmb-extra-byte-quirk;
>> -        vdd_io_1v8;
> 
> It was already mentioned but this shouldn't have been added in the
> previous patch so it shouldn't be here now.
> 
>>        clock-frequency = <27120000>;
>>        status = "okay";
>>    };
>> diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
>> index 4e051e9..8a88195 100644
>> --- a/drivers/nfc/trf7970a.c
>> +++ b/drivers/nfc/trf7970a.c
> 
>> @@ -2062,6 +2068,7 @@ static int trf7970a_probe(struct spi_device *spi)
>>        return ret;
>>    }
>> 
>> +
> 
> Please don't add an extra blank line.
> 
>>    of_property_read_u32(np, "clock-frequency", &clk_freq);
>>    if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) ||
>>        (clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY)) {
>> @@ -2105,6 +2112,25 @@ static int trf7970a_probe(struct spi_device *spi)
>>    if (uvolts > 4000000)
>>        trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
>> 
>> +    trf->regulator = devm_regulator_get(&spi->dev, "vdd-io");
>> +    if (IS_ERR(trf->regulator)) {
>> +        ret = PTR_ERR(trf->regulator);
>> +        dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret);
>> +        goto err_destroy_lock;
>> +    }
>> +
>> +    ret = regulator_enable(trf->regulator);
>> +    if (ret) {
>> +        dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret);
>> +        goto err_destroy_lock;
>> +    }
>> +
>> +
> 
> Please don't add an extra blank line.
> 
>> +    if (regulator_get_voltage(trf->regulator) == 1800000) {
>> +        trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW;
>> +        dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n");
>> +    }
>> +
>>    trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops,
>>            TRF7970A_SUPPORTED_PROTOCOLS,
>>            NFC_DIGITAL_DRV_CAPS_IN_CRC |
>> -- 
>> Signed-off-by: Geoff Lansberry <geoff@kuvee.com>
> 
> Your 'Signed-off-by:' goes at the end of the commit description not here.
> 
> Overall, I think you did the right thing (unless someone disagrees).
> Just some minor issues.
> 
> Mark
> --

^ permalink raw reply

* [PATCH for bnxt_re V4 20/21] RDMA/bnxt_re: Add QP event handling
From: Selvin Xavier @ 2016-12-21 11:42 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482320530-5344-1-git-send-email-selvin.xavier@broadcom.com>

Implements callback handler for processing Async events related to a QP.
This patch also implements the control path command completion handling.

v3: Removes unwanted braces

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 47 ++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 9144b5a..a000397 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -257,6 +257,44 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 	return 0;
 }
 
+static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+				       struct creq_qp_event *qp_event)
+{
+	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
+	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+	struct bnxt_qplib_crsqe *crsqe;
+	u16 cbit, cookie, blocked = 0;
+	unsigned long flags;
+	u32 sw_cons;
+
+	switch (qp_event->event) {
+	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+		break;
+	default:
+		/* Command Response */
+		spin_lock_irqsave(&cmdq->lock, flags);
+		sw_cons = HWQ_CMP(crsq->cons, crsq);
+		crsqe = &crsq->crsq[sw_cons];
+		crsq->cons++;
+		memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
+
+		cookie = le16_to_cpu(crsqe->qp_event.cookie);
+		blocked = cookie & RCFW_CMD_IS_BLOCKING;
+		cookie &= RCFW_MAX_COOKIE_VALUE;
+		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
+			dev_warn(&rcfw->pdev->dev,
+				 "QPLIB: CMD bit %d was not requested", cbit);
+
+		cmdq->cons += crsqe->req_size;
+		spin_unlock_irqrestore(&cmdq->lock, flags);
+		if (!blocked)
+			wake_up(&rcfw->waitq);
+		break;
+	}
+	return 0;
+}
+
 /* SP - CREQ Completion handlers */
 static void bnxt_qplib_service_creq(unsigned long data)
 {
@@ -280,6 +318,15 @@ static void bnxt_qplib_service_creq(unsigned long data)
 		type = creqe->type & CREQ_BASE_TYPE_MASK;
 		switch (type) {
 		case CREQ_BASE_TYPE_QP_EVENT:
+			if (!bnxt_qplib_process_qp_event
+			    (rcfw, (struct creq_qp_event *)creqe))
+				rcfw->creq_qp_event_processed++;
+			else {
+				dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
+				dev_warn(&rcfw->pdev->dev,
+					 "QPLIB: type = 0x%x not handled",
+					 type);
+			}
 			break;
 		case CREQ_BASE_TYPE_FUNC_EVENT:
 			if (!bnxt_qplib_process_func_event
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V4 19/21] RDMA/bnxt_re: Set uverbs command mask
From: Selvin Xavier @ 2016-12-21 11:42 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482320530-5344-1-git-send-email-selvin.xavier@broadcom.com>

This patch exports available uverbs command mask to the IB stack.
Also, populates some of the missing parameters in the ibdev structure
used for registration.

v4: Changes the include file names

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 7e78da6..7bc41da 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -61,6 +61,7 @@
 #include "qplib_rcfw.h"
 #include "bnxt_re.h"
 #include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
 #include "bnxt.h"
 static char version[] =
 		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
@@ -435,8 +436,42 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 		strlen(BNXT_RE_DESC) + 5);
 	ibdev->phys_port_cnt = 1;
 
+	bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+
 	ibdev->num_comp_vectors	= 1;
 	ibdev->dma_device = &rdev->en_dev->pdev->dev;
+	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+	/* User space */
+	ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
+	ibdev->uverbs_cmd_mask =
+			(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+			(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+			(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+			(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_AH)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_AH)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+	/* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
+
+	/* Kernel verbs */
 	ibdev->query_device		= bnxt_re_query_device;
 	ibdev->modify_device		= bnxt_re_modify_device;
 
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V4 13/21] RDMA/bnxt_re: Support QP verbs
From: Selvin Xavier @ 2016-12-21 11:42 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482320530-5344-1-git-send-email-selvin.xavier@broadcom.com>

This patch implements create_qp, destroy_qp, query_qp and modify_qp verbs.

v2: Fixed sparse warnings
v3: Splits __filter_modify_flags function to avoid nested switch cases.
    Removes unwanted macros. Also, fix the endianness related issues
    reported by sparse.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h  |  14 +
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 764 +++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |  21 +
 drivers/infiniband/hw/bnxt_re/main.c     |   6 +
 drivers/infiniband/hw/bnxt_re/qplib_fp.c | 862 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/qplib_fp.h | 272 ++++++++++
 include/uapi/rdma/bnxt_re-abi.h          |  11 +
 7 files changed, 1950 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index cac1173..f53ebd9 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -64,6 +64,14 @@ struct bnxt_re_work {
 	struct net_device	*vlan_dev;
 };
 
+struct bnxt_re_sqp_entries {
+	struct bnxt_qplib_sge sge;
+	u64 wrid;
+	/* For storing the actual qp1 cqe */
+	struct bnxt_qplib_cqe cqe;
+	struct bnxt_re_qp *qp1_qp;
+};
+
 #define BNXT_RE_MIN_MSIX		2
 #define BNXT_RE_MAX_MSIX		16
 #define BNXT_RE_AEQ_IDX			0
@@ -112,6 +120,12 @@ struct bnxt_re_dev {
 	atomic_t			mw_count;
 	/* Max of 2 lossless traffic class supported per port */
 	u16				cosq[2];
+
+	/* QP for for handling QP1 packets */
+	u32				sqp_id;
+	struct bnxt_re_qp		*qp1_sqp;
+	struct bnxt_re_ah		*sqp_ah;
+	struct bnxt_re_sqp_entries sqp_tbl[1024];
 };
 
 #define to_bnxt_re_dev(ptr, member)	\
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 8af6436..2189cad 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -639,6 +639,484 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
 	return 0;
 }
 
+/* Queue Pairs */
+int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+	struct bnxt_re_dev *rdev = qp->rdev;
+	int rc;
+
+	rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
+		return rc;
+	}
+	if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
+		rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
+					   &rdev->sqp_ah->qplib_ah);
+		if (rc) {
+			dev_err(rdev_to_dev(rdev),
+				"Failed to destroy HW AH for shadow QP");
+			return rc;
+		}
+
+		rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
+					   &rdev->qp1_sqp->qplib_qp);
+		if (rc) {
+			dev_err(rdev_to_dev(rdev),
+				"Failed to destroy Shadow QP");
+			return rc;
+		}
+		mutex_lock(&rdev->qp_lock);
+		list_del(&rdev->qp1_sqp->list);
+		atomic_dec(&rdev->qp_count);
+		mutex_unlock(&rdev->qp_lock);
+
+		kfree(rdev->sqp_ah);
+		kfree(rdev->qp1_sqp);
+	}
+
+	if (qp->rumem && !IS_ERR(qp->rumem))
+		ib_umem_release(qp->rumem);
+	if (qp->sumem && !IS_ERR(qp->sumem))
+		ib_umem_release(qp->sumem);
+
+	mutex_lock(&rdev->qp_lock);
+	list_del(&qp->list);
+	atomic_dec(&rdev->qp_count);
+	mutex_unlock(&rdev->qp_lock);
+	kfree(qp);
+	return 0;
+}
+
+static u8 __from_ib_qp_type(enum ib_qp_type type)
+{
+	switch (type) {
+	case IB_QPT_GSI:
+		return CMDQ_CREATE_QP1_TYPE_GSI;
+	case IB_QPT_RC:
+		return CMDQ_CREATE_QP_TYPE_RC;
+	case IB_QPT_UD:
+		return CMDQ_CREATE_QP_TYPE_UD;
+	case IB_QPT_RAW_ETHERTYPE:
+		return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE;
+	default:
+		return IB_QPT_MAX;
+	}
+}
+
+static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
+				struct bnxt_re_qp *qp, struct ib_udata *udata)
+{
+	struct bnxt_re_qp_req ureq;
+	struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
+	struct ib_umem *umem;
+	int bytes = 0;
+	struct ib_ucontext *context = pd->ib_pd.uobject->context;
+	struct bnxt_re_ucontext *cntx = container_of(context,
+						     struct bnxt_re_ucontext,
+						     ib_uctx);
+	if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+		return -EFAULT;
+
+	bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+	/* Consider mapping PSN search memory only for RC QPs. */
+	if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
+		bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
+	bytes = PAGE_ALIGN(bytes);
+	umem = ib_umem_get(context, ureq.qpsva, bytes,
+			   IB_ACCESS_LOCAL_WRITE, 1);
+	if (IS_ERR(umem))
+		return PTR_ERR(umem);
+
+	qp->sumem = umem;
+	qplib_qp->sq.sglist = umem->sg_head.sgl;
+	qplib_qp->sq.nmap = umem->nmap;
+	qplib_qp->qp_handle = ureq.qp_handle;
+
+	if (!qp->qplib_qp.srq) {
+		bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+		bytes = PAGE_ALIGN(bytes);
+		umem = ib_umem_get(context, ureq.qprva, bytes,
+				   IB_ACCESS_LOCAL_WRITE, 1);
+		if (IS_ERR(umem))
+			goto rqfail;
+		qp->rumem = umem;
+		qplib_qp->rq.sglist = umem->sg_head.sgl;
+		qplib_qp->rq.nmap = umem->nmap;
+	}
+
+	qplib_qp->dpi = cntx->dpi;
+	return 0;
+rqfail:
+	ib_umem_release(qp->sumem);
+	qp->sumem = NULL;
+	qplib_qp->sq.sglist = NULL;
+	qplib_qp->sq.nmap = 0;
+
+	return PTR_ERR(umem);
+}
+
+static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
+				(struct bnxt_re_pd *pd,
+				 struct bnxt_qplib_res *qp1_res,
+				 struct bnxt_qplib_qp *qp1_qp)
+{
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_ah *ah;
+	union ib_gid sgid;
+	int rc;
+
+	ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+	if (!ah)
+		return NULL;
+
+	memset(ah, 0, sizeof(*ah));
+	ah->rdev = rdev;
+	ah->qplib_ah.pd = &pd->qplib_pd;
+
+	rc = bnxt_re_query_gid(&rdev->ibdev, 1, 0, &sgid);
+	if (rc)
+		goto fail;
+
+	/* supply the dgid data same as sgid */
+	memcpy(ah->qplib_ah.dgid.data, &sgid.raw,
+	       sizeof(union ib_gid));
+	ah->qplib_ah.sgid_index = 0;
+
+	ah->qplib_ah.traffic_class = 0;
+	ah->qplib_ah.flow_label = 0;
+	ah->qplib_ah.hop_limit = 1;
+	ah->qplib_ah.sl = 0;
+	/* Have DMAC same as SMAC */
+	ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
+
+	rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to allocate HW AH for Shadow QP");
+		goto fail;
+	}
+
+	return ah;
+
+fail:
+	kfree(ah);
+	return NULL;
+}
+
+static struct bnxt_re_qp *bnxt_re_create_shadow_qp
+				(struct bnxt_re_pd *pd,
+				 struct bnxt_qplib_res *qp1_res,
+				 struct bnxt_qplib_qp *qp1_qp)
+{
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_qp *qp;
+	int rc;
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return NULL;
+
+	memset(qp, 0, sizeof(*qp));
+	qp->rdev = rdev;
+
+	/* Initialize the shadow QP structure from the QP1 values */
+	ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+
+	qp->qplib_qp.pd = &pd->qplib_pd;
+	qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+	qp->qplib_qp.type = IB_QPT_UD;
+
+	qp->qplib_qp.max_inline_data = 0;
+	qp->qplib_qp.sig_type = true;
+
+	/* Shadow QP SQ depth should be same as QP1 RQ depth */
+	qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+	qp->qplib_qp.sq.max_sge = 2;
+
+	qp->qplib_qp.scq = qp1_qp->scq;
+	qp->qplib_qp.rcq = qp1_qp->rcq;
+
+	qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+	qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+
+	qp->qplib_qp.mtu = qp1_qp->mtu;
+
+	qp->qplib_qp.sq_hdr_buf_size = 0;
+	qp->qplib_qp.rq_hdr_buf_size = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+	qp->qplib_qp.dpi = &rdev->dpi_privileged;
+
+	rc = bnxt_qplib_create_qp(qp1_res, &qp->qplib_qp);
+	if (rc)
+		goto fail;
+
+	rdev->sqp_id = qp->qplib_qp.id;
+
+	spin_lock_init(&qp->sq_lock);
+	INIT_LIST_HEAD(&qp->list);
+	mutex_lock(&rdev->qp_lock);
+	list_add_tail(&qp->list, &rdev->qp_list);
+	atomic_inc(&rdev->qp_count);
+	mutex_unlock(&rdev->qp_lock);
+	return qp;
+fail:
+	kfree(qp);
+	return NULL;
+}
+
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
+				struct ib_qp_init_attr *qp_init_attr,
+				struct ib_udata *udata)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+	struct bnxt_re_qp *qp;
+	struct bnxt_re_srq *srq;
+	struct bnxt_re_cq *cq;
+	int rc, entries;
+
+	if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
+	    (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) ||
+	    (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) ||
+	    (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) ||
+	    (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data))
+		return ERR_PTR(-EINVAL);
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	qp->rdev = rdev;
+	ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+	qp->qplib_qp.pd = &pd->qplib_pd;
+	qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+	qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
+	if (qp->qplib_qp.type == IB_QPT_MAX) {
+		dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
+			qp->qplib_qp.type);
+		rc = -EINVAL;
+		goto fail;
+	}
+	qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
+	qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
+				  IB_SIGNAL_ALL_WR) ? true : false);
+
+	entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+	if (entries > dev_attr->max_qp_wqes + 1)
+		entries = dev_attr->max_qp_wqes + 1;
+	qp->qplib_qp.sq.max_wqe = entries;
+
+	qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
+	if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+		qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+	if (qp_init_attr->send_cq) {
+		cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq,
+				  ib_cq);
+		if (!cq) {
+			dev_err(rdev_to_dev(rdev), "Send CQ not found");
+			rc = -EINVAL;
+			goto fail;
+		}
+		qp->qplib_qp.scq = &cq->qplib_cq;
+	}
+
+	if (qp_init_attr->recv_cq) {
+		cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq,
+				  ib_cq);
+		if (!cq) {
+			dev_err(rdev_to_dev(rdev), "Receive CQ not found");
+			rc = -EINVAL;
+			goto fail;
+		}
+		qp->qplib_qp.rcq = &cq->qplib_cq;
+	}
+
+	if (qp_init_attr->srq) {
+		dev_err(rdev_to_dev(rdev), "SRQ not supported");
+		rc = -ENOTSUPP;
+		goto fail;
+	} else {
+		/* Allocate 1 more than what's provided so posting max doesn't
+		 * mean empty
+		 */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1);
+		if (entries > dev_attr->max_qp_wqes + 1)
+			entries = dev_attr->max_qp_wqes + 1;
+		qp->qplib_qp.rq.max_wqe = entries;
+
+		qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
+		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+	}
+
+	qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+
+	if (qp_init_attr->qp_type == IB_QPT_GSI) {
+		qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+		qp->qplib_qp.sq.max_sge++;
+		if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+			qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+		qp->qplib_qp.rq_hdr_buf_size =
+					BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+
+		qp->qplib_qp.sq_hdr_buf_size =
+					BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
+		qp->qplib_qp.dpi = &rdev->dpi_privileged;
+		rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp);
+		if (rc) {
+			dev_err(rdev_to_dev(rdev), "Failed to create HW QP1");
+			goto fail;
+		}
+		/* Create a shadow QP to handle the QP1 traffic */
+		rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res,
+							 &qp->qplib_qp);
+		if (!rdev->qp1_sqp) {
+			rc = -EINVAL;
+			dev_err(rdev_to_dev(rdev),
+				"Failed to create Shadow QP for QP1");
+			goto qp_destroy;
+		}
+		rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+							   &qp->qplib_qp);
+		if (!rdev->sqp_ah) {
+			bnxt_qplib_destroy_qp(&rdev->qplib_res,
+					      &rdev->qp1_sqp->qplib_qp);
+			rc = -EINVAL;
+			dev_err(rdev_to_dev(rdev),
+				"Failed to create AH entry for ShadowQP");
+			goto qp_destroy;
+		}
+
+	} else {
+		qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
+		qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+		if (udata) {
+			rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
+			if (rc)
+				goto fail;
+		} else {
+			qp->qplib_qp.dpi = &rdev->dpi_privileged;
+		}
+
+		rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
+		if (rc) {
+			dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
+			goto fail;
+		}
+	}
+
+	qp->ib_qp.qp_num = qp->qplib_qp.id;
+	spin_lock_init(&qp->sq_lock);
+
+	if (udata) {
+		struct bnxt_re_qp_resp resp;
+
+		resp.qpid = qp->ib_qp.qp_num;
+		resp.rsvd = 0;
+		rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+		if (rc) {
+			dev_err(rdev_to_dev(rdev), "Failed to copy QP udata");
+			goto qp_destroy;
+		}
+	}
+	INIT_LIST_HEAD(&qp->list);
+	mutex_lock(&rdev->qp_lock);
+	list_add_tail(&qp->list, &rdev->qp_list);
+	atomic_inc(&rdev->qp_count);
+	mutex_unlock(&rdev->qp_lock);
+
+	return &qp->ib_qp;
+qp_destroy:
+	bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+fail:
+	kfree(qp);
+	return ERR_PTR(rc);
+}
+
+static u8 __from_ib_qp_state(enum ib_qp_state state)
+{
+	switch (state) {
+	case IB_QPS_RESET:
+		return CMDQ_MODIFY_QP_NEW_STATE_RESET;
+	case IB_QPS_INIT:
+		return CMDQ_MODIFY_QP_NEW_STATE_INIT;
+	case IB_QPS_RTR:
+		return CMDQ_MODIFY_QP_NEW_STATE_RTR;
+	case IB_QPS_RTS:
+		return CMDQ_MODIFY_QP_NEW_STATE_RTS;
+	case IB_QPS_SQD:
+		return CMDQ_MODIFY_QP_NEW_STATE_SQD;
+	case IB_QPS_SQE:
+		return CMDQ_MODIFY_QP_NEW_STATE_SQE;
+	case IB_QPS_ERR:
+	default:
+		return CMDQ_MODIFY_QP_NEW_STATE_ERR;
+	}
+}
+
+static enum ib_qp_state __to_ib_qp_state(u8 state)
+{
+	switch (state) {
+	case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+		return IB_QPS_RESET;
+	case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+		return IB_QPS_INIT;
+	case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+		return IB_QPS_RTR;
+	case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+		return IB_QPS_RTS;
+	case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+		return IB_QPS_SQD;
+	case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+		return IB_QPS_SQE;
+	case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+	default:
+		return IB_QPS_ERR;
+	}
+}
+
+static u32 __from_ib_mtu(enum ib_mtu mtu)
+{
+	switch (mtu) {
+	case IB_MTU_256:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_256;
+	case IB_MTU_512:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_512;
+	case IB_MTU_1024:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_1024;
+	case IB_MTU_2048:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+	case IB_MTU_4096:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_4096;
+	default:
+		return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+	}
+}
+
+static enum ib_mtu __to_ib_mtu(u32 mtu)
+{
+	switch (mtu & CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) {
+	case CMDQ_MODIFY_QP_PATH_MTU_MTU_256:
+		return IB_MTU_256;
+	case CMDQ_MODIFY_QP_PATH_MTU_MTU_512:
+		return IB_MTU_512;
+	case CMDQ_MODIFY_QP_PATH_MTU_MTU_1024:
+		return IB_MTU_1024;
+	case CMDQ_MODIFY_QP_PATH_MTU_MTU_2048:
+		return IB_MTU_2048;
+	case CMDQ_MODIFY_QP_PATH_MTU_MTU_4096:
+		return IB_MTU_4096;
+	default:
+		return IB_MTU_2048;
+	}
+}
+
 static int __from_ib_access_flags(int iflags)
 {
 	int qflags = 0;
@@ -681,6 +1159,292 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
 	return iflags;
 };
 
+static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
+				    struct bnxt_re_qp *qp1_qp,
+				    int qp_attr_mask)
+{
+	struct bnxt_re_qp *qp = rdev->qp1_sqp;
+	int rc = 0;
+
+	if (qp_attr_mask & IB_QP_STATE) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+		qp->qplib_qp.state = qp1_qp->qplib_qp.state;
+	}
+	if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+		qp->qplib_qp.pkey_index = qp1_qp->qplib_qp.pkey_index;
+	}
+
+	if (qp_attr_mask & IB_QP_QKEY) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+		/* Using a Random  QKEY */
+		qp->qplib_qp.qkey = 0x81818181;
+	}
+	if (qp_attr_mask & IB_QP_SQ_PSN) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+		qp->qplib_qp.sq.psn = qp1_qp->qplib_qp.sq.psn;
+	}
+
+	rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+	if (rc)
+		dev_err(rdev_to_dev(rdev),
+			"Failed to modify Shadow QP for QP1");
+	return rc;
+}
+
+int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+		      int qp_attr_mask, struct ib_udata *udata)
+{
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+	struct bnxt_re_dev *rdev = qp->rdev;
+	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+	enum ib_qp_state curr_qp_state, new_qp_state;
+	int rc, entries;
+	int status;
+	union ib_gid sgid;
+	struct ib_gid_attr sgid_attr;
+	u8 nw_type;
+
+	qp->qplib_qp.modify_flags = 0;
+	if (qp_attr_mask & IB_QP_STATE) {
+		curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
+		new_qp_state = qp_attr->qp_state;
+		if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
+					ib_qp->qp_type, qp_attr_mask,
+					IB_LINK_LAYER_ETHERNET)) {
+			dev_err(rdev_to_dev(rdev),
+				"Invalid attribute mask: %#x specified ",
+				qp_attr_mask);
+			dev_err(rdev_to_dev(rdev),
+				"for qpn: %#x type: %#x",
+				ib_qp->qp_num, ib_qp->qp_type);
+			dev_err(rdev_to_dev(rdev),
+				"curr_qp_state=0x%x, new_qp_state=0x%x\n",
+				curr_qp_state, new_qp_state);
+			return -EINVAL;
+		}
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+		qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state);
+	}
+	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY;
+		qp->qplib_qp.en_sqd_async_notify = true;
+	}
+	if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+		qp->qplib_qp.access =
+			__from_ib_access_flags(qp_attr->qp_access_flags);
+		/* LOCAL_WRITE access must be set to allow RC receive */
+		qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	}
+	if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+		qp->qplib_qp.pkey_index = qp_attr->pkey_index;
+	}
+	if (qp_attr_mask & IB_QP_QKEY) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+		qp->qplib_qp.qkey = qp_attr->qkey;
+	}
+	if (qp_attr_mask & IB_QP_AV) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+				     CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+		memcpy(qp->qplib_qp.ah.dgid.data, qp_attr->ah_attr.grh.dgid.raw,
+		       sizeof(qp->qplib_qp.ah.dgid.data));
+		qp->qplib_qp.ah.flow_label = qp_attr->ah_attr.grh.flow_label;
+		/* If RoCE V2 is enabled, stack will have two entries for
+		 * each GID entry. Avoiding this duplicte entry in HW. Dividing
+		 * the GID index by 2 for RoCE V2
+		 */
+		qp->qplib_qp.ah.sgid_index =
+					qp_attr->ah_attr.grh.sgid_index / 2;
+		qp->qplib_qp.ah.host_sgid_index =
+					qp_attr->ah_attr.grh.sgid_index;
+		qp->qplib_qp.ah.hop_limit = qp_attr->ah_attr.grh.hop_limit;
+		qp->qplib_qp.ah.traffic_class =
+					qp_attr->ah_attr.grh.traffic_class;
+		qp->qplib_qp.ah.sl = qp_attr->ah_attr.sl;
+		ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.dmac);
+
+		status = ib_get_cached_gid(&rdev->ibdev, 1,
+					   qp_attr->ah_attr.grh.sgid_index,
+					   &sgid, &sgid_attr);
+		if (!status && sgid_attr.ndev) {
+			memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
+			       ETH_ALEN);
+			dev_put(sgid_attr.ndev);
+			nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
+							 &sgid);
+			switch (nw_type) {
+			case RDMA_NETWORK_IPV4:
+				qp->qplib_qp.nw_type =
+					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+				break;
+			case RDMA_NETWORK_IPV6:
+				qp->qplib_qp.nw_type =
+					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+				break;
+			default:
+				qp->qplib_qp.nw_type =
+					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+				break;
+			}
+		}
+	}
+
+	if (qp_attr_mask & IB_QP_PATH_MTU) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+		qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+	} else if (qp_attr->qp_state == IB_QPS_RTR) {
+		qp->qplib_qp.modify_flags |=
+			CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+		qp->qplib_qp.path_mtu =
+			__from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+	}
+
+	if (qp_attr_mask & IB_QP_TIMEOUT) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT;
+		qp->qplib_qp.timeout = qp_attr->timeout;
+	}
+	if (qp_attr_mask & IB_QP_RETRY_CNT) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT;
+		qp->qplib_qp.retry_cnt = qp_attr->retry_cnt;
+	}
+	if (qp_attr_mask & IB_QP_RNR_RETRY) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY;
+		qp->qplib_qp.rnr_retry = qp_attr->rnr_retry;
+	}
+	if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+		qp->qplib_qp.min_rnr_timer = qp_attr->min_rnr_timer;
+	}
+	if (qp_attr_mask & IB_QP_RQ_PSN) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN;
+		qp->qplib_qp.rq.psn = qp_attr->rq_psn;
+	}
+	if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC;
+		qp->qplib_qp.max_rd_atomic = qp_attr->max_rd_atomic;
+	}
+	if (qp_attr_mask & IB_QP_SQ_PSN) {
+		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+		qp->qplib_qp.sq.psn = qp_attr->sq_psn;
+	}
+	if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC;
+		qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
+	}
+	if (qp_attr_mask & IB_QP_CAP) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
+				CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
+				CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE |
+				CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE |
+				CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA;
+		if ((qp_attr->cap.max_send_wr >= dev_attr->max_qp_wqes) ||
+		    (qp_attr->cap.max_recv_wr >= dev_attr->max_qp_wqes) ||
+		    (qp_attr->cap.max_send_sge >= dev_attr->max_qp_sges) ||
+		    (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
+		    (qp_attr->cap.max_inline_data >=
+						dev_attr->max_inline_data)) {
+			dev_err(rdev_to_dev(rdev),
+				"Create QP failed - max exceeded");
+			return -EINVAL;
+		}
+		entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
+		if (entries > dev_attr->max_qp_wqes)
+			entries = dev_attr->max_qp_wqes;
+		qp->qplib_qp.sq.max_wqe = entries;
+		qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
+		if (qp->qplib_qp.rq.max_wqe) {
+			entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
+			if (entries > dev_attr->max_qp_wqes)
+				entries = dev_attr->max_qp_wqes;
+			qp->qplib_qp.rq.max_wqe = entries;
+			qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
+		} else {
+			/* SRQ was used prior, just ignore the RQ caps */
+		}
+	}
+	if (qp_attr_mask & IB_QP_DEST_QPN) {
+		qp->qplib_qp.modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID;
+		qp->qplib_qp.dest_qpn = qp_attr->dest_qp_num;
+	}
+	rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to modify HW QP");
+		return rc;
+	}
+	if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp)
+		rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
+	return rc;
+}
+
+int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+		     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+	struct bnxt_re_dev *rdev = qp->rdev;
+	struct bnxt_qplib_qp qplib_qp;
+	int rc;
+
+	memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
+	qplib_qp.id = qp->qplib_qp.id;
+	qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+
+	rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
+		return rc;
+	}
+	qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
+	qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
+	qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
+	qp_attr->pkey_index = qplib_qp.pkey_index;
+	qp_attr->qkey = qplib_qp.qkey;
+	memcpy(qp_attr->ah_attr.grh.dgid.raw, qplib_qp.ah.dgid.data,
+	       sizeof(qplib_qp.ah.dgid.data));
+	qp_attr->ah_attr.grh.flow_label = qplib_qp.ah.flow_label;
+	qp_attr->ah_attr.grh.sgid_index = qplib_qp.ah.host_sgid_index;
+	qp_attr->ah_attr.grh.hop_limit = qplib_qp.ah.hop_limit;
+	qp_attr->ah_attr.grh.traffic_class = qplib_qp.ah.traffic_class;
+	qp_attr->ah_attr.sl = qplib_qp.ah.sl;
+	ether_addr_copy(qp_attr->ah_attr.dmac, qplib_qp.ah.dmac);
+	qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
+	qp_attr->timeout = qplib_qp.timeout;
+	qp_attr->retry_cnt = qplib_qp.retry_cnt;
+	qp_attr->rnr_retry = qplib_qp.rnr_retry;
+	qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
+	qp_attr->rq_psn = qplib_qp.rq.psn;
+	qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
+	qp_attr->sq_psn = qplib_qp.sq.psn;
+	qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
+	qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
+							IB_SIGNAL_REQ_WR;
+	qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+
+	qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
+	qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
+	qp_attr->cap.max_recv_wr = qp->qplib_qp.rq.max_wqe;
+	qp_attr->cap.max_recv_sge = qp->qplib_qp.rq.max_sge;
+	qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
+	qp_init_attr->cap = qp_attr->cap;
+
+	return 0;
+}
+
 /* Completion Queues */
 int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
 {
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index ba9a4c9..75ee88a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -57,6 +57,19 @@ struct bnxt_re_ah {
 	struct bnxt_qplib_ah	qplib_ah;
 };
 
+struct bnxt_re_qp {
+	struct list_head	list;
+	struct bnxt_re_dev	*rdev;
+	struct ib_qp		ib_qp;
+	spinlock_t		sq_lock;	/* protect sq */
+	struct bnxt_qplib_qp	qplib_qp;
+	struct ib_umem		*sumem;
+	struct ib_umem		*rumem;
+	/* QP1 */
+	u32			send_psn;
+	struct ib_ud_header	qp1_hdr;
+};
+
 struct bnxt_re_cq {
 	struct bnxt_re_dev	*rdev;
 	spinlock_t              cq_lock;	/* protect cq */
@@ -141,6 +154,14 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
 int bnxt_re_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
 int bnxt_re_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
 int bnxt_re_destroy_ah(struct ib_ah *ah);
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
+				struct ib_qp_init_attr *qp_init_attr,
+				struct ib_udata *udata);
+int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		      int qp_attr_mask, struct ib_udata *udata);
+int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int bnxt_re_destroy_qp(struct ib_qp *qp);
 struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 70b07ac..d68f08d 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -456,6 +456,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->modify_ah		= bnxt_re_modify_ah;
 	ibdev->query_ah			= bnxt_re_query_ah;
 	ibdev->destroy_ah		= bnxt_re_destroy_ah;
+
+	ibdev->create_qp		= bnxt_re_create_qp;
+	ibdev->modify_qp		= bnxt_re_modify_qp;
+	ibdev->query_qp			= bnxt_re_query_qp;
+	ibdev->destroy_qp		= bnxt_re_destroy_qp;
+
 	ibdev->create_cq		= bnxt_re_create_cq;
 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index b61b0db..f1f9095 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -51,6 +51,69 @@
 #include "qplib_fp.h"
 
 static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
+
+static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+	struct bnxt_qplib_q *sq = &qp->sq;
+
+	if (qp->rq_hdr_buf)
+		dma_free_coherent(&res->pdev->dev,
+				  rq->hwq.max_elements * qp->rq_hdr_buf_size,
+				  qp->rq_hdr_buf, qp->rq_hdr_buf_map);
+	if (qp->sq_hdr_buf)
+		dma_free_coherent(&res->pdev->dev,
+				  sq->hwq.max_elements * qp->sq_hdr_buf_size,
+				  qp->sq_hdr_buf, qp->sq_hdr_buf_map);
+	qp->rq_hdr_buf = NULL;
+	qp->sq_hdr_buf = NULL;
+	qp->rq_hdr_buf_map = 0;
+	qp->sq_hdr_buf_map = 0;
+	qp->sq_hdr_buf_size = 0;
+	qp->rq_hdr_buf_size = 0;
+}
+
+static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+	struct bnxt_qplib_q *sq = &qp->rq;
+	int rc = 0;
+
+	if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+		qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+					sq->hwq.max_elements *
+					qp->sq_hdr_buf_size,
+					&qp->sq_hdr_buf_map, GFP_KERNEL);
+		if (!qp->sq_hdr_buf) {
+			rc = -ENOMEM;
+			dev_err(&res->pdev->dev,
+				"QPLIB: Failed to create sq_hdr_buf");
+			goto fail;
+		}
+	}
+
+	if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+		qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+						    rq->hwq.max_elements *
+						    qp->rq_hdr_buf_size,
+						    &qp->rq_hdr_buf_map,
+						    GFP_KERNEL);
+		if (!qp->rq_hdr_buf) {
+			rc = -ENOMEM;
+			dev_err(&res->pdev->dev,
+				"QPLIB: Failed to create rq_hdr_buf");
+			goto fail;
+		}
+	}
+	return 0;
+
+fail:
+	bnxt_qplib_free_qp_hdr_buf(res, qp);
+	return rc;
+}
+
 static void bnxt_qplib_service_nq(unsigned long data)
 {
 	struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
@@ -216,6 +279,805 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
 	return 0;
 }
 
+/* QP */
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_create_qp1 req;
+	struct creq_create_qp1_resp *resp;
+	struct bnxt_qplib_pbl *pbl;
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_q *rq = &qp->rq;
+	int rc;
+	u16 cmd_flags = 0;
+	u32 qp_flags = 0;
+
+	RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
+
+	/* General */
+	req.type = qp->type;
+	req.dpi = cpu_to_le32(qp->dpi->dpi);
+	req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+	/* SQ */
+	sq->hwq.max_elements = sq->max_wqe;
+	rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0,
+				       &sq->hwq.max_elements,
+				       BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_QUEUE);
+	if (rc)
+		goto exit;
+
+	sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+	if (!sq->swq) {
+		rc = -ENOMEM;
+		goto fail_sq;
+	}
+	pbl = &sq->hwq.pbl[PBL_LVL_0];
+	req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+	req.sq_pg_size_sq_lvl =
+		((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK)
+				<<  CMDQ_CREATE_QP1_SQ_LVL_SFT) |
+		(pbl->pg_size == ROCE_PG_SIZE_4K ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K :
+		 pbl->pg_size == ROCE_PG_SIZE_8K ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K :
+		 pbl->pg_size == ROCE_PG_SIZE_64K ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K :
+		 pbl->pg_size == ROCE_PG_SIZE_2M ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M :
+		 pbl->pg_size == ROCE_PG_SIZE_8M ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M :
+		 pbl->pg_size == ROCE_PG_SIZE_1G ?
+				CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G :
+		 CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K);
+
+	if (qp->scq)
+		req.scq_cid = cpu_to_le32(qp->scq->id);
+
+	qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
+
+	/* RQ */
+	if (rq->max_wqe) {
+		rq->hwq.max_elements = qp->rq.max_wqe;
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0,
+					       &rq->hwq.max_elements,
+					       BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+					       PAGE_SIZE, HWQ_TYPE_QUEUE);
+		if (rc)
+			goto fail_sq;
+
+		rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+				  GFP_KERNEL);
+		if (!rq->swq) {
+			rc = -ENOMEM;
+			goto fail_rq;
+		}
+		pbl = &rq->hwq.pbl[PBL_LVL_0];
+		req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+		req.rq_pg_size_rq_lvl =
+			((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) <<
+			 CMDQ_CREATE_QP1_RQ_LVL_SFT) |
+				(pbl->pg_size == ROCE_PG_SIZE_4K ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K :
+				 pbl->pg_size == ROCE_PG_SIZE_8K ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K :
+				 pbl->pg_size == ROCE_PG_SIZE_64K ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K :
+				 pbl->pg_size == ROCE_PG_SIZE_2M ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M :
+				 pbl->pg_size == ROCE_PG_SIZE_8M ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M :
+				 pbl->pg_size == ROCE_PG_SIZE_1G ?
+					CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G :
+				 CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K);
+		if (qp->rcq)
+			req.rcq_cid = cpu_to_le32(qp->rcq->id);
+	}
+
+	/* Header buffer - allow hdr_buf pass in */
+	rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+	if (rc) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	req.qp_flags = cpu_to_le32(qp_flags);
+	req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+	req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+
+	req.sq_fwo_sq_sge =
+		cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+			    CMDQ_CREATE_QP1_SQ_SGE_SFT);
+	req.rq_fwo_rq_sge =
+		cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+			    CMDQ_CREATE_QP1_RQ_SGE_SFT);
+
+	req.pd_id = cpu_to_le32(qp->pd->id);
+
+	resp = (struct creq_create_qp1_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
+		rc = -ETIMEDOUT;
+		goto fail;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		rc = -EINVAL;
+		goto fail;
+	}
+	qp->id = le32_to_cpu(resp->xid);
+	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+	sq->flush_in_progress = false;
+	rq->flush_in_progress = false;
+
+	return 0;
+
+fail:
+	bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+	bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+	kfree(rq->swq);
+fail_sq:
+	bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+	kfree(sq->swq);
+exit:
+	return rc;
+}
+
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+	struct cmdq_create_qp req;
+	struct creq_create_qp_resp *resp;
+	struct bnxt_qplib_pbl *pbl;
+	struct sq_psn_search **psn_search_ptr;
+	unsigned long int psn_search, poff = 0;
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_q *rq = &qp->rq;
+	struct bnxt_qplib_hwq *xrrq;
+	int i, rc, req_size, psn_sz;
+	u16 cmd_flags = 0, max_ssge;
+	u32 sw_prod, qp_flags = 0;
+
+	RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
+
+	/* General */
+	req.type = qp->type;
+	req.dpi = cpu_to_le32(qp->dpi->dpi);
+	req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+	/* SQ */
+	psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ?
+		 sizeof(struct sq_psn_search) : 0;
+	sq->hwq.max_elements = sq->max_wqe;
+	rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist,
+				       sq->nmap, &sq->hwq.max_elements,
+				       BNXT_QPLIB_MAX_SQE_ENTRY_SIZE,
+				       psn_sz,
+				       PAGE_SIZE, HWQ_TYPE_QUEUE);
+	if (rc)
+		goto exit;
+
+	sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+	if (!sq->swq) {
+		rc = -ENOMEM;
+		goto fail_sq;
+	}
+	hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+	if (psn_sz) {
+		psn_search_ptr = (struct sq_psn_search **)
+				  &hw_sq_send_ptr[get_sqe_pg
+					(sq->hwq.max_elements)];
+		psn_search = (unsigned long int)
+			      &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)]
+			      [get_sqe_idx(sq->hwq.max_elements)];
+		if (psn_search & ~PAGE_MASK) {
+			/* If the psn_search does not start on a page boundary,
+			 * then calculate the offset
+			 */
+			poff = (psn_search & ~PAGE_MASK) /
+				BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
+		}
+		for (i = 0; i < sq->hwq.max_elements; i++)
+			sq->swq[i].psn_search =
+				&psn_search_ptr[get_psne_pg(i + poff)]
+					       [get_psne_idx(i + poff)];
+	}
+	pbl = &sq->hwq.pbl[PBL_LVL_0];
+	req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+	req.sq_pg_size_sq_lvl =
+		((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK)
+				 <<  CMDQ_CREATE_QP_SQ_LVL_SFT) |
+		(pbl->pg_size == ROCE_PG_SIZE_4K ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K :
+		 pbl->pg_size == ROCE_PG_SIZE_8K ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K :
+		 pbl->pg_size == ROCE_PG_SIZE_64K ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K :
+		 pbl->pg_size == ROCE_PG_SIZE_2M ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M :
+		 pbl->pg_size == ROCE_PG_SIZE_8M ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M :
+		 pbl->pg_size == ROCE_PG_SIZE_1G ?
+				CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
+		 CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
+
+	/* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */
+	hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+	for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) {
+		hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+						[get_sqe_idx(sw_prod)];
+		hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID;
+	}
+
+	if (qp->scq)
+		req.scq_cid = cpu_to_le32(qp->scq->id);
+
+	qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+	qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+	if (qp->sig_type)
+		qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+
+	/* RQ */
+	if (rq->max_wqe) {
+		rq->hwq.max_elements = rq->max_wqe;
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist,
+					       rq->nmap, &rq->hwq.max_elements,
+					       BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+					       PAGE_SIZE, HWQ_TYPE_QUEUE);
+		if (rc)
+			goto fail_sq;
+
+		rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+				  GFP_KERNEL);
+		if (!rq->swq) {
+			rc = -ENOMEM;
+			goto fail_rq;
+		}
+		pbl = &rq->hwq.pbl[PBL_LVL_0];
+		req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+		req.rq_pg_size_rq_lvl =
+			((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) <<
+			 CMDQ_CREATE_QP_RQ_LVL_SFT) |
+				(pbl->pg_size == ROCE_PG_SIZE_4K ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K :
+				 pbl->pg_size == ROCE_PG_SIZE_8K ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K :
+				 pbl->pg_size == ROCE_PG_SIZE_64K ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K :
+				 pbl->pg_size == ROCE_PG_SIZE_2M ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M :
+				 pbl->pg_size == ROCE_PG_SIZE_8M ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M :
+				 pbl->pg_size == ROCE_PG_SIZE_1G ?
+					CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
+				 CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+	}
+
+	if (qp->rcq)
+		req.rcq_cid = cpu_to_le32(qp->rcq->id);
+	req.qp_flags = cpu_to_le32(qp_flags);
+	req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+	req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+	qp->sq_hdr_buf = NULL;
+	qp->rq_hdr_buf = NULL;
+
+	rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+	if (rc)
+		goto fail_rq;
+
+	/* CTRL-22434: Irrespective of the requested SGE count on the SQ
+	 * always create the QP with max send sges possible if the requested
+	 * inline size is greater than 0.
+	 */
+	max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
+	req.sq_fwo_sq_sge = cpu_to_le16(
+				((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
+				 << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+	req.rq_fwo_rq_sge = cpu_to_le16(
+				((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK)
+				 << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
+	/* ORRQ and IRRQ */
+	if (psn_sz) {
+		xrrq = &qp->orrq;
+		xrrq->max_elements =
+			ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+		req_size = xrrq->max_elements *
+			   BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+		req_size &= ~(PAGE_SIZE - 1);
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+					       &xrrq->max_elements,
+					       BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE,
+					       0, req_size, HWQ_TYPE_CTX);
+		if (rc)
+			goto fail_buf_free;
+		pbl = &xrrq->pbl[PBL_LVL_0];
+		req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+
+		xrrq = &qp->irrq;
+		xrrq->max_elements = IRD_LIMIT_TO_IRRQ_SLOTS(
+						qp->max_dest_rd_atomic);
+		req_size = xrrq->max_elements *
+			   BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+		req_size &= ~(PAGE_SIZE - 1);
+
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+					       &xrrq->max_elements,
+					       BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE,
+					       0, req_size, HWQ_TYPE_CTX);
+		if (rc)
+			goto fail_orrq;
+
+		pbl = &xrrq->pbl[PBL_LVL_0];
+		req.irrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+	}
+	req.pd_id = cpu_to_le32(qp->pd->id);
+
+	resp = (struct creq_create_qp_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
+		rc = -ETIMEDOUT;
+		goto fail;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		rc = -EINVAL;
+		goto fail;
+	}
+	qp->id = le32_to_cpu(resp->xid);
+	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+	sq->flush_in_progress = false;
+	rq->flush_in_progress = false;
+
+	return 0;
+
+fail:
+	if (qp->irrq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+fail_orrq:
+	if (qp->orrq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+fail_buf_free:
+	bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+	bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+	kfree(rq->swq);
+fail_sq:
+	bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+	kfree(sq->swq);
+exit:
+	return rc;
+}
+
+static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
+{
+	switch (qp->state) {
+	case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+		/* INIT->RTR, configure the path_mtu to the default
+		 * 2048 if not being requested
+		 */
+		if (!(qp->modify_flags &
+		    CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)) {
+			qp->modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+			qp->path_mtu =
+				CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+		}
+		qp->modify_flags &=
+			~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+		/* Bono FW require the max_dest_rd_atomic to be >= 1 */
+		if (qp->max_dest_rd_atomic < 1)
+			qp->max_dest_rd_atomic = 1;
+		qp->modify_flags &= ~CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC;
+		/* Bono FW 20.6.5 requires SGID_INDEX configuration */
+		if (!(qp->modify_flags &
+		    CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)) {
+			qp->modify_flags |=
+				CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX;
+			qp->ah.sgid_index = 0;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void __modify_flags_from_rtr_state(struct bnxt_qplib_qp *qp)
+{
+	switch (qp->state) {
+	case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+		/* Bono FW requires the max_rd_atomic to be >= 1 */
+		if (qp->max_rd_atomic < 1)
+			qp->max_rd_atomic = 1;
+		/* Bono FW does not allow PKEY_INDEX,
+		 * DGID, FLOW_LABEL, SGID_INDEX, HOP_LIMIT,
+		 * TRAFFIC_CLASS, DEST_MAC, PATH_MTU, RQ_PSN,
+		 * MIN_RNR_TIMER, MAX_DEST_RD_ATOMIC, DEST_QP_ID
+		 * modification
+		 */
+		qp->modify_flags &=
+			~(CMDQ_MODIFY_QP_MODIFY_MASK_PKEY |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC |
+			  CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID);
+		break;
+	default:
+		break;
+	}
+}
+
+static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
+{
+	switch (qp->cur_qp_state) {
+	case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+		__modify_flags_from_init_state(qp);
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+		__modify_flags_from_rtr_state(qp);
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+		break;
+	case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+		break;
+	default:
+		break;
+	}
+}
+
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_modify_qp req;
+	struct creq_modify_qp_resp *resp;
+	u16 cmd_flags = 0, pkey;
+	u32 temp32[4];
+	u32 bmask;
+
+	RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
+
+	/* Filter out the qp_attr_mask based on the state->new transition */
+	__filter_modify_flags(qp);
+	bmask = qp->modify_flags;
+	req.modify_mask = cpu_to_le32(qp->modify_flags);
+	req.qp_cid = cpu_to_le32(qp->id);
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+		req.network_type_en_sqd_async_notify_new_state =
+				(qp->state & CMDQ_MODIFY_QP_NEW_STATE_MASK) |
+				(qp->en_sqd_async_notify ?
+					CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY : 0);
+	}
+	req.network_type_en_sqd_async_notify_new_state |= qp->nw_type;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
+		req.access = qp->access;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) {
+		if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl,
+					 qp->pkey_index, &pkey))
+			req.pkey = cpu_to_le16(pkey);
+	}
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
+		req.qkey = cpu_to_le32(qp->qkey);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DGID) {
+		memcpy(temp32, qp->ah.dgid.data, sizeof(struct bnxt_qplib_gid));
+		req.dgid[0] = cpu_to_le32(temp32[0]);
+		req.dgid[1] = cpu_to_le32(temp32[1]);
+		req.dgid[2] = cpu_to_le32(temp32[2]);
+		req.dgid[3] = cpu_to_le32(temp32[3]);
+	}
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
+		req.flow_label = cpu_to_le32(qp->ah.flow_label);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)
+		req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id
+					     [qp->ah.sgid_index]);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
+		req.hop_limit = qp->ah.hop_limit;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS)
+		req.traffic_class = qp->ah.traffic_class;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC)
+		memcpy(req.dest_mac, qp->ah.dmac, 6);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)
+		req.path_mtu = qp->path_mtu;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT)
+		req.timeout = qp->timeout;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT)
+		req.retry_cnt = qp->retry_cnt;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY)
+		req.rnr_retry = qp->rnr_retry;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER)
+		req.min_rnr_timer = qp->min_rnr_timer;
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN)
+		req.rq_psn = cpu_to_le32(qp->rq.psn);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN)
+		req.sq_psn = cpu_to_le32(qp->sq.psn);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC)
+		req.max_rd_atomic =
+			ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC)
+		req.max_dest_rd_atomic =
+			IRD_LIMIT_TO_IRRQ_SLOTS(qp->max_dest_rd_atomic);
+
+	req.sq_size = cpu_to_le32(qp->sq.hwq.max_elements);
+	req.rq_size = cpu_to_le32(qp->rq.hwq.max_elements);
+	req.sq_sge = cpu_to_le16(qp->sq.max_sge);
+	req.rq_sge = cpu_to_le16(qp->rq.max_sge);
+	req.max_inline_data = cpu_to_le32(qp->max_inline_data);
+	if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
+		req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
+
+	req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
+
+	resp = (struct creq_modify_qp_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	qp->cur_qp_state = qp->state;
+	return 0;
+}
+
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_query_qp req;
+	struct creq_query_qp_resp *resp;
+	struct creq_query_qp_resp_sb *sb;
+	u16 cmd_flags = 0;
+	u32 temp32[4];
+	int i;
+
+	RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
+
+	req.qp_cid = cpu_to_le32(qp->id);
+	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+	resp = (struct creq_query_qp_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     (void **)&sb, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	/* Extract the context from the side buffer */
+	qp->state = sb->en_sqd_async_notify_state &
+			CREQ_QUERY_QP_RESP_SB_STATE_MASK;
+	qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
+				  CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ?
+				  true : false;
+	qp->access = sb->access;
+	qp->pkey_index = le16_to_cpu(sb->pkey);
+	qp->qkey = le32_to_cpu(sb->qkey);
+
+	temp32[0] = le32_to_cpu(sb->dgid[0]);
+	temp32[1] = le32_to_cpu(sb->dgid[1]);
+	temp32[2] = le32_to_cpu(sb->dgid[2]);
+	temp32[3] = le32_to_cpu(sb->dgid[3]);
+	memcpy(qp->ah.dgid.data, temp32, sizeof(qp->ah.dgid.data));
+
+	qp->ah.flow_label = le32_to_cpu(sb->flow_label);
+
+	qp->ah.sgid_index = 0;
+	for (i = 0; i < res->sgid_tbl.max; i++) {
+		if (res->sgid_tbl.hw_id[i] == le16_to_cpu(sb->sgid_index)) {
+			qp->ah.sgid_index = i;
+			break;
+		}
+	}
+	if (i == res->sgid_tbl.max)
+		dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+
+	qp->ah.hop_limit = sb->hop_limit;
+	qp->ah.traffic_class = sb->traffic_class;
+	memcpy(qp->ah.dmac, sb->dest_mac, 6);
+	qp->ah.vlan_id = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+				CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK) >>
+				CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT;
+	qp->path_mtu = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+				    CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) >>
+				    CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT;
+	qp->timeout = sb->timeout;
+	qp->retry_cnt = sb->retry_cnt;
+	qp->rnr_retry = sb->rnr_retry;
+	qp->min_rnr_timer = sb->min_rnr_timer;
+	qp->rq.psn = le32_to_cpu(sb->rq_psn);
+	qp->max_rd_atomic = ORRQ_SLOTS_TO_ORD_LIMIT(sb->max_rd_atomic);
+	qp->sq.psn = le32_to_cpu(sb->sq_psn);
+	qp->max_dest_rd_atomic =
+			IRRQ_SLOTS_TO_IRD_LIMIT(sb->max_dest_rd_atomic);
+	qp->sq.max_wqe = qp->sq.hwq.max_elements;
+	qp->rq.max_wqe = qp->rq.hwq.max_elements;
+	qp->sq.max_sge = le16_to_cpu(sb->sq_sge);
+	qp->rq.max_sge = le16_to_cpu(sb->rq_sge);
+	qp->max_inline_data = le32_to_cpu(sb->max_inline_data);
+	qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
+	memcpy(qp->smac, sb->src_mac, 6);
+	qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
+	return 0;
+}
+
+static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
+{
+	struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+	struct cq_base *hw_cqe, **hw_cqe_ptr;
+	int i;
+
+	for (i = 0; i < cq_hwq->max_elements; i++) {
+		hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr;
+		hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)];
+		if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+			continue;
+		switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+		case CQ_BASE_CQE_TYPE_REQ:
+		case CQ_BASE_CQE_TYPE_TERMINAL:
+		{
+			struct cq_req *cqe = (struct cq_req *)hw_cqe;
+
+			if (qp == le64_to_cpu(cqe->qp_handle))
+				cqe->qp_handle = 0;
+			break;
+		}
+		case CQ_BASE_CQE_TYPE_RES_RC:
+		case CQ_BASE_CQE_TYPE_RES_UD:
+		case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+		{
+			struct cq_res_rc *cqe = (struct cq_res_rc *)hw_cqe;
+
+			if (qp == le64_to_cpu(cqe->qp_handle))
+				cqe->qp_handle = 0;
+			break;
+		}
+		default:
+			break;
+		}
+	}
+}
+
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
+			  struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_destroy_qp req;
+	struct creq_destroy_qp_resp *resp;
+	unsigned long flags;
+	u16 cmd_flags = 0;
+
+	RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
+
+	req.qp_cid = cpu_to_le32(qp->id);
+	resp = (struct creq_destroy_qp_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+
+	/* Must walk the associated CQs to nullified the QP ptr */
+	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
+
+	__clean_cq(qp->scq, (u64)(unsigned long)qp);
+
+	if (qp->rcq && qp->rcq != qp->scq) {
+		spin_lock(&qp->rcq->hwq.lock);
+		__clean_cq(qp->rcq, (u64)(unsigned long)qp);
+		spin_unlock(&qp->rcq->hwq.lock);
+	}
+
+	spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+
+	bnxt_qplib_free_qp_hdr_buf(res, qp);
+	bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
+	kfree(qp->sq.swq);
+
+	bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq);
+	kfree(qp->rq.swq);
+
+	if (qp->irrq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+	if (qp->orrq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+
+	return 0;
+}
+
 /* CQ */
 
 /* Spinlock must be held */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 9797138..e62c750 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -38,8 +38,268 @@
 
 #ifndef __BNXT_QPLIB_FP_H__
 #define __BNXT_QPLIB_FP_H__
+struct bnxt_qplib_sge {
+	u64				addr;
+	u32				lkey;
+	u32				size;
+};
+
+#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE	sizeof(struct sq_send)
+
+#define SQE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE)
+#define SQE_MAX_IDX_PER_PG	(SQE_CNT_PER_PG - 1)
+
+static inline u32 get_sqe_pg(u32 val)
+{
+	return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG);
+}
+
+static inline u32 get_sqe_idx(u32 val)
+{
+	return (val & SQE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE	sizeof(struct sq_psn_search)
+
+#define PSNE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE)
+#define PSNE_MAX_IDX_PER_PG	(PSNE_CNT_PER_PG - 1)
+
+static inline u32 get_psne_pg(u32 val)
+{
+	return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG);
+}
+
+static inline u32 get_psne_idx(u32 val)
+{
+	return (val & PSNE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_QP_MAX_SGL	6
+
+struct bnxt_qplib_swq {
+	u64				wr_id;
+	u8				type;
+	u8				flags;
+	u32				start_psn;
+	u32				next_psn;
+	struct sq_psn_search		*psn_search;
+};
+
+struct bnxt_qplib_swqe {
+	/* General */
+	u64				wr_id;
+	u8				reqs_type;
+	u8				type;
+#define BNXT_QPLIB_SWQE_TYPE_SEND			0
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM		1
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV		2
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE			4
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM	5
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_READ			6
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP		8
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD	11
+#define BNXT_QPLIB_SWQE_TYPE_LOCAL_INV			12
+#define BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR		13
+#define BNXT_QPLIB_SWQE_TYPE_REG_MR			13
+#define BNXT_QPLIB_SWQE_TYPE_BIND_MW			14
+#define BNXT_QPLIB_SWQE_TYPE_RECV			128
+#define BNXT_QPLIB_SWQE_TYPE_RECV_RDMA_IMM		129
+	u8				flags;
+#define BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP		BIT(0)
+#define BNXT_QPLIB_SWQE_FLAGS_RD_ATOMIC_FENCE		BIT(1)
+#define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE			BIT(2)
+#define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT		BIT(3)
+#define BNXT_QPLIB_SWQE_FLAGS_INLINE			BIT(4)
+	struct bnxt_qplib_sge		sg_list[BNXT_QPLIB_QP_MAX_SGL];
+	int				num_sge;
+	/* Max inline data is 96 bytes */
+	u32				inline_len;
+#define BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH		96
+	u8		inline_data[BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH];
+
+	union {
+		/* Send, with imm, inval key */
+		struct {
+			union {
+				__be32	imm_data;
+				u32	inv_key;
+			};
+			u32		q_key;
+			u32		dst_qp;
+			u16		avid;
+		} send;
+
+		/* Send Raw Ethernet and QP1 */
+		struct {
+			u16		lflags;
+			u16		cfa_action;
+			u32		cfa_meta;
+		} rawqp1;
+
+		/* RDMA write, with imm, read */
+		struct {
+			union {
+				__be32	imm_data;
+				u32	inv_key;
+			};
+			u64		remote_va;
+			u32		r_key;
+		} rdma;
+
+		/* Atomic cmp/swap, fetch/add */
+		struct {
+			u64		remote_va;
+			u32		r_key;
+			u64		swap_data;
+			u64		cmp_data;
+		} atomic;
+
+		/* Local Invalidate */
+		struct {
+			u32		inv_l_key;
+		} local_inv;
+
+		/* FR-PMR */
+		struct {
+			u8		access_cntl;
+			u8		pg_sz_log;
+			bool		zero_based;
+			u32		l_key;
+			u32		length;
+			u8		pbl_pg_sz_log;
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4K			0
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_8K			1
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_64K			4
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_256K			6
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1M			8
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_2M			9
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4M			10
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1G			18
+			u8		levels;
+#define PAGE_SHIFT_4K	12
+			__le64		*pbl_ptr;
+			dma_addr_t	pbl_dma_ptr;
+			u64		*page_list;
+			u16		page_list_len;
+			u64		va;
+		} frmr;
+
+		/* Bind */
+		struct {
+			u8		access_cntl;
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_LOCAL_WRITE		BIT(0)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_READ		BIT(1)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_WRITE	BIT(2)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_ATOMIC	BIT(3)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_WINDOW_BIND		BIT(4)
+			bool		zero_based;
+			u8		mw_type;
+			u32		parent_l_key;
+			u32		r_key;
+			u64		va;
+			u32		length;
+		} bind;
+	};
+};
+
+#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE	sizeof(struct rq_wqe)
+
+#define RQE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE)
+#define RQE_MAX_IDX_PER_PG	(RQE_CNT_PER_PG - 1)
+#define RQE_PG(x)		(((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG)
+#define RQE_IDX(x)		((x) & RQE_MAX_IDX_PER_PG)
+
+struct bnxt_qplib_q {
+	struct bnxt_qplib_hwq		hwq;
+	struct bnxt_qplib_swq		*swq;
+	struct scatterlist		*sglist;
+	u32				nmap;
+	u32				max_wqe;
+	u16				max_sge;
+	u32				psn;
+	bool				flush_in_progress;
+};
+
+struct bnxt_qplib_qp {
+	struct bnxt_qplib_pd		*pd;
+	struct bnxt_qplib_dpi		*dpi;
+	u64				qp_handle;
+	u32				id;
+	u8				type;
+	u8				sig_type;
+	u32				modify_flags;
+	u8				state;
+	u8				cur_qp_state;
+	u32				max_inline_data;
+	u32				mtu;
+	u8				path_mtu;
+	bool				en_sqd_async_notify;
+	u16				pkey_index;
+	u32				qkey;
+	u32				dest_qp_id;
+	u8				access;
+	u8				timeout;
+	u8				retry_cnt;
+	u8				rnr_retry;
+	u32				min_rnr_timer;
+	u32				max_rd_atomic;
+	u32				max_dest_rd_atomic;
+	u32				dest_qpn;
+	u8				smac[6];
+	u16				vlan_id;
+	u8				nw_type;
+	struct bnxt_qplib_ah		ah;
+
+#define BTH_PSN_MASK			((1 << 24) - 1)
+	/* SQ */
+	struct bnxt_qplib_q		sq;
+	/* RQ */
+	struct bnxt_qplib_q		rq;
+	/* SRQ */
+	struct bnxt_qplib_srq		*srq;
+	/* CQ */
+	struct bnxt_qplib_cq		*scq;
+	struct bnxt_qplib_cq		*rcq;
+	/* IRRQ and ORRQ */
+	struct bnxt_qplib_hwq		irrq;
+	struct bnxt_qplib_hwq		orrq;
+	/* Header buffer for QP1 */
+	int				sq_hdr_buf_size;
+	int				rq_hdr_buf_size;
+/*
+ * Buffer space for ETH(14), IP or GRH(40), UDP header(8)
+ * and ib_bth + ib_deth (20).
+ * Max required is 82 when RoCE V2 is enabled
+ */
+#define BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2	86
+	/* Ethernet header	=  14 */
+	/* ib_grh		=  40 (provided by MAD) */
+	/* ib_bth + ib_deth	=  20 */
+	/* MAD			= 256 (provided by MAD) */
+	/* iCRC			=   4 */
+#define BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE	14
+#define BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2	512
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4	20
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6	40
+#define BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE	20
+	void				*sq_hdr_buf;
+	dma_addr_t			sq_hdr_buf_map;
+	void				*rq_hdr_buf;
+	dma_addr_t			rq_hdr_buf_map;
+};
+
 #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE	sizeof(struct cq_base)
 
+#define CQE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_CQE_ENTRY_SIZE)
+#define CQE_MAX_IDX_PER_PG	(CQE_CNT_PER_PG - 1)
+#define CQE_PG(x)		(((x) & ~CQE_MAX_IDX_PER_PG) / CQE_CNT_PER_PG)
+#define CQE_IDX(x)		((x) & CQE_MAX_IDX_PER_PG)
+
+#define ROCE_CQE_CMP_V			0
+#define CQE_CMP_VALID(hdr, raw_cons, cp_bit)			\
+	(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==		\
+	   !((raw_cons) & (cp_bit)))
+
 struct bnxt_qplib_cqe {
 	u8				status;
 	u8				type;
@@ -85,6 +345,13 @@ struct bnxt_qplib_cq {
 	wait_queue_head_t		waitq;
 };
 
+#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE	sizeof(struct xrrq_irrq)
+#define BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE	sizeof(struct xrrq_orrq)
+#define IRD_LIMIT_TO_IRRQ_SLOTS(x)	(2 * (x) + 2)
+#define IRRQ_SLOTS_TO_IRD_LIMIT(s)	(((s) >> 1) - 1)
+#define ORD_LIMIT_TO_ORRQ_SLOTS(x)	((x) + 1)
+#define ORRQ_SLOTS_TO_ORD_LIMIT(s)	((s) - 1)
+
 #define BNXT_QPLIB_MAX_NQE_ENTRY_SIZE	sizeof(struct nq_base)
 
 #define NQE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_NQE_ENTRY_SIZE)
@@ -143,6 +410,11 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
 			 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
 					     void *srq,
 					     u8 event));
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index 024579b..ad8c916 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -68,6 +68,17 @@ struct bnxt_re_cq_resp {
 	__u32 rsvd;
 };
 
+struct bnxt_re_qp_req {
+	__u64 qpsva;
+	__u64 qprva;
+	__u64 qp_handle;
+};
+
+struct bnxt_re_qp_resp {
+	__u32 qpid;
+	__u32 rsvd;
+};
+
 enum bnxt_re_shpg_offt {
 	BNXT_RE_BEG_RESV_OFFT	= 0x00,
 	BNXT_RE_AVID_OFFT	= 0x10,
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V4 12/21] RDMA/bnxt_re: Support memory registration verbs
From: Selvin Xavier @ 2016-12-21 11:42 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482320530-5344-1-git-send-email-selvin.xavier@broadcom.com>

This patch implements the kernel and user memory region registration
supported by the bnxt_re driver.
This includes the user MR, FRMR, FMR and DMA MR support.

v3: Moves bnxt_qplib_map_tc2cos to DCB patch as the functionality is used
    by DCB patch. Also, removed some unwanted macros.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 376 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |  44 ++++
 drivers/infiniband/hw/bnxt_re/main.c     |  11 +
 drivers/infiniband/hw/bnxt_re/qplib_sp.c | 289 ++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/qplib_sp.h |  41 ++++
 5 files changed, 761 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 4f14cd6..8af6436 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -639,6 +639,48 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
 	return 0;
 }
 
+static int __from_ib_access_flags(int iflags)
+{
+	int qflags = 0;
+
+	if (iflags & IB_ACCESS_LOCAL_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_READ)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+	if (iflags & IB_ACCESS_REMOTE_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+	if (iflags & IB_ACCESS_MW_BIND)
+		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+	if (iflags & IB_ZERO_BASED)
+		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+	if (iflags & IB_ACCESS_ON_DEMAND)
+		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+	return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+	enum ib_access_flags iflags = 0;
+
+	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+		iflags |= IB_ACCESS_LOCAL_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+		iflags |= IB_ACCESS_REMOTE_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+		iflags |= IB_ACCESS_REMOTE_READ;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+		iflags |= IB_ACCESS_REMOTE_ATOMIC;
+	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+		iflags |= IB_ACCESS_MW_BIND;
+	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+		iflags |= IB_ZERO_BASED;
+	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+		iflags |= IB_ACCESS_ON_DEMAND;
+	return iflags;
+};
+
 /* Completion Queues */
 int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
 {
@@ -785,6 +827,340 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
 	return 0;
 }
 
+/* Memory Regions */
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr;
+	u64 pbl = 0;
+	int rc;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	/* Allocate and register 0 as the address */
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc)
+		goto fail;
+
+	mr->qplib_mr.hwq.level = PBL_LVL_MAX;
+	mr->qplib_mr.total_size = -1; /* Infinte length */
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false);
+	if (rc)
+		goto fail_mr;
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
+			       IB_ACCESS_REMOTE_ATOMIC))
+		mr->ib_mr.rkey = mr->ib_mr.lkey;
+	atomic_inc(&rdev->mr_count);
+
+	return &mr->ib_mr;
+
+fail_mr:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
+int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+	struct bnxt_re_dev *rdev = mr->rdev;
+	int rc = 0;
+
+	if (mr->npages && mr->pages) {
+		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
+							&mr->qplib_frpl);
+		kfree(mr->pages);
+		mr->npages = 0;
+		mr->pages = NULL;
+	}
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+
+	if (!IS_ERR(mr->ib_umem) && mr->ib_umem)
+		ib_umem_release(mr->ib_umem);
+
+	kfree(mr);
+	atomic_dec(&rdev->mr_count);
+	return rc;
+}
+
+static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+	if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs))
+		return -ENOMEM;
+
+	mr->pages[mr->npages++] = addr;
+	return 0;
+}
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+		      unsigned int *sg_offset)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+	mr->npages = 0;
+	return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page);
+}
+
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
+			       u32 max_num_sg)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr = NULL;
+	int rc;
+
+	if (type != IB_MR_TYPE_MEM_REG) {
+		dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+		return ERR_PTR(-EINVAL);
+	}
+	if (max_num_sg > MAX_PBL_LVL_1_PGS)
+		return ERR_PTR(-EINVAL);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc)
+		goto fail;
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->ib_mr.rkey = mr->ib_mr.lkey;
+
+	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+	if (!mr->pages) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
+						 &mr->qplib_frpl, max_num_sg);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to allocate HW FR page list");
+		goto fail_mr;
+	}
+
+	atomic_inc(&rdev->mr_count);
+	return &mr->ib_mr;
+
+fail_mr:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+	kfree(mr->pages);
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
+/* Fast Memory Regions */
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
+				 struct ib_fmr_attr *fmr_attr)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_fmr *fmr;
+	int rc;
+
+	if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
+	    fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
+		dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+		return ERR_PTR(-ENOMEM);
+	}
+	fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
+	if (!fmr)
+		return ERR_PTR(-ENOMEM);
+
+	fmr->rdev = rdev;
+	fmr->qplib_fmr.pd = &pd->qplib_pd;
+	fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+	if (rc)
+		goto fail;
+
+	fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
+	fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
+	fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+
+	atomic_inc(&rdev->mr_count);
+	return &fmr->ib_fmr;
+fail:
+	kfree(fmr);
+	return ERR_PTR(rc);
+}
+
+int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
+			 u64 iova)
+{
+	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+					     ib_fmr);
+	struct bnxt_re_dev *rdev = fmr->rdev;
+	int rc;
+
+	fmr->qplib_fmr.va = iova;
+	fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
+
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
+			       list_len, true);
+	if (rc)
+		dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
+			fmr->ib_fmr.lkey);
+	return rc;
+}
+
+int bnxt_re_unmap_fmr(struct list_head *fmr_list)
+{
+	struct bnxt_re_dev *rdev;
+	struct bnxt_re_fmr *fmr;
+	struct ib_fmr *ib_fmr;
+	int rc = 0;
+
+	/* Validate each FMRs inside the fmr_list */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
+		rdev = fmr->rdev;
+
+		if (rdev) {
+			rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
+						  &fmr->qplib_fmr, true);
+			if (rc)
+				break;
+		}
+	}
+	return rc;
+}
+
+int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
+{
+	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+					       ib_fmr);
+	struct bnxt_re_dev *rdev = fmr->rdev;
+	int rc;
+
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+	if (rc)
+		dev_err(rdev_to_dev(rdev), "Failed to free FMR");
+
+	kfree(fmr);
+	atomic_dec(&rdev->mr_count);
+	return rc;
+}
+
+/* uverbs */
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+				  u64 virt_addr, int mr_access_flags,
+				  struct ib_udata *udata)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr;
+	struct ib_umem *umem;
+	u64 *pbl_tbl, *pbl_tbl_orig;
+	int i, umem_pgs, pages, page_shift, rc;
+	struct scatterlist *sg;
+	int entry;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+
+	umem = ib_umem_get(ib_pd->uobject->context, start, length,
+			   mr_access_flags, 0);
+	if (IS_ERR(umem)) {
+		dev_err(rdev_to_dev(rdev), "Failed to get umem");
+		rc = -EFAULT;
+		goto free_mr;
+	}
+	mr->ib_umem = umem;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+		goto release_umem;
+	}
+	/* The fixed portion of the rkey is the same as the lkey */
+	mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+	mr->qplib_mr.va = virt_addr;
+	umem_pgs = ib_umem_page_count(umem);
+	if (!umem_pgs) {
+		dev_err(rdev_to_dev(rdev), "umem is invalid!");
+		rc = -EINVAL;
+		goto free_mrw;
+	}
+	mr->qplib_mr.total_size = length;
+
+	pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
+	if (!pbl_tbl) {
+		rc = -EINVAL;
+		goto free_mrw;
+	}
+	pbl_tbl_orig = pbl_tbl;
+
+	page_shift = ilog2(umem->page_size);
+	if (umem->hugetlb) {
+		dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
+		rc = -EFAULT;
+		goto fail;
+	}
+	if (umem->page_size != PAGE_SIZE) {
+		dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+		rc = -EFAULT;
+		goto fail;
+	}
+	/* Map umem buf ptrs to the PBL */
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		pages = sg_dma_len(sg) >> page_shift;
+		for (i = 0; i < pages; i++, pbl_tbl++)
+			*pbl_tbl = sg_dma_address(sg) + (i << page_shift);
+	}
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
+			       umem_pgs, false);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to register user MR");
+		goto fail;
+	}
+
+	kfree(pbl_tbl_orig);
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->ib_mr.rkey = mr->qplib_mr.lkey;
+	atomic_inc(&rdev->mr_count);
+
+	return &mr->ib_mr;
+fail:
+	kfree(pbl_tbl_orig);
+free_mrw:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+release_umem:
+	ib_umem_release(umem);
+free_mr:
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
 struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata)
 {
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 14c9e02..ba9a4c9 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -70,6 +70,34 @@ struct bnxt_re_cq {
 	struct ib_umem		*umem;
 };
 
+struct bnxt_re_mr {
+	struct bnxt_re_dev	*rdev;
+	struct ib_mr		ib_mr;
+	struct ib_umem		*ib_umem;
+	struct bnxt_qplib_mrw	qplib_mr;
+	u32			npages;
+	u64			*pages;
+	struct bnxt_qplib_frpl	qplib_frpl;
+};
+
+struct bnxt_re_frpl {
+	struct bnxt_re_dev		*rdev;
+	struct bnxt_qplib_frpl		qplib_frpl;
+	u64				*page_list;
+};
+
+struct bnxt_re_fmr {
+	struct bnxt_re_dev	*rdev;
+	struct ib_fmr		ib_fmr;
+	struct bnxt_qplib_mrw	qplib_fmr;
+};
+
+struct bnxt_re_mw {
+	struct bnxt_re_dev	*rdev;
+	struct ib_mw		ib_mw;
+	struct bnxt_qplib_mrw	qplib_mw;
+};
+
 struct bnxt_re_ucontext {
 	struct bnxt_re_dev	*rdev;
 	struct ib_ucontext	ib_uctx;
@@ -119,6 +147,22 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				struct ib_udata *udata);
 int bnxt_re_destroy_cq(struct ib_cq *cq);
 int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+		      unsigned int *sg_offset);
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
+			       u32 max_num_sg);
+int bnxt_re_dereg_mr(struct ib_mr *mr);
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+				 struct ib_fmr_attr *fmr_attr);
+int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
+			 u64 iova);
+int bnxt_re_unmap_fmr(struct list_head *fmr_list);
+int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+				  u64 virt_addr, int mr_access_flags,
+				  struct ib_udata *udata);
 struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 31dc3f1..70b07ac 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -459,6 +459,17 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->create_cq		= bnxt_re_create_cq;
 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
+
+	ibdev->get_dma_mr		= bnxt_re_get_dma_mr;
+	ibdev->dereg_mr			= bnxt_re_dereg_mr;
+	ibdev->alloc_mr			= bnxt_re_alloc_mr;
+	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
+	ibdev->alloc_fmr		= bnxt_re_alloc_fmr;
+	ibdev->map_phys_fmr		= bnxt_re_map_phys_fmr;
+	ibdev->unmap_fmr		= bnxt_re_unmap_fmr;
+	ibdev->dealloc_fmr		= bnxt_re_dealloc_fmr;
+
+	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
 	ibdev->dealloc_ucontext		= bnxt_re_dealloc_ucontext;
 	ibdev->mmap			= bnxt_re_mmap;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2fb43c1..8a6f43f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -510,3 +510,292 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 	}
 	return 0;
 }
+
+/* MRW */
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_deallocate_key req;
+	struct creq_deallocate_key_resp *resp;
+	u16 cmd_flags = 0;
+
+	if (mrw->lkey == 0xFFFFFFFF) {
+		dev_info(&res->pdev->dev,
+			 "QPLIB: SP: Free a reserved lkey MRW");
+		return 0;
+	}
+
+	RCFW_CMD_PREP(req, DEALLOCATE_KEY, cmd_flags);
+
+	req.mrw_flags = mrw->type;
+
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+		req.key = cpu_to_le32(mrw->rkey);
+	else
+		req.key = cpu_to_le32(mrw->lkey);
+
+	resp = (struct creq_deallocate_key_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
+		dev_err(&res->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	/* Free the qplib's MRW memory */
+	if (mrw->hwq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+
+	return 0;
+}
+
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_allocate_mrw req;
+	struct creq_allocate_mrw_resp *resp;
+	u16 cmd_flags = 0;
+	unsigned long tmp;
+
+	RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
+
+	req.pd_id = cpu_to_le32(mrw->pd->id);
+	req.mrw_flags = mrw->type;
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
+	     mrw->flags & BNXT_QPLIB_FR_PMR) ||
+	    mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
+	    mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
+		req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
+	tmp = (unsigned long)mrw;
+	req.mrw_handle = cpu_to_le64(tmp);
+
+	resp = (struct creq_allocate_mrw_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+		mrw->rkey = le32_to_cpu(resp->xid);
+	else
+		mrw->lkey = le32_to_cpu(resp->xid);
+	return 0;
+}
+
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+			 bool block)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_deregister_mr req;
+	struct creq_deregister_mr_resp *resp;
+	u16 cmd_flags = 0;
+	int rc;
+
+	RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
+
+	req.lkey = cpu_to_le32(mrw->lkey);
+	resp = (struct creq_deregister_mr_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, block);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
+		return -EINVAL;
+	}
+	if (block)
+		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+						    le16_to_cpu(req.cookie));
+	else
+		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie));
+	if (!rc) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+
+	/* Free the qplib's MR memory */
+	if (mrw->hwq.max_elements) {
+		mrw->va = 0;
+		mrw->total_size = 0;
+		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+	}
+
+	return 0;
+}
+
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+		      u64 *pbl_tbl, int num_pbls, bool block)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_register_mr req;
+	struct creq_register_mr_resp *resp;
+	u16 cmd_flags = 0, level;
+	int pg_ptrs, pages, i, rc;
+	dma_addr_t **pbl_ptr;
+	u32 pg_size;
+
+	if (num_pbls) {
+		pg_ptrs = roundup_pow_of_two(num_pbls);
+		pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+		if (!pages)
+			pages++;
+
+		if (pages > MAX_PBL_LVL_1_PGS) {
+			dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
+			dev_err(&res->pdev->dev,
+				"requested (0x%x) exceeded max (0x%x)",
+				pages, MAX_PBL_LVL_1_PGS);
+			return -ENOMEM;
+		}
+		/* Free the hwq if it already exist, must be a rereg */
+		if (mr->hwq.max_elements)
+			bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+
+		mr->hwq.max_elements = pages;
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
+					       &mr->hwq.max_elements,
+					       PAGE_SIZE, 0, PAGE_SIZE,
+					       HWQ_TYPE_CTX);
+		if (rc) {
+			dev_err(&res->pdev->dev,
+				"SP: Reg MR memory allocation failed");
+			return -ENOMEM;
+		}
+		/* Write to the hwq */
+		pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
+		for (i = 0; i < num_pbls; i++)
+			pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
+				(pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
+	}
+
+	RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
+
+	/* Configure the request */
+	if (mr->hwq.level == PBL_LVL_MAX) {
+		level = 0;
+		req.pbl = 0;
+		pg_size = PAGE_SIZE;
+	} else {
+		level = mr->hwq.level + 1;
+		req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+		pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
+	}
+	req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
+			       ((ilog2(pg_size) <<
+				 CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
+				CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+	req.access = (mr->flags & 0xFFFF);
+	req.va = cpu_to_le64(mr->va);
+	req.key = cpu_to_le32(mr->lkey);
+	req.mr_size = cpu_to_le64(mr->total_size);
+
+	resp = (struct creq_register_mr_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, block);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "SP: REG_MR send failed");
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (block)
+		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+						    le16_to_cpu(req.cookie));
+	else
+		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie));
+	if (!rc) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "SP: REG_MR timed out");
+		rc = -ETIMEDOUT;
+		goto fail;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
+		dev_err(&res->pdev->dev,
+			"QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		rc = -EINVAL;
+		goto fail;
+	}
+	return 0;
+
+fail:
+	if (mr->hwq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+	return rc;
+}
+
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+					struct bnxt_qplib_frpl *frpl,
+					int max_pg_ptrs)
+{
+	int pg_ptrs, pages, rc;
+
+	/* Re-calculate the max to fit the HWQ allocation model */
+	pg_ptrs = roundup_pow_of_two(max_pg_ptrs);
+	pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+	if (!pages)
+		pages++;
+
+	if (pages > MAX_PBL_LVL_1_PGS)
+		return -ENOMEM;
+
+	frpl->hwq.max_elements = pages;
+	rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0,
+				       &frpl->hwq.max_elements, PAGE_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (!rc)
+		frpl->max_pg_ptrs = pg_ptrs;
+
+	return rc;
+}
+
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_frpl *frpl)
+{
+	bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 26eac17..3358f6d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -94,6 +94,34 @@ struct bnxt_qplib_ah {
 	u8				nw_type;
 };
 
+struct bnxt_qplib_mrw {
+	struct bnxt_qplib_pd		*pd;
+	int				type;
+	u32				flags;
+#define BNXT_QPLIB_FR_PMR		0x80000000
+	u32				lkey;
+	u32				rkey;
+#define BNXT_QPLIB_RSVD_LKEY		0xFFFFFFFF
+	u64				va;
+	u64				total_size;
+	u32				npages;
+	u64				mr_handle;
+	struct bnxt_qplib_hwq		hwq;
+};
+
+struct bnxt_qplib_frpl {
+	int				max_pg_ptrs;
+	struct bnxt_qplib_hwq		hwq;
+};
+
+#define BNXT_QPLIB_ACCESS_LOCAL_WRITE	BIT(0)
+#define BNXT_QPLIB_ACCESS_REMOTE_READ	BIT(1)
+#define BNXT_QPLIB_ACCESS_REMOTE_WRITE	BIT(2)
+#define BNXT_QPLIB_ACCESS_REMOTE_ATOMIC	BIT(3)
+#define BNXT_QPLIB_ACCESS_MW_BIND	BIT(4)
+#define BNXT_QPLIB_ACCESS_ZERO_BASED	BIT(5)
+#define BNXT_QPLIB_ACCESS_ON_DEMAND	BIT(6)
+
 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
 			struct bnxt_qplib_gid *gid);
@@ -115,4 +143,17 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr);
 int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
 int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
+			 struct bnxt_qplib_mrw *mrw);
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+			 bool block);
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+		      u64 *pbl_tbl, int num_pbls, bool block);
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
+int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
+				 struct bnxt_qplib_mrw *mr, int max);
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+					struct bnxt_qplib_frpl *frpl, int max);
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_frpl *frpl);
 #endif /* __BNXT_QPLIB_SP_H__*/
-- 
2.5.5

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox