From mboxrd@z Thu Jan 1 00:00:00 1970 From: Shannon Nelson Subject: Re: [Intel-wired-lan] [next-queue 08/10] ixgbe: process the Tx ipsec offload Date: Thu, 7 Dec 2017 10:50:24 -0800 Message-ID: <4c81b86d-44ed-b58d-9ff5-a6ebd9eee2ab@oracle.com> References: <1512452116-14795-1-git-send-email-shannon.nelson@oracle.com> <1512452116-14795-9-git-send-email-shannon.nelson@oracle.com> <1c5fddf2-4d93-d1cf-2ac0-d3b2bcb0c682@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Cc: intel-wired-lan , Jeff Kirsher , Steffen Klassert , Sowmini Varadhan , Netdev To: Alexander Duyck Return-path: Received: from aserp2120.oracle.com ([141.146.126.78]:60808 "EHLO aserp2120.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750996AbdLGSvT (ORCPT ); Thu, 7 Dec 2017 13:51:19 -0500 In-Reply-To: Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 12/7/2017 9:56 AM, Alexander Duyck wrote: You've suggested several things here, all good things to look into, which I will do, most now, some in the near future. Thanks! sln > On Wed, Dec 6, 2017 at 9:43 PM, Shannon Nelson > wrote: >> On 12/5/2017 10:13 AM, Alexander Duyck wrote: >>> >>> On Mon, Dec 4, 2017 at 9:35 PM, Shannon Nelson >>> wrote: >>>> >>>> If the skb has a security association referenced in the skb, then >>>> set up the Tx descriptor with the ipsec offload bits. While we're >>>> here, we fix an oddly named field in the context descriptor struct. >>>> >>>> Signed-off-by: Shannon Nelson >>>> --- >>>> drivers/net/ethernet/intel/ixgbe/ixgbe.h | 10 +++- >>>> drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 77 >>>> ++++++++++++++++++++++++++ >>>> drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 4 +- >>>> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 38 ++++++++++--- >>>> drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 +- >>>> 5 files changed, 118 insertions(+), 13 deletions(-) >>>> >>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h >>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h >>>> index 77f07dc..68097fe 100644 >>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h >>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h >>>> @@ -171,10 +171,11 @@ enum ixgbe_tx_flags { >>>> IXGBE_TX_FLAGS_CC = 0x08, >>>> IXGBE_TX_FLAGS_IPV4 = 0x10, >>>> IXGBE_TX_FLAGS_CSUM = 0x20, >>>> + IXGBE_TX_FLAGS_IPSEC = 0x40, >>>> >>>> /* software defined flags */ >>>> - IXGBE_TX_FLAGS_SW_VLAN = 0x40, >>>> - IXGBE_TX_FLAGS_FCOE = 0x80, >>>> + IXGBE_TX_FLAGS_SW_VLAN = 0x80, >>>> + IXGBE_TX_FLAGS_FCOE = 0x100, >>>> }; >>>> >>>> /* VLAN info */ >>>> @@ -1012,12 +1013,17 @@ void ixgbe_init_ipsec_offload(struct >>>> ixgbe_adapter *adapter); >>>> void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, >>>> union ixgbe_adv_rx_desc *rx_desc, >>>> struct sk_buff *skb); >>>> +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct sk_buff *skb, >>>> + __be16 protocol, struct ixgbe_ipsec_tx_data *itd); >>>> void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter); >>>> #else >>>> static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter >>>> *adapter) { }; >>>> static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, >>>> union ixgbe_adv_rx_desc *rx_desc, >>>> struct sk_buff *skb) { }; >>>> +static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, >>>> + struct sk_buff *skb, __be16 protocol, >>>> + struct ixgbe_ipsec_tx_data *itd) { >>>> return 0; }; >>>> static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { >>>> }; >>>> #endif /* CONFIG_XFRM_OFFLOAD */ >>>> #endif /* _IXGBE_H_ */ >>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c >>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c >>>> index fd06d9b..2a0dd7a 100644 >>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c >>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c >>>> @@ -703,12 +703,89 @@ static void ixgbe_ipsec_del_sa(struct xfrm_state >>>> *xs) >>>> } >>>> } >>>> >>>> +/** >>>> + * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload >>>> + * @skb: current data packet >>>> + * @xs: pointer to transformer state struct >>>> + **/ >>>> +static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct >>>> xfrm_state *xs) >>>> +{ >>>> + if (xs->props.family == AF_INET) { >>>> + /* Offload with IPv4 options is not supported yet */ >>>> + if (ip_hdr(skb)->ihl > 5) >>> >>> >>> I would make this ihl != 5 instead of "> 5" since smaller values would >>> be invalid as well. >> >> >> Sure >> >> >>> >>>> + return false; >>>> + } else { >>>> + /* Offload with IPv6 extension headers is not support yet >>>> */ >>>> + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) >>>> + return false; >>>> + } >>>> + >>>> + return true; >>>> +} >>>> + >>>> static const struct xfrmdev_ops ixgbe_xfrmdev_ops = { >>>> .xdo_dev_state_add = ixgbe_ipsec_add_sa, >>>> .xdo_dev_state_delete = ixgbe_ipsec_del_sa, >>>> + .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok, >>>> }; >>>> >>>> /** >>>> + * ixgbe_ipsec_tx - setup Tx flags for ipsec offload >>>> + * @tx_ring: outgoing context >>>> + * @skb: current data packet >>>> + * @protocol: network protocol >>>> + * @itd: ipsec Tx data for later use in building context descriptor >>>> + **/ >>>> +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct sk_buff *skb, >>>> + __be16 protocol, struct ixgbe_ipsec_tx_data *itd) >>>> +{ >>>> + struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); >>>> + struct ixgbe_ipsec *ipsec = adapter->ipsec; >>>> + struct xfrm_state *xs; >>>> + struct tx_sa *tsa; >>>> + >>>> + if (!skb->sp->len) { >>>> + netdev_err(tx_ring->netdev, "%s: no xfrm state len = >>>> %d\n", >>>> + __func__, skb->sp->len); >>>> + return 0; >>>> + } >>>> + >>>> + xs = xfrm_input_state(skb); >>>> + if (!xs) { >>>> + netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs >>>> = %p\n", >>>> + __func__, xs); >>>> + return 0; >>>> + } >>>> + >>>> + itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX; >>>> + if (itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT) { >>>> + netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d >>>> handle=%lu\n", >>>> + __func__, itd->sa_idx, >>>> xs->xso.offload_handle); >>>> + return 0; >>>> + } >>>> + >>>> + tsa = &ipsec->tx_tbl[itd->sa_idx]; >>>> + if (!tsa->used) { >>>> + netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n", >>>> + __func__, itd->sa_idx); >>>> + return 0; >>>> + } >>>> + >>>> + itd->flags = 0; >>>> + if (xs->id.proto == IPPROTO_ESP) { >>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | >>>> + IXGBE_ADVTXD_TUCMD_L4T_TCP; >>> >>> >>> Why is the TCP value being set here? This doesn't seem correct either. >>> This implies TCP a TCP offload. It seems like this should only be >>> setting ESP. >> >> >> Honestly? Because when I was testing that, it didn't work without it. This >> was one of the things I was going to come back to when I started working on >> the csum and tso support. > > We might want to try testing with that dropped to see if we need it or > not. I would suspect not since I would imagine this would cause bad > things for non-TCP traffic. Also the inner L4 header shouldn't matter > unless you are trying to offload it. > >>> >>>> + if (protocol == htons(ETH_P_IP)) >>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4; >>> >>> >>> Does the IPsec offload need to know if the frame is v4 or v6? I'm just >>> wondering if it does or not. >> >> >> Yes, I believe this is how it knows how much header to skip to find the ESP >> header. However, I'll test that and see if it can come out. > > Like I mentioned last time it might be better to have this handled in > ixgbe_tx_csum. If it is harmless we can probably just include it > there. We should be able to do it in the block after the no_csum > label. I'd be curious if not doing this up until now might have other > effects such as impacting RSS since I know the whole reason for us > having to do the CC stuff anyway was to actually get header split to > work correctly with PF/VF loopback packets. It wouldn't surprise me if > setting these fields defines the packet type received on the other > end. > >>> If not then this probably isn't needed. >>> One thought on this line is you might look at moving it into >>> ixgbe_tx_csum. If setting the bit is harmless without setting IXSM we >>> might look at moving it into the end of ixgbe_tx_csum and just make it >>> compare against first->protocol there. >> >> >>> >>>> + itd->trailer_len = xs->props.trailer_len; >>>> + } >>>> + if (tsa->encrypt) >>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN; >>>> + >>>> + return 1; >>>> +} >>>> + >>>> +/** >>>> * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor >>>> * @rx_ring: receiving ring >>>> * @rx_desc: receive data descriptor >>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c >>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c >>>> index f1bfae0..d7875b3 100644 >>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c >>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c >>>> @@ -1261,7 +1261,7 @@ void ixgbe_clear_interrupt_scheme(struct >>>> ixgbe_adapter *adapter) >>>> } >>>> >>>> void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, >>>> - u32 fcoe_sof_eof, u32 type_tucmd, u32 >>>> mss_l4len_idx) >>>> + u32 fceof_saidx, u32 type_tucmd, u32 >>>> mss_l4len_idx) >>>> { >>>> struct ixgbe_adv_tx_context_desc *context_desc; >>>> u16 i = tx_ring->next_to_use; >>>> @@ -1275,7 +1275,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, >>>> u32 vlan_macip_lens, >>>> type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; >>>> >>>> context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); >>>> - context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); >>>> + context_desc->fceof_saidx = cpu_to_le32(fceof_saidx); >>>> context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); >>>> context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); >>>> } >>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >>>> index 60f9f2d..c857594 100644 >>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c >>>> @@ -7659,9 +7659,10 @@ static void ixgbe_service_task(struct work_struct >>>> *work) >>>> >>>> static int ixgbe_tso(struct ixgbe_ring *tx_ring, >>>> struct ixgbe_tx_buffer *first, >>>> - u8 *hdr_len) >>>> + u8 *hdr_len, >>>> + struct ixgbe_ipsec_tx_data *itd) >>>> { >>>> - u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; >>>> + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx, fceof_saidx = 0; >>>> struct sk_buff *skb = first->skb; >>>> union { >>>> struct iphdr *v4; >>>> @@ -7740,7 +7741,12 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, >>>> vlan_macip_lens |= (ip.hdr - skb->data) << >>>> IXGBE_ADVTXD_MACLEN_SHIFT; >>>> vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; >>>> >>>> - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, >>>> + if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) { >>>> + fceof_saidx |= itd->sa_idx; >>>> + type_tucmd |= itd->flags | itd->trailer_len; >>>> + } > > So just a thought. Why bother with the TX_FLAGS_CHECK at all? It seems > like in the case that the flag isn't set you would have itd->sa_idx > equal to 0 anyway so it would still be the same result wouldn't it? It > would save you from having to zero both fceof_saidx and itd->sa_idx > since you could just pass itd->sa_idx and save yourself the extra > variable. > > Also if flags and trailer_len are both being written to the same > location why not combine them in your structure into one single 32 bit > entry? It would allow you to essentially reduce this to one OR and you > could just pass itd->sa_idx directly which should be a pretty > significant savings in terms of instructions and cycles. Also you > might want to consider bumping itd->sa_idx up to a 32b value. It will > possibly cost you a cycle or so to convert the 16b value to a 32b > value before writing it. If you merge the flags and trailer length you > should have the space to spare to bump up the size. > >>>> + >>>> + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, >>>> type_tucmd, >>>> mss_l4len_idx); >>>> >>>> return 1; >>>> @@ -7756,10 +7762,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct >>>> sk_buff *skb) >>>> } >>>> >>>> static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, >>>> - struct ixgbe_tx_buffer *first) >>>> + struct ixgbe_tx_buffer *first, >>>> + struct ixgbe_ipsec_tx_data *itd) >>>> { >>>> struct sk_buff *skb = first->skb; >>>> u32 vlan_macip_lens = 0; >>>> + u32 fceof_saidx = 0; >>>> u32 type_tucmd = 0; >>>> >>>> if (skb->ip_summed != CHECKSUM_PARTIAL) { >>>> @@ -7800,7 +7808,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring >>>> *tx_ring, >>>> vlan_macip_lens |= skb_network_offset(skb) << >>>> IXGBE_ADVTXD_MACLEN_SHIFT; >>>> vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; >>>> >>>> - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0); >>>> + if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) { >>>> + fceof_saidx |= itd->sa_idx; >>>> + type_tucmd |= itd->flags | itd->trailer_len; >>>> + } >>>> + >>>> + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, >>>> type_tucmd, 0); >>>> } >>>> >>>> #define IXGBE_SET_FLAG(_input, _flag, _result) \ >>>> @@ -7843,11 +7856,16 @@ static void ixgbe_tx_olinfo_status(union >>>> ixgbe_adv_tx_desc *tx_desc, >>>> IXGBE_TX_FLAGS_CSUM, >>>> IXGBE_ADVTXD_POPTS_TXSM); >>>> >>>> - /* enble IPv4 checksum for TSO */ >>>> + /* enable IPv4 checksum for TSO */ >>>> olinfo_status |= IXGBE_SET_FLAG(tx_flags, >>>> IXGBE_TX_FLAGS_IPV4, >>>> IXGBE_ADVTXD_POPTS_IXSM); >>>> >>>> + /* enable IPsec */ >>>> + olinfo_status |= IXGBE_SET_FLAG(tx_flags, >>>> + IXGBE_TX_FLAGS_IPSEC, >>>> + IXGBE_ADVTXD_POPTS_IPSEC); >>>> + >>>> /* >>>> * Check Context must be set if Tx switch is enabled, which it >>>> * always is for case where virtual functions are running >>>> @@ -8306,6 +8324,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff >>>> *skb, >>>> u32 tx_flags = 0; >>>> unsigned short f; >>>> u16 count = TXD_USE_COUNT(skb_headlen(skb)); >>>> + struct ixgbe_ipsec_tx_data ipsec_tx = { 0 }; >>>> __be16 protocol = skb->protocol; >>>> u8 hdr_len = 0; >>>> >>>> @@ -8394,6 +8413,9 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff >>>> *skb, >>>> } >>>> } >>>> >>>> + if (skb->sp && ixgbe_ipsec_tx(tx_ring, skb, protocol, &ipsec_tx)) >>>> + tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC; >>> >>> >>> You might just want to pull the skb->sp check into ixgbe_ipsec_tx and >>> could pass tx_flags as a part of the first buffer. It doesn't really >>> matter anyway as most of this will just be inlined so it will all end >>> up a part of the same function anyway. >> >> >> Since the function is defined in a different .o file, are you sure it will >> get inlined? I put the skb->sp check here to make sure we don't do an >> unnecessary jump. > > You're right. I forgot you are defining this in a different file. > > Still I would like to see this moved down though. Where it is at > doesn't really flow with everything else since FCoE and this aren't > likely to ever interact so I would rather us check for FCoE and then > get into the IPsec logic. > >>> >>> Also I would move this down so that it is handled after the fields in >>> the first buffer_info structure are set. Then this can ll just fall >>> inline with the TSO block and get handled there. >>> >>>> + >>>> /* record initial flags and protocol */ >>>> first->tx_flags = tx_flags; >>>> first->protocol = protocol; >>>> @@ -8410,11 +8432,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff >>>> *skb, >>>> } >>>> >>>> #endif /* IXGBE_FCOE */ >>> >>> >>> So if you move the function down here it will help to avoid any other >>> complication. In addition you could follow the same logic that we do >>> for ixgbe_tso/fso so you could drop the frame instead of transmitting >>> it if it is requesting a bad offload. >> >> >> Sure >> >> sln >> >> >>> >>>> - tso = ixgbe_tso(tx_ring, first, &hdr_len); >>>> + tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx); >>>> if (tso < 0) >>>> goto out_drop; >>>> else if (!tso) >>>> - ixgbe_tx_csum(tx_ring, first); >>>> + ixgbe_tx_csum(tx_ring, first, &ipsec_tx); >>>> >>>> /* add the ATR filter if ATR is on */ >>>> if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) >>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h >>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h >>>> index 3df0763..0ac725fa 100644 >>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h >>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h >>>> @@ -2856,7 +2856,7 @@ union ixgbe_adv_rx_desc { >>>> /* Context descriptors */ >>>> struct ixgbe_adv_tx_context_desc { >>>> __le32 vlan_macip_lens; >>>> - __le32 seqnum_seed; >>>> + __le32 fceof_saidx; >>>> __le32 type_tucmd_mlhl; >>>> __le32 mss_l4len_idx; >>>> }; >>>> -- >>>> 2.7.4 >>>> >>>> _______________________________________________ >>>> Intel-wired-lan mailing list >>>> Intel-wired-lan@osuosl.org >>>> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan