Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Martin KaFai Lau <martin.lau@linux.dev>
To: "Abhishek Chauhan (ABC)" <quic_abchauha@quicinc.com>,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Andrew Halaney <ahalaney@redhat.com>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>, bpf <bpf@vger.kernel.org>,
	kernel@quicinc.com
Subject: Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type
Date: Thu, 18 Apr 2024 14:57:12 -0700	[thread overview]
Message-ID: <c6f33a36-1fac-4738-8a4f-c930b544ba62@linux.dev> (raw)
In-Reply-To: <cb922600-783e-4741-be85-260d1ded5bdb@quicinc.com>

On 4/18/24 1:10 PM, Abhishek Chauhan (ABC) wrote:
>>>   #ifdef CONFIG_NET_XGRESS
>>>   	__u8			tc_at_ingress:1;	/* See TC_AT_INGRESS_MASK */
>>>   	__u8			tc_skip_classify:1;
>>> @@ -1096,10 +1100,12 @@ struct sk_buff {
>>>    */
>>>   #ifdef __BIG_ENDIAN_BITFIELD
>>>   #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 7)
>>> -#define TC_AT_INGRESS_MASK		(1 << 6)
>>> +#define SKB_TAI_DELIVERY_TIME_MASK	(1 << 6)
>>
>> SKB_TSTAMP_TYPE_BIT2_MASK?

nit. Shorten it to just SKB_TSTAMP_TYPE_MASK?

#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_TSTAMP_TYPE_MASK	(3 << 6)
#define SKB_TSTAMP_TYPE_RSH	(6)	/* more on this later */
#else
#define SKB_TSTAMP_TYPE_MASK	(3)
#endif

>>
> I was thinking to keep it as TAI because it will confuse developers. I hope thats okay.

I think it is not very useful to distinguish each bit since it is an enum value 
now. It becomes more like the "pkt_type:3" and its PKT_TYPE_MAX.

>>> +#define TC_AT_INGRESS_MASK		(1 << 5)
>>>   #else
>>>   #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 0)
>>> -#define TC_AT_INGRESS_MASK		(1 << 1)
>>> +#define SKB_TAI_DELIVERY_TIME_MASK	(1 << 1)
>>> +#define TC_AT_INGRESS_MASK		(1 << 2)
>>>   #endif
>>>   #define SKB_BF_MONO_TC_OFFSET		offsetof(struct sk_buff, __mono_tc_offset)
>>>   
>>> @@ -4206,6 +4212,11 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
>>>   	case CLOCK_MONOTONIC:
>>>   		skb->tstamp_type = SKB_CLOCK_MONO;
>>>   		break;
>>> +	case CLOCK_TAI:
>>> +		skb->tstamp_type = SKB_CLOCK_TAI;
>>> +		break;
>>> +	default:
>>> +		WARN_ONCE(true, "clockid %d not supported", tstamp_type);
>>
>> and set to 0 and default tstamp_type?
>> Actually thinking about it. I feel if its unsupported just fall back to default is the correct thing. I will take care of this.
>>>   	}
>>>   }
>>
>>>   >
>>   @@ -9372,10 +9378,16 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
>>>   	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
>>>   			      SKB_BF_MONO_TC_OFFSET);
>>>   	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>> -				SKB_MONO_DELIVERY_TIME_MASK, 2);
>>> +				SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
>>> +	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>> +				SKB_MONO_DELIVERY_TIME_MASK, 3);
>>> +	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>> +				SKB_TAI_DELIVERY_TIME_MASK, 4);
>>>   	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
>>>   	*insn++ = BPF_JMP_A(1);
>>>   	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
>>> +	*insn++ = BPF_JMP_A(1);
>>> +	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);

With SKB_TSTAMP_TYPE_MASK defined like above, this could be simplified like this 
(untested):

static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
                                                      struct bpf_insn *insn)
{
	__u8 value_reg = si->dst_reg;
	__u8 skb_reg = si->src_reg;

	BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);
	*insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
	*insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
#ifdef __BIG_ENDIAN_BITFIELD
	*insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSH);
#else
	BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
#endif

	return insn;
}

>>>   
>>>   	return insn;
>>>   }
>>> @@ -9418,10 +9430,26 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
>>>   		__u8 tmp_reg = BPF_REG_AX;
>>>   
>>>   		*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>>> +		/*check if all three bits are set*/
>>>   		*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
>>> -					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
>>> -		*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
>>> -					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
>>> +					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
>>> +					SKB_TAI_DELIVERY_TIME_MASK);
>>> +		/*if all 3 bits are set jump 3 instructions and clear the register */
>>> +		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>> +					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
>>> +					SKB_TAI_DELIVERY_TIME_MASK, 4);
>>> +		/*Now check Mono is set with ingress mask if so clear */
>>> +		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>> +					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3);
>>> +		/*Now Check tai is set with ingress mask if so clear */
>>> +		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>> +					TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
>>> +		/*Now Check tai and mono are set if so clear */
>>> +		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>> +					SKB_MONO_DELIVERY_TIME_MASK |
>>> +					SKB_TAI_DELIVERY_TIME_MASK, 1);

Same as the bpf_convert_tstamp_type_read, this could be simplified with 
SKB_TSTAMP_TYPE_MASK.

>>
>> This looks as if all JEQ result in "if so clear"?
>>
>> Is the goal to only do something different for the two bits being 0x1,
>> can we have a single test with a two-bit mask, rather than four tests?
>>
> I think Martin wanted to take care of TAI as well. I will wait for his comment here
> 
> My Goal was to take care of invalid combos which does not hold valid
> 1. If all 3 bits are set => invalid combo (Test case written is Insane)
> 2. If 2 bits are set (tai+mono)(Test case written is Insane) => this cannot happen (because clock base can only be one in skb)
> 3. If 2 bit are set (ingress + tai/mono) => This is existing logic + tai being added (clear tstamp in ingress)
> 4. For all other cases go ahead and fill in the tstamp in the dest register.

If it is to ensure no new type is added without adding 
BPF_SKB_TSTAMP_DELIVERY_XYZ, I would simplify this runtime bpf insns here and 
use a BUILD_BUG_ON to catch it at compile time. Something like,

enum skb_tstamp_type {
         SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
         SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
  	SKB_CLOCK_TAI,  /* Time base in skb is TAI */
         __SKB_CLOCK_MAX = SKB_CLOCK_TAI,
};

/* Same one used in the bpf_convert_tstamp_type_read() above */
BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);

Another thing is, the UDP test in test_tc_dtime.c probably needs to be adjusted, 
the userspace is using the CLOCK_TAI in SO_TXTIME and it is getting forwarded now.

next prev parent reply	other threads:[~2024-04-18 21:57 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-18  0:43 [RFC PATCH bpf-next v4 0/2] Replace mono_delivery_time with tstamp_type Abhishek Chauhan
2024-04-18  0:43 ` [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty Abhishek Chauhan
2024-04-18 18:47   ` Willem de Bruijn
2024-04-18 19:58     ` Abhishek Chauhan (ABC)
2024-04-18 20:11       ` Willem de Bruijn
2024-04-18 20:38         ` Abhishek Chauhan (ABC)
2024-04-18 20:49           ` Willem de Bruijn
2024-04-18 20:51             ` Abhishek Chauhan (ABC)
2024-04-18  0:43 ` [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type Abhishek Chauhan
2024-04-18 19:06   ` Willem de Bruijn
2024-04-18 20:10     ` Abhishek Chauhan (ABC)
2024-04-18 21:57       ` Martin KaFai Lau [this message]
2024-04-19  0:30         ` Abhishek Chauhan (ABC)
2024-04-20  1:13           ` Abhishek Chauhan (ABC)
2024-04-22 18:46             ` Martin KaFai Lau
2024-04-24 18:03               ` Abhishek Chauhan (ABC)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c6f33a36-1fac-4738-8a4f-c930b544ba62@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=ahalaney@redhat.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kernel@quicinc.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=quic_abchauha@quicinc.com \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.