Linux DTrace development list
 help / color / mirror / Atom feed
* [PATCH] cg: ensure string results in a ternary are padded to strsize
@ 2025-02-07  6:11 Kris Van Hees
  2025-02-07 19:17 ` Eugene Loh
  0 siblings, 1 reply; 4+ messages in thread
From: Kris Van Hees @ 2025-02-07  6:11 UTC (permalink / raw)
  To: dtrace, dtrace-devel

The result of a string ternary was copied from either left or right
value using a memcpy() of strsize, but that could result in garbage
being included in the result beyond the terminating 0-byte of the
string value.  If the result was e.g. assigned to a char-array
translator member, that garbage could affect consumer output.

The ternary will now copy the string using dt_cg_strcpy() which ensures
that any string < strsize will be padded with 0-bytes up to strsize.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
---
 libdtrace/dt_cg.c | 68 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
index a5c9aa09..b48e27f0 100644
--- a/libdtrace/dt_cg.c
+++ b/libdtrace/dt_cg.c
@@ -1371,6 +1371,19 @@ dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
 		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0));
 }
 
+/*
+ * Store a pointer to the 'memory block of zeros' in reg.
+ */
+static void
+dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
+{
+	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
+	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
+
+	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
+	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
+}
+
 static void
 dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
 {
@@ -1394,6 +1407,43 @@ dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
 	dt_regset_free(drp, BPF_REG_0);
 }
 
+/*
+ * Copy a string from the pointer stored in the src register to the pointer
+ * stored in the dst register.  At most strsize characters will be copied, and
+ * if the source string is less than strsize characters, the remainder will be
+ * filled with 0s.
+ */
+static void
+dt_cg_strcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
+{
+	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
+	uint_t		lbl_ok = dt_irlist_label(dlp);
+	size_t		size = dtp->dt_options[DTRACEOPT_STRSIZE];
+
+	if (dt_regset_xalloc_args(drp) == -1)
+		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
+
+	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
+	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
+	emit(dlp,  BPF_MOV_REG(BPF_REG_3, src));
+	dt_regset_xalloc(drp, BPF_REG_0);
+	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel_str]));
+
+	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_ok));
+	dt_cg_probe_error(yypcb, DTRACEFLT_BADADDR, DT_ISREG, src);
+
+	emitl(dlp, lbl_ok,
+		   BPF_NOP());
+	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
+	emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0));
+	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
+	emit(dlp,  BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0));
+	dt_cg_zerosptr(BPF_REG_3, dlp, drp);
+	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
+	dt_regset_free(drp, BPF_REG_0);
+	dt_regset_free_args(drp);
+}
+
 static void
 dt_cg_spill_store(int reg)
 {
@@ -3040,19 +3090,6 @@ dt_cg_pop_stack(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
 	dt_regset_free(drp, treg);
 }
 
-/*
- * Store a pointer to the 'memory block of zeros' in reg.
- */
-static void
-dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
-{
-	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
-	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
-
-	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
-	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
-}
-
 /*
  * Generate code to promote signed scalars (size < 64 bits) to native register
  * size (64 bits).
@@ -4603,7 +4640,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 	 * dn_right).
 	 */
 	if (dt_node_is_string(dnp)) {
-		uint_t lbl_null = dt_irlist_label(dlp);
+		uint_t	lbl_null = dt_irlist_label(dlp);
 
 		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, lbl_null));
 
@@ -4619,8 +4656,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
 		dt_cg_access_dctx(dnp->dn_reg, dlp, drp, DCTX_MEM);
 		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
 
-		dt_cg_memcpy(dlp, drp, dnp->dn_reg, BPF_REG_0,
-			     yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE]);
+		dt_cg_strcpy(dlp, drp, dnp->dn_reg, BPF_REG_0);
 
 		emitl(dlp, lbl_null,
 			   BPF_NOP());
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] cg: ensure string results in a ternary are padded to strsize
  2025-02-07  6:11 [PATCH] cg: ensure string results in a ternary are padded to strsize Kris Van Hees
@ 2025-02-07 19:17 ` Eugene Loh
  2025-02-07 20:22   ` Kris Van Hees
  0 siblings, 1 reply; 4+ messages in thread
From: Eugene Loh @ 2025-02-07 19:17 UTC (permalink / raw)
  To: Kris Van Hees, dtrace, dtrace-devel

Reviewed-by: Eugene Loh <eugene.loh@oracle.com>

That said:

*)  Is it possible to construct a test?

*)  Has there been an audit to ferret out other instances of this 
problem?  Also, I'm confused what position we're taking here. We're 
saying that it's possible for a string (which in D has strsize) to have 
garbage after the NUL byte, but we'll deal with this situation only in 
the case that it's an argument of a ternary operation?  Why do we not 
say instead that a strsize string will not have garbage after the NUL 
byte, so that a strsize copy of a string will be safe (regardless of 
whether it's a ternary op)?

*)  What position are we taking here on BPF register pressure? E.g., 
what if dst were %r0 (okay, let's hope not) or even just %r1-%r5 (even 
just %r4 or %r5)?  One position is that we don't do a robust job of BPF 
reg management anyhow and someday we're going to have to clean that up 
and so being sloppy now is okay or even necessary.  Another position is 
that we can add a little code to improve things (even if other areas of 
dt_cg.c vary considerably in quality).  E.g., one can fill-spill regs 
between the two function calls, costing extra BPF instructions only if 
needed. (That's already a pretty common practice in dt_cg.c.)  For extra 
robustness (beyond what we currently do in dt_cg.c), one can move 
%r1=dst and %r3=src in whatever order makes sense (more complicated and 
less likely to be useful).  Anyhow, fill-spill regs between function 
calls is a pretty common practice in dt_cg.c and I would think would 
make to do here.

On 2/7/25 01:11, Kris Van Hees wrote:
> The result of a string ternary was copied from either left or right
> value using a memcpy() of strsize, but that could result in garbage
> being included in the result beyond the terminating 0-byte of the
> string value.  If the result was e.g. assigned to a char-array
> translator member, that garbage could affect consumer output.
>
> The ternary will now copy the string using dt_cg_strcpy() which ensures
> that any string < strsize will be padded with 0-bytes up to strsize.
>
> Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
> ---
>   libdtrace/dt_cg.c | 68 ++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> index a5c9aa09..b48e27f0 100644
> --- a/libdtrace/dt_cg.c
> +++ b/libdtrace/dt_cg.c
> @@ -1371,6 +1371,19 @@ dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
>   		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0));
>   }
>   
> +/*
> + * Store a pointer to the 'memory block of zeros' in reg.
> + */
> +static void
> +dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> +{
> +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> +	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> +
> +	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> +	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> +}
> +
>   static void
>   dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
>   {
> @@ -1394,6 +1407,43 @@ dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
>   	dt_regset_free(drp, BPF_REG_0);
>   }
>   
> +/*
> + * Copy a string from the pointer stored in the src register to the pointer
> + * stored in the dst register.  At most strsize characters will be copied, and
> + * if the source string is less than strsize characters, the remainder will be
> + * filled with 0s.
> + */
> +static void
> +dt_cg_strcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
> +{
> +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> +	uint_t		lbl_ok = dt_irlist_label(dlp);
> +	size_t		size = dtp->dt_options[DTRACEOPT_STRSIZE];
> +
> +	if (dt_regset_xalloc_args(drp) == -1)
> +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> +
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, src));
> +	dt_regset_xalloc(drp, BPF_REG_0);
> +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel_str]));
> +
> +	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_ok));
> +	dt_cg_probe_error(yypcb, DTRACEFLT_BADADDR, DT_ISREG, src);
> +
> +	emitl(dlp, lbl_ok,
> +		   BPF_NOP());
> +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> +	emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0));
> +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> +	emit(dlp,  BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0));
> +	dt_cg_zerosptr(BPF_REG_3, dlp, drp);
> +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
> +	dt_regset_free(drp, BPF_REG_0);
> +	dt_regset_free_args(drp);
> +}
> +
>   static void
>   dt_cg_spill_store(int reg)
>   {
> @@ -3040,19 +3090,6 @@ dt_cg_pop_stack(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
>   	dt_regset_free(drp, treg);
>   }
>   
> -/*
> - * Store a pointer to the 'memory block of zeros' in reg.
> - */
> -static void
> -dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> -{
> -	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> -	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> -
> -	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> -	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> -}
> -
>   /*
>    * Generate code to promote signed scalars (size < 64 bits) to native register
>    * size (64 bits).
> @@ -4603,7 +4640,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>   	 * dn_right).
>   	 */
>   	if (dt_node_is_string(dnp)) {
> -		uint_t lbl_null = dt_irlist_label(dlp);
> +		uint_t	lbl_null = dt_irlist_label(dlp);
>   
>   		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, lbl_null));
>   
> @@ -4619,8 +4656,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
>   		dt_cg_access_dctx(dnp->dn_reg, dlp, drp, DCTX_MEM);
>   		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
>   
> -		dt_cg_memcpy(dlp, drp, dnp->dn_reg, BPF_REG_0,
> -			     yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE]);
> +		dt_cg_strcpy(dlp, drp, dnp->dn_reg, BPF_REG_0);
>   
>   		emitl(dlp, lbl_null,
>   			   BPF_NOP());

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] cg: ensure string results in a ternary are padded to strsize
  2025-02-07 19:17 ` Eugene Loh
@ 2025-02-07 20:22   ` Kris Van Hees
  2025-02-07 21:00     ` Kris Van Hees
  0 siblings, 1 reply; 4+ messages in thread
From: Kris Van Hees @ 2025-02-07 20:22 UTC (permalink / raw)
  To: Eugene Loh; +Cc: Kris Van Hees, dtrace, dtrace-devel

On Fri, Feb 07, 2025 at 02:17:44PM -0500, Eugene Loh wrote:
> Reviewed-by: Eugene Loh <eugene.loh@oracle.com>
> 
> That said:
> 
> *)  Is it possible to construct a test?

The execargs patch provides a test for this.  The situation here is quite
unusual because we end up assigning a DTrace string to a char array, which
is an operation that you generally cannot do in D.

> *)  Has there been an audit to ferret out other instances of this problem? 
> Also, I'm confused what position we're taking here. We're saying that it's
> possible for a string (which in D has strsize) to have garbage after the NUL
> byte, but we'll deal with this situation only in the case that it's an
> argument of a ternary operation?  Why do we not say instead that a strsize
> string will not have garbage after the NUL byte, so that a strsize copy of a
> string will be safe (regardless of whether it's a ternary op)?

I am not sure what you mean with 'taking a position'.  This is a fix for a
real problem, that is unusual (see above) yet occurs and results in trace()
output to be wrong.  It may be possible that there are other cases where a
similar problem may occur, but well, none have popped up as far as I know
(partly witnessed that this is the first time it is noticed AFAIK and that
is the reason I am fixing it).

> *)  What position are we taking here on BPF register pressure? E.g., what if
> dst were %r0 (okay, let's hope not) or even just %r1-%r5 (even just %r4 or
> %r5)?  One position is that we don't do a robust job of BPF reg management
> anyhow and someday we're going to have to clean that up and so being sloppy
> now is okay or even necessary.  Another position is that we can add a little
> code to improve things (even if other areas of dt_cg.c vary considerably in
> quality).  E.g., one can fill-spill regs between the two function calls,
> costing extra BPF instructions only if needed. (That's already a pretty
> common practice in dt_cg.c.)  For extra robustness (beyond what we currently
> do in dt_cg.c), one can move %r1=dst and %r3=src in whatever order makes
> sense (more complicated and less likely to be useful).  Anyhow, fill-spill
> regs between function calls is a pretty common practice in dt_cg.c and I
> would think would make to do here.

I'll look closer at the register use here but I don't think there is a
problem actually.  dt_cg_strcpy() is not meant to be a general use function
at this point (perhaps some day it will be).  I can also move this into
the dt_op_ternary() funnction, but I felt that mighht make it at bit too
complex.

> On 2/7/25 01:11, Kris Van Hees wrote:
> > The result of a string ternary was copied from either left or right
> > value using a memcpy() of strsize, but that could result in garbage
> > being included in the result beyond the terminating 0-byte of the
> > string value.  If the result was e.g. assigned to a char-array
> > translator member, that garbage could affect consumer output.
> > 
> > The ternary will now copy the string using dt_cg_strcpy() which ensures
> > that any string < strsize will be padded with 0-bytes up to strsize.
> > 
> > Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
> > ---
> >   libdtrace/dt_cg.c | 68 ++++++++++++++++++++++++++++++++++++-----------
> >   1 file changed, 52 insertions(+), 16 deletions(-)
> > 
> > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> > index a5c9aa09..b48e27f0 100644
> > --- a/libdtrace/dt_cg.c
> > +++ b/libdtrace/dt_cg.c
> > @@ -1371,6 +1371,19 @@ dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
> >   		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0));
> >   }
> > +/*
> > + * Store a pointer to the 'memory block of zeros' in reg.
> > + */
> > +static void
> > +dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> > +{
> > +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > +	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> > +
> > +	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> > +	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> > +}
> > +
> >   static void
> >   dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
> >   {
> > @@ -1394,6 +1407,43 @@ dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
> >   	dt_regset_free(drp, BPF_REG_0);
> >   }
> > +/*
> > + * Copy a string from the pointer stored in the src register to the pointer
> > + * stored in the dst register.  At most strsize characters will be copied, and
> > + * if the source string is less than strsize characters, the remainder will be
> > + * filled with 0s.
> > + */
> > +static void
> > +dt_cg_strcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
> > +{
> > +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > +	uint_t		lbl_ok = dt_irlist_label(dlp);
> > +	size_t		size = dtp->dt_options[DTRACEOPT_STRSIZE];
> > +
> > +	if (dt_regset_xalloc_args(drp) == -1)
> > +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > +
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> > +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, src));
> > +	dt_regset_xalloc(drp, BPF_REG_0);
> > +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel_str]));
> > +
> > +	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_ok));
> > +	dt_cg_probe_error(yypcb, DTRACEFLT_BADADDR, DT_ISREG, src);
> > +
> > +	emitl(dlp, lbl_ok,
> > +		   BPF_NOP());
> > +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> > +	emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0));
> > +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> > +	emit(dlp,  BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0));
> > +	dt_cg_zerosptr(BPF_REG_3, dlp, drp);
> > +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
> > +	dt_regset_free(drp, BPF_REG_0);
> > +	dt_regset_free_args(drp);
> > +}
> > +
> >   static void
> >   dt_cg_spill_store(int reg)
> >   {
> > @@ -3040,19 +3090,6 @@ dt_cg_pop_stack(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> >   	dt_regset_free(drp, treg);
> >   }
> > -/*
> > - * Store a pointer to the 'memory block of zeros' in reg.
> > - */
> > -static void
> > -dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> > -{
> > -	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > -	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> > -
> > -	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> > -	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> > -}
> > -
> >   /*
> >    * Generate code to promote signed scalars (size < 64 bits) to native register
> >    * size (64 bits).
> > @@ -4603,7 +4640,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> >   	 * dn_right).
> >   	 */
> >   	if (dt_node_is_string(dnp)) {
> > -		uint_t lbl_null = dt_irlist_label(dlp);
> > +		uint_t	lbl_null = dt_irlist_label(dlp);
> >   		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, lbl_null));
> > @@ -4619,8 +4656,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> >   		dt_cg_access_dctx(dnp->dn_reg, dlp, drp, DCTX_MEM);
> >   		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> > -		dt_cg_memcpy(dlp, drp, dnp->dn_reg, BPF_REG_0,
> > -			     yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE]);
> > +		dt_cg_strcpy(dlp, drp, dnp->dn_reg, BPF_REG_0);
> >   		emitl(dlp, lbl_null,
> >   			   BPF_NOP());

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] cg: ensure string results in a ternary are padded to strsize
  2025-02-07 20:22   ` Kris Van Hees
@ 2025-02-07 21:00     ` Kris Van Hees
  0 siblings, 0 replies; 4+ messages in thread
From: Kris Van Hees @ 2025-02-07 21:00 UTC (permalink / raw)
  To: Kris Van Hees; +Cc: Eugene Loh, dtrace, dtrace-devel

Actually, I am going to replace this patch with something totally different.
While it is essentially correct, it is just not worth it given that assignment
from string to char[] is not allowed in D, and somehow translators do it
anyway (in a roundabout way).  I prefer to just not cause this complication,
and instead will be changing the member types in psinfo and lwpsinfo to not
use char-arrays and instead use string (as is done in the io provider 
translated structs).  That avoids this complication in a much more natural
way.

I will also introduce a patch to change the ternary for string types to use
a string copy rather than using memcpy, because we really should be using the
string primitives (probe_read_kerneL_str instead of probe_read_kernel, or
even probe_read).  That should do the correct thing anyway, and when we are
using string types, we do not need to worry about training garbage because it
is ignored correctly in those cases.

The patch I sent to fix the consume code is still valid though because people
ca nstill use char arrays for other things.

On Fri, Feb 07, 2025 at 03:22:32PM -0500, Kris Van Hees wrote:
> On Fri, Feb 07, 2025 at 02:17:44PM -0500, Eugene Loh wrote:
> > Reviewed-by: Eugene Loh <eugene.loh@oracle.com>
> > 
> > That said:
> > 
> > *)  Is it possible to construct a test?
> 
> The execargs patch provides a test for this.  The situation here is quite
> unusual because we end up assigning a DTrace string to a char array, which
> is an operation that you generally cannot do in D.
> 
> > *)  Has there been an audit to ferret out other instances of this problem? 
> > Also, I'm confused what position we're taking here. We're saying that it's
> > possible for a string (which in D has strsize) to have garbage after the NUL
> > byte, but we'll deal with this situation only in the case that it's an
> > argument of a ternary operation?  Why do we not say instead that a strsize
> > string will not have garbage after the NUL byte, so that a strsize copy of a
> > string will be safe (regardless of whether it's a ternary op)?
> 
> I am not sure what you mean with 'taking a position'.  This is a fix for a
> real problem, that is unusual (see above) yet occurs and results in trace()
> output to be wrong.  It may be possible that there are other cases where a
> similar problem may occur, but well, none have popped up as far as I know
> (partly witnessed that this is the first time it is noticed AFAIK and that
> is the reason I am fixing it).
> 
> > *)  What position are we taking here on BPF register pressure? E.g., what if
> > dst were %r0 (okay, let's hope not) or even just %r1-%r5 (even just %r4 or
> > %r5)?  One position is that we don't do a robust job of BPF reg management
> > anyhow and someday we're going to have to clean that up and so being sloppy
> > now is okay or even necessary.  Another position is that we can add a little
> > code to improve things (even if other areas of dt_cg.c vary considerably in
> > quality).  E.g., one can fill-spill regs between the two function calls,
> > costing extra BPF instructions only if needed. (That's already a pretty
> > common practice in dt_cg.c.)  For extra robustness (beyond what we currently
> > do in dt_cg.c), one can move %r1=dst and %r3=src in whatever order makes
> > sense (more complicated and less likely to be useful).  Anyhow, fill-spill
> > regs between function calls is a pretty common practice in dt_cg.c and I
> > would think would make to do here.
> 
> I'll look closer at the register use here but I don't think there is a
> problem actually.  dt_cg_strcpy() is not meant to be a general use function
> at this point (perhaps some day it will be).  I can also move this into
> the dt_op_ternary() funnction, but I felt that mighht make it at bit too
> complex.
> 
> > On 2/7/25 01:11, Kris Van Hees wrote:
> > > The result of a string ternary was copied from either left or right
> > > value using a memcpy() of strsize, but that could result in garbage
> > > being included in the result beyond the terminating 0-byte of the
> > > string value.  If the result was e.g. assigned to a char-array
> > > translator member, that garbage could affect consumer output.
> > > 
> > > The ternary will now copy the string using dt_cg_strcpy() which ensures
> > > that any string < strsize will be padded with 0-bytes up to strsize.
> > > 
> > > Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
> > > ---
> > >   libdtrace/dt_cg.c | 68 ++++++++++++++++++++++++++++++++++++-----------
> > >   1 file changed, 52 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/libdtrace/dt_cg.c b/libdtrace/dt_cg.c
> > > index a5c9aa09..b48e27f0 100644
> > > --- a/libdtrace/dt_cg.c
> > > +++ b/libdtrace/dt_cg.c
> > > @@ -1371,6 +1371,19 @@ dt_cg_fill_gap(dt_pcb_t *pcb, int gap)
> > >   		emit(dlp, BPF_STORE_IMM(BPF_W, BPF_REG_9, off, 0));
> > >   }
> > > +/*
> > > + * Store a pointer to the 'memory block of zeros' in reg.
> > > + */
> > > +static void
> > > +dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> > > +{
> > > +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > > +	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> > > +
> > > +	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> > > +	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> > > +}
> > > +
> > >   static void
> > >   dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
> > >   {
> > > @@ -1394,6 +1407,43 @@ dt_cg_memcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src, size_t size)
> > >   	dt_regset_free(drp, BPF_REG_0);
> > >   }
> > > +/*
> > > + * Copy a string from the pointer stored in the src register to the pointer
> > > + * stored in the dst register.  At most strsize characters will be copied, and
> > > + * if the source string is less than strsize characters, the remainder will be
> > > + * filled with 0s.
> > > + */
> > > +static void
> > > +dt_cg_strcpy(dt_irlist_t *dlp, dt_regset_t *drp, int dst, int src)
> > > +{
> > > +	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > > +	uint_t		lbl_ok = dt_irlist_label(dlp);
> > > +	size_t		size = dtp->dt_options[DTRACEOPT_STRSIZE];
> > > +
> > > +	if (dt_regset_xalloc_args(drp) == -1)
> > > +		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
> > > +
> > > +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> > > +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> > > +	emit(dlp,  BPF_MOV_REG(BPF_REG_3, src));
> > > +	dt_regset_xalloc(drp, BPF_REG_0);
> > > +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel_str]));
> > > +
> > > +	emit(dlp,  BPF_BRANCH_IMM(BPF_JSGE, BPF_REG_0, 0, lbl_ok));
> > > +	dt_cg_probe_error(yypcb, DTRACEFLT_BADADDR, DT_ISREG, src);
> > > +
> > > +	emitl(dlp, lbl_ok,
> > > +		   BPF_NOP());
> > > +	emit(dlp,  BPF_MOV_REG(BPF_REG_1, dst));
> > > +	emit(dlp,  BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0));
> > > +	emit(dlp,  BPF_MOV_IMM(BPF_REG_2, size));
> > > +	emit(dlp,  BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0));
> > > +	dt_cg_zerosptr(BPF_REG_3, dlp, drp);
> > > +	emit(dlp,  BPF_CALL_HELPER(dtp->dt_bpfhelper[BPF_FUNC_probe_read_kernel]));
> > > +	dt_regset_free(drp, BPF_REG_0);
> > > +	dt_regset_free_args(drp);
> > > +}
> > > +
> > >   static void
> > >   dt_cg_spill_store(int reg)
> > >   {
> > > @@ -3040,19 +3090,6 @@ dt_cg_pop_stack(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> > >   	dt_regset_free(drp, treg);
> > >   }
> > > -/*
> > > - * Store a pointer to the 'memory block of zeros' in reg.
> > > - */
> > > -static void
> > > -dt_cg_zerosptr(int reg, dt_irlist_t *dlp, dt_regset_t *drp)
> > > -{
> > > -	dtrace_hdl_t	*dtp = yypcb->pcb_hdl;
> > > -	dt_ident_t	*zero_off = dt_dlib_get_var(dtp, "ZERO_OFF");
> > > -
> > > -	dt_cg_access_dctx(reg, dlp, drp, DCTX_STRTAB);
> > > -	emite(dlp, BPF_ALU64_IMM(BPF_ADD, reg, -1), zero_off);
> > > -}
> > > -
> > >   /*
> > >    * Generate code to promote signed scalars (size < 64 bits) to native register
> > >    * size (64 bits).
> > > @@ -4603,7 +4640,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> > >   	 * dn_right).
> > >   	 */
> > >   	if (dt_node_is_string(dnp)) {
> > > -		uint_t lbl_null = dt_irlist_label(dlp);
> > > +		uint_t	lbl_null = dt_irlist_label(dlp);
> > >   		emit(dlp,  BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, lbl_null));
> > > @@ -4619,8 +4656,7 @@ dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
> > >   		dt_cg_access_dctx(dnp->dn_reg, dlp, drp, DCTX_MEM);
> > >   		emit(dlp,  BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, dnp->dn_tstring->dn_value));
> > > -		dt_cg_memcpy(dlp, drp, dnp->dn_reg, BPF_REG_0,
> > > -			     yypcb->pcb_hdl->dt_options[DTRACEOPT_STRSIZE]);
> > > +		dt_cg_strcpy(dlp, drp, dnp->dn_reg, BPF_REG_0);
> > >   		emitl(dlp, lbl_null,
> > >   			   BPF_NOP());

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-02-07 21:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-07  6:11 [PATCH] cg: ensure string results in a ternary are padded to strsize Kris Van Hees
2025-02-07 19:17 ` Eugene Loh
2025-02-07 20:22   ` Kris Van Hees
2025-02-07 21:00     ` Kris Van Hees

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox