[PATCH 2/3] target/s390x: Implement DIVIDE TO INTEGER

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Ilya Leoshkevich <iii@linux.ibm.com>
To: Thomas Huth <thuth@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>
Cc: David Hildenbrand <david@kernel.org>,
	qemu-s390x@nongnu.org, qemu-devel@nongnu.org,
	Ilya Leoshkevich <iii@linux.ibm.com>
Subject: [PATCH 2/3] target/s390x: Implement DIVIDE TO INTEGER
Date: Wed, 21 Jan 2026 23:12:32 +0100	[thread overview]
Message-ID: <20260121222116.713325-3-iii@linux.ibm.com> (raw)
In-Reply-To: <20260121222116.713325-1-iii@linux.ibm.com>

DIVIDE TO INTEGER computes floating point remainder and is used by
LuaJIT, so add it to QEMU.

The instruction comes in two flavors: for floats and doubles, which are
very similar. Since it's also quite complex, copy-pasting the
implementation would result in barely maintainable code. Mitigate that
using macros. An alternative would be an .inc file, but this looks like
an overkill.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 target/s390x/helper.h            |   2 +
 target/s390x/tcg/fpu_helper.c    | 199 +++++++++++++++++++++++++++++++
 target/s390x/tcg/insn-data.h.inc |   5 +-
 target/s390x/tcg/translate.c     |  26 ++++
 4 files changed, 231 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1a8a76abb98..f2b24c65a88 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -46,6 +46,8 @@ DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128)
 DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_5(didb, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(dieb, void, env, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 1ba43715ac1..f524c4257fb 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -286,6 +286,205 @@ Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b)
     return RET128(ret);
 }
 
+static float128 float128_precision_round_to_float32(float128 x)
+{
+    x.low = 0;
+    x.high = deposit64(x.high, 0, 25, 0);
+    return x;
+}
+
+static float128 float128_precision_round_to_float64(float128 x)
+{
+    x.low = deposit64(x.low, 0, 60, 0);
+    return x;
+}
+
+static int float128_get_exp(float128 x)
+{
+    return extract64(x.high, 48, 15) - 16383;
+}
+
+static float128 float128_set_exp(float128 x, int exp)
+{
+    x.high = deposit64(x.high, 48, 15, exp + 16383);
+    return x;
+}
+
+static float128 float128_adjust_exp(float128 x, int delta)
+{
+    return float128_set_exp(x, float128_get_exp(x) + delta);
+}
+
+static bool float128_is_int(float128 x)
+{
+    return extract64(x.high, 0, 48) == 0 && x.low == 0;
+}
+
+static float32 extract_float32(CPUS390XState *env, uint32_t r)
+{
+    return env->vregs[r][0] >> 32;
+}
+
+static void deposit_float32(CPUS390XState *env, uint32_t r, float32 x)
+{
+    env->vregs[r][0] = deposit64(env->vregs[r][0], 32, 32, x);
+}
+
+static float64 extract_float64(CPUS390XState *env, uint32_t r)
+{
+    return env->vregs[r][0];
+}
+
+static void deposit_float64(CPUS390XState *env, uint32_t r, float64 x)
+{
+    env->vregs[r][0] = x;
+}
+
+#define DIVIDE_TO_INTEGER(name, floatN, p, exp_max, exp_bias)                  \
+void HELPER(name)(CPUS390XState *env, uint32_t r1, uint32_t r2,                \
+                  uint32_t r3, uint32_t m4)                                    \
+{                                                                              \
+    int float_exception_flags = 0;                                             \
+    floatN a, b, n, r;                                                         \
+    int dxc = -1;                                                              \
+    uint32_t cc;                                                               \
+                                                                               \
+    a = extract_ ## floatN(env, r1);                                           \
+    b = extract_ ## floatN(env, r2);                                           \
+                                                                               \
+    /* POp table "Results: DIVIDE TO INTEGER (Part 1 of 2)" */                 \
+    if (floatN ## _is_signaling_nan(a, &env->fpu_status)) {                    \
+        r = n = floatN ## _silence_nan(a, &env->fpu_status);                   \
+        cc = 1;                                                                \
+        float_exception_flags |= float_flag_invalid;                           \
+    } else if (floatN ## _is_signaling_nan(b, &env->fpu_status)) {             \
+        r = n = floatN ## _silence_nan(b, &env->fpu_status);                   \
+        cc = 1;                                                                \
+        float_exception_flags |= float_flag_invalid;                           \
+    } else if (floatN ## _is_quiet_nan(a, &env->fpu_status)) {                 \
+        r = n = a;                                                             \
+        cc = 1;                                                                \
+    } else if (floatN ## _is_quiet_nan(b, &env->fpu_status)) {                 \
+        r = n = b;                                                             \
+        cc = 1;                                                                \
+    } else if (floatN ## _is_infinity(a) || floatN ## _is_zero(b)) {           \
+        r = n = floatN ## _default_nan(&env->fpu_status);                      \
+        cc = 1;                                                                \
+        float_exception_flags |= float_flag_invalid;                           \
+    } else if (floatN ## _is_infinity(b))  {                                   \
+        r = a;                                                                 \
+        n = floatN ## _set_sign(floatN ## _zero,                               \
+                                floatN ## _is_neg(a) != floatN ## _is_neg(b)); \
+        cc = 0;                                                                \
+    } else {                                                                   \
+        float128 a128, b128, m128, n128, q128, r128;                           \
+        bool is_final, is_q128_smallish;                                       \
+        int old_mode, r128_exp;                                                \
+        uint32_t r_flags;                                                      \
+                                                                               \
+        /* Compute precise quotient */                                         \
+        a128 = floatN ## _to_float128(a, &env->fpu_status);                    \
+        b128 = floatN ## _to_float128(b, &env->fpu_status);                    \
+        q128 = float128_div(a128, b128, &env->fpu_status);                     \
+                                                                               \
+        /* Final or partial case? */                                           \
+        is_q128_smallish = float128_get_exp(q128) < p;                         \
+        is_final = is_q128_smallish || float128_is_int(q128);                  \
+                                                                               \
+        /*                                                                     \
+         * Final quotient is rounded using M4,                                 \
+         * partial quotient is rounded toward zero.                            \
+         */                                                                    \
+        old_mode = s390_swap_bfp_rounding_mode(env, is_final ? m4 : 5);        \
+        n128 = float128_round_to_int(q128, &env->fpu_status);                  \
+        s390_restore_bfp_rounding_mode(env, old_mode);                         \
+                                                                               \
+        /*                                                                     \
+         * Intermediate values are precision-rounded,                          \
+         * see "Intermediate Values" in POp.                                   \
+         */                                                                    \
+        n128 = float128_precision_round_to_ ## floatN(n128);                   \
+                                                                               \
+        /* Compute remainder */                                                \
+        m128 = float128_mul(b128, n128, &env->fpu_status);                     \
+        env->fpu_status.float_exception_flags = 0;                             \
+        r128 = float128_sub(a128, m128, &env->fpu_status);                     \
+        r128_exp = float128_get_exp(r128);                                     \
+        r = float128_to_## floatN(r128, &env->fpu_status);                     \
+        r_flags = env->fpu_status.float_exception_flags;                       \
+                                                                               \
+        /* POp table "Results: DIVIDE TO INTEGER (Part 2 of 2)" */             \
+        if (is_q128_smallish) {                                                \
+            cc = 0;                                                            \
+            if (!floatN ## _is_zero(r)) {                                      \
+                if (r128_exp < -(exp_max - 1)) {                               \
+                    if ((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW) {         \
+                        float_exception_flags |= float_flag_underflow;         \
+                        dxc = 0x10;                                            \
+                        r128 = float128_adjust_exp(r128, exp_bias);            \
+                        r = float128_to_## floatN(r128, &env->fpu_status);     \
+                    }                                                          \
+                } else if (r_flags & float_flag_inexact) {                     \
+                    float_exception_flags |= float_flag_inexact;               \
+                    if ((env->fpc >> 24) & S390_IEEE_MASK_INEXACT) {           \
+                        /*                                                     \
+                         * Check whether remainder was truncated (rounded      \
+                         * toward zero) or incremented.                        \
+                         */                                                    \
+                        if (float128_lt(                                       \
+                                floatN ## _to_float128(floatN ## _abs(r),      \
+                                                       &env->fpu_status),      \
+                                float128_abs(r128), &env->fpu_status)) {       \
+                           dxc = 0x8;                                          \
+                        } else {                                               \
+                           dxc = 0xc;                                          \
+                        }                                                      \
+                    }                                                          \
+                }                                                              \
+            }                                                                  \
+        } else if (float128_get_exp(n128) > exp_max) {                         \
+            n128 = float128_adjust_exp(n128, -exp_bias);                       \
+            cc = floatN ## _is_zero(r) ? 1 : 3;                                \
+        } else {                                                               \
+            cc = floatN ## _is_zero(r) ? 0 : 2;                                \
+        }                                                                      \
+                                                                               \
+        /* Adjust sign of zero */                                              \
+        if (floatN ## _is_zero(r)) {                                           \
+            r = floatN ## _set_sign(r, float128_is_neg(a128));                 \
+        }                                                                      \
+        n = float128_to_ ## floatN(n128, &env->fpu_status);                    \
+        if (floatN ## _is_zero(n)) {                                           \
+            n = floatN ## _set_sign(n,                                         \
+                                    float128_is_neg(a128) !=                   \
+                                        float128_is_neg(b128));                \
+        }                                                                      \
+    }                                                                          \
+                                                                               \
+    /* Flush the results if needed */                                          \
+    if ((float_exception_flags & float_flag_invalid) &&                        \
+        ((env->fpc >> 24) & S390_IEEE_MASK_INVALID)) {                         \
+        /* The action for invalid operation is "Suppress" */                   \
+    } else {                                                                   \
+        /* The action for other exceptions is "Complete" */                    \
+        deposit_ ## floatN(env, r1, r);                                        \
+        deposit_ ## floatN(env, r3, n);                                        \
+        env->cc_op = cc;                                                       \
+    }                                                                          \
+                                                                               \
+    /* Raise an exception if needed */                                         \
+    if (dxc == -1) {                                                           \
+        env->fpu_status.float_exception_flags = float_exception_flags;         \
+        handle_exceptions(env, false, GETPC());                                \
+    } else {                                                                   \
+        env->fpu_status.float_exception_flags = 0;                             \
+        tcg_s390_data_exception(env, dxc, GETPC());                            \
+    }                                                                          \
+}
+
+DIVIDE_TO_INTEGER(dieb, float32, 24, 127, 192)
+DIVIDE_TO_INTEGER(didb, float64, 53, 1023, 1536)
+
 /* 32-bit FP multiplication */
 uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index baaafe922e9..0d5392eac54 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -9,7 +9,7 @@
  *  OPC  = (op << 8) | op2 where op is the major, op2 the minor opcode
  *  NAME = name of the opcode, used internally
  *  FMT  = format of the opcode (defined in insn-format.h.inc)
- *  FAC  = facility the opcode is available in (defined in DisasFacility)
+ *  FAC  = facility the opcode is available in (define in translate.c)
  *  I1   = func in1_xx fills o->in1
  *  I2   = func in2_xx fills o->in2
  *  P    = func prep_xx initializes o->*out*
@@ -361,6 +361,9 @@
     C(0xb91d, DSGFR,   RRE,   Z,   r1p1, r2_32s, r1_P, 0, divs64, 0)
     C(0xe30d, DSG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, divs64, 0)
     C(0xe31d, DSGF,    RXY_a, Z,   r1p1, m2_32s, r1_P, 0, divs64, 0)
+/* DIVIDE TO INTEGER */
+    D(0xb35b, DIDBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 64)
+    D(0xb353, DIEBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 32)
 
 /* EXCLUSIVE OR */
     C(0x1700, XR,      RR_a,  Z,   r1, r2, new, r1_32, xor, nz32)
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 540c5a569c0..a3b753bc829 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2283,6 +2283,32 @@ static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_dib(DisasContext *s, DisasOps *o)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m4 = get_field(s, m4);
+
+    if (get_field(s, r1) == get_field(s, r2) ||
+        get_field(s, r1) == get_field(s, r3) ||
+        get_field(s, r2) == get_field(s, r3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (m4 == 2 || (!fpe && m4 == 3) || m4 > 7) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    (s->insn->data == 32 ? gen_helper_dieb : gen_helper_didb)(
+        tcg_env, tcg_constant_i32(get_field(s, r1)),
+        tcg_constant_i32(get_field(s, r2)),
+        tcg_constant_i32(get_field(s, r3)), tcg_constant_i32(m4));
+    set_cc_static(s);
+
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
 {
     int r2 = get_field(s, r2);
-- 
2.52.0

next prev parent reply	other threads:[~2026-01-21 22:22 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-21 22:12 [PATCH 0/3] target/s390x: Implement DIVIDE TO INTEGER Ilya Leoshkevich
2026-01-21 22:12 ` [PATCH 1/3] target/s390x: Dump Floating-Point-Control Register Ilya Leoshkevich
2026-01-22 16:40   ` Alex Bennée
2026-01-21 22:12 ` Ilya Leoshkevich [this message]
2026-01-22  1:04   ` [PATCH 2/3] target/s390x: Implement DIVIDE TO INTEGER Richard Henderson
2026-01-22 13:14     ` Ilya Leoshkevich
2026-01-21 22:12 ` [PATCH 3/3] tests/tcg/s390x: Test " Ilya Leoshkevich
2026-01-22 16:43   ` Alex Bennée
2026-01-22 16:59     ` Ilya Leoshkevich
2026-01-22 18:09       ` Alex Bennée

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1a8a76abb9 dfblob:f2b24c65a8 dfblob:1ba43715ac
dfblob:f524c4257f dfblob:baaafe922e dfblob:0d5392eac5 dfblob:540c5a569c
dfblob:a3b753bc82 )
 OR (
bs:"[PATCH 2/3] target/s390x: Implement DIVIDE TO INTEGER" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260121222116.713325-3-iii@linux.ibm.com \
    --to=iii@linux.ibm.com \
    --cc=david@kernel.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.