All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] nvc0/ir: avoid jumping to a sched instruction
@ 2015-05-09  5:35 Ilia Mirkin
       [not found] ` <1431149707-7159-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  2015-05-09  5:35 ` [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg Ilia Mirkin
  0 siblings, 2 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09  5:35 UTC (permalink / raw)
  To: mesa-dev; +Cc: nouveau

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
Pretty sure there's nothing wrong with it, but it looks odd in the code.

 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 7 +++++--
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 6bb9620..28081fa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1316,6 +1316,8 @@ CodeEmitterGK110::emitFlow(const Instruction *i)
    } else
    if (mask & 2) {
       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+         pcRel += 8;
       // currently we don't want absolute branches
       assert(!f->absolute);
       code[0] |= (pcRel & 0x1ff) << 23;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 22db368..442cedf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -509,10 +509,13 @@ CodeEmitterGM107::emitBRA()
    emitCond5(0x00, CC_TR);
 
    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
+      int32_t pos = insn->target.bb->binPos;
+      if (writeIssueDelays && !(pos & 0x1f))
+         pos += 8;
       if (!insn->absolute)
-         emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
+         emitField(0x14, 24, pos - (codeSize + 8));
       else
-         emitField(0x14, 32, insn->target.bb->binPos);
+         emitField(0x14, 32, pos);
    } else {
       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
       emitField(0x05, 1, 1);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index d9aed34..c241973 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1406,6 +1406,8 @@ CodeEmitterNVC0::emitFlow(const Instruction *i)
    } else
    if (mask & 2) {
       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+         pcRel += 8;
       // currently we don't want absolute branches
       assert(!f->absolute);
       code[0] |= (pcRel & 0x3f) << 26;
-- 
2.3.6

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] nvc0/ir: allow iset to produce a boolean float
       [not found] ` <1431149707-7159-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-05-09  5:35   ` Ilia Mirkin
  2015-05-09  5:35   ` [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets Ilia Mirkin
  1 sibling, 0 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09  5:35 UTC (permalink / raw)
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 12 ++++++++----
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  |  8 +++++++-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 28081fa..ab8bf2e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -967,8 +967,8 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
       code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
       if (i->defExists(1))
          defId(i->def(1), 2);
-   else
-      code[0] |= 0x1c;
+      else
+         code[0] |= 0x1c;
    } else {
       switch (i->sType) {
       case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
@@ -990,8 +990,12 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
       }
       FTZ_(3a);
 
-      if (i->dType == TYPE_F32)
-         code[1] |= 1 << 23;
+      if (i->dType == TYPE_F32) {
+         if (isFloatType(i->sType))
+            code[1] |= 1 << 23;
+         else
+            code[1] |= 1 << 15;
+      }
    }
    if (i->sType == TYPE_S32)
       code[1] |= 1 << 19;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 442cedf..399a6f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1830,6 +1830,7 @@ CodeEmitterGM107::emitISET()
    emitCond3(0x31, insn->setCond);
    emitField(0x30, 1, isSignedType(insn->sType));
    emitCC   (0x2f);
+   emitField(0x2c, 1, insn->dType == TYPE_F32);
    emitX    (0x2b);
    emitGPR  (0x08, insn->src(0));
    emitGPR  (0x00, insn->def(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index c241973..f5992bc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1078,8 +1078,14 @@ CodeEmitterNVC0::emitSET(const CmpInstruction *i)
    if (!isFloatType(i->sType))
       lo = 0x3;
 
-   if (isFloatType(i->dType) || isSignedIntType(i->sType))
+   if (isSignedIntType(i->sType))
       lo |= 0x20;
+   if (isFloatType(i->dType)) {
+      if (isFloatType(i->sType))
+         lo |= 0x20;
+      else
+         lo |= 0x80;
+   }
 
    switch (i->op) {
    case OP_SET_AND: hi = 0x10000000; break;
-- 
2.3.6

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets
       [not found] ` <1431149707-7159-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  2015-05-09  5:35   ` [PATCH 2/4] nvc0/ir: allow iset to produce a boolean float Ilia Mirkin
@ 2015-05-09  5:35   ` Ilia Mirkin
       [not found]     ` <1431149707-7159-3-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09  5:35 UTC (permalink / raw)
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This has started to happen more now that the backend is producing
KILL_IF more often.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 29 ++++++++++++++++++++++
 .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 14446b6..d8af19a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -973,6 +973,35 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
    }
       break;
 
+   case OP_AND:
+   {
+      CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
+      if (!cmp || cmp->op == OP_SLCT)
+         return;
+      if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
+         return;
+      if (imm0.reg.data.f32 != 1.0)
+         return;
+      if (cmp == NULL)
+         return;
+      if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
+         return;
+
+      i->getSrc(t)->getInsn()->dType = TYPE_F32;
+      if (i->src(t).mod != Modifier(0)) {
+         assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
+         i->src(t).mod = Modifier(0);
+         cmp->setCond = reverseCondCode(cmp->setCond);
+      }
+      i->op = OP_MOV;
+      i->setSrc(s, NULL);
+      if (t) {
+         i->setSrc(0, i->getSrc(t));
+         i->setSrc(t, NULL);
+      }
+   }
+      break;
+
    case OP_SHL:
    {
       if (s != 1 || i->src(0).mod != Modifier(0))
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 178a167..70180eb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -413,6 +413,8 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
       return false;
    case OP_SAD:
       return ty == TYPE_S32;
+   case OP_SET:
+      return !isFloatType(ty);
    default:
       return true;
    }
-- 
2.3.6

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg
  2015-05-09  5:35 [PATCH 1/4] nvc0/ir: avoid jumping to a sched instruction Ilia Mirkin
       [not found] ` <1431149707-7159-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-05-09  5:35 ` Ilia Mirkin
       [not found]   ` <1431149707-7159-4-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09  5:35 UTC (permalink / raw)
  To: mesa-dev; +Cc: nouveau

This covers the pattern where a KILL_IF is used, which triggers a
comparison of -x to 0. This can usually be folded into the comparison whose
result is being compared to 0, however it may, itself, have already been
combined with another comparison. That shouldn't impact the logic of
this pass however. With this and the & 1.0 change, code like

00000020: 001c0001 80081df4     set b32 $r0 lt f32 $r0 0x3e800000
00000028: 001c0000 201fc000     and b32 $r0 $r0 0x3f800000
00000030: 7f9c001e dd885c00     set $p0 0x1 lt f32 neg $r0 0x0
00000038: 0000003c 19800000     $p0 discard

becomes

00000020: 001c001d b5881df4     set $p0 0x1 lt f32 $r0 0x3e800000
00000028: 0000003c 19800000     $p0 discard

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++++++++++++++--------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d8af19a..43a2fe9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -278,7 +278,6 @@ private:
 
    void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
 
-   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
    CmpInstruction *findOriginForTestWithZero(Value *);
 
    unsigned int foldCount;
@@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
       return NULL;
    Instruction *insn = value->getInsn();
 
-   while (insn && insn->op != OP_SET) {
+   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
+          insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
       Instruction *next = NULL;
       switch (insn->op) {
-      case OP_NEG:
-      case OP_ABS:
-      case OP_CVT:
-         next = insn->getSrc(0)->getInsn();
-         if (insn->sType != next->dType)
-            return NULL;
-         break;
       case OP_MOV:
          next = insn->getSrc(0)->getInsn();
          break;
@@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 
    case OP_SET: // TODO: SET_AND,OR,XOR
    {
+      /* This optimizes the case where the output of a set is being compared
+       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+       * can be a lot cleverer in our comparison.
+       */
       CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
       CondCode cc, ccZ;
-      if (i->src(t).mod != Modifier(0))
-         return;
-      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+      if (imm0.reg.data.u32 != 0 || !si)
          return;
       cc = si->setCond;
       ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+      // first.
       if (s == 0)
          ccZ = reverseCondCode(ccZ);
+      // If there is a negative modifier, we need to undo that, by flipping
+      // the comparison to zero.
+      if (i->src(t).mod.neg())
+         ccZ = reverseCondCode(ccZ);
+      // If this is a signed comparison, we expect the input to be a regular
+      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+      // is positive, so just flip the sign.
+      if (i->sType == TYPE_S32) {
+         assert(!isFloatType(si->dType));
+         ccZ = reverseCondCode(ccZ);
+      }
       switch (ccZ) {
-      case CC_LT: cc = CC_FL; break;
-      case CC_GE: cc = CC_TR; break;
-      case CC_EQ: cc = inverseCondCode(cc); break;
-      case CC_LE: cc = inverseCondCode(cc); break;
-      case CC_GT: break;
-      case CC_NE: break;
+      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+      case CC_GT: break; // bool > 0 -- bool
+      case CC_NE: break; // bool != 0 -- bool
       default:
          return;
       }
+
+      // Update the condition of this SET to be identical to the origin set,
+      // but with the updated condition code. The original SET should get
+      // DCE'd, ideally.
+      i->op = si->op;
       i->asCmp()->setCond = cc;
       i->setSrc(0, si->src(0));
       i->setSrc(1, si->src(1));
+      if (si->srcExists(2))
+         i->setSrc(2, si->src(2));
       i->sType = si->sType;
    }
       break;
-- 
2.3.6

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets
       [not found]     ` <1431149707-7159-3-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-05-09 15:27       ` Tobias Klausmann
       [not found]         ` <554E2754.5080904-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Tobias Klausmann @ 2015-05-09 15:27 UTC (permalink / raw)
  To: Ilia Mirkin; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 09.05.2015 07:35, Ilia Mirkin wrote:
> This has started to happen more now that the backend is producing
> KILL_IF more often.
>
> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
> ---
>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 29 ++++++++++++++++++++++
>   .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  2 ++
>   2 files changed, 31 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 14446b6..d8af19a 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -973,6 +973,35 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>      }
>         break;
>   
> +   case OP_AND:
> +   {
> +      CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
> +      if (!cmp || cmp->op == OP_SLCT)

how about if (cmp == NULL || ...) and kill the same condition later?

> +         return;
> +      if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
> +         return;
> +      if (imm0.reg.data.f32 != 1.0)
> +         return;
> +      if (cmp == NULL)
> +         return;
> +      if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
> +         return;
> +
> +      i->getSrc(t)->getInsn()->dType = TYPE_F32;
> +      if (i->src(t).mod != Modifier(0)) {
> +         assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
> +         i->src(t).mod = Modifier(0);
> +         cmp->setCond = reverseCondCode(cmp->setCond);
> +      }
> +      i->op = OP_MOV;
> +      i->setSrc(s, NULL);
> +      if (t) {
> +         i->setSrc(0, i->getSrc(t));
> +         i->setSrc(t, NULL);
> +      }
> +   }
> +      break;
> +
>      case OP_SHL:
>      {
>         if (s != 1 || i->src(0).mod != Modifier(0))
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index 178a167..70180eb 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -413,6 +413,8 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
>         return false;
>      case OP_SAD:
>         return ty == TYPE_S32;
> +   case OP_SET:
> +      return !isFloatType(ty);
>      default:
>         return true;
>      }

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets
       [not found]         ` <554E2754.5080904-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
@ 2015-05-09 17:53           ` Ilia Mirkin
       [not found]             ` <CAKb7Uvg-2tUxSpkXVh9M+a6G2bTO97MzowN=r0jRbYh5DiFmYw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09 17:53 UTC (permalink / raw)
  To: Tobias Klausmann
  Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

On Sat, May 9, 2015 at 11:27 AM, Tobias Klausmann
<tobias.johannes.klausmann@mni.thm.de> wrote:
>
>
> On 09.05.2015 07:35, Ilia Mirkin wrote:
>>
>> This has started to happen more now that the backend is producing
>> KILL_IF more often.
>>
>> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
>> ---
>>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 29
>> ++++++++++++++++++++++
>>   .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  2 ++
>>   2 files changed, 31 insertions(+)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index 14446b6..d8af19a 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -973,6 +973,35 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue
>> &imm0, int s)
>>      }
>>         break;
>>   +   case OP_AND:
>> +   {
>> +      CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
>> +      if (!cmp || cmp->op == OP_SLCT)
>
>
> how about if (cmp == NULL || ...) and kill the same condition later?

I just killed the other one. I think the usual style tends to be if
(!ptr) rather than if (ptr == NULL) in codegen. Both are acceptable
though.

>
>
>> +         return;
>> +      if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
>> +         return;
>> +      if (imm0.reg.data.f32 != 1.0)
>> +         return;
>> +      if (cmp == NULL)
>> +         return;
>> +      if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
>> +         return;
>> +
>> +      i->getSrc(t)->getInsn()->dType = TYPE_F32;
>> +      if (i->src(t).mod != Modifier(0)) {
>> +         assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
>> +         i->src(t).mod = Modifier(0);
>> +         cmp->setCond = reverseCondCode(cmp->setCond);
>> +      }
>> +      i->op = OP_MOV;
>> +      i->setSrc(s, NULL);
>> +      if (t) {
>> +         i->setSrc(0, i->getSrc(t));
>> +         i->setSrc(t, NULL);
>> +      }
>> +   }
>> +      break;
>> +
>>      case OP_SHL:
>>      {
>>         if (s != 1 || i->src(0).mod != Modifier(0))
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> index 178a167..70180eb 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> @@ -413,6 +413,8 @@ TargetNV50::isOpSupported(operation op, DataType ty)
>> const
>>         return false;
>>      case OP_SAD:
>>         return ty == TYPE_S32;
>> +   case OP_SET:
>> +      return !isFloatType(ty);
>>      default:
>>         return true;
>>      }
>
>
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets
       [not found]             ` <CAKb7Uvg-2tUxSpkXVh9M+a6G2bTO97MzowN=r0jRbYh5DiFmYw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-05-09 19:47               ` Tobias Klausmann
  0 siblings, 0 replies; 9+ messages in thread
From: Tobias Klausmann @ 2015-05-09 19:47 UTC (permalink / raw)
  To: Ilia Mirkin; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org



On 09.05.2015 19:53, Ilia Mirkin wrote:
> On Sat, May 9, 2015 at 11:27 AM, Tobias Klausmann
> <tobias.johannes.klausmann@mni.thm.de> wrote:
>>
>> On 09.05.2015 07:35, Ilia Mirkin wrote:
>>> This has started to happen more now that the backend is producing
>>> KILL_IF more often.
>>>
>>> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
>>> ---
>>>    .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 29
>>> ++++++++++++++++++++++
>>>    .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  2 ++
>>>    2 files changed, 31 insertions(+)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> index 14446b6..d8af19a 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>>> @@ -973,6 +973,35 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue
>>> &imm0, int s)
>>>       }
>>>          break;
>>>    +   case OP_AND:
>>> +   {
>>> +      CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
>>> +      if (!cmp || cmp->op == OP_SLCT)
>>
>> how about if (cmp == NULL || ...) and kill the same condition later?
> I just killed the other one. I think the usual style tends to be if
> (!ptr) rather than if (ptr == NULL) in codegen. Both are acceptable
> though.

it was mainly about the dead code you killed now. With this it looks 
fine to me, so feel free to add

Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>


>>
>>> +         return;
>>> +      if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
>>> +         return;
>>> +      if (imm0.reg.data.f32 != 1.0)
>>> +         return;
>>> +      if (cmp == NULL)
>>> +         return;
>>> +      if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
>>> +         return;
>>> +
>>> +      i->getSrc(t)->getInsn()->dType = TYPE_F32;
>>> +      if (i->src(t).mod != Modifier(0)) {
>>> +         assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
>>> +         i->src(t).mod = Modifier(0);
>>> +         cmp->setCond = reverseCondCode(cmp->setCond);
>>> +      }
>>> +      i->op = OP_MOV;
>>> +      i->setSrc(s, NULL);
>>> +      if (t) {
>>> +         i->setSrc(0, i->getSrc(t));
>>> +         i->setSrc(t, NULL);
>>> +      }
>>> +   }
>>> +      break;
>>> +
>>>       case OP_SHL:
>>>       {
>>>          if (s != 1 || i->src(0).mod != Modifier(0))
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> index 178a167..70180eb 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> @@ -413,6 +413,8 @@ TargetNV50::isOpSupported(operation op, DataType ty)
>>> const
>>>          return false;
>>>       case OP_SAD:
>>>          return ty == TYPE_S32;
>>> +   case OP_SET:
>>> +      return !isFloatType(ty);
>>>       default:
>>>          return true;
>>>       }
>>

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg
       [not found]   ` <1431149707-7159-4-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-05-09 20:04     ` Tobias Klausmann
       [not found]       ` <554E685B.1040602-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Tobias Klausmann @ 2015-05-09 20:04 UTC (permalink / raw)
  To: Ilia Mirkin; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 09.05.2015 07:35, Ilia Mirkin wrote:
> This covers the pattern where a KILL_IF is used, which triggers a
> comparison of -x to 0. This can usually be folded into the comparison whose
> result is being compared to 0, however it may, itself, have already been
> combined with another comparison. That shouldn't impact the logic of
> this pass however. With this and the & 1.0 change, code like
>
> 00000020: 001c0001 80081df4     set b32 $r0 lt f32 $r0 0x3e800000
> 00000028: 001c0000 201fc000     and b32 $r0 $r0 0x3f800000
> 00000030: 7f9c001e dd885c00     set $p0 0x1 lt f32 neg $r0 0x0
> 00000038: 0000003c 19800000     $p0 discard
>
> becomes
>
> 00000020: 001c001d b5881df4     set $p0 0x1 lt f32 $r0 0x3e800000
> 00000028: 0000003c 19800000     $p0 discard
>
> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
> ---
>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++++++++++++++--------
>   1 file changed, 33 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index d8af19a..43a2fe9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -278,7 +278,6 @@ private:
>   
>      void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
>   
> -   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
>      CmpInstruction *findOriginForTestWithZero(Value *);
>   
>      unsigned int foldCount;
> @@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
>         return NULL;
>      Instruction *insn = value->getInsn();
>   
> -   while (insn && insn->op != OP_SET) {
> +   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
> +          insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
>         Instruction *next = NULL;
>         switch (insn->op) {
> -      case OP_NEG:
> -      case OP_ABS:
> -      case OP_CVT:
> -         next = insn->getSrc(0)->getInsn();
> -         if (insn->sType != next->dType)
> -            return NULL;
> -         break;
>         case OP_MOV:
>            next = insn->getSrc(0)->getInsn();
>            break;
> @@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>   
>      case OP_SET: // TODO: SET_AND,OR,XOR

delete this comment?!

>      {
> +      /* This optimizes the case where the output of a set is being compared
> +       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
> +       * can be a lot cleverer in our comparison.
> +       */
>         CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
>         CondCode cc, ccZ;
> -      if (i->src(t).mod != Modifier(0))
> -         return;
> -      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
> +      if (imm0.reg.data.u32 != 0 || !si)
>            return;
>         cc = si->setCond;
>         ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
> +      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
> +      // first.
>         if (s == 0)
>            ccZ = reverseCondCode(ccZ);
> +      // If there is a negative modifier, we need to undo that, by flipping
> +      // the comparison to zero.
> +      if (i->src(t).mod.neg())
> +         ccZ = reverseCondCode(ccZ);
> +      // If this is a signed comparison, we expect the input to be a regular
> +      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
> +      // is positive, so just flip the sign.
> +      if (i->sType == TYPE_S32) {
> +         assert(!isFloatType(si->dType));
> +         ccZ = reverseCondCode(ccZ);
> +      }

can both this and the previous condition evaluate to true? if yes, this 
double-flips ccZ...

>         switch (ccZ) {
> -      case CC_LT: cc = CC_FL; break;
> -      case CC_GE: cc = CC_TR; break;
> -      case CC_EQ: cc = inverseCondCode(cc); break;
> -      case CC_LE: cc = inverseCondCode(cc); break;
> -      case CC_GT: break;
> -      case CC_NE: break;
> +      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
> +      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
> +      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
> +      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
> +      case CC_GT: break; // bool > 0 -- bool
> +      case CC_NE: break; // bool != 0 -- bool
>         default:
>            return;
>         }
> +
> +      // Update the condition of this SET to be identical to the origin set,
> +      // but with the updated condition code. The original SET should get
> +      // DCE'd, ideally.
> +      i->op = si->op;
>         i->asCmp()->setCond = cc;
>         i->setSrc(0, si->src(0));
>         i->setSrc(1, si->src(1));
> +      if (si->srcExists(2))
> +         i->setSrc(2, si->src(2));
>         i->sType = si->sType;
>      }
>         break;

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg
       [not found]       ` <554E685B.1040602-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
@ 2015-05-09 20:07         ` Ilia Mirkin
  0 siblings, 0 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-05-09 20:07 UTC (permalink / raw)
  To: Tobias Klausmann
  Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

On Sat, May 9, 2015 at 4:04 PM, Tobias Klausmann
<tobias.johannes.klausmann@mni.thm.de> wrote:
>
>
> On 09.05.2015 07:35, Ilia Mirkin wrote:
>>
>> This covers the pattern where a KILL_IF is used, which triggers a
>> comparison of -x to 0. This can usually be folded into the comparison
>> whose
>> result is being compared to 0, however it may, itself, have already been
>> combined with another comparison. That shouldn't impact the logic of
>> this pass however. With this and the & 1.0 change, code like
>>
>> 00000020: 001c0001 80081df4     set b32 $r0 lt f32 $r0 0x3e800000
>> 00000028: 001c0000 201fc000     and b32 $r0 $r0 0x3f800000
>> 00000030: 7f9c001e dd885c00     set $p0 0x1 lt f32 neg $r0 0x0
>> 00000038: 0000003c 19800000     $p0 discard
>>
>> becomes
>>
>> 00000020: 001c001d b5881df4     set $p0 0x1 lt f32 $r0 0x3e800000
>> 00000028: 0000003c 19800000     $p0 discard
>>
>> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
>> ---
>>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51
>> ++++++++++++++--------
>>   1 file changed, 33 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index d8af19a..43a2fe9 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -278,7 +278,6 @@ private:
>>        void tryCollapseChainedMULs(Instruction *, const int s,
>> ImmediateValue&);
>>   -   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to
>> SET
>>      CmpInstruction *findOriginForTestWithZero(Value *);
>>        unsigned int foldCount;
>> @@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value
>> *value)
>>         return NULL;
>>      Instruction *insn = value->getInsn();
>>   -   while (insn && insn->op != OP_SET) {
>> +   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
>> +          insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
>>         Instruction *next = NULL;
>>         switch (insn->op) {
>> -      case OP_NEG:
>> -      case OP_ABS:
>> -      case OP_CVT:
>> -         next = insn->getSrc(0)->getInsn();
>> -         if (insn->sType != next->dType)
>> -            return NULL;
>> -         break;
>>         case OP_MOV:
>>            next = insn->getSrc(0)->getInsn();
>>            break;
>> @@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue
>> &imm0, int s)
>>        case OP_SET: // TODO: SET_AND,OR,XOR
>
>
> delete this comment?!

No... this still only handles OP_SET.

>
>
>>      {
>> +      /* This optimizes the case where the output of a set is being
>> compared
>> +       * to zero. Since the set can only produce 0/-1 (int) or 0/1
>> (float), we
>> +       * can be a lot cleverer in our comparison.
>> +       */
>>         CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
>>         CondCode cc, ccZ;
>> -      if (i->src(t).mod != Modifier(0))
>> -         return;
>> -      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
>> +      if (imm0.reg.data.u32 != 0 || !si)
>>            return;
>>         cc = si->setCond;
>>         ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
>> +      // We do everything assuming var (cmp) 0, reverse the condition if
>> 0 is
>> +      // first.
>>         if (s == 0)
>>            ccZ = reverseCondCode(ccZ);
>> +      // If there is a negative modifier, we need to undo that, by
>> flipping
>> +      // the comparison to zero.
>> +      if (i->src(t).mod.neg())
>> +         ccZ = reverseCondCode(ccZ);
>> +      // If this is a signed comparison, we expect the input to be a
>> regular
>> +      // boolean, i.e. 0/-1. However the rest of the logic assumes that
>> true
>> +      // is positive, so just flip the sign.
>> +      if (i->sType == TYPE_S32) {
>> +         assert(!isFloatType(si->dType));
>> +         ccZ = reverseCondCode(ccZ);
>> +      }
>
>
> can both this and the previous condition evaluate to true? if yes, this
> double-flips ccZ...

Triple, in fact :) Each thing causes a direction flip... I guess I
should just sum them up, and flip it if it's odd, but... wtvr. This
seems more straightforward.

Actually both this and the previous commit required a bit more work,
I'll send v2's, but the latest versions are at:

https://github.com/imirkin/mesa/commits/tmp4

>
>
>>         switch (ccZ) {
>> -      case CC_LT: cc = CC_FL; break;
>> -      case CC_GE: cc = CC_TR; break;
>> -      case CC_EQ: cc = inverseCondCode(cc); break;
>> -      case CC_LE: cc = inverseCondCode(cc); break;
>> -      case CC_GT: break;
>> -      case CC_NE: break;
>> +      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
>> +      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
>> +      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
>> +      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
>> +      case CC_GT: break; // bool > 0 -- bool
>> +      case CC_NE: break; // bool != 0 -- bool
>>         default:
>>            return;
>>         }
>> +
>> +      // Update the condition of this SET to be identical to the origin
>> set,
>> +      // but with the updated condition code. The original SET should get
>> +      // DCE'd, ideally.
>> +      i->op = si->op;
>>         i->asCmp()->setCond = cc;
>>         i->setSrc(0, si->src(0));
>>         i->setSrc(1, si->src(1));
>> +      if (si->srcExists(2))
>> +         i->setSrc(2, si->src(2));
>>         i->sType = si->sType;
>>      }
>>         break;
>
>
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-05-09 20:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-09  5:35 [PATCH 1/4] nvc0/ir: avoid jumping to a sched instruction Ilia Mirkin
     [not found] ` <1431149707-7159-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
2015-05-09  5:35   ` [PATCH 2/4] nvc0/ir: allow iset to produce a boolean float Ilia Mirkin
2015-05-09  5:35   ` [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets Ilia Mirkin
     [not found]     ` <1431149707-7159-3-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
2015-05-09 15:27       ` Tobias Klausmann
     [not found]         ` <554E2754.5080904-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
2015-05-09 17:53           ` Ilia Mirkin
     [not found]             ` <CAKb7Uvg-2tUxSpkXVh9M+a6G2bTO97MzowN=r0jRbYh5DiFmYw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-05-09 19:47               ` Tobias Klausmann
2015-05-09  5:35 ` [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg Ilia Mirkin
     [not found]   ` <1431149707-7159-4-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
2015-05-09 20:04     ` Tobias Klausmann
     [not found]       ` <554E685B.1040602-AqjdNwhu20eELgA04lAiVw@public.gmane.org>
2015-05-09 20:07         ` Ilia Mirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.