* Forest Bond <forest@alittletooquiet.net>,Greg Kroah-Hartman <gregkh@linuxfoundation.org>,devel@drive
From: Rodolfo C. Villordo @ 2020-06-07 22:41 UTC (permalink / raw)
To: Forest Bond, Greg Kroah-Hartman, kernel-janitors, linux-kernel
Cc: rodolfovillordo
Multiple line over 80 characters fixes by splitting in multiple lines.
Warning found by checkpatch.pl
Signed-off-by: Rodolfo C. Villordo <rodolfovillordo@gmail.com>
---
drivers/staging/vt6655/rxtx.c | 225 ++++++++++++++++++++++++----------
1 file changed, 162 insertions(+), 63 deletions(-)
diff --git a/drivers/staging/vt6655/rxtx.c b/drivers/staging/vt6655/rxtx.c
index cfab64d2b312..30ea29ea70cf 100644
--- a/drivers/staging/vt6655/rxtx.c
+++ b/drivers/staging/vt6655/rxtx.c
@@ -165,7 +165,8 @@ s_uGetTxRsvTime(
{
unsigned int uDataTime, uAckTime;
- uDataTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, cbFrameLength, wRate);
+ uDataTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ cbFrameLength, wRate);
if (!bNeedAck)
return uDataTime;
@@ -206,28 +207,39 @@ s_uGetRTSCTSRsvTime(
unsigned int uAckTime = 0;
unsigned int uDataTime = 0;
- uDataTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, cbFrameLength, wCurrentRate);
+ uDataTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ cbFrameLength, wCurrentRate);
if (byRTSRsvType = 0) { /* RTSTxRrvTime_bb */
- uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 20, pDevice->byTopCCKBasicRate);
- uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
+ uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 20, pDevice->byTopCCKBasicRate);
+ uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
uCTSTime = uAckTime;
} else if (byRTSRsvType = 1) { /* RTSTxRrvTime_ba, only in 2.4GHZ */
- uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 20, pDevice->byTopCCKBasicRate);
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
+ uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 20, pDevice->byTopCCKBasicRate);
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
} else if (byRTSRsvType = 2) { /* RTSTxRrvTime_aa */
- uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 20, pDevice->byTopOFDMBasicRate);
- uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
+ uRTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 20, pDevice->byTopOFDMBasicRate);
+ uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
uCTSTime = uAckTime;
} else if (byRTSRsvType = 3) { /* CTSTxRrvTime_ba, only in 2.4GHZ */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ uAckTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
uRrvTime = uCTSTime + uAckTime + uDataTime + 2 * pDevice->uSIFS;
return cpu_to_le16((u16)uRrvTime);
}
/* RTSRrvTime */
- uRrvTime = uRTSTime + uCTSTime + uAckTime + uDataTime + 3 * pDevice->uSIFS;
+ uRrvTime = uRTSTime + uCTSTime + uAckTime + uDataTime
+ + 3 * pDevice->uSIFS;
return cpu_to_le16((u16)uRrvTime);
}
@@ -350,72 +362,102 @@ s_uGetRTSCTSDuration(
switch (byDurType) {
case RTSDUR_BB: /* RTSDuration_bb */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wRate, bNeedAck);
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ uDurTime = uCTSTime + 2 * pDevice->uSIFS
+ + s_uGetTxRsvTime(pDevice, byPktType,
+ cbFrameLength, wRate,
+ bNeedAck);
break;
case RTSDUR_BA: /* RTSDuration_ba */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wRate, bNeedAck);
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ uDurTime = uCTSTime + 2 * pDevice->uSIFS
+ + s_uGetTxRsvTime(pDevice, byPktType,
+ cbFrameLength, wRate,
+ bNeedAck);
break;
case RTSDUR_AA: /* RTSDuration_aa */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
- uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wRate, bNeedAck);
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
+ uDurTime = uCTSTime + 2 * pDevice->uSIFS
+ + s_uGetTxRsvTime(pDevice, byPktType,
+ cbFrameLength, wRate,
+ bNeedAck);
break;
case CTSDUR_BA: /* CTSDuration_ba */
- uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wRate, bNeedAck);
+ uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType,
+ cbFrameLength,
+ wRate, bNeedAck);
break;
case RTSDUR_BA_F0: /* RTSDuration_ba_f0 */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate-RATE_18M], bNeedAck);
break;
case RTSDUR_AA_F0: /* RTSDuration_aa_f0 */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate-RATE_18M], bNeedAck);
break;
case RTSDUR_BA_F1: /* RTSDuration_ba_f1 */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopCCKBasicRate);
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopCCKBasicRate);
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2*pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate-RATE_18M], bNeedAck);
break;
case RTSDUR_AA_F1: /* RTSDuration_aa_f1 */
- uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType, 14, pDevice->byTopOFDMBasicRate);
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ uCTSTime = bb_get_frame_time(pDevice->byPreambleType, byPktType,
+ 14, pDevice->byTopOFDMBasicRate);
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate-RATE_18M], bNeedAck);
break;
case CTSDUR_BA_F0: /* CTSDuration_ba_f0 */
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate-RATE_18M], bNeedAck);
break;
case CTSDUR_BA_F1: /* CTSDuration_ba_f1 */
- if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ if ((byFBOption = AUTO_FB_0) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate-RATE_18M], bNeedAck);
- else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
+ else if ((byFBOption = AUTO_FB_1) && (wRate >= RATE_18M) &&
+ (wRate <= RATE_54M))
uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate-RATE_18M], bNeedAck);
break;
@@ -459,7 +501,8 @@ s_uFillDataHead(
PK_TYPE_11B, &buf->b);
if (is_pspoll) {
- __le16 dur = cpu_to_le16(pDevice->current_aid | BIT(14) | BIT(15));
+ __le16 dur = cpu_to_le16(pDevice->current_aid |
+ BIT(14) | BIT(15));
buf->duration_a = dur;
buf->duration_b = dur;
@@ -477,8 +520,11 @@ s_uFillDataHead(
uMACfragNum, byFBOption));
}
- buf->time_stamp_off_a = vnt_time_stamp_off(pDevice, wCurrentRate);
- buf->time_stamp_off_b = vnt_time_stamp_off(pDevice, pDevice->byTopCCKBasicRate);
+ buf->time_stamp_off_a + vnt_time_stamp_off(pDevice, wCurrentRate);
+ buf->time_stamp_off_b + vnt_time_stamp_off(pDevice,
+ pDevice->byTopCCKBasicRate);
return buf->duration_a;
} else {
@@ -501,8 +547,11 @@ s_uFillDataHead(
buf->duration_a_f1 = cpu_to_le16((u16)s_uGetDataDuration(pDevice, DATADUR_A_F1, cbFrameLength, byPktType,
wCurrentRate, bNeedAck, uFragIdx, cbLastFragmentSize, uMACfragNum, byFBOption));
- buf->time_stamp_off_a = vnt_time_stamp_off(pDevice, wCurrentRate);
- buf->time_stamp_off_b = vnt_time_stamp_off(pDevice, pDevice->byTopCCKBasicRate);
+ buf->time_stamp_off_a + vnt_time_stamp_off(pDevice, wCurrentRate);
+ buf->time_stamp_off_b + vnt_time_stamp_off(pDevice,
+ pDevice->byTopCCKBasicRate);
return buf->duration_a;
} /* if (byFBOption = AUTO_FB_NONE) */
@@ -530,7 +579,8 @@ s_uFillDataHead(
byPktType, &buf->ab);
if (is_pspoll) {
- __le16 dur = cpu_to_le16(pDevice->current_aid | BIT(14) | BIT(15));
+ __le16 dur = cpu_to_le16(pDevice->current_aid |
+ BIT(14) | BIT(15));
buf->duration = dur;
} else {
@@ -542,7 +592,8 @@ s_uFillDataHead(
byFBOption));
}
- buf->time_stamp_off = vnt_time_stamp_off(pDevice, wCurrentRate);
+ buf->time_stamp_off + vnt_time_stamp_off(pDevice, wCurrentRate);
return buf->duration;
}
} else {
@@ -552,7 +603,8 @@ s_uFillDataHead(
byPktType, &buf->ab);
if (is_pspoll) {
- __le16 dur = cpu_to_le16(pDevice->current_aid | BIT(14) | BIT(15));
+ __le16 dur = cpu_to_le16(pDevice->current_aid |
+ BIT(14) | BIT(15));
buf->duration = dur;
} else {
@@ -792,7 +844,8 @@ s_vFillCTSHead(
}
if (byPktType = PK_TYPE_11GB || byPktType = PK_TYPE_11GA) {
- if (byFBOption != AUTO_FB_NONE && uDMAIdx != TYPE_ATIMDMA && uDMAIdx != TYPE_BEACONDMA) {
+ if (byFBOption != AUTO_FB_NONE && uDMAIdx != TYPE_ATIMDMA &&
+ uDMAIdx != TYPE_BEACONDMA) {
/* Auto Fall back */
struct vnt_cts_fb *buf = pvCTS;
/* Get SignalField, ServiceField & Length */
@@ -921,50 +974,96 @@ s_vGenerateTxParameter(
/* Fill RsvTime */
struct vnt_rrv_time_rts *buf = pvRrvTime;
- buf->rts_rrv_time_aa = s_uGetRTSCTSRsvTime(pDevice, 2, byPktType, cbFrameSize, wCurrentRate);
- buf->rts_rrv_time_ba = s_uGetRTSCTSRsvTime(pDevice, 1, byPktType, cbFrameSize, wCurrentRate);
- buf->rts_rrv_time_bb = s_uGetRTSCTSRsvTime(pDevice, 0, byPktType, cbFrameSize, wCurrentRate);
- buf->rrv_time_a = vnt_rxtx_rsvtime_le16(pDevice, byPktType, cbFrameSize, wCurrentRate, bNeedACK);
- buf->rrv_time_b = vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B, cbFrameSize, pDevice->byTopCCKBasicRate, bNeedACK);
-
- s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize, bNeedACK, bDisCRC, psEthHeader, wCurrentRate, byFBOption);
+ buf->rts_rrv_time_aa + s_uGetRTSCTSRsvTime(pDevice, 2, byPktType,
+ cbFrameSize, wCurrentRate);
+ buf->rts_rrv_time_ba + s_uGetRTSCTSRsvTime(pDevice, 1, byPktType,
+ cbFrameSize, wCurrentRate);
+ buf->rts_rrv_time_bb + s_uGetRTSCTSRsvTime(pDevice, 0, byPktType,
+ cbFrameSize, wCurrentRate);
+ buf->rrv_time_a + vnt_rxtx_rsvtime_le16(pDevice, byPktType,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
+ buf->rrv_time_b + vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B,
+ cbFrameSize,
+ pDevice->byTopCCKBasicRate,
+ bNeedACK);
+
+ s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize,
+ bNeedACK, bDisCRC, psEthHeader,
+ wCurrentRate, byFBOption);
} else {/* RTS_needless, PCF mode */
struct vnt_rrv_time_cts *buf = pvRrvTime;
- buf->rrv_time_a = vnt_rxtx_rsvtime_le16(pDevice, byPktType, cbFrameSize, wCurrentRate, bNeedACK);
- buf->rrv_time_b = vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B, cbFrameSize, pDevice->byTopCCKBasicRate, bNeedACK);
- buf->cts_rrv_time_ba = s_uGetRTSCTSRsvTime(pDevice, 3, byPktType, cbFrameSize, wCurrentRate);
+ buf->rrv_time_a + vnt_rxtx_rsvtime_le16(pDevice, byPktType,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
+ buf->rrv_time_b + vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B,
+ cbFrameSize,
+ pDevice->byTopCCKBasicRate,
+ bNeedACK);
+ buf->cts_rrv_time_ba + s_uGetRTSCTSRsvTime(pDevice, 3, byPktType,
+ cbFrameSize, wCurrentRate);
/* Fill CTS */
- s_vFillCTSHead(pDevice, uDMAIdx, byPktType, pvCTS, cbFrameSize, bNeedACK, bDisCRC, wCurrentRate, byFBOption);
+ s_vFillCTSHead(pDevice, uDMAIdx, byPktType, pvCTS,
+ cbFrameSize, bNeedACK, bDisCRC,
+ wCurrentRate, byFBOption);
}
} else if (byPktType = PK_TYPE_11A) {
if (pvRTS) {/* RTS_need, non PCF mode */
struct vnt_rrv_time_ab *buf = pvRrvTime;
- buf->rts_rrv_time = s_uGetRTSCTSRsvTime(pDevice, 2, byPktType, cbFrameSize, wCurrentRate);
- buf->rrv_time = vnt_rxtx_rsvtime_le16(pDevice, byPktType, cbFrameSize, wCurrentRate, bNeedACK);
+ buf->rts_rrv_time + s_uGetRTSCTSRsvTime(pDevice, 2, byPktType,
+ cbFrameSize, wCurrentRate);
+ buf->rrv_time + vnt_rxtx_rsvtime_le16(pDevice, byPktType,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
/* Fill RTS */
- s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize, bNeedACK, bDisCRC, psEthHeader, wCurrentRate, byFBOption);
+ s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize,
+ bNeedACK, bDisCRC, psEthHeader,
+ wCurrentRate, byFBOption);
} else if (!pvRTS) {/* RTS_needless, non PCF mode */
struct vnt_rrv_time_ab *buf = pvRrvTime;
- buf->rrv_time = vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11A, cbFrameSize, wCurrentRate, bNeedACK);
+ buf->rrv_time + vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11A,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
}
} else if (byPktType = PK_TYPE_11B) {
if (pvRTS) {/* RTS_need, non PCF mode */
struct vnt_rrv_time_ab *buf = pvRrvTime;
- buf->rts_rrv_time = s_uGetRTSCTSRsvTime(pDevice, 0, byPktType, cbFrameSize, wCurrentRate);
- buf->rrv_time = vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B, cbFrameSize, wCurrentRate, bNeedACK);
+ buf->rts_rrv_time + s_uGetRTSCTSRsvTime(pDevice, 0, byPktType,
+ cbFrameSize, wCurrentRate);
+ buf->rrv_time + vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
/* Fill RTS */
- s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize, bNeedACK, bDisCRC, psEthHeader, wCurrentRate, byFBOption);
+ s_vFillRTSHead(pDevice, byPktType, pvRTS, cbFrameSize,
+ bNeedACK, bDisCRC, psEthHeader,
+ wCurrentRate, byFBOption);
} else { /* RTS_needless, non PCF mode */
struct vnt_rrv_time_ab *buf = pvRrvTime;
- buf->rrv_time = vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B, cbFrameSize, wCurrentRate, bNeedACK);
+ buf->rrv_time + vnt_rxtx_rsvtime_le16(pDevice, PK_TYPE_11B,
+ cbFrameSize, wCurrentRate,
+ bNeedACK);
}
}
}
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 19/32] iio:imu:st_lsm6dsx Fix alignment and data leak issues
From: Lorenzo Bianconi @ 2020-06-07 22:33 UTC (permalink / raw)
To: Jonathan Cameron
Cc: linux-iio, Andy Shevchenko, Jonathan Cameron, Lars-Peter Clausen,
Lorenzo Bianconi
In-Reply-To: <20200607155408.958437-20-jic23@kernel.org>
[-- Attachment #1: Type: text/plain, Size: 5807 bytes --]
> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>
> One of a class of bugs pointed out by Lars in a recent review.
> iio_push_to_buffers_with_timestamp assumes the buffer used is aligned
> to the size of the timestamp (8 bytes). This is not guaranteed in
> this driver which uses an array of smaller elements on the stack.
> As Lars also noted this anti pattern can involve a leak of data to
> userspace and that indeed can happen here. We close both issues by
> moving to a set of suitable structures in the iio_priv() data.
>
> This data is allocated with kzalloc so no data can leak apart from
> previous readings.
>
> For the tagged path the data is aligned by using __aligned(8) for
> the buffer on the stack.
>
> There has been a lot of churn in this driver, so likely backports
> may be needed for stable.
Hi Jonathan,
I added just some nitpicks inline, but it seems to me the patch is fine.
I guess we can address them with a followup patch if you agree, no need to
resend this huge series :)
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
>
> Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver")
> Reported-by: Lars-Peter Clausen <lars@metafoo.de>
> Cc: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
> drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 5 +++
> .../iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 36 ++++++++++---------
> 2 files changed, 25 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
> index b56df409ed0f..5f821ef467da 100644
> --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
> +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
> @@ -411,6 +411,11 @@ struct st_lsm6dsx_hw {
> const struct st_lsm6dsx_settings *settings;
>
> struct iio_mount_matrix orientation;
> + /* Ensure natural alignment of buffer elements */
> + struct {
> + __le16 channels[3];
> + s64 ts __aligned(8);
> + } gyro_scan, acc_scan, ext_scan;
> };
it seems to me doing something like:
struct {
__le16 channels[3];
s64 ts __aligned(8);
} scan[3];
would be better if for example we want to add support for more external devices
for untagged FIFO devices
>
> static __maybe_unused const struct iio_event_spec st_lsm6dsx_event = {
> diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
> index afd00daeefb2..bebbc2bb37f7 100644
> --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
> +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
> @@ -341,9 +341,6 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw)
> int err, sip, acc_sip, gyro_sip, ts_sip, ext_sip, read_len, offset;
> u16 fifo_len, pattern_len = hw->sip * ST_LSM6DSX_SAMPLE_SIZE;
> u16 fifo_diff_mask = hw->settings->fifo_ops.fifo_diff.mask;
> - u8 gyro_buff[ST_LSM6DSX_IIO_BUFF_SIZE];
> - u8 acc_buff[ST_LSM6DSX_IIO_BUFF_SIZE];
> - u8 ext_buff[ST_LSM6DSX_IIO_BUFF_SIZE];
> bool reset_ts = false;
> __le16 fifo_status;
> s64 ts = 0;
> @@ -404,19 +401,22 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw)
>
> while (acc_sip > 0 || gyro_sip > 0 || ext_sip > 0) {
> if (gyro_sip > 0 && !(sip % gyro_sensor->decimator)) {
> - memcpy(gyro_buff, &hw->buff[offset],
> - ST_LSM6DSX_SAMPLE_SIZE);
> - offset += ST_LSM6DSX_SAMPLE_SIZE;
> + memcpy(hw->gyro_scan.channels,
> + &hw->buff[offset],
> + sizeof(hw->gyro_scan.channels));
> + offset += sizeof(hw->gyro_scan.channels);
> }
> if (acc_sip > 0 && !(sip % acc_sensor->decimator)) {
> - memcpy(acc_buff, &hw->buff[offset],
> - ST_LSM6DSX_SAMPLE_SIZE);
> - offset += ST_LSM6DSX_SAMPLE_SIZE;
> + memcpy(hw->acc_scan.channels,
> + &hw->buff[offset],
> + sizeof(hw->acc_scan.channels));
> + offset += sizeof(hw->acc_scan.channels);
> }
> if (ext_sip > 0 && !(sip % ext_sensor->decimator)) {
> - memcpy(ext_buff, &hw->buff[offset],
> - ST_LSM6DSX_SAMPLE_SIZE);
> - offset += ST_LSM6DSX_SAMPLE_SIZE;
> + memcpy(hw->ext_scan.channels,
> + &hw->buff[offset],
> + sizeof(hw->ext_scan.channels));
> + offset += sizeof(hw->ext_scan.channels);
> }
>
> if (ts_sip-- > 0) {
> @@ -446,19 +446,22 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw)
> if (gyro_sip > 0 && !(sip % gyro_sensor->decimator)) {
> iio_push_to_buffers_with_timestamp(
> hw->iio_devs[ST_LSM6DSX_ID_GYRO],
> - gyro_buff, gyro_sensor->ts_ref + ts);
> + &hw->gyro_scan,
> + gyro_sensor->ts_ref + ts);
> gyro_sip--;
> }
> if (acc_sip > 0 && !(sip % acc_sensor->decimator)) {
> iio_push_to_buffers_with_timestamp(
> hw->iio_devs[ST_LSM6DSX_ID_ACC],
> - acc_buff, acc_sensor->ts_ref + ts);
> + &hw->acc_scan,
> + acc_sensor->ts_ref + ts);
> acc_sip--;
> }
> if (ext_sip > 0 && !(sip % ext_sensor->decimator)) {
> iio_push_to_buffers_with_timestamp(
> hw->iio_devs[ST_LSM6DSX_ID_EXT0],
> - ext_buff, ext_sensor->ts_ref + ts);
> + &hw->ext_scan,
> + ext_sensor->ts_ref + ts);
> ext_sip--;
> }
> sip++;
> @@ -543,7 +546,8 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw)
> {
> u16 pattern_len = hw->sip * ST_LSM6DSX_TAGGED_SAMPLE_SIZE;
> u16 fifo_len, fifo_diff_mask;
> - u8 iio_buff[ST_LSM6DSX_IIO_BUFF_SIZE], tag;
> + u8 iio_buff[ST_LSM6DSX_IIO_BUFF_SIZE] __aligned(8);
here we can use hw->scan[0] and drop the array on the stack
> + u8 tag;
> bool reset_ts = false;
> int i, err, read_len;
> __le16 fifo_status;
> --
> 2.26.2
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply
* Re: [PATCH] build: resolve iptables-apply not getting installed
From: Pablo Neira Ayuso @ 2020-06-07 22:30 UTC (permalink / raw)
To: Jan Engelhardt; +Cc: netfilter-devel
In-Reply-To: <20200603133848.13672-1-jengelh@inai.de>
On Wed, Jun 03, 2020 at 03:38:48PM +0200, Jan Engelhardt wrote:
> ip6tables-apply gets installed but iptables-apply does not.
> That is wrong.
Also applied, thanks.
^ permalink raw reply
* Re: [PATCH] doc: document danger of applying REJECT to INVALID CTs
From: Pablo Neira Ayuso @ 2020-06-07 22:30 UTC (permalink / raw)
To: Jan Engelhardt; +Cc: netfilter-devel
In-Reply-To: <20200603133604.7169-1-jengelh@inai.de>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH ethtool v1] netlink: add master/slave configuration support
From: Stephen Hemminger @ 2020-06-07 22:30 UTC (permalink / raw)
To: Oleksij Rempel
Cc: Andrew Lunn, David S. Miller, Florian Fainelli, Heiner Kallweit,
Jakub Kicinski, Jonathan Corbet, Michal Kubecek, John W. Linville,
David Jander, kernel, linux-kernel, netdev, Russell King, mkl,
Marek Vasut, Christian Herber, Amit Cohen, Petr Machata
In-Reply-To: <20200526091025.25243-1-o.rempel@pengutronix.de>
On Tue, 26 May 2020 11:10:25 +0200
Oleksij Rempel <o.rempel@pengutronix.de> wrote:
> This UAPI is needed for BroadR-Reach 100BASE-T1 devices. Due to lack of
> auto-negotiation support, we needed to be able to configure the
> MASTER-SLAVE role of the port manually or from an application in user
> space.
>
> The same UAPI can be used for 1000BASE-T or MultiGBASE-T devices to
> force MASTER or SLAVE role. See IEEE 802.3-2018:
> 22.2.4.3.7 MASTER-SLAVE control register (Register 9)
> 22.2.4.3.8 MASTER-SLAVE status register (Register 10)
> 40.5.2 MASTER-SLAVE configuration resolution
> 45.2.1.185.1 MASTER-SLAVE config value (1.2100.14)
> 45.2.7.10 MultiGBASE-T AN control 1 register (Register 7.32)
>
> The MASTER-SLAVE role affects the clock configuration:
>
> -------------------------------------------------------------------------------
> When the PHY is configured as MASTER, the PMA Transmit function shall
> source TX_TCLK from a local clock source. When configured as SLAVE, the
> PMA Transmit function shall source TX_TCLK from the clock recovered from
> data stream provided by MASTER.
>
> iMX6Q KSZ9031 XXX
> ------\ /-----------\ /------------\
> | | | | |
> MAC |<----RGMII----->| PHY Slave |<------>| PHY Master |
> |<--- 125 MHz ---+-<------/ | | \ |
> ------/ \-----------/ \------------/
> ^
> \-TX_TCLK
>
> -------------------------------------------------------------------------------
>
> Since some clock or link related issues are only reproducible in a
> specific MASTER-SLAVE-role, MAC and PHY configuration, it is beneficial
> to provide generic (not 100BASE-T1 specific) interface to the user space
> for configuration flexibility and trouble shooting.
>
> Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
NAK
Open source projects have been working hard to remove the terms master and slave
in API's and documentation. Apparently, Linux hasn't gotten the message.
It would make sense not to introduce new instances.
^ permalink raw reply
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/dp_mst: Fix the DDC I2C device unregistration of an MST port
From: Patchwork @ 2020-06-07 22:22 UTC (permalink / raw)
To: Imre Deak; +Cc: intel-gfx
In-Reply-To: <20200607212522.16935-1-imre.deak@intel.com>
== Series Details ==
Series: series starting with [1/3] drm/dp_mst: Fix the DDC I2C device unregistration of an MST port
URL : https://patchwork.freedesktop.org/series/78100/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8597 -> Patchwork_17901
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/index.html
Known issues
------------
Here are the changes found in Patchwork_17901 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
- fi-bsw-n3050: [PASS][1] -> [DMESG-WARN][2] ([i915#1982])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
- fi-byt-j1900: [PASS][3] -> [DMESG-WARN][4] ([i915#1982])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-byt-j1900/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-byt-j1900/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
* igt@kms_cursor_legacy@basic-flip-after-cursor-legacy:
- fi-icl-guc: [PASS][5] -> [DMESG-WARN][6] ([i915#1982])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-icl-guc/igt@kms_cursor_legacy@basic-flip-after-cursor-legacy.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-icl-guc/igt@kms_cursor_legacy@basic-flip-after-cursor-legacy.html
#### Possible fixes ####
* igt@i915_module_load@reload:
- {fi-tgl-dsi}: [DMESG-WARN][7] ([i915#1982]) -> [PASS][8] +1 similar issue
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-tgl-dsi/igt@i915_module_load@reload.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-tgl-dsi/igt@i915_module_load@reload.html
- fi-icl-y: [DMESG-WARN][9] ([i915#1982]) -> [PASS][10]
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-icl-y/igt@i915_module_load@reload.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-icl-y/igt@i915_module_load@reload.html
* igt@i915_pm_rpm@module-reload:
- fi-icl-guc: [DMESG-WARN][11] ([i915#1982]) -> [PASS][12]
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-icl-guc/igt@i915_pm_rpm@module-reload.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-icl-guc/igt@i915_pm_rpm@module-reload.html
#### Warnings ####
* igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
- fi-kbl-x1275: [DMESG-WARN][13] ([i915#62] / [i915#92]) -> [DMESG-WARN][14] ([i915#62] / [i915#92] / [i915#95]) +2 similar issues
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-kbl-x1275/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-kbl-x1275/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
* igt@kms_force_connector_basic@force-edid:
- fi-kbl-x1275: [DMESG-WARN][15] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][16] ([i915#62] / [i915#92]) +3 similar issues
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8597/fi-kbl-x1275/igt@kms_force_connector_basic@force-edid.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/fi-kbl-x1275/igt@kms_force_connector_basic@force-edid.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
[i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
[i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
[i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
Participating hosts (48 -> 42)
------------------------------
Missing (6): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus
Build changes
-------------
* Linux: CI_DRM_8597 -> Patchwork_17901
CI-20190529: 20190529
CI_DRM_8597: aadd3cf12a7c515bca8752da797ded56a003617b @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5696: 8d1744239f4300eb12d5bab14a30b79d9c8dd364 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17901: c3019da98129f263800477ae0cd767c2179fc034 @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
c3019da98129 drm/dp_mst: Fix flushing the delayed port/mstb destroy work
f183ac475a8b drm/dp_mst: Fix the DDC I2C device registration of an MST port
be4cb6213842 drm/dp_mst: Fix the DDC I2C device unregistration of an MST port
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17901/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply
* [Intel-gfx] [PATCH 08/28] drm/i915/gt: Resubmit the virtual engine on schedule-out
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Having recognised that we do not change the sibling until we schedule
out, we can then defer the decision to resubmit the virtual engine from
the unwind of the active queue to scheduling out of the virtual context.
By keeping the unwind order intact on the local engine, we can preserve
data dependency ordering while doing a preempt-to-busy pass until we
have determined the new ELSP. This means that if we try to timeslice
between a virtual engine and a data-dependent ordinary request, the pair
will maintain their relative ordering and we will avoid the
resubmission, cancelling the timeslicing until further change.
The dilemma though is that we then may end up in a situation where the
'demotion' of the virtual request to an ordinary request in the engine
queue results in filling the ELSP[] with virtual requests instead of
spreading the load across the engines. To compensate for this, we mark
each virtual request and refuse to resubmit a virtual request in the
secondary ELSP slots, thus forcing subsequent virtual requests to be
scheduled out after timeslicing. By delaying the decision until we
schedule out, we will avoid unnecessary resubmission.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 133 ++++++++++++++++---------
drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +-
2 files changed, 89 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d98e37900171..cbcbe694f931 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1117,46 +1117,17 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
- /*
- * Push the request back into the queue for later resubmission.
- * If this request is not native to this physical engine (i.e.
- * it came from a virtual source), push it back onto the virtual
- * engine so that it can be moved across onto another physical
- * engine as load dictates.
- */
- if (likely(rq->execution_mask == engine->mask)) {
- GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
- if (rq_prio(rq) != prio) {
- prio = rq_prio(rq);
- pl = i915_sched_lookup_priolist(engine, prio);
- }
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
- list_move(&rq->sched.link, pl);
- set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != prio) {
+ prio = rq_prio(rq);
+ pl = i915_sched_lookup_priolist(engine, prio);
+ }
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- active = rq;
- } else {
- struct intel_engine_cs *owner = rq->context->engine;
+ list_move(&rq->sched.link, pl);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
- /*
- * Decouple the virtual breadcrumb before moving it
- * back to the virtual engine -- we don't want the
- * request to complete in the background and try
- * and cancel the breadcrumb on the virtual engine
- * (instead of the old engine where it is linked)!
- */
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &rq->fence.flags)) {
- spin_lock_nested(&rq->lock,
- SINGLE_DEPTH_NESTING);
- i915_request_cancel_breadcrumb(rq);
- spin_unlock(&rq->lock);
- }
- WRITE_ONCE(rq->engine, owner);
- owner->submit_request(rq);
- active = NULL;
- }
+ active = rq;
}
return active;
@@ -1400,12 +1371,54 @@ execlists_schedule_in(struct i915_request *rq, int idx)
return i915_request_get(rq);
}
+static void
+resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
+{
+ struct intel_engine_cs *engine = rq->engine;
+
+ /*
+ * Note that although __execlists_schedule_out() may be called from
+ * inside execlists_dequeue (under the spinlock), it can only do so
+ * as a result of request completion, and a completed request is
+ * not resubmitted.
+ */
+ spin_lock_irq(&engine->active.lock);
+
+ /*
+ * Decouple the virtual breadcrumb before moving it back to the virtual
+ * engine -- we don't want the request to complete in the background
+ * and then try and cancel the breadcrumb on the virtual engine
+ * (instead of the old engine where it is linked)!
+ */
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) {
+ spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
+ i915_request_cancel_breadcrumb(rq);
+ spin_unlock(&rq->lock);
+ }
+
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ WRITE_ONCE(rq->engine, &ve->base);
+ ve->base.submit_request(rq);
+
+ spin_unlock_irq(&engine->active.lock);
+}
+
static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
+
+ /*
+ * This engine is now too busy to run this virtual request, so
+ * see if we can find an alternative engine for it to execute on.
+ * Once a request has become bonded to this engine, we treat it the
+ * same as other native request.
+ */
+ if (i915_request_in_priority_queue(rq) &&
+ rq->execution_mask != rq->engine->mask)
+ resubmit_virtual_request(rq, ve);
}
static inline void
@@ -1645,6 +1658,20 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
sentinel = i915_request_has_sentinel(rq);
+ /*
+ * We want virtual requests to only be in the first slot so
+ * that they are never stuck behind a hog and can be immediately
+ * transferred onto the next idle engine.
+ */
+ if (rq->execution_mask != engine->mask &&
+ port != execlists->pending) {
+ GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n",
+ engine->name,
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
@@ -2343,6 +2370,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (i915_request_has_sentinel(last))
goto done;
+ /*
+ * We avoid submitting virtual requests into
+ * the secondary ports so that we can migrate
+ * the request immediately to another engine
+ * rather than wait for the primary request.
+ */
+ if (rq->execution_mask != engine->mask)
+ goto done;
+
/*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
@@ -3148,13 +3184,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
if (reset_in_progress(execlists))
return; /* defer until we restart the engine following reset */
- /* Hopefully we clear execlists->pending[] to let us through */
- if (READ_ONCE(execlists->pending[0]) &&
- tasklet_trylock(&execlists->tasklet)) {
- process_csb(engine);
- tasklet_unlock(&execlists->tasklet);
- }
-
__execlists_submission_tasklet(engine);
}
@@ -3177,11 +3206,25 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine,
return !list_empty(&engine->active.hold) && hold_request(rq);
}
+static void flush_csb(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *el = &engine->execlists;
+
+ if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
+ if (!reset_in_progress(el))
+ process_csb(engine);
+ tasklet_unlock(&el->tasklet);
+ }
+}
+
static void execlists_submit_request(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
+ /* Hopefully we clear execlists->pending[] to let us through */
+ flush_csb(engine);
+
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f651bdf7f191..a8bcea8aa1b4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -4289,7 +4289,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
spin_lock_irq(&engine->active.lock);
__unwind_incomplete_requests(engine);
spin_unlock_irq(&engine->active.lock);
- GEM_BUG_ON(rq->engine != ve->engine);
+ GEM_BUG_ON(rq->engine != engine);
/* Reset the engine while keeping our active request on hold */
execlists_hold(engine, rq);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 20/28] drm/i915: Replace engine->schedule() with a known request operation
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Looking to the future, we want to set the scheduling attributes
explicitly and so replace the generic engine->schedule() with the more
direct i915_request_set_priority()
What it loses in removing the 'schedule' name from the function, it
gains in having an explicit entry point with a stated goal.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/display/intel_display.c | 9 +----
drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_wait.c | 27 +++++----------
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 --
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 +--
drivers/gpu/drm/i915/gt/intel_engine_types.h | 29 ++++++++--------
drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +-
drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 11 +++----
drivers/gpu/drm/i915/gt/selftest_lrc.c | 33 +++++--------------
drivers/gpu/drm/i915/i915_request.c | 11 ++++---
drivers/gpu/drm/i915/i915_scheduler.c | 15 +++++----
drivers/gpu/drm/i915/i915_scheduler.h | 3 +-
13 files changed, 57 insertions(+), 95 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 511555d444e5..797e3573d392 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -15888,13 +15888,6 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
intel_unpin_fb_vma(vma, old_plane_state->flags);
}
-static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
-{
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY };
-
- i915_gem_object_wait_priority(obj, 0, &attr);
-}
-
/**
* intel_prepare_plane_fb - Prepare fb for usage on plane
* @_plane: drm plane to prepare for
@@ -15971,7 +15964,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
if (ret)
return ret;
- fb_obj_bump_render_priority(obj);
+ i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->uapi.fence) { /* implicit fencing */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 2faa481cc18f..876c34982555 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -476,7 +476,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
long timeout);
int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
- const struct i915_sched_attr *attr);
+ int prio);
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
enum fb_op_origin origin);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 8af55cd3e690..cefbbb3d9b52 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -93,28 +93,17 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
return timeout;
}
-static void __fence_set_priority(struct dma_fence *fence,
- const struct i915_sched_attr *attr)
+static void __fence_set_priority(struct dma_fence *fence, int prio)
{
- struct i915_request *rq;
- struct intel_engine_cs *engine;
-
if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
return;
- rq = to_request(fence);
- engine = rq->engine;
-
local_bh_disable();
- rcu_read_lock(); /* RCU serialisation for set-wedged protection */
- if (engine->schedule)
- engine->schedule(rq, attr);
- rcu_read_unlock();
+ i915_request_set_priority(to_request(fence), prio);
local_bh_enable(); /* kick the tasklets if queues were reprioritised */
}
-static void fence_set_priority(struct dma_fence *fence,
- const struct i915_sched_attr *attr)
+static void fence_set_priority(struct dma_fence *fence, int prio)
{
/* Recurse once into a fence-array */
if (dma_fence_is_array(fence)) {
@@ -122,16 +111,16 @@ static void fence_set_priority(struct dma_fence *fence,
int i;
for (i = 0; i < array->num_fences; i++)
- __fence_set_priority(array->fences[i], attr);
+ __fence_set_priority(array->fences[i], prio);
} else {
- __fence_set_priority(fence, attr);
+ __fence_set_priority(fence, prio);
}
}
int
i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
- const struct i915_sched_attr *attr)
+ int prio)
{
struct dma_fence *excl;
@@ -146,7 +135,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
return ret;
for (i = 0; i < count; i++) {
- fence_set_priority(shared[i], attr);
+ fence_set_priority(shared[i], prio);
dma_fence_put(shared[i]);
}
@@ -156,7 +145,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
}
if (excl) {
- fence_set_priority(excl, attr);
+ fence_set_priority(excl, prio);
dma_fence_put(excl);
}
return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e5141a897786..d79307d790da 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -334,9 +334,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
if (engine->context_size)
DRIVER_CAPS(i915)->has_logical_contexts = true;
- /* Nothing to do here, execute in order of dependencies */
- engine->schedule = NULL;
-
ewma__engine_latency_init(&engine->latency);
seqlock_init(&engine->stats.lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index ee002eb796cb..5251860e952d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -87,7 +87,7 @@ static void heartbeat(struct work_struct *wrk)
* but all other contexts, including the kernel
* context are stuck waiting for the signal.
*/
- } else if (engine->schedule &&
+ } else if (intel_engine_has_scheduler(engine) &&
rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
/*
* Gradually raise the priority of the heartbeat to
@@ -102,7 +102,7 @@ static void heartbeat(struct work_struct *wrk)
attr.priority = I915_PRIORITY_BARRIER;
local_bh_disable();
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, attr.priority);
local_bh_enable();
} else {
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 073c3769e8cc..48e111f16dc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -482,14 +482,6 @@ struct intel_engine_cs {
void (*bond_execute)(struct i915_request *rq,
struct dma_fence *signal);
- /*
- * Call when the priority on a request has changed and it and its
- * dependencies may need rescheduling. Note the request itself may
- * not be ready to run!
- */
- void (*schedule)(struct i915_request *request,
- const struct i915_sched_attr *attr);
-
void (*release)(struct intel_engine_cs *engine);
struct intel_engine_execlists execlists;
@@ -507,13 +499,14 @@ struct intel_engine_cs {
#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_HAS_TIMESLICES BIT(4)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
-#define I915_ENGINE_IS_VIRTUAL BIT(6)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
+#define I915_ENGINE_HAS_SCHEDULER BIT(2)
+#define I915_ENGINE_HAS_PREEMPTION BIT(3)
+#define I915_ENGINE_HAS_SEMAPHORES BIT(4)
+#define I915_ENGINE_HAS_TIMESLICES BIT(5)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(6)
+#define I915_ENGINE_IS_VIRTUAL BIT(7)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(8)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(9)
unsigned int flags;
/*
@@ -599,6 +592,12 @@ intel_engine_supports_stats(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_SUPPORTS_STATS;
}
+static inline bool
+intel_engine_has_scheduler(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_HAS_SCHEDULER;
+}
+
static inline bool
intel_engine_has_preemption(const struct intel_engine_cs *engine)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 848decee9066..1c0a7f3ec0bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -108,7 +108,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
for_each_uabi_engine(engine, i915) { /* all engines must agree! */
int i;
- if (engine->schedule)
+ if (intel_engine_has_scheduler(engine))
enabled |= (I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY);
else
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0ca3604ab846..3199c65fa7e8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4938,7 +4938,6 @@ static void execlists_park(struct intel_engine_cs *engine)
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
@@ -4949,6 +4948,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->park = execlists_park;
engine->unpark = NULL;
+ engine->flags |= I915_ENGINE_HAS_SCHEDULER;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
@@ -5682,7 +5682,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
- ve->base.schedule = i915_schedule;
ve->base.submit_request = virtual_submit_request;
ve->base.bond_execute = virtual_bond_execute;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 2af66f8ffbd2..afa4f88035ac 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -727,12 +727,11 @@ static int active_engine(void *data)
rq[idx] = i915_request_get(new);
i915_request_add(new);
- if (engine->schedule && arg->flags & TEST_PRIORITY) {
- struct i915_sched_attr attr = {
- .priority =
- i915_prandom_u32_max_state(512, &prng),
- };
- engine->schedule(rq[idx], &attr);
+ if (intel_engine_has_scheduler(engine) &&
+ arg->flags & TEST_PRIORITY) {
+ int prio = i915_prandom_u32_max_state(512, &prng);
+
+ i915_request_set_priority(rq[idx], prio);
}
err = active_request_put(old);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 15aaa1bf8943..052dcc59fcc5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -308,12 +308,8 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
i915_request_put(rq[0]);
if (prio) {
- struct i915_sched_attr attr = {
- .priority = prio,
- };
-
/* Alternatively preempt the spinner with ce[1] */
- engine->schedule(rq[1], &attr);
+ i915_request_set_priority(rq[1], prio);
}
/* And switch back to ce[0] for good measure */
@@ -760,9 +756,6 @@ release_queue(struct intel_engine_cs *engine,
struct i915_vma *vma,
int idx, int prio)
{
- struct i915_sched_attr attr = {
- .priority = prio,
- };
struct i915_request *rq;
u32 *cs;
@@ -787,7 +780,7 @@ release_queue(struct intel_engine_cs *engine,
i915_request_add(rq);
local_bh_disable();
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, prio);
local_bh_enable(); /* kick tasklet */
i915_request_put(rq);
@@ -1193,7 +1186,6 @@ static int live_timeslice_queue(void *arg)
goto err_pin;
for_each_engine(engine, gt, id) {
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct i915_request *rq, *nop;
if (!intel_engine_has_preemption(engine))
@@ -1208,7 +1200,7 @@ static int live_timeslice_queue(void *arg)
err = PTR_ERR(rq);
goto err_heartbeat;
}
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, I915_PRIORITY_MAX);
err = wait_for_submit(engine, rq, HZ / 2);
if (err) {
pr_err("%s: Timed out trying to submit semaphores\n",
@@ -1695,7 +1687,6 @@ static int live_late_preempt(void *arg)
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
- struct i915_sched_attr attr = {};
enum intel_engine_id id;
int err = -ENOMEM;
@@ -1758,8 +1749,7 @@ static int live_late_preempt(void *arg)
goto err_wedged;
}
- attr.priority = I915_PRIORITY_MAX;
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, I915_PRIORITY_MAX);
if (!igt_wait_for_spinner(&spin_hi, rq)) {
pr_err("High priority context failed to preempt the low priority context\n");
@@ -2235,7 +2225,6 @@ static int live_preempt_cancel(void *arg)
static int live_suppress_self_preempt(void *arg)
{
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client a, b;
@@ -2306,7 +2295,7 @@ static int live_suppress_self_preempt(void *arg)
i915_request_add(rq_b);
GEM_BUG_ON(i915_request_completed(rq_a));
- engine->schedule(rq_a, &attr);
+ i915_request_set_priority(rq_a, I915_PRIORITY_MAX);
igt_spinner_end(&a.spin);
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
@@ -2374,7 +2363,6 @@ static int live_chain_preempt(void *arg)
goto err_client_hi;
for_each_engine(engine, gt, id) {
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct igt_live_test t;
struct i915_request *rq;
int ring_size, count, i;
@@ -2441,7 +2429,7 @@ static int live_chain_preempt(void *arg)
i915_request_get(rq);
i915_request_add(rq);
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, I915_PRIORITY_MAX);
igt_spinner_end(&hi.spin);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
@@ -2630,14 +2618,12 @@ static int live_preempt_gang(void *arg)
return -EIO;
do {
- struct i915_sched_attr attr = { .priority = prio++ };
-
err = create_gang(engine, &rq);
if (err)
break;
/* Submit each spinner at increasing priority */
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, prio++);
} while (prio <= I915_PRIORITY_MAX &&
!__igt_timeout(end_time, NULL));
pr_debug("%s: Preempt chain of %d requests\n",
@@ -2859,9 +2845,6 @@ static int preempt_user(struct intel_engine_cs *engine,
struct i915_vma *global,
int id)
{
- struct i915_sched_attr attr = {
- .priority = I915_PRIORITY_MAX
- };
struct i915_request *rq;
int err = 0;
u32 *cs;
@@ -2886,7 +2869,7 @@ static int preempt_user(struct intel_engine_cs *engine,
i915_request_get(rq);
i915_request_add(rq);
- engine->schedule(rq, &attr);
+ i915_request_set_priority(rq, I915_PRIORITY_MAX);
if (i915_request_wait(rq, 0, HZ / 2) < 0)
err = -ETIME;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f04f91b4d879..6c602b29026d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1182,7 +1182,7 @@ __i915_request_await_execution(struct i915_request *to,
}
/* Couple the dependency tree for PI on this exposed to->fence */
- if (to->engine->schedule) {
+ if (intel_engine_has_scheduler(to->engine)) {
err = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_WEAK);
@@ -1453,7 +1453,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
return 0;
}
- if (to->engine->schedule) {
+ if (intel_engine_has_scheduler(to->engine)) {
ret = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_EXTERNAL);
@@ -1663,7 +1663,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
__i915_sw_fence_await_dma_fence(&rq->submit,
&prev->fence,
&rq->dmaq);
- if (rq->engine->schedule)
+ if (intel_engine_has_scheduler(rq->engine))
__i915_sched_node_add_dependency(&rq->sched,
&prev->sched,
&rq->dep,
@@ -1729,8 +1729,9 @@ void __i915_request_queue(struct i915_request *rq,
* decide whether to preempt the entire chain so that it is ready to
* run at the earliest possible convenience.
*/
- if (attr && rq->engine->schedule)
- rq->engine->schedule(rq, attr);
+ if (attr)
+ i915_request_set_priority(rq, attr->priority);
+
i915_sw_fence_commit(&rq->semaphore);
i915_sw_fence_commit(&rq->submit);
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7246ffbb3e33..9437e9d1d445 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -216,10 +216,8 @@ static void kick_submission(struct intel_engine_cs *engine,
rcu_read_unlock();
}
-static void __i915_schedule(struct i915_sched_node *node,
- const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_sched_node *node, int prio)
{
- const int prio = max(attr->priority, node->attr.priority);
struct intel_engine_cs *engine;
struct i915_dependency *dep, *p;
struct i915_dependency stack;
@@ -233,6 +231,8 @@ static void __i915_schedule(struct i915_sched_node *node,
if (node_signaled(node))
return;
+ prio = max(prio, node->attr.priority);
+
stack.signaler = node;
list_add(&stack.dfs_link, &dfs);
@@ -286,7 +286,7 @@ static void __i915_schedule(struct i915_sched_node *node,
*/
if (node->attr.priority == I915_PRIORITY_INVALID) {
GEM_BUG_ON(!list_empty(&node->link));
- node->attr = *attr;
+ node->attr.priority = prio;
if (stack.dfs_link.next == stack.dfs_link.prev)
return;
@@ -341,10 +341,13 @@ static void __i915_schedule(struct i915_sched_node *node,
spin_unlock(&engine->active.lock);
}
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+void i915_request_set_priority(struct i915_request *rq, int prio)
{
+ if (!intel_engine_has_scheduler(rq->engine))
+ return;
+
spin_lock_irq(&schedule_lock);
- __i915_schedule(&rq->sched, attr);
+ __i915_schedule(&rq->sched, prio);
spin_unlock_irq(&schedule_lock);
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1b3c1e1a6ec5..b8696edef446 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -36,8 +36,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
void i915_sched_node_fini(struct i915_sched_node *node);
-void i915_schedule(struct i915_request *request,
- const struct i915_sched_attr *attr);
+void i915_request_set_priority(struct i915_request *request, int prio);
struct list_head *
i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 01/28] drm/i915: Adjust the sentinel assert to match implementation
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Sentinels are supposed to be last reqeusts in the elsp queue, not the
only one, so adjust the assert accordingly.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d55a5e0466e5..db8a170b0e5c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1635,9 +1635,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
ccid = ce->lrc.ccid;
/*
- * Sentinels are supposed to be lonely so they flush the
- * current exection off the HW. Check that they are the
- * only request in the pending submission.
+ * Sentinels are supposed to be the last request so they flush
+ * the current exection off the HW. Check that they are the only
+ * request in the pending submission.
*/
if (sentinel) {
GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
@@ -1646,15 +1646,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
port - execlists->pending);
return false;
}
-
sentinel = i915_request_has_sentinel(rq);
- if (sentinel && port != execlists->pending) {
- GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
- engine->name,
- ce->timeline->fence_context,
- port - execlists->pending);
- return false;
- }
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 15/28] drm/i915: Lift waiter/signaler iterators
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Lift the list iteration defines for traversing the signaler/waiter lists
into i915_scheduler.h for reuse.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ----------
drivers/gpu/drm/i915/i915_scheduler_types.h | 10 ++++++++++
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index cbcbe694f931..5f5ac05ccbe4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1872,16 +1872,6 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
}
-#define for_each_waiter(p__, rq__) \
- list_for_each_entry_lockless(p__, \
- &(rq__)->sched.waiters_list, \
- wait_link)
-
-#define for_each_signaler(p__, rq__) \
- list_for_each_entry_rcu(p__, \
- &(rq__)->sched.signalers_list, \
- signal_link)
-
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
LIST_HEAD(list);
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index f72e6c397b08..343ed44d5ed4 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -81,4 +81,14 @@ struct i915_dependency {
#define I915_DEPENDENCY_WEAK BIT(2)
};
+#define for_each_waiter(p__, rq__) \
+ list_for_each_entry_lockless(p__, \
+ &(rq__)->sched.waiters_list, \
+ wait_link)
+
+#define for_each_signaler(p__, rq__) \
+ list_for_each_entry_rcu(p__, \
+ &(rq__)->sched.signalers_list, \
+ signal_link)
+
#endif /* _I915_SCHEDULER_TYPES_H_ */
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 10/28] drm/i915/gem: Separate reloc validation into an earlier step
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Over the next couple of patches, we will want to lock all the modified
vma for relocation processing under a single ww_mutex. We neither want
to have to include the vma that are skipped (due to no modifications
required) nor do we want those to be marked as written too. So separate
out the reloc validation into an early step, which we can use both to
reject the execbuf before committing to making our changes, and to
filter out the unmodified vma.
This does introduce a second pass through the reloc[], but only if we
need to emit relocations.
v2: reuse the outer loop, not cut'n'paste.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 145 +++++++++++-------
1 file changed, 86 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 23db79b806db..01ab1e15a142 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -911,9 +911,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
static inline u64
relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
- const struct i915_vma *target)
+ u64 target)
{
- return gen8_canonical_addr((int)reloc->delta + target->node.start);
+ return gen8_canonical_addr((int)reloc->delta + target);
}
static void reloc_cache_init(struct reloc_cache *cache,
@@ -1292,26 +1292,11 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
return 0;
}
-static u64
-relocate_entry(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- const struct drm_i915_gem_relocation_entry *reloc,
- const struct i915_vma *target)
-{
- u64 target_addr = relocation_target(reloc, target);
- int err;
-
- err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
- if (err)
- return err;
-
- return target->node.start | UPDATE;
-}
-
-static u64
-eb_relocate_entry(struct i915_execbuffer *eb,
- struct eb_vma *ev,
- const struct drm_i915_gem_relocation_entry *reloc)
+static int
+eb_reloc_prepare(struct i915_execbuffer *eb,
+ struct eb_vma *ev,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct drm_i915_gem_relocation_entry __user *user)
{
struct drm_i915_private *i915 = eb->i915;
struct eb_vma *target;
@@ -1389,6 +1374,32 @@ eb_relocate_entry(struct i915_execbuffer *eb,
return -EINVAL;
}
+ return 1;
+}
+
+static int
+eb_reloc_entry(struct i915_execbuffer *eb,
+ struct eb_vma *ev,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct drm_i915_gem_relocation_entry __user *user)
+{
+ struct eb_vma *target;
+ u64 offset;
+ int err;
+
+ /* we've already hold a reference to all valid objects */
+ target = eb_get_vma(eb, reloc->target_handle);
+ if (unlikely(!target))
+ return -ENOENT;
+
+ /*
+ * If the relocation already has the right value in it, no
+ * more work needs to be done.
+ */
+ offset = gen8_canonical_addr(target->vma->node.start);
+ if (offset == reloc->presumed_offset)
+ return 0;
+
/*
* If we write into the object, we need to force the synchronisation
* barrier, either with an asynchronous clflush or if we executed the
@@ -1399,11 +1410,41 @@ eb_relocate_entry(struct i915_execbuffer *eb,
*/
ev->flags &= ~EXEC_OBJECT_ASYNC;
- /* and update the user's relocation entry */
- return relocate_entry(eb, ev->vma, reloc, target->vma);
+ err = __reloc_entry_gpu(eb, ev->vma, reloc->offset,
+ relocation_target(reloc, offset));
+ if (err)
+ return err;
+
+ /*
+ * Note that reporting an error now
+ * leaves everything in an inconsistent
+ * state as we have *already* changed
+ * the relocation value inside the
+ * object. As we have not changed the
+ * reloc.presumed_offset or will not
+ * change the execobject.offset, on the
+ * call we may not rewrite the value
+ * inside the object, leaving it
+ * dangling and causing a GPU hang. Unless
+ * userspace dynamically rebuilds the
+ * relocations on each execbuf rather than
+ * presume a static tree.
+ *
+ * We did previously check if the relocations
+ * were writable (access_ok), an error now
+ * would be a strange race with mprotect,
+ * having already demonstrated that we
+ * can read from this userspace address.
+ */
+ __put_user(offset, &user->presumed_offset);
+ return 0;
}
-static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
+static long eb_reloc_vma(struct i915_execbuffer *eb, struct eb_vma *ev,
+ int (*fn)(struct i915_execbuffer *eb,
+ struct eb_vma *ev,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct drm_i915_gem_relocation_entry __user *user))
{
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
@@ -1411,6 +1452,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
struct drm_i915_gem_relocation_entry __user *urelocs =
u64_to_user_ptr(entry->relocs_ptr);
unsigned long remain = entry->relocation_count;
+ int required = 0;
if (unlikely(remain > N_RELOC(ULONG_MAX)))
return -EINVAL;
@@ -1443,42 +1485,18 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
remain -= count;
do {
- u64 offset = eb_relocate_entry(eb, ev, r);
+ int ret;
- if (likely(offset == 0)) {
- } else if ((s64)offset < 0) {
- return (int)offset;
- } else {
- /*
- * Note that reporting an error now
- * leaves everything in an inconsistent
- * state as we have *already* changed
- * the relocation value inside the
- * object. As we have not changed the
- * reloc.presumed_offset or will not
- * change the execobject.offset, on the
- * call we may not rewrite the value
- * inside the object, leaving it
- * dangling and causing a GPU hang. Unless
- * userspace dynamically rebuilds the
- * relocations on each execbuf rather than
- * presume a static tree.
- *
- * We did previously check if the relocations
- * were writable (access_ok), an error now
- * would be a strange race with mprotect,
- * having already demonstrated that we
- * can read from this userspace address.
- */
- offset = gen8_canonical_addr(offset & ~UPDATE);
- __put_user(offset,
- &urelocs[r - stack].presumed_offset);
- }
+ ret = fn(eb, ev, r, &urelocs[r - stack]);
+ if (ret < 0)
+ return ret;
+
+ required |= ret;
} while (r++, --count);
urelocs += ARRAY_SIZE(stack);
} while (remain);
- return 0;
+ return required;
}
static int eb_relocate(struct i915_execbuffer *eb)
@@ -1497,12 +1515,21 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
- struct eb_vma *ev;
+ struct eb_vma *ev, *en;
int flush;
+ list_for_each_entry_safe(ev, en, &eb->relocs, reloc_link) {
+ err = eb_reloc_vma(eb, ev, eb_reloc_prepare);
+ if (err < 0)
+ return err;
+
+ if (err == 0)
+ list_del_init(&ev->reloc_link);
+ }
+
list_for_each_entry(ev, &eb->relocs, reloc_link) {
- err = eb_relocate_vma(eb, ev);
- if (err)
+ err = eb_reloc_vma(eb, ev, eb_reloc_entry);
+ if (err < 0)
break;
}
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 19/28] drm/i915: Remove I915_USER_PRIORITY_SHIFT
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
As we do not have any internal priority levels, the priority can be set
directed from the user values.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/display/intel_display.c | 4 +-
drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +--
.../i915/gem/selftests/i915_gem_object_blt.c | 4 +-
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 6 +--
drivers/gpu/drm/i915/gt/selftest_lrc.c | 44 +++++++------------
drivers/gpu/drm/i915/i915_priolist_types.h | 3 --
drivers/gpu/drm/i915/i915_scheduler.c | 1 -
7 files changed, 23 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index b16aca0fe5f0..511555d444e5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -15890,9 +15890,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
{
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
- };
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY };
i915_gem_object_wait_priority(obj, 0, &attr);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f5d59d18cd5b..ef76dff0e255 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -712,7 +712,7 @@ __create_context(struct drm_i915_private *i915)
kref_init(&ctx->ref);
ctx->i915 = i915;
- ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
+ ctx->sched.priority = I915_PRIORITY_NORMAL;
mutex_init(&ctx->mutex);
spin_lock_init(&ctx->stale.lock);
@@ -1999,7 +1999,7 @@ static int set_priority(struct i915_gem_context *ctx,
!capable(CAP_SYS_NICE))
return -EPERM;
- ctx->sched.priority = I915_USER_PRIORITY(priority);
+ ctx->sched.priority = priority;
context_apply_all(ctx, __apply_priority, ctx);
return 0;
@@ -2502,7 +2502,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->size = 0;
- args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
+ args->value = ctx->sched.priority;
break;
case I915_CONTEXT_PARAM_SSEU:
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index 23b6e11bbc3e..c4c04fb97d14 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -220,7 +220,7 @@ static int igt_fill_blt_thread(void *arg)
return PTR_ERR(ctx);
prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
- ctx->sched.priority = I915_USER_PRIORITY(prio);
+ ctx->sched.priority = prio;
}
ce = i915_gem_context_get_engine(ctx, 0);
@@ -338,7 +338,7 @@ static int igt_copy_blt_thread(void *arg)
return PTR_ERR(ctx);
prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
- ctx->sched.priority = I915_USER_PRIORITY(prio);
+ ctx->sched.priority = prio;
}
ce = i915_gem_context_get_engine(ctx, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index eecf666c772d..ee002eb796cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -57,9 +57,7 @@ static void show_heartbeat(const struct i915_request *rq,
static void heartbeat(struct work_struct *wrk)
{
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
- };
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
struct intel_engine_cs *engine =
container_of(wrk, typeof(*engine), heartbeat.work.work);
struct intel_context *ce = engine->kernel_context;
@@ -99,7 +97,7 @@ static void heartbeat(struct work_struct *wrk)
*/
attr.priority = 0;
if (rq->sched.attr.priority >= attr.priority)
- attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+ attr.priority = I915_PRIORITY_HEARTBEAT;
if (rq->sched.attr.priority >= attr.priority)
attr.priority = I915_PRIORITY_BARRIER;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a0248c47d7bd..15aaa1bf8943 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -360,7 +360,7 @@ static int live_unlite_switch(void *arg)
static int live_unlite_preempt(void *arg)
{
- return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+ return live_unlite_restore(arg, I915_PRIORITY_MAX);
}
static int live_pin_rewind(void *arg)
@@ -1193,9 +1193,7 @@ static int live_timeslice_queue(void *arg)
goto err_pin;
for_each_engine(engine, gt, id) {
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
- };
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct i915_request *rq, *nop;
if (!intel_engine_has_preemption(engine))
@@ -1410,14 +1408,12 @@ static int live_busywait_preempt(void *arg)
ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
return -ENOMEM;
- ctx_hi->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+ ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
- ctx_lo->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+ ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
@@ -1620,14 +1616,12 @@ static int live_preempt(void *arg)
ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_hi->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+ ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
- ctx_lo->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+ ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
for_each_engine(engine, gt, id) {
struct igt_live_test t;
@@ -1723,7 +1717,7 @@ static int live_late_preempt(void *arg)
goto err_ctx_hi;
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
- ctx_lo->sched.priority = I915_USER_PRIORITY(1);
+ ctx_lo->sched.priority = 1;
for_each_engine(engine, gt, id) {
struct igt_live_test t;
@@ -1764,7 +1758,7 @@ static int live_late_preempt(void *arg)
goto err_wedged;
}
- attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+ attr.priority = I915_PRIORITY_MAX;
engine->schedule(rq, &attr);
if (!igt_wait_for_spinner(&spin_hi, rq)) {
@@ -1848,7 +1842,7 @@ static int live_nopreempt(void *arg)
return -ENOMEM;
if (preempt_client_init(gt, &b))
goto err_client_a;
- b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+ b.ctx->sched.priority = I915_PRIORITY_MAX;
for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
@@ -2241,11 +2235,9 @@ static int live_preempt_cancel(void *arg)
static int live_suppress_self_preempt(void *arg)
{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
- };
struct preempt_client a, b;
enum intel_engine_id id;
int err = -ENOMEM;
@@ -2382,9 +2374,7 @@ static int live_chain_preempt(void *arg)
goto err_client_hi;
for_each_engine(engine, gt, id) {
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
- };
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct igt_live_test t;
struct i915_request *rq;
int ring_size, count, i;
@@ -2640,9 +2630,7 @@ static int live_preempt_gang(void *arg)
return -EIO;
do {
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(prio++),
- };
+ struct i915_sched_attr attr = { .priority = prio++ };
err = create_gang(engine, &rq);
if (err)
@@ -2679,7 +2667,7 @@ static int live_preempt_gang(void *arg)
drm_info_printer(engine->i915->drm.dev);
pr_err("Failed to flush chain of %d requests, at %d\n",
- prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+ prio, rq_prio(rq));
intel_engine_dump(engine, &p,
"%s\n", engine->name);
@@ -3053,14 +3041,12 @@ static int live_preempt_timeout(void *arg)
ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_hi->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+ ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
- ctx_lo->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+ ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
for_each_engine(engine, gt, id) {
unsigned long saved_timeout;
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 9a7657bb002e..bc2fa84f98a8 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -24,9 +24,6 @@ enum {
I915_PRIORITY_DISPLAY,
};
-#define I915_USER_PRIORITY_SHIFT 0
-#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
-
/* Smallest priority value that cannot be bumped. */
#define I915_PRIORITY_INVALID (INT_MIN)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7945cc161a12..7246ffbb3e33 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -71,7 +71,6 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
lockdep_assert_held(&engine->active.lock);
assert_priolists(execlists);
- prio >>= I915_USER_PRIORITY_SHIFT;
if (unlikely(execlists->no_priolist))
prio = I915_PRIORITY_NORMAL;
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 18/28] drm/i915: Strip out internal priorities
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Since we are not using any internal priority levels, and in the next few
patches will introduce a new index for which the optimisation is not so
lear cut, discard the small table within the priolist.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 22 ++------
drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 -
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +--
drivers/gpu/drm/i915/i915_priolist_types.h | 8 +--
drivers/gpu/drm/i915/i915_scheduler.c | 51 +++----------------
drivers/gpu/drm/i915/i915_scheduler.h | 18 ++-----
7 files changed, 21 insertions(+), 88 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index f67ad937eefb..eecf666c772d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -97,7 +97,7 @@ static void heartbeat(struct work_struct *wrk)
* low latency and no jitter] the chance to naturally
* complete before being preempted.
*/
- attr.priority = I915_PRIORITY_MASK;
+ attr.priority = 0;
if (rq->sched.attr.priority >= attr.priority)
attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
if (rq->sched.attr.priority >= attr.priority)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5f5ac05ccbe4..0ca3604ab846 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -434,22 +434,13 @@ static int effective_prio(const struct i915_request *rq)
static int queue_prio(const struct intel_engine_execlists *execlists)
{
- struct i915_priolist *p;
struct rb_node *rb;
rb = rb_first_cached(&execlists->queue);
if (!rb)
return INT_MIN;
- /*
- * As the priolist[] are inverted, with the highest priority in [0],
- * we have to flip the index value to become priority.
- */
- p = to_priolist(rb);
- if (!I915_USER_PRIORITY_SHIFT)
- return p->priority;
-
- return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
+ return to_priolist(rb)->priority;
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
@@ -2324,9 +2315,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
- int i;
- priolist_for_each_request_consume(rq, rn, p, i) {
+ priolist_for_each_request_consume(rq, rn, p) {
bool merge = true;
/*
@@ -4323,9 +4313,8 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
- int i;
- priolist_for_each_request_consume(rq, rn, p, i) {
+ priolist_for_each_request_consume(rq, rn, p) {
mark_eio(rq);
__i915_request_submit(rq);
}
@@ -5337,7 +5326,7 @@ static int __execlists_context_alloc(struct intel_context *ce,
static struct list_head *virtual_queue(struct virtual_engine *ve)
{
- return &ve->base.execlists.default_priolist.requests[0];
+ return &ve->base.execlists.default_priolist.requests;
}
static void virtual_context_destroy(struct kref *kref)
@@ -5896,9 +5885,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
count = 0;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
- int i;
- priolist_for_each_request(rq, p, i) {
+ priolist_for_each_request(rq, p) {
if (count++ < max - 1)
show_request(m, rq, "\t\tQ ");
else
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index a8bcea8aa1b4..a0248c47d7bd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -964,7 +964,6 @@ create_rewinder(struct intel_context *ce,
intel_ring_advance(rq, cs);
- rq->sched.attr.priority = I915_PRIORITY_MASK;
err = 0;
err:
i915_request_get(rq);
@@ -5059,7 +5058,6 @@ create_timestamp(struct intel_context *ce, void *slot, int idx)
intel_ring_advance(rq, cs);
- rq->sched.attr.priority = I915_PRIORITY_MASK;
err = 0;
err:
i915_request_get(rq);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 94eb63f309ce..0c42e8b0c211 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -312,9 +312,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
- int i;
- priolist_for_each_request_consume(rq, rn, p, i) {
+ priolist_for_each_request_consume(rq, rn, p) {
if (last && rq->context != last->context) {
if (port == last_port)
goto done;
@@ -463,9 +462,8 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
- int i;
- priolist_for_each_request_consume(rq, rn, p, i) {
+ priolist_for_each_request_consume(rq, rn, p) {
list_del_init(&rq->sched.link);
__i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO);
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 8aa7866ec6b6..9a7657bb002e 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -27,11 +27,8 @@ enum {
#define I915_USER_PRIORITY_SHIFT 0
#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
-#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
-#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
-
/* Smallest priority value that cannot be bumped. */
-#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
+#define I915_PRIORITY_INVALID (INT_MIN)
/*
* Requests containing performance queries must not be preempted by
@@ -45,9 +42,8 @@ enum {
#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
struct i915_priolist {
- struct list_head requests[I915_PRIORITY_COUNT];
+ struct list_head requests;
struct rb_node node;
- unsigned long used;
int priority;
};
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 250832768279..7945cc161a12 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -43,7 +43,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static void assert_priolists(struct intel_engine_execlists * const execlists)
{
struct rb_node *rb;
- long last_prio, i;
+ long last_prio;
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
@@ -57,14 +57,6 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
GEM_BUG_ON(p->priority > last_prio);
last_prio = p->priority;
-
- GEM_BUG_ON(!p->used);
- for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
- if (list_empty(&p->requests[i]))
- continue;
-
- GEM_BUG_ON(!(p->used & BIT(i)));
- }
}
}
@@ -75,13 +67,10 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
struct i915_priolist *p;
struct rb_node **parent, *rb;
bool first = true;
- int idx, i;
lockdep_assert_held(&engine->active.lock);
assert_priolists(execlists);
- /* buckets sorted from highest [in slot 0] to lowest priority */
- idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
prio >>= I915_USER_PRIORITY_SHIFT;
if (unlikely(execlists->no_priolist))
prio = I915_PRIORITY_NORMAL;
@@ -99,7 +88,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
parent = &rb->rb_right;
first = false;
} else {
- goto out;
+ return &p->requests;
}
}
@@ -125,15 +114,12 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
}
p->priority = prio;
- for (i = 0; i < ARRAY_SIZE(p->requests); i++)
- INIT_LIST_HEAD(&p->requests[i]);
+ INIT_LIST_HEAD(&p->requests);
+
rb_link_node(&p->node, rb, parent);
rb_insert_color_cached(&p->node, &execlists->queue, first);
- p->used = 0;
-out:
- p->used |= BIT(idx);
- return &p->requests[idx];
+ return &p->requests;
}
void __i915_priolist_free(struct i915_priolist *p)
@@ -363,30 +349,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
spin_unlock_irq(&schedule_lock);
}
-static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
-{
- struct i915_sched_attr attr = node->attr;
-
- if (attr.priority & bump)
- return;
-
- attr.priority |= bump;
- __i915_schedule(node, &attr);
-}
-
-void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
-{
- unsigned long flags;
-
- GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
- if (READ_ONCE(rq->sched.attr.priority) & bump)
- return;
-
- spin_lock_irqsave(&schedule_lock, flags);
- __bump_priority(&rq->sched, bump);
- spin_unlock_irqrestore(&schedule_lock, flags);
-}
-
void i915_sched_node_init(struct i915_sched_node *node)
{
INIT_LIST_HEAD(&node->signalers_list);
@@ -570,8 +532,7 @@ int __init i915_global_scheduler_init(void)
if (!global.slab_dependencies)
return -ENOMEM;
- global.slab_priorities = KMEM_CACHE(i915_priolist,
- SLAB_HWCACHE_ALIGN);
+ global.slab_priorities = KMEM_CACHE(i915_priolist, 0);
if (!global.slab_priorities)
goto err_priorities;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 13432add8929..1b3c1e1a6ec5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -13,17 +13,11 @@
#include "i915_scheduler_types.h"
-#define priolist_for_each_request(it, plist, idx) \
- for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
- list_for_each_entry(it, &(plist)->requests[idx], sched.link)
-
-#define priolist_for_each_request_consume(it, n, plist, idx) \
- for (; \
- (plist)->used ? (idx = __ffs((plist)->used)), 1 : 0; \
- (plist)->used &= ~BIT(idx)) \
- list_for_each_entry_safe(it, n, \
- &(plist)->requests[idx], \
- sched.link)
+#define priolist_for_each_request(it, plist) \
+ list_for_each_entry(it, &(plist)->requests, sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist) \
+ list_for_each_entry_safe(it, n, &(plist)->requests, sched.link)
void i915_sched_node_init(struct i915_sched_node *node);
void i915_sched_node_reinit(struct i915_sched_node *node);
@@ -45,8 +39,6 @@ void i915_sched_node_fini(struct i915_sched_node *node);
void i915_schedule(struct i915_request *request,
const struct i915_sched_attr *attr);
-void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
-
struct list_head *
i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 22/28] drm/i915: Teach the i915_dependency to use a double-lock
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Currently, we construct and teardown the i915_dependency chains using a
global spinlock. As the lists are entirely local, it should be possible
to use an double-lock with an explicit nesting [signaler -> waiter,
always] and so avoid the costly convenience of a global spinlock.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +--
drivers/gpu/drm/i915/i915_request.c | 2 +-
drivers/gpu/drm/i915/i915_scheduler.c | 44 +++++++++++++--------
drivers/gpu/drm/i915/i915_scheduler.h | 2 +-
drivers/gpu/drm/i915/i915_scheduler_types.h | 1 +
5 files changed, 34 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index af6f78eca9ad..3fb1b4c67adb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1884,7 +1884,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
- if (p->flags & I915_DEPENDENCY_WEAK)
+ if (!p->waiter || p->flags & I915_DEPENDENCY_WEAK)
continue;
/* Leave semaphores spinning on the other engines */
@@ -2726,7 +2726,7 @@ static void __execlists_hold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
- if (p->flags & I915_DEPENDENCY_WEAK)
+ if (!p->waiter || p->flags & I915_DEPENDENCY_WEAK)
continue;
/* Leave semaphores spinning on the other engines */
@@ -2853,7 +2853,7 @@ static void __execlists_unhold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
- if (p->flags & I915_DEPENDENCY_WEAK)
+ if (!p->waiter || p->flags & I915_DEPENDENCY_WEAK)
continue;
/* Propagate any change in error status */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6c602b29026d..a09fe74bb818 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -338,7 +338,7 @@ bool i915_request_retire(struct i915_request *rq)
intel_context_unpin(rq->context);
free_capture_list(rq);
- i915_sched_node_fini(&rq->sched);
+ i915_sched_node_retire(&rq->sched);
i915_request_put(rq);
return true;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 9437e9d1d445..f9cd8baaefcd 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -353,6 +353,8 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
void i915_sched_node_init(struct i915_sched_node *node)
{
+ spin_lock_init(&node->lock);
+
INIT_LIST_HEAD(&node->signalers_list);
INIT_LIST_HEAD(&node->waiters_list);
INIT_LIST_HEAD(&node->link);
@@ -390,7 +392,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
{
bool ret = false;
- spin_lock_irq(&schedule_lock);
+ /* The signal->lock is always the outer lock in this double-lock. */
+ spin_lock_irq(&signal->lock);
if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link);
@@ -399,15 +402,17 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
dep->flags = flags;
/* All set, now publish. Beware the lockless walkers. */
+ spin_lock_nested(&node->lock, SINGLE_DEPTH_NESTING);
list_add_rcu(&dep->signal_link, &node->signalers_list);
list_add_rcu(&dep->wait_link, &signal->waiters_list);
+ spin_unlock(&node->lock);
/* Propagate the chains */
node->flags |= signal->flags;
ret = true;
}
- spin_unlock_irq(&schedule_lock);
+ spin_unlock_irq(&signal->lock);
return ret;
}
@@ -474,39 +479,46 @@ bool i915_sched_node_verify_dag(struct i915_sched_node *waiter,
return result;
}
-void i915_sched_node_fini(struct i915_sched_node *node)
+void i915_sched_node_retire(struct i915_sched_node *node)
{
struct i915_dependency *dep, *tmp;
- spin_lock_irq(&schedule_lock);
+ spin_lock_irq(&node->lock);
/*
* Everyone we depended upon (the fences we wait to be signaled)
* should retire before us and remove themselves from our list.
* However, retirement is run independently on each timeline and
- * so we may be called out-of-order.
+ * so we may be called out-of-order. As we need to avoid taking
+ * the signaler's lock, just mark up our completion and be wary
+ * in traversing the signalers->waiters_list.
*/
- list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
- GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
- list_del_rcu(&dep->wait_link);
- if (dep->flags & I915_DEPENDENCY_ALLOC)
- i915_dependency_free(dep);
+ list_for_each_entry(dep, &node->signalers_list, signal_link) {
+ GEM_BUG_ON(dep->waiter != node);
+ WRITE_ONCE(dep->waiter, NULL);
}
- INIT_LIST_HEAD(&node->signalers_list);
+ INIT_LIST_HEAD_RCU(&node->signalers_list);
/* Remove ourselves from everyone who depends upon us */
list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+ struct i915_sched_node *w;
+
GEM_BUG_ON(dep->signaler != node);
- GEM_BUG_ON(!list_empty(&dep->dfs_link));
- list_del_rcu(&dep->signal_link);
+ w = READ_ONCE(dep->waiter);
+ if (w) {
+ spin_lock_nested(&w->lock, SINGLE_DEPTH_NESTING);
+ if (READ_ONCE(dep->waiter))
+ list_del_rcu(&dep->signal_link);
+ spin_unlock(&w->lock);
+ }
+
if (dep->flags & I915_DEPENDENCY_ALLOC)
i915_dependency_free(dep);
}
- INIT_LIST_HEAD(&node->waiters_list);
+ INIT_LIST_HEAD_RCU(&node->waiters_list);
- spin_unlock_irq(&schedule_lock);
+ spin_unlock_irq(&node->lock);
}
static void i915_global_scheduler_shrink(void)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index b8696edef446..b26a13ef6feb 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -34,7 +34,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal,
unsigned long flags);
-void i915_sched_node_fini(struct i915_sched_node *node);
+void i915_sched_node_retire(struct i915_sched_node *node);
void i915_request_set_priority(struct i915_request *request, int prio);
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 343ed44d5ed4..3246430eb1c1 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -60,6 +60,7 @@ struct i915_sched_attr {
* others.
*/
struct i915_sched_node {
+ spinlock_t lock; /* protect the lists */
struct list_head signalers_list; /* those before us, we depend upon */
struct list_head waiters_list; /* those after us, they depend upon us */
struct list_head link;
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 13/28] drm/i915/gem: Add all GPU reloc awaits/signals en masse
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Asynchronous waits and signaling form a traditional semaphore with all
the usual ordering problems with taking multiple locks. If we want to
add more than one wait on a shared resource by the GPU, we must ensure
that all the associated timelines are advanced atomically, ergo we must
lock all the timelines en masse.
Testcase: igt/gem_exec_reloc/basic-concurrent16
Fixes: 0e97fbb08055 ("drm/i915/gem: Use a single chained reloc batches for a single execbuf")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/1889
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 114 ++++++++++++------
.../i915/gem/selftests/i915_gem_execbuffer.c | 24 ++--
2 files changed, 93 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 83cea2ea7c61..8f3c1cf5af31 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -259,7 +259,6 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
- struct i915_vma *target;
struct i915_request *rq;
struct i915_vma *rq_vma;
u32 *rq_cmd;
@@ -924,7 +923,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- cache->target = NULL;
}
#define RELOC_TAIL 4
@@ -1038,26 +1036,6 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
i915_request_add(rq);
}
-static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- int err;
-
- i915_vma_lock(vma);
-
- if (obj->cache_dirty & ~obj->cache_coherent)
- i915_gem_clflush_object(obj, 0);
- obj->write_domain = 0;
-
- err = i915_request_await_object(rq, vma->obj, true);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
- i915_vma_unlock(vma);
-
- return err;
-}
-
static int
__reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
{
@@ -1147,24 +1125,12 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
return err;
}
-static u32 *reloc_batch_grow(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- unsigned int len)
+static u32 *reloc_batch_grow(struct i915_execbuffer *eb, unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
int err;
- if (vma != cache->target) {
- err = reloc_move_to_gpu(cache->rq, vma);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
- return ERR_PTR(err);
- }
-
- cache->target = vma;
- }
-
if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
@@ -1210,7 +1176,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
else
len = 3;
- batch = reloc_batch_grow(eb, vma, len);
+ batch = reloc_batch_grow(eb, len);
if (IS_ERR(batch))
return PTR_ERR(batch);
@@ -1471,6 +1437,78 @@ static long eb_reloc_vma(struct i915_execbuffer *eb, struct eb_vma *ev,
return required;
}
+static int reloc_move_to_gpu(struct reloc_cache *cache, struct eb_vma *ev)
+{
+ struct i915_request *rq = cache->rq;
+ struct i915_vma *vma = ev->vma;
+ struct drm_i915_gem_object *obj = vma->obj;
+ int err;
+
+ if (obj->cache_dirty & ~obj->cache_coherent)
+ i915_gem_clflush_object(obj, 0);
+
+ obj->write_domain = I915_GEM_DOMAIN_RENDER;
+ obj->read_domains = I915_GEM_DOMAIN_RENDER;
+
+ err = i915_request_await_object(rq, obj, true);
+ if (err)
+ return err;
+
+ err = __i915_vma_move_to_active(vma, rq);
+ if (err)
+ return err;
+
+ dma_resv_add_excl_fence(vma->resv, &rq->fence);
+
+ return 0;
+}
+
+static int
+lock_relocs(struct i915_execbuffer *eb)
+{
+ struct ww_acquire_ctx acquire;
+ struct eb_vma *ev;
+ int err = 0;
+
+ ww_acquire_init(&acquire, &reservation_ww_class);
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ struct i915_vma *vma = ev->vma;
+
+ err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+ if (err == -EDEADLK) {
+ struct eb_vma *unlock = ev, *en;
+
+ list_for_each_entry_safe_continue_reverse(unlock, en,
+ &eb->relocs,
+ reloc_link) {
+ ww_mutex_unlock(&unlock->vma->resv->lock);
+ list_move_tail(&unlock->reloc_link,
+ &eb->relocs);
+ }
+
+ GEM_BUG_ON(!list_is_first(&ev->reloc_link,
+ &eb->relocs));
+ err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+ &acquire);
+ }
+ if (err)
+ break;
+ }
+
+ ww_acquire_done(&acquire);
+
+ list_for_each_entry_continue_reverse(ev, &eb->relocs, reloc_link) {
+ if (err == 0)
+ err = reloc_move_to_gpu(&eb->reloc_cache, ev);
+ ww_mutex_unlock(&ev->vma->resv->lock);
+ }
+
+ ww_acquire_fini(&acquire);
+
+ return err;
+}
+
static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
{
return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
@@ -1499,6 +1537,10 @@ static int reloc_gpu(struct i915_execbuffer *eb)
return err;
GEM_BUG_ON(!eb->reloc_cache.rq);
+ err = lock_relocs(eb);
+ if (err)
+ goto out;
+
err = reloc_gpu_emit(&eb->reloc_cache);
if (err)
goto out;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index faed6480a792..4f10b51f9a7e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -24,15 +24,15 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
const u32 *map = page_mask_bits(obj->mm.mapping);
struct i915_request *rq;
- struct i915_vma *vma;
+ struct eb_vma ev;
int err;
int i;
- vma = i915_vma_instance(obj, eb->context->vm, NULL);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ ev.vma = i915_vma_instance(obj, eb->context->vm, NULL);
+ if (IS_ERR(ev.vma))
+ return PTR_ERR(ev.vma);
- err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ err = i915_vma_pin(ev.vma, 0, 0, PIN_USER | PIN_HIGH);
if (err)
return err;
@@ -40,17 +40,22 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (err)
goto unpin_vma;
+ list_add(&ev.reloc_link, &eb->relocs);
+ err = lock_relocs(eb);
+ if (err)
+ goto unpin_vma;
+
err = reloc_gpu_emit(&eb->reloc_cache);
if (err)
goto unpin_vma;
/* 8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
+ err = __reloc_entry_gpu(eb, ev.vma, offsets[0] * sizeof(u32), 0);
if (err)
goto unpin_vma;
/* !8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
+ err = __reloc_entry_gpu(eb, ev.vma, offsets[1] * sizeof(u32), 1);
if (err)
goto unpin_vma;
@@ -62,7 +67,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
eb->reloc_cache.rq_size += i;
/* Force batch chaining */
- err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
+ err = __reloc_entry_gpu(eb, ev.vma, offsets[2] * sizeof(u32), 2);
if (err)
goto unpin_vma;
@@ -97,7 +102,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
put_rq:
i915_request_put(rq);
unpin_vma:
- i915_vma_unpin(vma);
+ i915_vma_unpin(ev.vma);
return err;
}
@@ -121,6 +126,7 @@ static int igt_gpu_reloc(void *arg)
}
for_each_uabi_engine(eb.engine, eb.i915) {
+ INIT_LIST_HEAD(&eb.relocs);
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 25/28] drm/i915/gt: Check for a completed last request once
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Pull the repeated check for the last active request being completed to a
single spot, when deciding whether or not execlist preemption is
required.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3fb1b4c67adb..f9c095c79874 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2132,12 +2132,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* completed and barf.
*/
if ((last = *active)) {
- if (need_preempt(engine, last, ve)) {
- if (i915_request_completed(last)) {
- tasklet_hi_schedule(&execlists->tasklet);
- return;
- }
+ if (i915_request_completed(last) &&
+ !list_is_last(&last->sched.link, &engine->active.requests))
+ return;
+ if (need_preempt(engine, last, ve)) {
ENGINE_TRACE(engine,
"preempting last=%llx:%lld, prio=%d, hint=%d\n",
last->fence.context,
@@ -2165,11 +2164,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last, ve) &&
timeslice_expired(execlists, last)) {
- if (i915_request_completed(last)) {
- tasklet_hi_schedule(&execlists->tasklet);
- return;
- }
-
ENGINE_TRACE(engine,
"expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 11/28] drm/i915/gem: Lift GPU relocation allocation
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Since we have reduced the relocations paths to just use the async GPU,
we can lift the request allocation to the start of the relocations.
Knowing that we use one request for all relocations will simplify
tracking the relocation fence.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 100 ++++++++++--------
.../i915/gem/selftests/i915_gem_execbuffer.c | 5 +-
2 files changed, 57 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 01ab1e15a142..e012857be129 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -900,8 +900,6 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
static void eb_destroy(const struct i915_execbuffer *eb)
{
- GEM_BUG_ON(eb->reloc_cache.rq);
-
if (eb->array)
eb_vma_array_put(eb->array);
@@ -926,7 +924,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- cache->rq = NULL;
cache->target = NULL;
}
@@ -1007,13 +1004,9 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
static int reloc_gpu_flush(struct reloc_cache *cache)
{
- struct i915_request *rq;
+ struct i915_request *rq = cache->rq;
int err;
- rq = fetch_and_zero(&cache->rq);
- if (!rq)
- return 0;
-
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;
@@ -1062,9 +1055,8 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
return err;
}
-static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct intel_engine_cs *engine,
- unsigned int len)
+static int
+__reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
{
struct reloc_cache *cache = &eb->reloc_cache;
struct intel_gt_buffer_pool_node *pool;
@@ -1154,33 +1146,14 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
return err;
}
-static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
-{
- return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
-}
-
-static u32 *reloc_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- unsigned int len)
+static u32 *reloc_batch_grow(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
int err;
- if (unlikely(!cache->rq)) {
- struct intel_engine_cs *engine = eb->engine;
-
- if (!reloc_can_use_engine(engine)) {
- engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
- if (!engine)
- return ERR_PTR(-ENODEV);
- }
-
- err = __reloc_gpu_alloc(eb, engine, len);
- if (unlikely(err))
- return ERR_PTR(err);
- }
-
if (vma != cache->target) {
err = reloc_move_to_gpu(cache->rq, vma);
if (unlikely(err)) {
@@ -1238,7 +1211,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
else
len = 3;
- batch = reloc_gpu(eb, vma, len);
+ batch = reloc_batch_grow(eb, vma, len);
if (IS_ERR(batch))
return PTR_ERR(batch);
@@ -1499,6 +1472,47 @@ static long eb_reloc_vma(struct i915_execbuffer *eb, struct eb_vma *ev,
return required;
}
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+ return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
+static int reloc_gpu_alloc(struct i915_execbuffer *eb)
+{
+ struct intel_engine_cs *engine = eb->engine;
+
+ if (!reloc_can_use_engine(engine)) {
+ engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ if (!engine)
+ return -ENODEV;
+ }
+
+ return __reloc_gpu_alloc(eb, engine);
+}
+
+static int reloc_gpu(struct i915_execbuffer *eb)
+{
+ struct eb_vma *ev;
+ int flush, err;
+
+ err = reloc_gpu_alloc(eb);
+ if (err)
+ return err;
+ GEM_BUG_ON(!eb->reloc_cache.rq);
+
+ list_for_each_entry(ev, &eb->relocs, reloc_link) {
+ err = eb_reloc_vma(eb, ev, eb_reloc_entry);
+ if (err < 0)
+ goto out;
+ }
+
+out:
+ flush = reloc_gpu_flush(&eb->reloc_cache);
+ if (!err)
+ err = flush;
+ return err;
+}
+
static int eb_relocate(struct i915_execbuffer *eb)
{
int err;
@@ -1516,7 +1530,6 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
struct eb_vma *ev, *en;
- int flush;
list_for_each_entry_safe(ev, en, &eb->relocs, reloc_link) {
err = eb_reloc_vma(eb, ev, eb_reloc_prepare);
@@ -1527,18 +1540,14 @@ static int eb_relocate(struct i915_execbuffer *eb)
list_del_init(&ev->reloc_link);
}
- list_for_each_entry(ev, &eb->relocs, reloc_link) {
- err = eb_reloc_vma(eb, ev, eb_reloc_entry);
- if (err < 0)
- break;
+ if (!list_empty(&eb->relocs)) {
+ err = reloc_gpu(eb);
+ if (err)
+ return err;
}
-
- flush = reloc_gpu_flush(&eb->reloc_cache);
- if (!err)
- err = flush;
}
- return err;
+ return 0;
}
static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -2538,9 +2547,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
batch = vma;
}
- /* All GPU relocation batches must be submitted prior to the user rq */
- GEM_BUG_ON(eb.reloc_cache.rq);
-
/* Allocate a request for this batch buffer nice and early. */
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 57c14d3340cd..50fe22d87ae1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -36,6 +36,10 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (err)
return err;
+ err = reloc_gpu_alloc(eb);
+ if (err)
+ goto unpin_vma;
+
/* 8-Byte aligned */
err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
if (err)
@@ -63,7 +67,6 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
err = reloc_gpu_flush(&eb->reloc_cache);
if (err)
goto put_rq;
- GEM_BUG_ON(eb->reloc_cache.rq);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
if (err) {
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 21/28] drm/i915/gt: Do not suspend bonded requests if one hangs
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
Treat the dependency between bonded requests as weak and leave the
remainder of the pair on the GPU if one hangs.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3199c65fa7e8..af6f78eca9ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2726,6 +2726,9 @@ static void __execlists_hold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@@ -2850,6 +2853,9 @@ static void __execlists_unhold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Propagate any change in error status */
if (rq->fence.error)
i915_request_set_error_once(w, rq->fence.error);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 09/28] drm/i915: Add list_for_each_entry_safe_continue_reverse
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
One more list iterator variant, for when we want to unwind from inside
one list iterator with the intention of restarting from the current
entry as the new head of the list.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_utils.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 03a73d2bd50d..6ebccdd12d4c 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -266,6 +266,12 @@ static inline int list_is_last_rcu(const struct list_head *list,
return READ_ONCE(list->next) == head;
}
+#define list_for_each_entry_safe_continue_reverse(pos, n, head, member) \
+ for (pos = list_prev_entry(pos, member), \
+ n = list_prev_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_prev_entry(n, member))
+
/*
* Wait until the work is finally complete, even if it tries to postpone
* by requeueing itself. Note, that if the worker never cancels itself,
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 24/28] ipi-dag
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_scheduler.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 320d3720ba34..4c189b81cc62 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -436,17 +436,17 @@ bool i915_sched_node_verify_dag(struct i915_sched_node *waiter,
struct i915_dependency *dep, *p;
struct i915_dependency stack;
bool result = false;
- LIST_HEAD(dfs);
+ LIST_HEAD(ipi);
if (list_empty(&waiter->waiters_list))
return true;
- spin_lock_irq(&schedule_lock);
+ spin_lock_irq(&ipi_lock);
stack.signaler = signaler;
- list_add(&stack.dfs_link, &dfs);
+ list_add(&stack.ipi_link, &ipi);
- list_for_each_entry(dep, &dfs, dfs_link) {
+ list_for_each_entry(dep, &ipi, ipi_link) {
struct i915_sched_node *node = dep->signaler;
if (node_signaled(node))
@@ -456,17 +456,17 @@ bool i915_sched_node_verify_dag(struct i915_sched_node *waiter,
if (p->signaler == waiter)
goto out;
- if (list_empty(&p->dfs_link))
- list_add_tail(&p->dfs_link, &dfs);
+ if (list_empty(&p->ipi_link))
+ list_add_tail(&p->ipi_link, &ipi);
}
}
result = true;
out:
- list_for_each_entry_safe(dep, p, &dfs, dfs_link)
- INIT_LIST_HEAD(&dep->dfs_link);
+ list_for_each_entry_safe(dep, p, &ipi, ipi_link)
+ INIT_LIST_HEAD(&dep->ipi_link);
- spin_unlock_irq(&schedule_lock);
+ spin_unlock_irq(&ipi_lock);
return result;
}
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 03/28] drm/i915/selftests: Teach hang-self to target only itself
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
We have a test case to exercise resetting an engine while the other
engines are busy, all the TEST_SELF adds on top is that the target
engine also has background activity. In this case it is useful to first
test resetting the engine while there is background activity, as a
separate flag from exercising all others.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 035f363fb0f8..2af66f8ffbd2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -805,10 +805,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].resets =
i915_reset_engine_count(global, other);
- if (!(flags & TEST_OTHERS))
+ if (other == engine && !(flags & TEST_SELF))
continue;
- if (other == engine && !(flags & TEST_SELF))
+ if (other != engine && !(flags & TEST_OTHERS))
continue;
threads[tmp].engine = other;
@@ -999,7 +999,7 @@ static int igt_reset_engines(void *arg)
},
{
"self-priority",
- TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
+ TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
},
{ }
};
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 04/28] drm/i915/selftests: Remove live_suppress_wait_preempt
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
With the removal of the internal wait-priority boosting, we can also
remove the selftest to ensure that those waits were being suppressed
from causing preemptions.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/selftest_lrc.c | 178 -------------------------
1 file changed, 178 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 67d74e6432a8..e838e38a262c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2379,183 +2379,6 @@ static int live_suppress_self_preempt(void *arg)
goto err_client_b;
}
-static int __i915_sw_fence_call
-dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
- return NOTIFY_DONE;
-}
-
-static struct i915_request *dummy_request(struct intel_engine_cs *engine)
-{
- struct i915_request *rq;
-
- rq = kzalloc(sizeof(*rq), GFP_KERNEL);
- if (!rq)
- return NULL;
-
- rq->engine = engine;
-
- spin_lock_init(&rq->lock);
- INIT_LIST_HEAD(&rq->fence.cb_list);
- rq->fence.lock = &rq->lock;
- rq->fence.ops = &i915_fence_ops;
-
- i915_sched_node_init(&rq->sched);
-
- /* mark this request as permanently incomplete */
- rq->fence.seqno = 1;
- BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
- rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
- GEM_BUG_ON(i915_request_completed(rq));
-
- i915_sw_fence_init(&rq->submit, dummy_notify);
- set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
-
- spin_lock_init(&rq->lock);
- rq->fence.lock = &rq->lock;
- INIT_LIST_HEAD(&rq->fence.cb_list);
-
- return rq;
-}
-
-static void dummy_request_free(struct i915_request *dummy)
-{
- /* We have to fake the CS interrupt to kick the next request */
- i915_sw_fence_commit(&dummy->submit);
-
- i915_request_mark_complete(dummy);
- dma_fence_signal(&dummy->fence);
-
- i915_sched_node_fini(&dummy->sched);
- i915_sw_fence_fini(&dummy->submit);
-
- dma_fence_free(&dummy->fence);
-}
-
-static int live_suppress_wait_preempt(void *arg)
-{
- struct intel_gt *gt = arg;
- struct preempt_client client[4];
- struct i915_request *rq[ARRAY_SIZE(client)] = {};
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = -ENOMEM;
- int i;
-
- /*
- * Waiters are given a little priority nudge, but not enough
- * to actually cause any preemption. Double check that we do
- * not needlessly generate preempt-to-idle cycles.
- */
-
- if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
- return 0;
-
- if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
- return -ENOMEM;
- if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
- goto err_client_0;
- if (preempt_client_init(gt, &client[2])) /* head of queue */
- goto err_client_1;
- if (preempt_client_init(gt, &client[3])) /* bystander */
- goto err_client_2;
-
- for_each_engine(engine, gt, id) {
- int depth;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- if (!engine->emit_init_breadcrumb)
- continue;
-
- for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *dummy;
-
- engine->execlists.preempt_hang.count = 0;
-
- dummy = dummy_request(engine);
- if (!dummy)
- goto err_client_3;
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- struct i915_request *this;
-
- this = spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(this)) {
- err = PTR_ERR(this);
- goto err_wedged;
- }
-
- /* Disable NEWCLIENT promotion */
- __i915_active_fence_set(&i915_request_timeline(this)->last_request,
- &dummy->fence);
-
- rq[i] = i915_request_get(this);
- i915_request_add(this);
- }
-
- dummy_request_free(dummy);
-
- GEM_BUG_ON(i915_request_completed(rq[0]));
- if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
- pr_err("%s: First client failed to start\n",
- engine->name);
- goto err_wedged;
- }
- GEM_BUG_ON(!i915_request_started(rq[0]));
-
- if (i915_request_wait(rq[depth],
- I915_WAIT_PRIORITY,
- 1) != -ETIME) {
- pr_err("%s: Waiter depth:%d completed!\n",
- engine->name, depth);
- goto err_wedged;
- }
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- igt_spinner_end(&client[i].spin);
- i915_request_put(rq[i]);
- rq[i] = NULL;
- }
-
- if (igt_flush_test(gt->i915))
- goto err_wedged;
-
- if (engine->execlists.preempt_hang.count) {
- pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
- engine->name,
- engine->execlists.preempt_hang.count,
- depth);
- err = -EINVAL;
- goto err_client_3;
- }
- }
- }
-
- err = 0;
-err_client_3:
- preempt_client_fini(&client[3]);
-err_client_2:
- preempt_client_fini(&client[2]);
-err_client_1:
- preempt_client_fini(&client[1]);
-err_client_0:
- preempt_client_fini(&client[0]);
- return err;
-
-err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- igt_spinner_end(&client[i].spin);
- i915_request_put(rq[i]);
- }
- intel_gt_set_wedged(gt);
- err = -EIO;
- goto err_client_3;
-}
-
static int live_chain_preempt(void *arg)
{
struct intel_gt *gt = arg;
@@ -4592,7 +4415,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_nopreempt),
SUBTEST(live_preempt_cancel),
SUBTEST(live_suppress_self_preempt),
- SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_timeout),
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 27/28] drm/i915/gt: Specify a deadline for the heartbeat
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
As we know when we expect the heartbeat to be checked for completion,
pass this information along as its deadline. We still do not complain if
the deadline is missed, at least until we have tried a few times, but it
will allow for quicker hang detection on systems where deadlines are
adhered to.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index ba778c7b5d2b..4e5b8146bee0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -42,6 +42,16 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
i915_request_add_active_barriers(rq);
}
+static void set_heartbeat_deadline(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ unsigned long interval;
+
+ interval = READ_ONCE(engine->props.heartbeat_interval_ms);
+ if (interval)
+ i915_request_set_deadline(rq, ktime_get() + (interval << 20));
+}
+
static void show_heartbeat(const struct i915_request *rq,
struct intel_engine_cs *engine)
{
@@ -103,6 +113,8 @@ static void heartbeat(struct work_struct *wrk)
local_bh_disable();
i915_request_set_priority(rq, attr.priority);
+ if (attr.priority == I915_PRIORITY_BARRIER)
+ i915_request_set_deadline(rq, 0);
local_bh_enable();
} else {
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -133,6 +145,7 @@ static void heartbeat(struct work_struct *wrk)
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
+ set_heartbeat_deadline(engine, rq);
unlock:
mutex_unlock(&ce->timeline->mutex);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 26/28] drm/i915: Fair low-latency scheduling
From: Chris Wilson @ 2020-06-07 22:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
The first "scheduler" was a topographical sorting of requests into
priority order. The execution order was deterministic, the earliest
submitted, highest priority request would be executed first. Priority
inherited ensured that inversions were kept at bay, and allowed us to
dynamically boost priorities (e.g. for interactive pageflips).
The minimalistic timeslicing scheme was an attempt to introduce fairness
between long running requests, by evicting the active request at the end
of a timeslice and moving it to the back of its priority queue (while
ensuring that dependencies were kept in order). For short running
requests from many clients of equal priority, the scheme is still very
much FIFO submission ordering, and as unfair as before.
To impose fairness, we need an external metric that ensures that clients
are interpersed, we don't execute one long chain from client A before
executing any of client B. This could be imposed by the clients by using
a fences based on an external clock, that is they only submit work for a
"frame" at frame-interval, instead of submitting as much work as they
are able to. The standard SwapBuffers approach is akin to double
bufferring, where as one frame is being executed, the next is being
submitted, such that there is always a maximum of two frames per client
in the pipeline. Even this scheme exhibits unfairness under load as a
single client will execute two frames back to back before the next, and
with enough clients, deadlines will be missed.
The idea introduced by BFS/MuQSS is that fairness is introduced by
metering with an external clock. Every request, when it becomes ready to
execute is assigned a virtual deadline, and execution order is then
determined by earliest deadline. Priority is used as a hint, rather than
strict ordering, where high priority requests have earlier deadlines,
but not necessarily earlier than outstanding work. Thus work is executed
in order of 'readiness', with timeslicing to demote long running work.
The Achille's heel of this scheduler is its strong preference for
low-latency and favouring of new queues. Whereas it was easy to dominate
the old scheduler by flooding it with many requests over a short period
of time, the new scheduler can be dominated by a 'synchronous' client
that waits for each of its requests to complete before submitting the
next. As such a client has no history, it is always considered
ready-to-run and receives an earlier deadline than the long running
requests.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 12 +-
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 +
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 24 --
drivers/gpu/drm/i915/gt/intel_lrc.c | 328 +++++++-----------
drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 5 +-
drivers/gpu/drm/i915/gt/selftest_lrc.c | 43 ++-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +-
drivers/gpu/drm/i915/i915_priolist_types.h | 7 +-
drivers/gpu/drm/i915/i915_request.h | 4 +-
drivers/gpu/drm/i915/i915_scheduler.c | 322 ++++++++++++-----
drivers/gpu/drm/i915/i915_scheduler.h | 22 +-
drivers/gpu/drm/i915/i915_scheduler_types.h | 17 +
.../drm/i915/selftests/i915_mock_selftests.h | 1 +
drivers/gpu/drm/i915/selftests/i915_request.c | 1 +
.../gpu/drm/i915/selftests/i915_scheduler.c | 49 +++
16 files changed, 484 insertions(+), 362 deletions(-)
create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d79307d790da..b99b3332467d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -513,7 +513,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
execlists->active =
memset(execlists->inflight, 0, sizeof(execlists->inflight));
- execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
}
@@ -1188,14 +1187,15 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
}
}
-static int print_sched_attr(const struct i915_sched_attr *attr,
- char *buf, int x, int len)
+static int print_sched(const struct i915_sched_node *node,
+ char *buf, int x, int len)
{
- if (attr->priority == I915_PRIORITY_INVALID)
+ if (node->attr.priority == I915_PRIORITY_INVALID)
return x;
x += snprintf(buf + x, len - x,
- " prio=%d", attr->priority);
+ " prio=%d, dl=%llu",
+ node->attr.priority, node->deadline);
return x;
}
@@ -1208,7 +1208,7 @@ static void print_request(struct drm_printer *m,
char buf[80] = "";
int x = 0;
- x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
+ x = print_sched(&rq->sched, buf, x, sizeof(buf));
drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
prefix,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5251860e952d..ba778c7b5d2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -214,6 +214,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
idle_pulse(engine, rq);
+ rq->sched.deadline = 0;
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index d0a1078ef632..ac9c777a6592 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -188,6 +188,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
i915_request_add_active_barriers(rq);
/* Install ourselves as a preemption barrier */
+ rq->sched.deadline = 0;
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
/*
@@ -248,9 +249,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 48e111f16dc5..a3c60038244c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -231,30 +231,6 @@ struct intel_engine_execlists {
*/
unsigned int port_mask;
- /**
- * @switch_priority_hint: Second context priority.
- *
- * We submit multiple contexts to the HW simultaneously and would
- * like to occasionally switch between them to emulate timeslicing.
- * To know when timeslicing is suitable, we track the priority of
- * the context submitted second.
- */
- int switch_priority_hint;
-
- /**
- * @queue_priority_hint: Highest pending priority.
- *
- * When we add requests into the queue, or adjust the priority of
- * executing requests, we compute the maximum priority of those
- * pending requests. We can then use this value to determine if
- * we need to preempt the executing requests to service the queue.
- * However, since the we may have recorded the priority of an inflight
- * request we wanted to preempt but since completed, at the time of
- * dequeuing the priority hint may no longer may match the highest
- * available request priority.
- */
- int queue_priority_hint;
-
/**
* @queue: queue of requests, in priority lists
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f9c095c79874..0678dbb9b9fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -200,7 +200,7 @@ struct virtual_engine {
*/
struct ve_node {
struct rb_node rb;
- int prio;
+ u64 deadline;
} nodes[I915_NUM_ENGINES];
/*
@@ -411,12 +411,17 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static inline int rq_prio(const struct i915_request *rq)
{
- return READ_ONCE(rq->sched.attr.priority);
+ return rq->sched.attr.priority;
}
-static int effective_prio(const struct i915_request *rq)
+static inline u64 rq_deadline(const struct i915_request *rq)
{
- int prio = rq_prio(rq);
+ return rq->sched.deadline;
+}
+
+static u64 effective_deadline(const struct i915_request *rq)
+{
+ u64 deadline = rq_deadline(rq);
/*
* If this request is special and must not be interrupted at any
@@ -427,27 +432,27 @@ static int effective_prio(const struct i915_request *rq)
* nopreempt for as long as desired).
*/
if (i915_request_has_nopreempt(rq))
- prio = I915_PRIORITY_UNPREEMPTABLE;
+ deadline = 0;
- return prio;
+ return deadline;
}
-static int queue_prio(const struct intel_engine_execlists *execlists)
+static u64 queue_deadline(const struct intel_engine_execlists *execlists)
{
struct rb_node *rb;
rb = rb_first_cached(&execlists->queue);
if (!rb)
- return INT_MIN;
+ return I915_DEADLINE_NEVER;
- return to_priolist(rb)->priority;
+ return to_priolist(rb)->deadline;
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *rq,
struct virtual_engine *ve)
{
- int last_prio;
+ u64 last_deadline;
if (!intel_engine_has_semaphores(engine))
return false;
@@ -470,16 +475,14 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* priority level: the task that is running should remain running
* to preserve FIFO ordering of dependencies.
*/
- last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
- if (engine->execlists.queue_priority_hint <= last_prio)
- return false;
+ last_deadline = effective_deadline(rq);
/*
* Check against the first request in ELSP[1], it will, thanks to the
* power of PI, be the highest priority of that context.
*/
if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
- rq_prio(list_next_entry(rq, sched.link)) > last_prio)
+ rq_deadline(list_next_entry(rq, sched.link)) < last_deadline)
return true;
if (ve) {
@@ -491,7 +494,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
rcu_read_lock();
next = READ_ONCE(ve->request);
if (next)
- preempt = rq_prio(next) > last_prio;
+ preempt = rq_deadline(next) < last_deadline;
rcu_read_unlock();
}
@@ -509,7 +512,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
* context, it's priority would not exceed ELSP[0] aka last_prio.
*/
- return queue_prio(&engine->execlists) > last_prio;
+ return queue_deadline(&engine->execlists) < last_deadline;
}
__maybe_unused static inline bool
@@ -526,7 +529,7 @@ assert_priority_queue(const struct i915_request *prev,
if (i915_request_is_active(prev))
return true;
- return rq_prio(prev) >= rq_prio(next);
+ return rq_deadline(prev) <= rq_deadline(next);
}
/*
@@ -1096,22 +1099,30 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
{
struct i915_request *rq, *rn, *active = NULL;
struct list_head *uninitialized_var(pl);
- int prio = I915_PRIORITY_INVALID;
+ u64 deadline = I915_DEADLINE_NEVER;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
- if (i915_request_completed(rq))
+ if (i915_request_completed(rq)) {
+ list_del_init(&rq->sched.link);
continue; /* XXX */
+ }
__i915_request_unsubmit(rq);
- GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
- if (rq_prio(rq) != prio) {
- prio = rq_prio(rq);
- pl = i915_sched_lookup_priolist(engine, prio);
+ if (i915_request_started(rq)) {
+ u64 deadline =
+ i915_scheduler_next_virtual_deadline(rq_prio(rq));
+ rq->sched.deadline = min(rq_deadline(rq), deadline);
+ }
+
+ if (rq_deadline(rq) != deadline) {
+ deadline = rq_deadline(rq);
+ pl = i915_sched_lookup_priolist(engine, deadline);
+
}
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
@@ -1546,14 +1557,14 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
if (!rq)
return "";
- snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
+ snprintf(buf, buflen, "%sccid:%x %llx:%lld%s dl %llu",
prefix,
rq->context->lrc.ccid,
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
- rq_prio(rq));
+ rq_deadline(rq));
return buf;
}
@@ -1863,7 +1874,9 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
}
-static void defer_request(struct i915_request *rq, struct list_head * const pl)
+static void defer_request(struct i915_request *rq,
+ struct list_head * const pl,
+ u64 deadline)
{
LIST_HEAD(list);
@@ -1878,6 +1891,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_dependency *p;
GEM_BUG_ON(i915_request_is_active(rq));
+ rq->sched.deadline = deadline;
list_move_tail(&rq->sched.link, pl);
for_each_waiter(p, rq) {
@@ -1900,10 +1914,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
if (!i915_request_is_ready(w))
continue;
- if (rq_prio(w) < rq_prio(rq))
+ if (rq_deadline(w) > deadline)
continue;
- GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
list_move_tail(&w->sched.link, &list);
}
@@ -1914,46 +1927,21 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
static void defer_active(struct intel_engine_cs *engine)
{
struct i915_request *rq;
+ u64 deadline;
rq = __unwind_incomplete_requests(engine);
if (!rq)
return;
- defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
-}
-
-static bool
-need_timeslice(const struct intel_engine_cs *engine,
- const struct i915_request *rq,
- struct virtual_engine *ve)
-{
- int hint;
-
- if (!intel_engine_has_timeslices(engine))
- return false;
-
- hint = engine->execlists.queue_priority_hint;
-
- if (ve) {
- const struct intel_engine_cs *inflight =
- intel_context_inflight(&ve->context);
-
- if (!inflight || inflight == engine) {
- struct i915_request *next;
-
- rcu_read_lock();
- next = READ_ONCE(ve->request);
- if (next)
- hint = max(hint, rq_prio(next));
- rcu_read_unlock();
- }
- }
-
- if (!list_is_last(&rq->sched.link, &engine->active.requests))
- hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
+ deadline = max(rq_deadline(rq),
+ i915_scheduler_next_virtual_deadline(rq_prio(rq)));
+ ENGINE_TRACE(engine, "defer %llx:%lld, dl:%llu -> %llu\n",
+ rq->fence.context, rq->fence.seqno,
+ rq_deadline(rq), deadline);
- GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
- return hint >= effective_prio(rq);
+ defer_request(rq,
+ i915_sched_lookup_priolist(engine, deadline),
+ deadline);
}
static bool
@@ -1976,42 +1964,56 @@ timeslice_yield(const struct intel_engine_execlists *el,
}
static bool
-timeslice_expired(const struct intel_engine_execlists *el,
- const struct i915_request *rq)
+timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq)
{
- return timer_expired(&el->timer) || timeslice_yield(el, rq);
-}
+ const struct intel_engine_execlists *el = &engine->execlists;
-static int
-switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
-{
- if (list_is_last(&rq->sched.link, &engine->active.requests))
- return engine->execlists.queue_priority_hint;
+ if (!intel_engine_has_timeslices(engine))
+ return false;
+
+ if (i915_request_has_nopreempt(rq) && i915_request_started(rq))
+ return false;
- return rq_prio(list_next_entry(rq, sched.link));
+ return timer_expired(&el->timer) || timeslice_yield(el, rq);
}
-static inline unsigned long
-timeslice(const struct intel_engine_cs *engine)
+static unsigned long timeslice(const struct intel_engine_cs *engine)
{
return READ_ONCE(engine->props.timeslice_duration_ms);
}
-static unsigned long active_timeslice(const struct intel_engine_cs *engine)
+static bool needs_timeslice(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- const struct intel_engine_execlists *execlists = &engine->execlists;
- const struct i915_request *rq = *execlists->active;
-
+ /* If not currently active, or about to switch, wait for next event */
if (!rq || i915_request_completed(rq))
- return 0;
+ return false;
+
+ /* We do not need to start the timeslice until after the ACK */
+ if (READ_ONCE(engine->execlists.pending[0]))
+ return false;
+
+ /* If ELSP[1] is occupied, always check to see if worth slicing */
+ if (!list_is_last(&rq->sched.link, &engine->active.requests))
+ return true;
+
+ /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */
+ if (rb_first_cached(&engine->execlists.queue))
+ return true;
- if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
+ return rb_first_cached(&engine->execlists.virtual);
+}
+
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
+{
+ /* Disable the timer if there is nothing to switch to */
+ if (!needs_timeslice(engine, execlists_active(&engine->execlists)))
return 0;
return timeslice(engine);
}
-static void set_timeslice(struct intel_engine_cs *engine)
+static void start_timeslice(struct intel_engine_cs *engine)
{
unsigned long duration;
@@ -2024,29 +2026,6 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, duration);
}
-static void start_timeslice(struct intel_engine_cs *engine, int prio)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- unsigned long duration;
-
- if (!intel_engine_has_timeslices(engine))
- return;
-
- WRITE_ONCE(execlists->switch_priority_hint, prio);
- if (prio == INT_MIN)
- return;
-
- if (timer_pending(&execlists->timer))
- return;
-
- duration = timeslice(engine);
- ENGINE_TRACE(engine,
- "start timeslicing, prio:%d, interval:%lu",
- prio, duration);
-
- set_timer_ms(&execlists->timer, duration);
-}
-
static void record_preemption(struct intel_engine_execlists *execlists)
{
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
@@ -2138,11 +2117,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (need_preempt(engine, last, ve)) {
ENGINE_TRACE(engine,
- "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ "preempting last=%llx:%llu, dl=%llu\n",
last->fence.context,
last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ rq_deadline(last));
record_preemption(execlists);
/*
@@ -2162,14 +2140,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
__unwind_incomplete_requests(engine);
last = NULL;
- } else if (need_timeslice(engine, last, ve) &&
- timeslice_expired(execlists, last)) {
+ } else if (timeslice_expired(engine, last)) {
ENGINE_TRACE(engine,
- "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint,
+ "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n",
+ yesno(timer_expired(&execlists->timer)),
+ last->fence.context, last->fence.seqno,
+ rq_deadline(last),
+ i915_sched_to_ticks(ktime_get()),
yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
@@ -2205,7 +2182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- start_timeslice(engine, queue_prio(execlists));
return;
}
}
@@ -2224,7 +2200,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq->engine != &ve->base);
GEM_BUG_ON(rq->context != &ve->context);
- if (unlikely(rq_prio(rq) < queue_prio(execlists))) {
+ if (unlikely(rq_deadline(rq) > queue_deadline(execlists))) {
spin_unlock(&ve->base.active.lock);
break;
}
@@ -2233,7 +2209,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- start_timeslice(engine, rq_prio(rq));
return; /* leave this for another sibling */
}
@@ -2245,10 +2220,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
i915_request_started(rq) ? "*" :
"",
yesno(engine != ve->siblings[0]));
-
WRITE_ONCE(ve->request, NULL);
- WRITE_ONCE(ve->base.execlists.queue_priority_hint,
- INT_MIN);
rb = &ve->nodes[engine->id].rb;
rb_erase_cached(rb, &execlists->virtual);
@@ -2391,28 +2363,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
done:
- /*
- * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
- *
- * We choose the priority hint such that if we add a request of greater
- * priority than this, we kick the submission tasklet to decide on
- * the right order of submitting the requests to hardware. We must
- * also be prepared to reorder requests as they are in-flight on the
- * HW. We derive the priority hint then as the first "hole" in
- * the HW submission ports and if there are no available slots,
- * the priority of the lowest executing request, i.e. last.
- *
- * When we do receive a higher priority request ready to run from the
- * user, see queue_request(), the priority hint is bumped to that
- * request triggering preemption on the next dequeue (or subsequent
- * interrupt for secondary ports).
- */
- execlists->queue_priority_hint = queue_prio(execlists);
-
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
- execlists->switch_priority_hint =
- switch_prio(engine, *execlists->pending);
/*
* Skip if we ended up with exactly the same set of requests,
@@ -2432,7 +2384,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
set_preempt_timeout(engine, *active);
execlists_submit_ports(engine);
} else {
- start_timeslice(engine, execlists->queue_priority_hint);
skip_submit:
ring_set_paused(engine, 0);
}
@@ -2675,7 +2626,6 @@ static void process_csb(struct intel_engine_cs *engine)
} while (head != tail);
execlists->csb_head = head;
- set_timeslice(engine);
/*
* Gen11 has proven to fail wrt global observation point between
@@ -2824,9 +2774,10 @@ static bool hold_request(const struct i915_request *rq)
return result;
}
-static void __execlists_unhold(struct i915_request *rq)
+static bool __execlists_unhold(struct i915_request *rq)
{
LIST_HEAD(list);
+ bool submit = false;
do {
struct i915_dependency *p;
@@ -2837,10 +2788,7 @@ static void __execlists_unhold(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
i915_request_clear_hold(rq);
- list_move_tail(&rq->sched.link,
- i915_sched_lookup_priolist(rq->engine,
- rq_prio(rq)));
- set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ submit |= intel_engine_queue_request(rq->engine, rq);
/* Also release any children on this engine that are ready */
for_each_waiter(p, rq) {
@@ -2869,6 +2817,8 @@ static void __execlists_unhold(struct i915_request *rq)
rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
} while (rq);
+
+ return submit;
}
static void execlists_unhold(struct intel_engine_cs *engine,
@@ -2880,12 +2830,8 @@ static void execlists_unhold(struct intel_engine_cs *engine,
* Move this request back to the priority queue, and all of its
* children and grandchildren that were suspended along with it.
*/
- __execlists_unhold(rq);
-
- if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
- engine->execlists.queue_priority_hint = rq_prio(rq);
+ if (__execlists_unhold(rq))
tasklet_hi_schedule(&engine->execlists.tasklet);
- }
spin_unlock_irq(&engine->active.lock);
}
@@ -3127,6 +3073,8 @@ static void execlists_submission_tasklet(unsigned long data)
if (unlikely(timeout && preempt_timeout(engine)))
execlists_reset(engine, "preemption time out");
}
+
+ start_timeslice(engine);
}
static void __execlists_kick(struct intel_engine_execlists *execlists)
@@ -3148,15 +3096,6 @@ static void execlists_preempt(struct timer_list *timer)
execlists_kick(timer, preempt);
}
-static void queue_request(struct intel_engine_cs *engine,
- struct i915_request *rq)
-{
- GEM_BUG_ON(!list_empty(&rq->sched.link));
- list_add_tail(&rq->sched.link,
- i915_sched_lookup_priolist(engine, rq_prio(rq)));
- set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
static void __submit_queue_imm(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -3167,18 +3106,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
__execlists_submission_tasklet(engine);
}
-static void submit_queue(struct intel_engine_cs *engine,
- const struct i915_request *rq)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
-
- if (rq_prio(rq) <= execlists->queue_priority_hint)
- return;
-
- execlists->queue_priority_hint = rq_prio(rq);
- __submit_queue_imm(engine);
-}
-
static bool ancestor_on_hold(const struct intel_engine_cs *engine,
const struct i915_request *rq)
{
@@ -3213,12 +3140,9 @@ static void execlists_submit_request(struct i915_request *request)
list_add_tail(&request->sched.link, &engine->active.hold);
i915_request_set_hold(request);
} else {
- queue_request(engine, request);
-
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- GEM_BUG_ON(list_empty(&request->sched.link));
-
- submit_queue(engine, request);
+ if (intel_engine_queue_request(engine, request))
+ __submit_queue_imm(engine);
+ start_timeslice(engine);
}
spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -4273,10 +4197,6 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
static void nop_submission_tasklet(unsigned long data)
{
- struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
-
- /* The driver is wedged; don't process any more events. */
- WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
}
static void execlists_reset_cancel(struct intel_engine_cs *engine)
@@ -4322,6 +4242,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
rb_erase_cached(&p->node, &execlists->queue);
i915_priolist_free(p);
}
+ GEM_BUG_ON(!RB_EMPTY_ROOT(&execlists->queue.rb_root));
/* On-hold requests will be flushed to timeline upon their release */
list_for_each_entry(rq, &engine->active.hold, sched.link)
@@ -4343,17 +4264,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
rq->engine = engine;
__i915_request_submit(rq);
i915_request_put(rq);
-
- ve->base.execlists.queue_priority_hint = INT_MIN;
}
spin_unlock(&ve->base.active.lock);
}
/* Remaining _unready_ requests will be nop'ed when submitted */
- execlists->queue_priority_hint = INT_MIN;
- execlists->queue = RB_ROOT_CACHED;
-
GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
execlists->tasklet.func = nop_submission_tasklet;
@@ -5449,7 +5365,8 @@ static const struct intel_context_ops virtual_context_ops = {
.destroy = virtual_context_destroy,
};
-static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+static intel_engine_mask_t
+virtual_submission_mask(struct virtual_engine *ve, u64 *deadline)
{
struct i915_request *rq;
intel_engine_mask_t mask;
@@ -5466,9 +5383,11 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ *deadline = rq_deadline(rq);
+
+ ENGINE_TRACE(&ve->base, "rq=%llx:%llu, mask=%x, dl=%llu\n",
rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ mask, *deadline);
return mask;
}
@@ -5476,12 +5395,12 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
static void virtual_submission_tasklet(unsigned long data)
{
struct virtual_engine * const ve = (struct virtual_engine *)data;
- const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
intel_engine_mask_t mask;
+ u64 deadline;
unsigned int n;
rcu_read_lock();
- mask = virtual_submission_mask(ve);
+ mask = virtual_submission_mask(ve, &deadline);
rcu_read_unlock();
if (unlikely(!mask))
return;
@@ -5514,7 +5433,8 @@ static void virtual_submission_tasklet(unsigned long data)
*/
first = rb_first_cached(&sibling->execlists.virtual) ==
&node->rb;
- if (prio == node->prio || (prio > node->prio && first))
+ if (deadline == node->deadline ||
+ (deadline < node->deadline && first))
goto submit_engine;
rb_erase_cached(&node->rb, &sibling->execlists.virtual);
@@ -5528,7 +5448,7 @@ static void virtual_submission_tasklet(unsigned long data)
rb = *parent;
other = rb_entry(rb, typeof(*other), rb);
- if (prio > other->prio) {
+ if (deadline < other->deadline) {
parent = &rb->rb_left;
} else {
parent = &rb->rb_right;
@@ -5543,8 +5463,8 @@ static void virtual_submission_tasklet(unsigned long data)
submit_engine:
GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
- node->prio = prio;
- if (first && prio > sibling->execlists.queue_priority_hint)
+ node->deadline = deadline;
+ if (first)
tasklet_hi_schedule(&sibling->execlists.tasklet);
unlock_engine:
@@ -5578,11 +5498,11 @@ static void virtual_submit_request(struct i915_request *rq)
if (i915_request_completed(rq)) {
__i915_request_submit(rq);
-
- ve->base.execlists.queue_priority_hint = INT_MIN;
ve->request = NULL;
} else {
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ rq->sched.deadline =
+ min(rq->sched.deadline,
+ i915_scheduler_next_virtual_deadline(rq_prio(rq)));
ve->request = i915_request_get(rq);
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
@@ -5686,7 +5606,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
ve->base.bond_execute = virtual_bond_execute;
INIT_LIST_HEAD(virtual_queue(ve));
- ve->base.execlists.queue_priority_hint = INT_MIN;
tasklet_init(&ve->base.execlists.tasklet,
virtual_submission_tasklet,
(unsigned long)ve);
@@ -5873,13 +5792,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
show_request(m, last, "\t\tE ");
}
- if (execlists->switch_priority_hint != INT_MIN)
- drm_printf(m, "\t\tSwitch priority hint: %d\n",
- READ_ONCE(execlists->switch_priority_hint));
- if (execlists->queue_priority_hint != INT_MIN)
- drm_printf(m, "\t\tQueue priority hint: %d\n",
- READ_ONCE(execlists->queue_priority_hint));
-
last = NULL;
count = 0;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index afa4f88035ac..01fca8acd4c4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -879,7 +879,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
break;
}
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ /* With deadlines, no strict priority */
+ i915_request_set_deadline(rq, 0);
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 052dcc59fcc5..b18276cf30ed 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -85,6 +85,9 @@ static int wait_for_submit(struct intel_engine_cs *engine,
struct i915_request *rq,
unsigned long timeout)
{
+ /* Ignore our own attempts to suppress excess tasklets */
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+
timeout += jiffies;
do {
bool done = time_after(jiffies, timeout);
@@ -754,7 +757,7 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
static int
release_queue(struct intel_engine_cs *engine,
struct i915_vma *vma,
- int idx, int prio)
+ int idx, u64 deadline)
{
struct i915_request *rq;
u32 *cs;
@@ -779,10 +782,7 @@ release_queue(struct intel_engine_cs *engine,
i915_request_get(rq);
i915_request_add(rq);
- local_bh_disable();
- i915_request_set_priority(rq, prio);
- local_bh_enable(); /* kick tasklet */
-
+ i915_request_set_deadline(rq, deadline);
i915_request_put(rq);
return 0;
@@ -796,6 +796,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
struct intel_engine_cs *engine;
struct i915_request *head;
enum intel_engine_id id;
+ long timeout;
int err, i, n = 0;
head = semaphore_queue(outer, vma, n++);
@@ -816,12 +817,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
}
}
- err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
+ err = release_queue(outer, vma, n, 0);
if (err)
goto out;
- if (i915_request_wait(head, 0,
- 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
+ /* Expected number of pessimal slices required */
+ timeout = RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3);
+ timeout *= 4; /* safety factor, including bucketing */
+ timeout += HZ / 2; /* and include the request completion */
+
+ if (i915_request_wait(head, 0, timeout) < 0) {
pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
count, n);
GEM_TRACE_DUMP();
@@ -926,6 +931,8 @@ create_rewinder(struct intel_context *ce,
err = i915_request_await_dma_fence(rq, &wait->fence);
if (err)
goto err;
+
+ i915_request_set_deadline(rq, rq_deadline(wait));
}
cs = intel_ring_begin(rq, 14);
@@ -1200,7 +1207,7 @@ static int live_timeslice_queue(void *arg)
err = PTR_ERR(rq);
goto err_heartbeat;
}
- i915_request_set_priority(rq, I915_PRIORITY_MAX);
+ i915_request_set_deadline(rq, 0);
err = wait_for_submit(engine, rq, HZ / 2);
if (err) {
pr_err("%s: Timed out trying to submit semaphores\n",
@@ -1223,10 +1230,9 @@ static int live_timeslice_queue(void *arg)
}
GEM_BUG_ON(i915_request_completed(rq));
- GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
/* Queue: semaphore signal, matching priority as semaphore */
- err = release_queue(engine, vma, 1, effective_prio(rq));
+ err = release_queue(engine, vma, 1, effective_deadline(rq));
if (err)
goto err_rq;
@@ -1326,7 +1332,7 @@ static int live_timeslice_nopreempt(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
- err = PTR_ERR(rq);
+ err = PTR_ERR(ce);
goto out_spin;
}
@@ -1337,6 +1343,7 @@ static int live_timeslice_nopreempt(void *arg)
goto out_spin;
}
+ rq->sched.deadline = 0;
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_get(rq);
i915_request_add(rq);
@@ -1709,6 +1716,7 @@ static int live_late_preempt(void *arg)
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
ctx_lo->sched.priority = 1;
+ ctx_hi->sched.priority = I915_PRIORITY_MIN;
for_each_engine(engine, gt, id) {
struct igt_live_test t;
@@ -2648,6 +2656,9 @@ static int live_preempt_gang(void *arg)
struct i915_request *n =
list_next_entry(rq, client_link);
+ /* With deadlines, no strict priority ordering */
+ i915_request_set_deadline(rq, 0);
+
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(engine->i915->drm.dev);
@@ -2869,7 +2880,7 @@ static int preempt_user(struct intel_engine_cs *engine,
i915_request_get(rq);
i915_request_add(rq);
- i915_request_set_priority(rq, I915_PRIORITY_MAX);
+ i915_request_set_deadline(rq, 0);
if (i915_request_wait(rq, 0, HZ / 2) < 0)
err = -ETIME;
@@ -4402,6 +4413,7 @@ static int emit_semaphore_signal(struct intel_context *ce, void *slot)
intel_ring_advance(rq, cs);
+ rq->sched.deadline = 0;
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_add(rq);
return 0;
@@ -4911,6 +4923,10 @@ static int __live_lrc_gpr(struct intel_engine_cs *engine,
err = emit_semaphore_signal(engine->kernel_context, slot);
if (err)
goto err_rq;
+
+ err = wait_for_submit(engine, rq, HZ / 2);
+ if (err)
+ goto err_rq;
} else {
slot[0] = 1;
wmb();
@@ -5468,6 +5484,7 @@ static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
intel_ring_advance(rq, cs);
+ rq->sched.deadline = 0;
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
err_rq:
i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0c42e8b0c211..6da465c7c4f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -333,8 +333,6 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
i915_priolist_free(p);
}
done:
- execlists->queue_priority_hint =
- rb ? to_priolist(rb)->priority : INT_MIN;
if (submit) {
*port = schedule_in(last, port - execlists->inflight);
*++port = NULL;
@@ -473,12 +471,10 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
rb_erase_cached(&p->node, &execlists->queue);
i915_priolist_free(p);
}
+ GEM_BUG_ON(!RB_EMPTY_ROOT(&execlists->queue.rb_root));
/* Remaining _unready_ requests will be nop'ed when submitted */
- execlists->queue_priority_hint = INT_MIN;
- execlists->queue = RB_ROOT_CACHED;
-
spin_unlock_irqrestore(&engine->active.lock, flags);
}
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index bc2fa84f98a8..43a0ac45295f 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -22,6 +22,8 @@ enum {
/* Interactive workload, scheduled for immediate pageflipping */
I915_PRIORITY_DISPLAY,
+
+ __I915_PRIORITY_KERNEL__
};
/* Smallest priority value that cannot be bumped. */
@@ -35,13 +37,12 @@ enum {
* i.e. nothing can have higher priority and force us to usurp the
* active request.
*/
-#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
-#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
+#define I915_PRIORITY_BARRIER INT_MAX
struct i915_priolist {
struct list_head requests;
struct rb_node node;
- int priority;
+ u64 deadline;
};
#endif /* _I915_PRIOLIST_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 118ab6650d1f..23594e712292 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -561,7 +561,7 @@ static inline void i915_request_clear_hold(struct i915_request *rq)
}
static inline struct intel_timeline *
-i915_request_timeline(struct i915_request *rq)
+i915_request_timeline(const struct i915_request *rq)
{
/* Valid only while the request is being constructed (or retired). */
return rcu_dereference_protected(rq->timeline,
@@ -576,7 +576,7 @@ i915_request_gem_context(struct i915_request *rq)
}
static inline struct intel_timeline *
-i915_request_active_timeline(struct i915_request *rq)
+i915_request_active_timeline(const struct i915_request *rq)
{
/*
* When in use during submission, we are protected by a guarantee that
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 4c189b81cc62..30bcb6f9d99f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -20,6 +20,11 @@ static struct i915_global_scheduler {
static DEFINE_SPINLOCK(ipi_lock);
static LIST_HEAD(ipi_list);
+static inline u64 rq_deadline(const struct i915_request *rq)
+{
+ return READ_ONCE(rq->sched.deadline);
+}
+
static inline int rq_prio(const struct i915_request *rq)
{
return READ_ONCE(rq->sched.attr.priority);
@@ -32,6 +37,7 @@ static void ipi_schedule(struct irq_work *wrk)
struct i915_dependency *p;
struct i915_request *rq;
unsigned long flags;
+ u64 deadline;
int prio;
spin_lock_irqsave(&ipi_lock, flags);
@@ -40,7 +46,10 @@ static void ipi_schedule(struct irq_work *wrk)
rq = container_of(p->signaler, typeof(*rq), sched);
list_del_init(&p->ipi_link);
+ deadline = p->ipi_deadline;
prio = p->ipi_priority;
+
+ p->ipi_deadline = I915_DEADLINE_NEVER;
p->ipi_priority = I915_PRIORITY_INVALID;
}
spin_unlock_irqrestore(&ipi_lock, flags);
@@ -52,6 +61,8 @@ static void ipi_schedule(struct irq_work *wrk)
if (prio > rq_prio(rq))
i915_request_set_priority(rq, prio);
+ if (deadline < rq_deadline(rq))
+ i915_request_set_deadline(rq, deadline);
} while (1);
rcu_read_unlock();
}
@@ -79,28 +90,8 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
}
-static void assert_priolists(struct intel_engine_execlists * const execlists)
-{
- struct rb_node *rb;
- long last_prio;
-
- if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- return;
-
- GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
- rb_first(&execlists->queue.rb_root));
-
- last_prio = INT_MAX;
- for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
- const struct i915_priolist *p = to_priolist(rb);
-
- GEM_BUG_ON(p->priority > last_prio);
- last_prio = p->priority;
- }
-}
-
struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, u64 deadline)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_priolist *p;
@@ -108,10 +99,9 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
bool first = true;
lockdep_assert_held(&engine->active.lock);
- assert_priolists(execlists);
if (unlikely(execlists->no_priolist))
- prio = I915_PRIORITY_NORMAL;
+ deadline = 0;
find_priolist:
/* most positive priority is scheduled first, equal priorities fifo */
@@ -120,9 +110,9 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
while (*parent) {
rb = *parent;
p = to_priolist(rb);
- if (prio > p->priority) {
+ if (deadline < p->deadline) {
parent = &rb->rb_left;
- } else if (prio < p->priority) {
+ } else if (deadline > p->deadline) {
parent = &rb->rb_right;
first = false;
} else {
@@ -130,13 +120,13 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
}
}
- if (prio == I915_PRIORITY_NORMAL) {
+ if (!deadline) {
p = &execlists->default_priolist;
} else {
p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
/* Convert an allocation failure to a priority bump */
if (unlikely(!p)) {
- prio = I915_PRIORITY_NORMAL; /* recurses just once */
+ deadline = 0; /* recurses just once */
/* To maintain ordering with all rendering, after an
* allocation failure we have to disable all scheduling.
@@ -151,7 +141,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
}
}
- p->priority = prio;
+ p->deadline = deadline;
INIT_LIST_HEAD(&p->requests);
rb_link_node(&p->node, rb, parent);
@@ -160,70 +150,221 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
return &p->requests;
}
-void __i915_priolist_free(struct i915_priolist *p)
+void i915_priolist_free(struct i915_priolist *p)
+{
+ if (p->deadline)
+ kmem_cache_free(global.slab_priorities, p);
+}
+
+static bool kick_submission(const struct intel_engine_cs *engine, u64 deadline)
{
- kmem_cache_free(global.slab_priorities, p);
+ const struct i915_request *inflight;
+ bool kick = true;
+
+ rcu_read_lock();
+ inflight = execlists_active(&engine->execlists);
+ if (inflight)
+ kick = deadline < rq_deadline(inflight);
+ rcu_read_unlock();
+
+ return kick;
+}
+
+static bool __i915_request_set_deadline(struct i915_request *rq, u64 deadline)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ struct i915_request *rn;
+ struct list_head *plist;
+ LIST_HEAD(dfs);
+
+ lockdep_assert_held(&engine->active.lock);
+ list_add(&rq->sched.dfs, &dfs);
+
+ list_for_each_entry(rq, &dfs, sched.dfs) {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(rq->engine != engine);
+
+ for_each_signaler(p, rq) {
+ struct i915_request *s =
+ container_of(p->signaler, typeof(*s), sched);
+
+ GEM_BUG_ON(s == rq);
+
+ if (rq_deadline(s) <= deadline)
+ continue;
+
+ if (i915_request_completed(s))
+ continue;
+
+ if (s->engine != rq->engine) {
+ spin_lock(&ipi_lock);
+ if (deadline < p->ipi_deadline) {
+ p->ipi_deadline = deadline;
+ list_move(&p->ipi_link, &ipi_list);
+ irq_work_queue(&ipi_work);
+ }
+ spin_unlock(&ipi_lock);
+ continue;
+ }
+
+ list_move_tail(&s->sched.dfs, &dfs);
+ }
+ }
+
+ plist = i915_sched_lookup_priolist(engine, deadline);
+
+ /* Fifo and depth-first replacement ensure our deps execute first */
+ list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
+ GEM_BUG_ON(rq->engine != engine);
+ GEM_BUG_ON(deadline > rq_deadline(rq));
+
+ INIT_LIST_HEAD(&rq->sched.dfs);
+ WRITE_ONCE(rq->sched.deadline, deadline);
+ RQ_TRACE(rq, "set-deadline:%llu\n", deadline);
+
+ /*
+ * Once the request is ready, it will be placed into the
+ * priority lists and then onto the HW runlist. Before the
+ * request is ready, it does not contribute to our preemption
+ * decisions and we can safely ignore it, as it will, and
+ * any preemption required, be dealt with upon submission.
+ * See engine->submit_request()
+ */
+
+ if (i915_request_in_priority_queue(rq))
+ list_move_tail(&rq->sched.link, plist);
+ }
+
+ return kick_submission(engine, deadline);
}
-static inline bool need_preempt(int prio, int active)
+void i915_request_set_deadline(struct i915_request *rq, u64 deadline)
{
+ struct intel_engine_cs *engine = READ_ONCE(rq->engine);
+ unsigned long flags;
+
+ if (!intel_engine_has_scheduler(engine))
+ return;
+
/*
- * Allow preemption of low -> normal -> high, but we do
- * not allow low priority tasks to preempt other low priority
- * tasks under the impression that latency for low priority
- * tasks does not matter (as much as background throughput),
- * so kiss.
+ * Virtual engines complicate acquiring the engine timeline lock,
+ * as their rq->engine pointer is not stable until under that
+ * engine lock. The simple ploy we use is to take the lock then
+ * check that the rq still belongs to the newly locked engine.
*/
- return prio >= max(I915_PRIORITY_NORMAL, active);
+ spin_lock_irqsave(&engine->active.lock, flags);
+ while (engine != READ_ONCE(rq->engine)) {
+ spin_unlock(&engine->active.lock);
+ engine = READ_ONCE(rq->engine);
+ spin_lock(&engine->active.lock);
+ }
+
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (deadline >= rq_deadline(rq))
+ goto unlock;
+
+ if (__i915_request_set_deadline(rq, deadline))
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+
+unlock:
+ spin_unlock_irqrestore(&engine->active.lock, flags);
}
-static void kick_submission(struct intel_engine_cs *engine,
- const struct i915_request *rq,
- int prio)
+static u64 prio_slice(int prio)
{
- const struct i915_request *inflight;
+ u64 slice;
+ int sf;
/*
- * We only need to kick the tasklet once for the high priority
- * new context we add into the queue.
+ * With a 1ms scheduling quantum:
+ *
+ * MAX USER: ~32us deadline
+ * 0: ~16ms deadline
+ * MIN_USER: 1000ms deadline
*/
- if (prio <= engine->execlists.queue_priority_hint)
- return;
- rcu_read_lock();
+ if (prio >= __I915_PRIORITY_KERNEL__)
+ return INT_MAX - prio;
- /* Nothing currently active? We're overdue for a submission! */
- inflight = execlists_active(&engine->execlists);
- if (!inflight)
- goto unlock;
+ slice = __I915_PRIORITY_KERNEL__ - prio;
+ if (prio >= 0)
+ sf = 20 - 6;
+ else
+ sf = 20 - 1;
+
+ return slice << sf;
+}
+
+u64 i915_scheduler_virtual_deadline(u64 kt, int priority)
+{
+ return i915_sched_to_ticks(kt + prio_slice(priority));
+}
+
+u64 i915_scheduler_next_virtual_deadline(int priority)
+{
+ return i915_scheduler_virtual_deadline(ktime_get(), priority);
+}
+
+static u64 signal_deadline(const struct i915_request *rq)
+{
+ u64 last = ktime_to_ns(ktime_get());
+ const struct i915_dependency *p;
/*
- * If we are already the currently executing context, don't
- * bother evaluating if we should preempt ourselves.
+ * Find the earliest point at which we will become 'ready',
+ * which we infer from the deadline of all active signalers.
+ * We will position ourselves at the end of that chain of work.
*/
- if (inflight->context == rq->context)
- goto unlock;
- ENGINE_TRACE(engine,
- "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
- prio,
- rq->fence.context, rq->fence.seqno,
- inflight->fence.context, inflight->fence.seqno,
- inflight->sched.attr.priority);
+ rcu_read_lock();
+ for_each_signaler(p, rq) {
+ const struct i915_request *s =
+ container_of(p->signaler, typeof(*s), sched);
+ u64 deadline;
- engine->execlists.queue_priority_hint = prio;
- if (need_preempt(prio, rq_prio(inflight)))
- tasklet_hi_schedule(&engine->execlists.tasklet);
+ if (i915_request_completed(s))
+ continue;
-unlock:
+ if (rq_prio(s) < rq_prio(rq))
+ continue;
+
+ deadline = i915_sched_to_ns(rq_deadline(s));
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ deadline -= prio_slice(rq_prio(s));
+
+ last = max(last, deadline);
+ }
rcu_read_unlock();
+
+ return last;
}
-static void __i915_request_set_priority(struct i915_request *rq, int prio)
+static u64 earliest_deadline(const struct i915_request *rq)
+{
+ return i915_scheduler_virtual_deadline(signal_deadline(rq),
+ rq_prio(rq));
+}
+
+static bool set_earliest_deadline(struct i915_request *rq, u64 old)
+{
+ u64 dl;
+
+ /* Recompute our deadlines and promote after a priority change */
+ dl = min(earliest_deadline(rq), rq_deadline(rq));
+ if (dl >= old)
+ return false;
+
+ return __i915_request_set_deadline(rq, dl);
+}
+
+static bool __i915_request_set_priority(struct i915_request *rq, int prio)
{
struct intel_engine_cs *engine = rq->engine;
struct i915_request *rn;
- struct list_head *plist;
+ bool kick = false;
LIST_HEAD(dfs);
lockdep_assert_held(&engine->active.lock);
@@ -280,32 +421,20 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio)
}
}
- plist = i915_sched_lookup_priolist(engine, prio);
-
- /* Fifo and depth-first replacement ensure our deps execute first */
list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
GEM_BUG_ON(rq->engine != engine);
+ GEM_BUG_ON(prio < rq_prio(rq));
INIT_LIST_HEAD(&rq->sched.dfs);
WRITE_ONCE(rq->sched.attr.priority, prio);
+ RQ_TRACE(rq, "set-priority:%d\n", prio);
- /*
- * Once the request is ready, it will be placed into the
- * priority lists and then onto the HW runlist. Before the
- * request is ready, it does not contribute to our preemption
- * decisions and we can safely ignore it, as it will, and
- * any preemption required, be dealt with upon submission.
- * See engine->submit_request()
- */
- if (!i915_request_is_ready(rq))
- continue;
-
- if (i915_request_in_priority_queue(rq))
- list_move_tail(&rq->sched.link, plist);
-
- /* Defer (tasklet) submission until after all updates. */
- kick_submission(engine, rq, prio);
+ if (i915_request_is_ready(rq) &&
+ set_earliest_deadline(rq, rq_deadline(rq)))
+ kick = true;
}
+
+ return kick;
}
void i915_request_set_priority(struct i915_request *rq, int prio)
@@ -316,12 +445,6 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
if (!intel_engine_has_scheduler(engine))
return;
- /*
- * Virtual engines complicate acquiring the engine timeline lock,
- * as their rq->engine pointer is not stable until under that
- * engine lock. The simple ploy we use is to take the lock then
- * check that the rq still belongs to the newly locked engine.
- */
spin_lock_irqsave(&engine->active.lock, flags);
while (engine != READ_ONCE(rq->engine)) {
spin_unlock(&engine->active.lock);
@@ -335,12 +458,21 @@ void i915_request_set_priority(struct i915_request *rq, int prio)
if (prio <= rq_prio(rq))
goto unlock;
- __i915_request_set_priority(rq, prio);
+ if (__i915_request_set_priority(rq, prio))
+ tasklet_hi_schedule(&engine->execlists.tasklet);
unlock:
spin_unlock_irqrestore(&engine->active.lock, flags);
}
+bool intel_engine_queue_request(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ lockdep_assert_held(&engine->active.lock);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ return set_earliest_deadline(rq, I915_DEADLINE_NEVER);
+}
+
void i915_sched_node_init(struct i915_sched_node *node)
{
spin_lock_init(&node->lock);
@@ -356,6 +488,7 @@ void i915_sched_node_init(struct i915_sched_node *node)
void i915_sched_node_reinit(struct i915_sched_node *node)
{
node->attr.priority = I915_PRIORITY_INVALID;
+ node->deadline = I915_DEADLINE_NEVER;
node->semaphores = 0;
node->flags = 0;
@@ -388,6 +521,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->ipi_link);
+ dep->ipi_deadline = I915_DEADLINE_NEVER;
dep->ipi_priority = I915_PRIORITY_INVALID;
dep->signaler = signal;
dep->waiter = node;
@@ -519,6 +653,10 @@ void i915_sched_node_retire(struct i915_sched_node *node)
spin_unlock_irq(&node->lock);
}
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_scheduler.c"
+#endif
+
static void i915_global_scheduler_shrink(void)
{
kmem_cache_shrink(global.slab_dependencies);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index b26a13ef6feb..62265108230f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -37,15 +37,27 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
void i915_sched_node_retire(struct i915_sched_node *node);
void i915_request_set_priority(struct i915_request *request, int prio);
+void i915_request_set_deadline(struct i915_request *request, u64 deadline);
+
+u64 i915_scheduler_virtual_deadline(u64 kt, int priority);
+u64 i915_scheduler_next_virtual_deadline(int priority);
+
+bool intel_engine_queue_request(struct intel_engine_cs *engine,
+ struct i915_request *rq);
struct list_head *
-i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
+i915_sched_lookup_priolist(struct intel_engine_cs *engine, u64 deadline);
+
+void i915_priolist_free(struct i915_priolist *p);
+
+static inline u64 i915_sched_to_ticks(ktime_t kt)
+{
+ return ktime_to_ns(kt) >> I915_SCHED_DEADLINE_SHIFT;
+}
-void __i915_priolist_free(struct i915_priolist *p);
-static inline void i915_priolist_free(struct i915_priolist *p)
+static inline u64 i915_sched_to_ns(u64 deadline)
{
- if (p->priority != I915_PRIORITY_NORMAL)
- __i915_priolist_free(p);
+ return deadline << I915_SCHED_DEADLINE_SHIFT;
}
#endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index ce60577df2bf..ae7ca78a88c8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -69,6 +69,22 @@ struct i915_sched_node {
unsigned int flags;
#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0)
intel_engine_mask_t semaphores;
+
+ /**
+ * @deadline: [virtual] deadline
+ *
+ * When the request is ready for execution, it is given a quota
+ * (the engine's timeslice) and a virtual deadline. The virtual
+ * deadline is derived from the current time:
+ * ktime_get() + (prio_ratio * timeslice)
+ *
+ * Requests are then executed in order of deadline completion.
+ * Requests with earlier deadlines than currently executing on
+ * the engine will preempt the active requests.
+ */
+ u64 deadline;
+#define I915_SCHED_DEADLINE_SHIFT 19 /* i.e. roughly 500us buckets */
+#define I915_DEADLINE_NEVER U64_MAX
};
struct i915_dependency {
@@ -81,6 +97,7 @@ struct i915_dependency {
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_EXTERNAL BIT(1)
#define I915_DEPENDENCY_WEAK BIT(2)
+ u64 ipi_deadline;
int ipi_priority;
};
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1929feba4e8e..29ff6b669cc2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -24,6 +24,7 @@ selftest(uncore, intel_uncore_mock_selftests)
selftest(engine, intel_engine_cs_mock_selftests)
selftest(timelines, intel_timeline_mock_selftests)
selftest(requests, i915_request_mock_selftests)
+selftest(scheduler, i915_scheduler_mock_selftests)
selftest(objects, i915_gem_object_mock_selftests)
selftest(phys, i915_gem_phys_mock_selftests)
selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 92c628f18c60..db91e639918e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -2124,6 +2124,7 @@ static int measure_preemption(struct intel_context *ce)
intel_ring_advance(rq, cs);
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ rq->sched.deadline = 0;
elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
new file mode 100644
index 000000000000..9ca50db81034
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+static int mock_scheduler_slices(void *dummy)
+{
+ u64 min, max, normal, kernel;
+
+ min = prio_slice(I915_PRIORITY_MIN);
+ pr_info("%8s slice: %lluus\n", "min", min >> 10);
+
+ normal = prio_slice(0);
+ pr_info("%8s slice: %lluus\n", "normal", normal >> 10);
+
+ max = prio_slice(I915_PRIORITY_MAX);
+ pr_info("%8s slice: %lluus\n", "max", max >> 10);
+
+ kernel = prio_slice(I915_PRIORITY_BARRIER);
+ pr_info("%8s slice: %lluus\n", "kernel", kernel >> 10);
+
+ if (kernel != 0) {
+ pr_err("kernel prio slice should be 0\n");
+ return -EINVAL;
+ }
+
+ if (max >= normal) {
+ pr_err("maximum prio slice should be shorter than normal\n");
+ return -EINVAL;
+ }
+
+ if (min <= normal) {
+ pr_err("minimum prio slice should be longer than normal\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int i915_scheduler_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(mock_scheduler_slices),
+ };
+
+ return i915_subtests(tests, NULL);
+}
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
* [Intel-gfx] [PATCH 12/28] drm/i915/gem: Build the reloc request first
From: Chris Wilson @ 2020-06-07 22:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
In-Reply-To: <20200607222108.14401-1-chris@chris-wilson.co.uk>
If we get interrupted in the middle of chaining up the relocation
entries, we will fail to submit the relocation batch. However, we will
report having already completed some of the relocations, and so the
reloc.presumed_offset will no longer match the batch contents, causing
confusion and invalid future batches. If we build the relocation request
packet first, we can always emit as far as we get up in the relocation
chain.
Fixes: 0e97fbb08055 ("drm/i915/gem: Use a single chained reloc batches for a single execbuf")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 49 ++++++++++---------
.../i915/gem/selftests/i915_gem_execbuffer.c | 8 +--
2 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e012857be129..83cea2ea7c61 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1002,11 +1002,27 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
}
-static int reloc_gpu_flush(struct reloc_cache *cache)
+static int reloc_gpu_emit(struct reloc_cache *cache)
{
struct i915_request *rq = cache->rq;
int err;
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ rq->batch->node.start,
+ PAGE_SIZE,
+ reloc_bb_flags(cache));
+
+ return err;
+}
+
+static void reloc_gpu_flush(struct reloc_cache *cache)
+{
+ struct i915_request *rq = cache->rq;
+
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;
@@ -1018,21 +1034,8 @@ static int reloc_gpu_flush(struct reloc_cache *cache)
i915_gem_object_unpin_map(obj);
}
- err = 0;
- if (rq->engine->emit_init_breadcrumb)
- err = rq->engine->emit_init_breadcrumb(rq);
- if (!err)
- err = rq->engine->emit_bb_start(rq,
- rq->batch->node.start,
- PAGE_SIZE,
- reloc_bb_flags(cache));
- if (err)
- i915_request_set_error_once(rq, err);
-
intel_gt_chipset_flush(rq->engine->gt);
i915_request_add(rq);
-
- return err;
}
static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
@@ -1120,7 +1123,7 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
if (err)
- goto skip_request;
+ goto err_request;
rq->batch = batch;
i915_vma_unpin(batch);
@@ -1133,8 +1136,6 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
/* Return with batch mapping (cmd) still pinned */
goto out_pool;
-skip_request:
- i915_request_set_error_once(rq, err);
err_request:
i915_request_add(rq);
err_unpin:
@@ -1167,10 +1168,8 @@ static u32 *reloc_batch_grow(struct i915_execbuffer *eb,
if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
- if (unlikely(err)) {
- i915_request_set_error_once(cache->rq, err);
+ if (unlikely(err))
return ERR_PTR(err);
- }
}
GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
@@ -1493,13 +1492,17 @@ static int reloc_gpu_alloc(struct i915_execbuffer *eb)
static int reloc_gpu(struct i915_execbuffer *eb)
{
struct eb_vma *ev;
- int flush, err;
+ int err;
err = reloc_gpu_alloc(eb);
if (err)
return err;
GEM_BUG_ON(!eb->reloc_cache.rq);
+ err = reloc_gpu_emit(&eb->reloc_cache);
+ if (err)
+ goto out;
+
list_for_each_entry(ev, &eb->relocs, reloc_link) {
err = eb_reloc_vma(eb, ev, eb_reloc_entry);
if (err < 0)
@@ -1507,9 +1510,7 @@ static int reloc_gpu(struct i915_execbuffer *eb)
}
out:
- flush = reloc_gpu_flush(&eb->reloc_cache);
- if (!err)
- err = flush;
+ reloc_gpu_flush(&eb->reloc_cache);
return err;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 50fe22d87ae1..faed6480a792 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -40,6 +40,10 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (err)
goto unpin_vma;
+ err = reloc_gpu_emit(&eb->reloc_cache);
+ if (err)
+ goto unpin_vma;
+
/* 8-Byte aligned */
err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
if (err)
@@ -64,9 +68,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
- err = reloc_gpu_flush(&eb->reloc_cache);
- if (err)
- goto put_rq;
+ reloc_gpu_flush(&eb->reloc_cache);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
if (err) {
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.