All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] reclocking stability improvements
@ 2011-04-27 23:58 Martin Peres
       [not found] ` <1303948692-22455-1-git-send-email-martin.peres-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-27 23:58 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi everyone,

I would like everyone to test this set of patch as we'll need them quite soon for timing management on nv50.

Please report success/failure by answering to this email.

Thanks in advance,
Martin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/2] drm/nouveau/pm: Add pm.(un)pause functions
       [not found] ` <1303948692-22455-1-git-send-email-martin.peres-GANU6spQydw@public.gmane.org>
@ 2011-04-27 23:58   ` Martin Peres
  2011-04-27 23:58   ` [PATCH 2/2] drm/nouveau/nv50: reclock memory using PMS on nv50 Martin Peres
  2011-04-28 10:32   ` [PATCH 0/2] reclocking stability improvements Maxim Levitsky
  2 siblings, 0 replies; 18+ messages in thread
From: Martin Peres @ 2011-04-27 23:58 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Martin Peres

From: Martin Peres <martin.peres-Iz16wY1oaNPLSKGbIzaifA@public.gmane.org>

With this patch, cards without internal memory (IONs and other IGPs)
and cards with no memory reclock (a lot of nv40) should support
safe reclocking while gaming.

This should work on all hardware(< nva3), report bugs if it doesn't.

v2: Fix missing symbol at compilation on x86_32 systems

Signed-off-by: Martin Peres <martin.peres-Iz16wY1oaNPLSKGbIzaifA@public.gmane.org>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h   |    9 ++
 drivers/gpu/drm/nouveau/nouveau_pm.c    |   24 ++++-
 drivers/gpu/drm/nouveau/nouveau_pm.h    |    4 +
 drivers/gpu/drm/nouveau/nouveau_reg.h   |    3 +
 drivers/gpu/drm/nouveau/nouveau_state.c |   13 ++-
 drivers/gpu/drm/nouveau/nv04_pm.c       |  126 ++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nv50_pm.c       |  172 +++++++++++++++++++++++++++++++
 7 files changed, 348 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 444a943..960e27d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -462,6 +462,10 @@ struct nouveau_pm_memtimings {
 	int nr_timing;
 };
 
+struct nouveau_pm_pause_card_state {
+	u32 reg_c040;
+};
+
 struct nouveau_pm_engine {
 	struct nouveau_pm_voltage voltage;
 	struct nouveau_pm_level perflvl[NOUVEAU_PM_MAX_LEVEL];
@@ -476,6 +480,11 @@ struct nouveau_pm_engine {
 	struct device *hwmon;
 	struct notifier_block acpi_nb;
 
+	struct nouveau_pm_pause_card_state pause_state;
+
+	int (*pause)(struct drm_device *);
+	void (*unpause)(struct drm_device *);
+
 	int (*clock_get)(struct drm_device *, u32 id);
 	void *(*clock_pre)(struct drm_device *, struct nouveau_pm_level *,
 			   u32 id, int khz);
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index da8d994..3bffe60 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -45,6 +45,10 @@ nouveau_pm_clock_set(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 	if (khz == 0)
 		return 0;
 
+	/* Do no reclock the memory if the frequencies didn't change */
+	if (id == PLL_MEMORY && pm->cur->memory == khz)
+		return 0;
+
 	pre_state = pm->clock_pre(dev, perflvl, id, khz);
 	if (IS_ERR(pre_state))
 		return PTR_ERR(pre_state);
@@ -60,10 +64,13 @@ nouveau_pm_perflvl_set(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	int ret;
+	uint64_t start = nv04_timer_read(dev);
 
 	if (perflvl == pm->cur)
 		return 0;
 
+	NV_INFO(dev, "setting performance level: %s\n", perflvl->name);
+
 	if (pm->voltage.supported && pm->voltage_set && perflvl->voltage) {
 		ret = pm->voltage_set(dev, perflvl->voltage);
 		if (ret) {
@@ -72,13 +79,27 @@ nouveau_pm_perflvl_set(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 		}
 	}
 
+	ret = pm->pause(dev);
+	if (ret)
+		return ret;
+
 	nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
 	nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
 	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
 	nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
 
+	/* Wait for PLLs to stabilize */
+	udelay(100);
+
 	pm->cur = perflvl;
-	return 0;
+	ret = 0;
+
+	pm->unpause(dev);
+
+	NV_DEBUG(dev, "Reclocking took %lluns\n",
+		 (nv04_timer_read(dev) - start));
+
+	return ret;
 }
 
 static int
@@ -112,7 +133,6 @@ nouveau_pm_profile_set(struct drm_device *dev, const char *profile)
 			return -EINVAL;
 	}
 
-	NV_INFO(dev, "setting performance level: %s\n", profile);
 	return nouveau_pm_perflvl_set(dev, perflvl);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.h b/drivers/gpu/drm/nouveau/nouveau_pm.h
index 4a9838dd..566f72d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.h
@@ -51,12 +51,16 @@ int nv04_pm_clock_get(struct drm_device *, u32 id);
 void *nv04_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
 			u32 id, int khz);
 void nv04_pm_clock_set(struct drm_device *, void *);
+int nv04_pm_pause(struct drm_device *dev);
+void nv04_pm_unpause(struct drm_device *dev);
 
 /* nv50_pm.c */
 int nv50_pm_clock_get(struct drm_device *, u32 id);
 void *nv50_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
 			u32 id, int khz);
 void nv50_pm_clock_set(struct drm_device *, void *);
+int nv50_pm_pause(struct drm_device *dev);
+void nv50_pm_unpause(struct drm_device *dev);
 
 /* nva3_pm.c */
 int nva3_pm_clock_get(struct drm_device *, u32 id);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index f18cdfc..485d7d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -695,8 +695,11 @@
 #define NV50_PROM__ESIZE                                       0x10000
 
 #define NV50_PGRAPH                                         0x00400000
+#define NV50_PGRAPH_CONTROL                                 0x00400500
+#define NV50_PGRAPH_FIFO_STATUS                             0x00400504
 #define NV50_PGRAPH__LEN                                           0x1
 #define NV50_PGRAPH__ESIZE                                     0x10000
+#define NV50_PFIFO_FREEZE                                       0x2504
 
 #define NV50_PDISPLAY                                                0x00610000
 #define NV50_PDISPLAY_OBJECTS                                        0x00610010
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 38ea662..3fc8455 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -90,6 +90,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
+		engine->pm.pause		= nv04_pm_pause;
+		engine->pm.unpause		= nv04_pm_unpause;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -138,6 +140,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
+		engine->pm.pause		= nv04_pm_pause;
+		engine->pm.unpause		= nv04_pm_unpause;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -186,6 +190,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
+		engine->pm.pause		= nv04_pm_pause;
+		engine->pm.unpause		= nv04_pm_unpause;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -236,6 +242,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_set		= nv04_pm_clock_set;
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
+		engine->pm.pause		= nv04_pm_pause;
+		engine->pm.unpause		= nv04_pm_unpause;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -288,6 +296,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->pm.temp_get		= nv40_temp_get;
+		engine->pm.pause		= nv04_pm_pause;
+		engine->pm.unpause		= nv04_pm_unpause;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -361,6 +371,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		}
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
+		engine->pm.pause		= nv50_pm_pause;
+		engine->pm.unpause		= nv50_pm_unpause;
 		if (dev_priv->chipset >= 0x84)
 			engine->pm.temp_get	= nv84_temp_get;
 		else
@@ -1137,4 +1149,3 @@ bool nouveau_wait_for_idle(struct drm_device *dev)
 
 	return true;
 }
-
diff --git a/drivers/gpu/drm/nouveau/nv04_pm.c b/drivers/gpu/drm/nouveau/nv04_pm.c
index eb1c70d..6f5ad051 100644
--- a/drivers/gpu/drm/nouveau/nv04_pm.c
+++ b/drivers/gpu/drm/nouveau/nv04_pm.c
@@ -88,3 +88,129 @@ nv04_pm_clock_set(struct drm_device *dev, void *pre_state)
 	kfree(state);
 }
 
+int
+nv04_pm_pause(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
+	unsigned long irq_flags, hold_spin = 0;
+	/* initial guess... */
+	uint32_t mask300 = 0xffffffff;
+	uint32_t mask700 = 0xffffbfff;
+	uint64_t start = nv04_timer_read(dev);
+
+	/* Do not allow the card to allocate/destroy a
+	 * new channel while reclocking.
+	 *
+	 * We try to hold it for the shortest period of time possible
+	 */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, irq_flags);
+	hold_spin = 1;
+
+	/* Don't context switch */
+	nv04_fifo_reassign(dev, false);
+
+	/* PDISPLAY magic */
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x0, 0x1);
+
+	/* Pause PFIFO's puller */
+	nv04_fifo_cache_pull(dev, false);
+
+	/* Wait for PFIFO's DMA_PUSH to deplete (Not busy) */
+	if (!nouveau_wait_eq(dev, 100000, NV04_PFIFO_CACHE1_DMA_PUSH,
+				0x100, 0x100)) {
+		NV_ERROR(dev, "PFIFO DMA_PUSH never depletes (0x%x)\n",
+			nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUSH));
+		goto err_pfifo_freeze;
+	}
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0x1, 0);
+
+	/* Pause PGRAPH's FIFO */
+	nv_wr32(dev, NV04_PGRAPH_FIFO, 0);
+
+	/* Now that the card is paused,
+	 * there is no problem with channel creation
+	 */
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, irq_flags);
+	hold_spin = 0;
+
+	/* Wait for PGRAPH to be really stopped */
+	if (!nouveau_wait_eq(dev, 1000000, 0x400300, mask300, 0x4) ||
+		!nouveau_wait_eq(dev, 8000000, NV04_PGRAPH_STATUS,
+				mask700, 0x0)) {
+		/* if you see this message,
+		* mask* above probably need to be adjusted
+		* to not contain the bits you see failing */
+		NV_ERROR(dev,
+		    "PGRAPH: wait for idle fail: %08x %08x!\n",
+		    nv_rd32(dev, NV04_PGRAPH_STATUS),
+		    nv_rd32(dev, 0x400300));
+
+		goto err_pgraph;
+	}
+
+	if (dev_priv->card_type == NV_40)
+		pm->pause_state.reg_c040 = nv_mask(dev, 0xc040, 0x333, 0);
+
+	NV_DEBUG(dev, "PM.pause took %lluns\n",
+		(nv04_timer_read(dev) - start));
+
+	return 0;
+
+err_pgraph:
+	nv_wr32(dev, NV04_PGRAPH_FIFO, 1);
+
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0, 0x1);
+
+err_pfifo_freeze:
+	nv04_fifo_cache_pull(dev, true);
+	nv04_fifo_reassign(dev, true);
+
+	/* PDISPLAY magic */
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x1, 0x0);
+
+	if (hold_spin)
+		spin_unlock_irqrestore(&dev_priv->context_switch_lock,
+					irq_flags);
+
+	return -EAGAIN;
+}
+
+void
+nv04_pm_unpause(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
+	unsigned long irq_flags;
+
+	/* Do not allow the card to allocate/destroy a
+	 * new channel while unpausing.
+	 */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, irq_flags);
+
+	if (dev_priv->card_type == NV_40) {
+		nv_wr32(dev, 0xc040, pm->pause_state.reg_c040);
+		nv_wr32(dev, 0xc04c, nv_rd32(dev, 0xc04c));
+	}
+
+	/* Unpause PGRAPH */
+	nv_wr32(dev, NV04_PGRAPH_FIFO, 1);
+
+	/* Unpause pfifo caches */
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0, 0x1);
+	nv04_fifo_cache_pull(dev, true);
+	nv04_fifo_reassign(dev, true);
+
+	/* PDISPLAY magic */
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x1, 0x0);
+
+	/* TODO: De-activated for the moment, it makes things unstable */
+#if 0
+	if (dev_priv->card_type == NV_40) {
+		nv_wr32(dev, 0x1580, nv_rd32(dev, 0x1580));
+		nv_wr32(dev, 0xc044, nv_rd32(dev, 0xc44));
+	}
+#endif
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, irq_flags);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index 8a28100..4dd2d76 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c
@@ -130,6 +130,7 @@ nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
 		nv_wr32(dev, 0x100210, 0);
 		nv_wr32(dev, 0x1002dc, 1);
 	}
+	/* TODO: Tweek 0x4700 before reclocking UNK05 */
 
 	tmp  = nv_rd32(dev, reg + 0) & 0xfff8ffff;
 	tmp |= 0x80000000 | (P << 16);
@@ -144,3 +145,174 @@ nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
 	kfree(state);
 }
 
+int
+nv50_pm_pause(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
+	unsigned long irq_flags, hold_spin = 0;
+	/* initial guess... */
+	uint32_t mask380 = 0xffffffff;
+	uint32_t mask384 = 0xffffffff;
+	uint32_t mask388 = 0xffffffff;
+	uint32_t mask504 = 0x00000001;
+	uint32_t mask700 = 0x00000001;
+	int i = 0;
+	uint64_t start = nv04_timer_read(dev);
+
+	/* Do not allow the card to allocate/destroy a
+	 * new channel while reclocking.
+	 *
+	 * We try to hold it for the shortest period of time possible
+	 */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, irq_flags);
+	hold_spin = 1;
+
+	/* Don't context switch */
+	nv04_fifo_reassign(dev, false);
+
+	/* PDISPLAY magic */
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x0, 0x1);
+
+	nv_wr32(dev, NV50_PFIFO_FREEZE, 1);
+	if (!nouveau_wait_eq(dev, 100000, NV50_PFIFO_FREEZE, 0x10, 0x10)) {
+		NV_ERROR(dev, "PFIFO freeze failed\n");
+		goto err_pfifo_freeze;
+	}
+
+	/* Wait for PFIFO's DMA_PUSH to deplete */
+	if (!nouveau_wait_eq(dev, 100000, NV04_PFIFO_CACHE1_DMA_PUSH,
+				0x100, 0x100)) {
+		NV_ERROR(dev, "PFIFO DMA_PUSH never depleted (0x%x)\n",
+			nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUSH));
+		goto err_pfifo_freeze;
+	}
+
+	/* Pause PFIFO's caches */
+	nv04_fifo_cache_pull(dev, false);
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0x1, 0);
+
+	/* Empty PGRAPH's FIFO */
+	do {
+		/* Un-pause PGRAPH's FIFO (in case it was) */
+		nv_mask(dev, NV50_PGRAPH_CONTROL, 0, 0x1);
+
+		/* Wait for PGRAPH's FIFO to deplete */
+		if (!nouveau_wait_eq(dev, 100000, NV50_PGRAPH_FIFO_STATUS,
+					mask504, 0x1)) {
+			if (nv_rd32(dev, NV04_PGRAPH_STATUS) & 0x100) {
+				NV_ERROR(dev,
+				"PGRAPH: PGRAPH paused while running a ctxprog,"
+				" NV40_PGRAPH_CTXCTL_0310 = 0x%x\n",
+				nv_rd32(dev, NV40_PGRAPH_CTXCTL_0310));
+			}
+
+			goto err_ctx_prog_playing;
+		}
+
+		/* Pause PGRAPH's FIFO */
+		nv_mask(dev, NV50_PGRAPH_CONTROL, 0x1, 0);
+
+		/* Limit the number of loops to 2 */
+		i++;
+		if (i > 1)
+			goto err_pgraph_stop;
+	} while ((nv_rd32(dev, NV50_PGRAPH_FIFO_STATUS) & mask504) == 0);
+
+	/* Now that the PGRAPH's FIFO is paused,
+	 * there is no problem with channel creation.
+	 */
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, irq_flags);
+	hold_spin = 0;
+
+	/* Wait for PGRAPH engines to stop */
+	if (!nouveau_wait_eq(dev, 100000, 0x400380, mask380, 0x0) ||
+	    !nouveau_wait_eq(dev, 100000, 0x400384, mask384, 0x0) ||
+	    !nouveau_wait_eq(dev, 100000, 0x400388, mask388, 0x0) ||
+	    !nouveau_wait_eq(dev, 500000, NV04_PGRAPH_STATUS, mask700, 0x0)) {
+		/* if you see this message,
+		* mask* above probably need to be adjusted
+		* to not contain the bits you see failing */
+		NV_ERROR(dev,
+		    "PGRAPH: wait for idle fail: %08x %08x %08x %08x %08x!\n",
+		    nv_rd32(dev, 0x400380),
+		    nv_rd32(dev, 0x400384),
+		    nv_rd32(dev, 0x400388),
+		    nv_rd32(dev, NV50_PGRAPH_FIFO_STATUS),
+		    nv_rd32(dev, NV04_PGRAPH_STATUS));
+
+		goto err_pgraph_stop;
+	}
+
+	/* De-activate the PLLs */
+	pm->pause_state.reg_c040 = nv_mask(dev, 0xc040, 0x30, 0x100000);
+
+	NV_DEBUG(dev, "PM.pause took %lluns\n",
+		   (nv04_timer_read(dev) - start));
+
+	return 0;
+
+err_pgraph_stop:
+	nv_mask(dev, NV50_PGRAPH_CONTROL, 0, 0x1);
+
+err_ctx_prog_playing:
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0, 0x1);
+	nv04_fifo_cache_pull(dev, true);
+
+err_pfifo_freeze:
+	nv_wr32(dev, NV50_PFIFO_FREEZE, 0);
+
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x1, 0x0);
+
+	nv04_fifo_reassign(dev, true);
+
+	if (hold_spin)
+		spin_unlock_irqrestore(&dev_priv->context_switch_lock,
+					irq_flags);
+	return -EAGAIN;
+}
+
+void
+nv50_pm_unpause(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
+	unsigned long irq_flags;
+
+	/* Do not allow the card to allocate/destroy a
+	 * new channel while unpausing.
+	 */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, irq_flags);
+
+	/* Restore the PLL supervisor state */
+	nv_wr32(dev, 0xc040, pm->pause_state.reg_c040);
+	nv_wr32(dev, 0xc04c, 0x10);
+	nv_wr32(dev, 0xc040, pm->pause_state.reg_c040);
+
+	/* Unpause pfifo caches */
+	nv_mask(dev, NV04_PFIFO_CACHE1_DMA_PUSH, 0, 0x1);
+	nv04_fifo_cache_pull(dev, true);
+
+	/* Unpause PGRAPH */
+	nv_mask(dev, NV50_PGRAPH_CONTROL, 0, 0x1);
+
+	/* Un-pause PFIFO */
+	nv_wr32(dev, NV50_PFIFO_FREEZE, 0);
+
+	/* PDISPLAY magic */
+	nv_wr32(dev, 0x616308, 0x10);
+	nv_wr32(dev, 0x616b08, 0x10);
+	nv_mask(dev, NV50_PDISPLAY_PIO_CTRL, 0x1, 0x0);
+
+	/* Re-allow context switch */
+	nv04_fifo_reassign(dev, true);
+
+	/* the blob also clear c040's bit 26 using PMS when the
+	 * performance level is set to 0.
+	 * I haven't seen difference in power consumption, so,
+	 * I leave it for later.
+	 */
+	nv_wr32(dev, 0xc040, pm->pause_state.reg_c040);
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, irq_flags);
+}
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] drm/nouveau/nv50: reclock memory using PMS on nv50
       [not found] ` <1303948692-22455-1-git-send-email-martin.peres-GANU6spQydw@public.gmane.org>
  2011-04-27 23:58   ` [PATCH 1/2] drm/nouveau/pm: Add pm.(un)pause functions Martin Peres
@ 2011-04-27 23:58   ` Martin Peres
  2011-04-28 10:32   ` [PATCH 0/2] reclocking stability improvements Maxim Levitsky
  2 siblings, 0 replies; 18+ messages in thread
From: Martin Peres @ 2011-04-27 23:58 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Martin Peres

From: Martin Peres <martin.peres-Iz16wY1oaNPLSKGbIzaifA@public.gmane.org>

Signed-off-by: Martin Peres <martin.peres-Iz16wY1oaNPLSKGbIzaifA@public.gmane.org>
---
 drivers/gpu/drm/nouveau/nouveau_pm.c  |    7 +-
 drivers/gpu/drm/nouveau/nouveau_pms.h |   98 +++++++++++++++++++++
 drivers/gpu/drm/nouveau/nv50_pm.c     |  153 ++++++++++++++++++++++++++++++---
 3 files changed, 243 insertions(+), 15 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_pms.h

diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index 3bffe60..abc0ef9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -79,13 +79,16 @@ nouveau_pm_perflvl_set(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 		}
 	}
 
+	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
+
 	ret = pm->pause(dev);
-	if (ret)
+	if (ret) {
+		nouveau_pm_clock_set(dev, pm->cur, PLL_MEMORY, perflvl->memory);
 		return ret;
+	}
 
 	nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
 	nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
-	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
 	nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
 
 	/* Wait for PLLs to stabilize */
diff --git a/drivers/gpu/drm/nouveau/nouveau_pms.h b/drivers/gpu/drm/nouveau/nouveau_pms.h
new file mode 100644
index 0000000..d7a445b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_pms.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifndef __NOUVEAU_PMS_H__
+#define __NOUVEAU_PMS_H__
+
+struct pms_ucode {
+	u8 data[256];
+	union {
+		u8  *u08;
+		u16 *u16;
+		u32 *u32;
+	} ptr;
+	u16 len;
+
+	u32 reg;
+	u32 val;
+};
+
+static inline void
+pms_init(struct pms_ucode *pms)
+{
+	pms->ptr.u08 = pms->data;
+	pms->reg = 0xffffffff;
+	pms->val = 0xffffffff;
+}
+
+static inline void
+pms_fini(struct pms_ucode *pms)
+{
+	do {
+		*pms->ptr.u08++ = 0x7f;
+		pms->len = pms->ptr.u08 - pms->data;
+	} while (pms->len & 3);
+	pms->ptr.u08 = pms->data;
+}
+
+static inline void
+pms_unkn(struct pms_ucode *pms, u8 v0)
+{
+	*pms->ptr.u08++ = v0;
+}
+
+static inline void
+pms_op5f(struct pms_ucode *pms, u8 v0, u8 v1)
+{
+	*pms->ptr.u08++ = 0x5f;
+	*pms->ptr.u08++ = v0;
+	*pms->ptr.u08++ = v1;
+}
+
+static inline void
+pms_wr32(struct pms_ucode *pms, u32 reg, u32 val)
+{
+	if (val != pms->val) {
+		if ((val & 0xffff0000) == (pms->val & 0xffff0000)) {
+			*pms->ptr.u08++ = 0x42;
+			*pms->ptr.u16++ = (val & 0x0000ffff);
+		} else {
+			*pms->ptr.u08++ = 0xe2;
+			*pms->ptr.u32++ = val;
+		}
+
+		pms->val = val;
+	}
+
+	if ((reg & 0xffff0000) == (pms->reg & 0xffff0000)) {
+		*pms->ptr.u08++ = 0x40;
+		*pms->ptr.u16++ = (reg & 0x0000ffff);
+	} else {
+		*pms->ptr.u08++ = 0xe0;
+		*pms->ptr.u32++ = reg;
+	}
+	pms->reg = reg;
+}
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index 4dd2d76..9b81f03 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c
@@ -26,9 +26,11 @@
 #include "nouveau_drv.h"
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
+#include "nouveau_pms.h"
 
 struct nv50_pm_state {
 	struct nouveau_pm_level *perflvl;
+	struct pms_ucode ucode;
 	struct pll_lims pll;
 	enum pll_types type;
 	int N, M, P;
@@ -73,14 +75,20 @@ void *
 nv50_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 		  u32 id, int khz)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nv50_pm_state *state;
-	int dummy, ret;
+	struct pms_ucode *pms;
+	u32 reg0_old, reg0_new;
+	u32 crtc_mask;
+	u32 reg_c040;
+	int ret, dummy, i;
 
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state)
 		return ERR_PTR(-ENOMEM);
 	state->type = id;
 	state->perflvl = perflvl;
+	pms = &state->ucode;
 
 	ret = get_pll_limits(dev, id, &state->pll);
 	if (ret < 0) {
@@ -95,20 +103,88 @@ nv50_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 		return ERR_PTR(ret);
 	}
 
+	reg0_old = nv_rd32(dev, state->pll.reg + 0);
+	reg0_new = 0x80000000 | (state->P << 16) | (reg0_old & 0xfff8ffff);
+
+	reg_c040 = nv_rd32(dev, 0xc040);
+
+	crtc_mask = 0;
+	for (i = 0; i < 2; i++) {
+		if (nv_rd32(dev, NV50_PDISPLAY_CRTC_C(i, CLOCK)))
+			crtc_mask |= (1 << i);
+	}
+
+	pms_init(pms);
+
+	switch (state->type) {
+	case PLL_MEMORY:
+		/* Wait for vblank on all the CRTCs */
+		if (crtc_mask) {
+			pms_op5f(pms, crtc_mask, 0x00);
+			pms_op5f(pms, crtc_mask, 0x01);
+		}
+
+		pms_wr32(pms, 0x002504, 0x00000001);
+		pms_unkn(pms, 0x06); /* unknown */
+		pms_unkn(pms, 0xb0); /* Disable bus access */
+		pms_op5f(pms, 0x00, 0x01);
+
+		pms_wr32(pms, 0x1002d4, 0x00000001);
+		pms_wr32(pms, 0x1002d0, 0x00000001);
+
+		pms_wr32(pms, 0x100210, 0x00000000);
+		pms_wr32(pms, 0x1002dc, 0x00000001);
+		pms_wr32(pms, state->pll.reg + 0, reg0_old);
+		pms_wr32(pms, state->pll.reg + 4, (state->N << 8) | state->M);
+
+		pms_wr32(pms, state->pll.reg + 0, reg0_new);
+		pms_wr32(pms, 0x1002dc, 0x00000000);
+		pms_wr32(pms, 0x100210, 0x80000000);
+		pms_unkn(pms, 0x07); /* unknown */
+
+		pms_unkn(pms, 0x0b);
+		pms_unkn(pms, 0xd0); /* Enable bus access again */
+		pms_op5f(pms, 0x00, 0x01);
+		pms_wr32(pms, 0x002504, 0x00000000);
+		break;
+	default:
+		pms_unkn(pms, 0xb0); /* Disable bus access */
+
+		pms_wr32(pms, 0xc040,
+			(reg_c040 & ~(1 << 5 | 1 << 4)) | (1 << 20));
+		pms_wr32(pms, state->pll.reg + 0, reg0_new);
+		pms_wr32(pms, state->pll.reg + 4, (state->N << 8) | state->M);
+		pms_unkn(pms, 0x0e);
+
+		pms_wr32(pms, 0xc040, reg_c040);
+		pms_wr32(pms, 0xc040, 0x10);
+
+		pms_wr32(pms, 0xc040, reg_c040);
+
+		pms_unkn(pms, 0xd0); /* Enable bus access again */
+		break;
+	}
+	pms_fini(pms);
+
 	return state;
 }
 
 void
 nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nv50_pm_state *state = pre_state;
 	struct nouveau_pm_level *perflvl = state->perflvl;
-	u32 reg = state->pll.reg, tmp;
+	struct pms_ucode *pms = &state->ucode;
 	struct bit_entry BIT_M;
+	u32 pbus1098, r100b0c, r619f00;
+	u32 pms_data, pms_kick;
 	u16 script;
+	u32 reg = state->pll.reg, tmp;
 	int N = state->N;
 	int M = state->M;
 	int P = state->P;
+	int i;
 
 	if (state->type == PLL_MEMORY && perflvl->memscript &&
 	    bit_table(dev, 'M', &BIT_M) == 0 &&
@@ -126,20 +202,71 @@ nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
 		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL);
 	}
 
+	/* only use PMS for changing the memory clocks */
 	if (state->type == PLL_MEMORY) {
-		nv_wr32(dev, 0x100210, 0);
-		nv_wr32(dev, 0x1002dc, 1);
-	}
-	/* TODO: Tweek 0x4700 before reclocking UNK05 */
-
-	tmp  = nv_rd32(dev, reg + 0) & 0xfff8ffff;
-	tmp |= 0x80000000 | (P << 16);
-	nv_wr32(dev, reg + 0, tmp);
-	nv_wr32(dev, reg + 4, (N << 8) | M);
+		if (dev_priv->chipset < 0x90) {
+			pms_data = 0x001400;
+			pms_kick = 0x00000003;
+		} else {
+			pms_data = 0x080000;
+			pms_kick = 0x00000001;
+		}
 
-	if (state->type == PLL_MEMORY) {
-		nv_wr32(dev, 0x1002dc, 0);
-		nv_wr32(dev, 0x100210, 0x80000000);
+		/* upload ucode */
+		pbus1098 = nv_mask(dev, 0x001098, 0x00000008, 0x00000000);
+		nv_wr32(dev, 0x001304, 0x00000000);
+		for (i = 0; i < pms->len / 4; i++)
+			nv_wr32(dev, pms_data + (i * 4), pms->ptr.u32[i]);
+		nv_wr32(dev, 0x001098, pbus1098 | 0x18);
+
+		nv_mask(dev, 0x616308, 0x00000000, 0x00000010);
+		nv_mask(dev, 0x616b08, 0x00000000, 0x00000010);
+
+		/* and run it! there's some pre and post script operations that
+		* nvidia do too, need to figure those out
+		*/
+		nv_mask(dev, 0x100200, 0x00000800, 0x00000000);
+		r100b0c = nv_mask(dev, 0x100b0c, 0x000000ff, 0x00000012);
+		r619f00 = nv_mask(dev, 0x619f00, 0x00000008, 0x00000000);
+		nv_wr32(dev, 0x00130c, pms_kick);
+		if (!nv_wait(dev, 0x001308, 0x00000100, 0x00000000)) {
+			NV_ERROR(dev, "pms ucode exec timed out\n");
+			NV_ERROR(dev, "0x001308: 0x%08x\n",
+				nv_rd32(dev, 0x001308));
+			for (i = 0; i < pms->len / 4; i++) {
+				NV_ERROR(dev, "0x%06x: 0x%08x\n",
+					0x1400 + (i * 4),
+					nv_rd32(dev, 0x001400 + (i * 4)));
+			}
+		}
+		nv_wr32(dev, 0x619f00, r619f00);
+		nv_wr32(dev, 0x100b0c, r100b0c);
+		nv_mask(dev, 0x616308, 0x00000000, 0x00000010);
+		nv_mask(dev, 0x616b08, 0x00000000, 0x00000010);
+
+		/*if (perflvl->id == 0) {
+			nv_wr32(dev, 0x100228, 0x00020102);
+			nv_wr32(dev, 0x100230, 0x28000808);
+			nv_wr32(dev, 0x100234, 0x06020702);
+		} else if (perflvl->id == 1) {
+			nv_wr32(dev, 0x100228, 0x00040305);
+			nv_wr32(dev, 0x100230, 0x28000808);
+			nv_wr32(dev, 0x100234, 0x11050905);
+		}else if (perflvl->id == 2) {
+			nv_wr32(dev, 0x100228, 0x0008080c);
+			nv_wr32(dev, 0x100230, 0x28000808);
+			nv_wr32(dev, 0x100234, 0x270c0c09);
+		}*/
+
+		nv_mask(dev, 0x100200, 0x00000000, 0x00000800);
+
+	} else {
+		/* TODO: Tweek 0x4700 before reclocking UNK05 */
+
+		tmp  = nv_rd32(dev, reg + 0) & 0xfff8ffff;
+		tmp |= 0x80000000 | (P << 16);
+		nv_wr32(dev, reg + 0, tmp);
+		nv_wr32(dev, reg + 4, (N << 8) | M);
 	}
 
 	kfree(state);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found] ` <1303948692-22455-1-git-send-email-martin.peres-GANU6spQydw@public.gmane.org>
  2011-04-27 23:58   ` [PATCH 1/2] drm/nouveau/pm: Add pm.(un)pause functions Martin Peres
  2011-04-27 23:58   ` [PATCH 2/2] drm/nouveau/nv50: reclock memory using PMS on nv50 Martin Peres
@ 2011-04-28 10:32   ` Maxim Levitsky
  2011-04-28 11:15     ` Martin Peres
  2 siblings, 1 reply; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 10:32 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 01:58 +0200, Martin Peres wrote:
> Hi everyone,
> 
> I would like everyone to test this set of patch as we'll need them quite soon for timing management on nv50.
> 
> Please report success/failure by answering to this email.
> 
> Thanks in advance,
> Martin

I tested this.
Patch seems to work except following problems:

Sometimes reclocking fails like this:

root@maxim-laptop:/home/maxim# echo 2 > /sys/class/drm/card0/device/performance_level
bash: echo: write error: Resource temporarily unavailable


[  630.371117] [drm] nouveau 0000:01:00.0: setting performance level: performance_level_2
[  630.412048] [drm] nouveau 0000:01:00.0: PFIFO DMA_PUSH never depleted (0xd06091)

Once it even  hang the GPU.
Easy to reproduce while running compiz and its benchmark overlay.


Also, it seems not to restore perf level after resume from ram.

Best regards
	Maxim Levitsky


> 
> _______________________________________________
> Nouveau mailing list
> Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 10:32   ` [PATCH 0/2] reclocking stability improvements Maxim Levitsky
@ 2011-04-28 11:15     ` Martin Peres
       [not found]       ` <4DB94C4F.3000801-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-28 11:15 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 28/04/2011 12:32, Maxim Levitsky a écrit :
> On Thu, 2011-04-28 at 01:58 +0200, Martin Peres wrote:
>> Hi everyone,
>>
>> I would like everyone to test this set of patch as we'll need them quite soon for timing management on nv50.
>>
>> Please report success/failure by answering to this email.
>>
>> Thanks in advance,
>> Martin
> I tested this.
> Patch seems to work except following problems:
>
> Sometimes reclocking fails like this:
>
> root@maxim-laptop:/home/maxim# echo 2>  /sys/class/drm/card0/device/performance_level
> bash: echo: write error: Resource temporarily unavailable
This is expected. Unfortunately, I can't do anything better than this 
for the upcoming months. I need help from the people really into the 
command submission system.
>
> [  630.371117] [drm] nouveau 0000:01:00.0: setting performance level: performance_level_2
> [  630.412048] [drm] nouveau 0000:01:00.0: PFIFO DMA_PUSH never depleted (0xd06091)
I consider it not safe to reclock core if PFIFO DMA_PUSH isn't empty. 
Hence the reason why I abort there.
> Once it even  hang the GPU.
This shouldn't happen but I managed to hang it too once. Anyway, this 
isn't perfect but still way better than what is currently implemented.
> Easy to reproduce while running compiz and its benchmark overlay.
>
Exactly, when playing open arena, the success ratio is very very low but 
I don't mind yet. I first want stability and then reliability.
> Also, it seems not to restore perf level after resume from ram.
Good catch, I thought I had fixed that but it doesn't seem like. I'll 
fix this tonight and send an update. Thanks a lot.
> Best regards
> 	Maxim Levitsky
Thanks a lot for testing it Maxim.

Martin
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]       ` <4DB94C4F.3000801-GANU6spQydw@public.gmane.org>
@ 2011-04-28 11:43         ` Maxim Levitsky
  2011-04-28 12:11           ` Martin Peres
  2011-04-28 15:24           ` Emil Velikov
  0 siblings, 2 replies; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 11:43 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 13:15 +0200, Martin Peres wrote:
> Le 28/04/2011 12:32, Maxim Levitsky a écrit :
> > On Thu, 2011-04-28 at 01:58 +0200, Martin Peres wrote:
> >> Hi everyone,
> >>
> >> I would like everyone to test this set of patch as we'll need them quite soon for timing management on nv50.
> >>
> >> Please report success/failure by answering to this email.
> >>
> >> Thanks in advance,
> >> Martin
> > I tested this.
> > Patch seems to work except following problems:
> >
> > Sometimes reclocking fails like this:
> >
> > root@maxim-laptop:/home/maxim# echo 2>  /sys/class/drm/card0/device/performance_level
> > bash: echo: write error: Resource temporarily unavailable
> This is expected. Unfortunately, I can't do anything better than this 
> for the upcoming months. I need help from the people really into the 
> command submission system.
> >
> > [  630.371117] [drm] nouveau 0000:01:00.0: setting performance level: performance_level_2
> > [  630.412048] [drm] nouveau 0000:01:00.0: PFIFO DMA_PUSH never depleted (0xd06091)
> I consider it not safe to reclock core if PFIFO DMA_PUSH isn't empty. 
> Hence the reason why I abort there.
> > Once it even  hang the GPU.
> This shouldn't happen but I managed to hang it too once. Anyway, this 
> isn't perfect but still way better than what is currently implemented.
> > Easy to reproduce while running compiz and its benchmark overlay.
> >
> Exactly, when playing open arena, the success ratio is very very low but 
> I don't mind yet. I first want stability and then reliability.
> > Also, it seems not to restore perf level after resume from ram.
> Good catch, I thought I had fixed that but it doesn't seem like. I'll 
> fix this tonight and send an update. Thanks a lot.
> > Best regards
> > 	Maxim Levitsky
> Thanks a lot for testing it Maxim.
> 
> Martin

Martin, one more thing, this is my observations regarding clocks I
finished today:


clock = (ref * N / M) >> (P & 0x7)

first line is blob, second nouveau

level0:
                         P         NNMM
    0x4008 - memory - 0018e200 00002505   = 1           = 01.0000
                      80016400 00002505   = 37 / 5 / 2  = 03.7000

    0x4020 - shader - 80010000 00000b01   = 11 / 1 / 2  = 05.5000
                      80020000 00001b02   = 27 / 2 / 4  = 03.3750

    0x4028 - core   - a0120000 00001b04   = 27 / 4 / 4  = 01.6875
                      a0020000 00001b04   = 27 / 4 / 4  = 01.6875

    0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
                      80020000 00001b04   = 27 / 4 / 4  = 01.6875

level1:

    0x4008 - memory - 809ae400 00002505   = 37 / 5 / 4  = 01.8500
                      80026400 00002505   = 37 / 5 / 4  = 01.8500
                      
    0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
                      80010000 00000b01   = 11 / 1 / 2  = 05.5000
                      
    0x4028 - core   - a0090000 00000b02   = 11 / 2 / 1  = 05.5000
                      a0010000 00000b02   = 11 / 2 / 1  = 05.5000

    0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
                      80010000 00000b02   = 11 / 2 / 2  = 05.5000

level2:

    0x4008 - memory - 8059e400 00002505   = 37 / 5 / 2  = 03.7000
                      80016400 00002505   = 37 / 5 / 2  = 03.7000

    0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
                      80000000 00001002   = 16 / 2 / 1  = 08.0000

    0x4028 - core   - a0090000 00001002   = 16 / 2 / 2  = 04.0000
                      a0010000 00001002   = 16 / 2 / 2  = 04.0000

    0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
                      80010000 00001202   = 18 / 2 / 2  = 06.0000


Clocks are very different and we seems to overclock hard the unk_05 PLL
And shader PLL is underclocked always, so another way to boost performance?
(it didn't seem to affect perfomace much here, but yet...)


-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 11:43         ` Maxim Levitsky
@ 2011-04-28 12:11           ` Martin Peres
       [not found]             ` <4DB95989.2090702-GANU6spQydw@public.gmane.org>
  2011-04-28 15:24           ` Emil Velikov
  1 sibling, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-28 12:11 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 28/04/2011 13:43, Maxim Levitsky a écrit :
>
> Martin, one more thing, this is my observations regarding clocks I
> finished today:
>
>
> clock = (ref * N / M)>>  (P&  0x7)
>
> first line is blob, second nouveau
>
> level0:
>                           P         NNMM
>      0x4008 - memory - 0018e200 00002505   = 1           = 01.0000
>                        80016400 00002505   = 37 / 5 / 2  = 03.7000
>
>      0x4020 - shader - 80010000 00000b01   = 11 / 1 / 2  = 05.5000
>                        80020000 00001b02   = 27 / 2 / 4  = 03.3750
>
>      0x4028 - core   - a0120000 00001b04   = 27 / 4 / 4  = 01.6875
>                        a0020000 00001b04   = 27 / 4 / 4  = 01.6875
>
>      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                        80020000 00001b04   = 27 / 4 / 4  = 01.6875
>
> level1:
>
>      0x4008 - memory - 809ae400 00002505   = 37 / 5 / 4  = 01.8500
>                        80026400 00002505   = 37 / 5 / 4  = 01.8500
>
>      0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
>                        80010000 00000b01   = 11 / 1 / 2  = 05.5000
>
>      0x4028 - core   - a0090000 00000b02   = 11 / 2 / 1  = 05.5000
>                        a0010000 00000b02   = 11 / 2 / 1  = 05.5000
>
>      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                        80010000 00000b02   = 11 / 2 / 2  = 05.5000
>
> level2:
>
>      0x4008 - memory - 8059e400 00002505   = 37 / 5 / 2  = 03.7000
>                        80016400 00002505   = 37 / 5 / 2  = 03.7000
>
>      0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
>                        80000000 00001002   = 16 / 2 / 1  = 08.0000
>
>      0x4028 - core   - a0090000 00001002   = 16 / 2 / 2  = 04.0000
>                        a0010000 00001002   = 16 / 2 / 2  = 04.0000
>
>      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                        80010000 00001202   = 18 / 2 / 2  = 06.0000
>
>
> Clocks are very different and we seems to overclock hard the unk_05 PLL
> And shader PLL is underclocked always, so another way to boost performance?
> (it didn't seem to affect perfomace much here, but yet...)
Good work Maxim!

Can you please tell us what card you use? For what range of card is this 
true? I can give you access to the vbios repo so as you can contact some 
people for further testing if you want to :)

Anyway, I hope you are interested in looking deeper into this. Xexaxo 
and darktama did all the work on this.
I have no time to put on this issue in the upcoming weeks as I'm already 
working on getting the PMS patches upstream, fan management and Fermi PM.

Thanks :)
Martin
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]             ` <4DB95989.2090702-GANU6spQydw@public.gmane.org>
@ 2011-04-28 12:32               ` Maxim Levitsky
  2011-04-28 12:35                 ` Martin Peres
  0 siblings, 1 reply; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 12:32 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 14:11 +0200, Martin Peres wrote:
> Le 28/04/2011 13:43, Maxim Levitsky a écrit :
> >
> > Martin, one more thing, this is my observations regarding clocks I
> > finished today:
> >
> >
> > clock = (ref * N / M)>>  (P&  0x7)
> >
> > first line is blob, second nouveau
> >
> > level0:
> >                           P         NNMM
> >      0x4008 - memory - 0018e200 00002505   = 1           = 01.0000
> >                        80016400 00002505   = 37 / 5 / 2  = 03.7000
> >
> >      0x4020 - shader - 80010000 00000b01   = 11 / 1 / 2  = 05.5000
> >                        80020000 00001b02   = 27 / 2 / 4  = 03.3750
> >
> >      0x4028 - core   - a0120000 00001b04   = 27 / 4 / 4  = 01.6875
> >                        a0020000 00001b04   = 27 / 4 / 4  = 01.6875
> >
> >      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                        80020000 00001b04   = 27 / 4 / 4  = 01.6875
> >
> > level1:
> >
> >      0x4008 - memory - 809ae400 00002505   = 37 / 5 / 4  = 01.8500
> >                        80026400 00002505   = 37 / 5 / 4  = 01.8500
> >
> >      0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
> >                        80010000 00000b01   = 11 / 1 / 2  = 05.5000
> >
> >      0x4028 - core   - a0090000 00000b02   = 11 / 2 / 1  = 05.5000
> >                        a0010000 00000b02   = 11 / 2 / 1  = 05.5000
> >
> >      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                        80010000 00000b02   = 11 / 2 / 2  = 05.5000
> >
> > level2:
> >
> >      0x4008 - memory - 8059e400 00002505   = 37 / 5 / 2  = 03.7000
> >                        80016400 00002505   = 37 / 5 / 2  = 03.7000
> >
> >      0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
> >                        80000000 00001002   = 16 / 2 / 1  = 08.0000
> >
> >      0x4028 - core   - a0090000 00001002   = 16 / 2 / 2  = 04.0000
> >                        a0010000 00001002   = 16 / 2 / 2  = 04.0000
> >
> >      0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                        80010000 00001202   = 18 / 2 / 2  = 06.0000
> >
> >
> > Clocks are very different and we seems to overclock hard the unk_05 PLL
> > And shader PLL is underclocked always, so another way to boost performance?
> > (it didn't seem to affect perfomace much here, but yet...)
> Good work Maxim!
> 
> Can you please tell us what card you use? For what range of card is this 
> true? I can give you access to the vbios repo so as you can contact some 
> people for further testing if you want to :)
I use NV86 Geforce 8400M GS card.
Don't have much time though.


> 
> Anyway, I hope you are interested in looking deeper into this. Xexaxo 
> and darktama did all the work on this.
> I have no time to put on this issue in the upcoming weeks as I'm already 
> working on getting the PMS patches upstream, fan management and Fermi PM.
> 
> Thanks :)
> Martin

No problem!

Best regards,
	Maxim Levitsky

-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 12:32               ` Maxim Levitsky
@ 2011-04-28 12:35                 ` Martin Peres
       [not found]                   ` <4DB95F02.50202-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-28 12:35 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 28/04/2011 14:32, Maxim Levitsky a écrit :
> On Thu, 2011-04-28 at 14:11 +0200, Martin Peres wrote:
>> Good work Maxim!
>> Can you please tell us what card you use? For what range of card is this
>> true? I can give you access to the vbios repo so as you can contact some
>> people for further testing if you want to :)
> I use NV86 Geforce 8400M GS card.
> Don't have much time though.
Ok, cool. I got a 8600 GS from my father, I'll see if I can reproduce 
this issue.

Too bad for your time, I'll see what I can do though. This could very 
well explain some instability I've been encountering.

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]                   ` <4DB95F02.50202-GANU6spQydw@public.gmane.org>
@ 2011-04-28 14:09                     ` Maxim Levitsky
  0 siblings, 0 replies; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 14:09 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 14:35 +0200, Martin Peres wrote:
> Le 28/04/2011 14:32, Maxim Levitsky a écrit :
> > On Thu, 2011-04-28 at 14:11 +0200, Martin Peres wrote:
> >> Good work Maxim!
> >> Can you please tell us what card you use? For what range of card is this
> >> true? I can give you access to the vbios repo so as you can contact some
> >> people for further testing if you want to :)
> > I use NV86 Geforce 8400M GS card.
> > Don't have much time though.
> Ok, cool. I got a 8600 GS from my father, I'll see if I can reproduce 
> this issue.
> 
> Too bad for your time, I'll see what I can do though. This could very 
> well explain some instability I've been encountering.
> 


And one more thing, I did a hour ago.
I documented power usage of nouveau vs nvidia.
Same settings, same kernel, only GPU drivers differ:
Result is very sad:

nvidia:
        perf level0 : ~15.8-16.0W
        perf level2 : ~16.7-17.1W
nouveau:
        perf level0 : ~20.4-20.8W
        perf level2 : ~23.0-23.5W


This means 2 things.
1. There is major source of PM improvement somewhere else
2, Clocks don't affect things much (thats why somewhat wrong clocks set
by nouveau don't show visible difference in performance/power usage.

(Note: I was under false impression that when you force nvidia to
maximum perf level, it lies to you and sets it only when device isn't
idle. Well, according to registers 0x4000-0x4040 nvidia doesn't lie...)

Also, interesting fact is that clock registers have many bits that
nouveau doesn't touch, but they sure have some meaning.
Well, reverse engineering is very tough job. In fact what nouveau
developers did is already well beyond what believed to be possible, so
no complains :-)

Speaking of report on general PM regression reported by phoronix.com,
its probably there but not as dramatic as they want it to appear.
I remember being able (with help of some smoke and mirrors) to lower
power consumption to 14W, so its higher a bit probably but not that
much.

-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 11:43         ` Maxim Levitsky
  2011-04-28 12:11           ` Martin Peres
@ 2011-04-28 15:24           ` Emil Velikov
  2011-04-28 16:58             ` Maxim Levitsky
  1 sibling, 1 reply; 18+ messages in thread
From: Emil Velikov @ 2011-04-28 15:24 UTC (permalink / raw)
  To: Martin Peres, Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 28 Apr 2011 12:43:31 +0100, Maxim Levitsky <maximlevitsky-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> Martin, one more thing, this is my observations regarding clocks I
> finished today:
>
>
> clock = (ref * N / M) >> (P & 0x7)
>
> first line is blob, second nouveau
>
> level0:
>                          P         NNMM
>     0x4008 - memory - 0018e200 00002505   = 1           = 01.0000
>                       80016400 00002505   = 37 / 5 / 2  = 03.7000
>
>     0x4020 - shader - 80010000 00000b01   = 11 / 1 / 2  = 05.5000
>                       80020000 00001b02   = 27 / 2 / 4  = 03.3750
>
>     0x4028 - core   - a0120000 00001b04   = 27 / 4 / 4  = 01.6875
>                       a0020000 00001b04   = 27 / 4 / 4  = 01.6875
>
>     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                       80020000 00001b04   = 27 / 4 / 4  = 01.6875
>
> level1:
>
>     0x4008 - memory - 809ae400 00002505   = 37 / 5 / 4  = 01.8500
>                       80026400 00002505   = 37 / 5 / 4  = 01.8500
>    0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
>                       80010000 00000b01   = 11 / 1 / 2  = 05.5000
>    0x4028 - core   - a0090000 00000b02   = 11 / 2 / 1  = 05.5000
>                       a0010000 00000b02   = 11 / 2 / 1  = 05.5000
>
>     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                       80010000 00000b02   = 11 / 2 / 2  = 05.5000
>
> level2:
>
>     0x4008 - memory - 8059e400 00002505   = 37 / 5 / 2  = 03.7000
>                       80016400 00002505   = 37 / 5 / 2  = 03.7000
>
>     0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
>                       80000000 00001002   = 16 / 2 / 1  = 08.0000
>
>     0x4028 - core   - a0090000 00001002   = 16 / 2 / 2  = 04.0000
>                       a0010000 00001002   = 16 / 2 / 2  = 04.0000
>
>     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
>                       80010000 00001202   = 18 / 2 / 2  = 06.0000
>
>
> Clocks are very different and we seems to overclock hard the unk_05 PLL
> And shader PLL is underclocked always, so another way to boost performance?
> (it didn't seem to affect perfomace much here, but yet...)
>
>

Maxim most likely you are tired of hearing this, but the shader PLL in many
cases is disabled(-ish)
Either by nv_wr32(dev, (nv_rd32(dev, 0x4020) & ~0x80000000)), or by the
0xc040 register (don't remember the exact bit)
Whereas for the unk_05 the logic of the blob has been changed in it's recent
versions - i.e. blob v195.xx the numbers(P/M/N) where the same as the nouveau

Cheers
Emil (xexaxo)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 15:24           ` Emil Velikov
@ 2011-04-28 16:58             ` Maxim Levitsky
  2011-04-28 18:24               ` Martin Peres
  0 siblings, 1 reply; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 16:58 UTC (permalink / raw)
  To: Emil Velikov; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 16:24 +0100, Emil Velikov wrote:
> On Thu, 28 Apr 2011 12:43:31 +0100, Maxim Levitsky <maximlevitsky-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> 
> > Martin, one more thing, this is my observations regarding clocks I
> > finished today:
> >
> >
> > clock = (ref * N / M) >> (P & 0x7)
> >
> > first line is blob, second nouveau
> >
> > level0:
> >                          P         NNMM
> >     0x4008 - memory - 0018e200 00002505   = 1           = 01.0000
> >                       80016400 00002505   = 37 / 5 / 2  = 03.7000
> >
> >     0x4020 - shader - 80010000 00000b01   = 11 / 1 / 2  = 05.5000
> >                       80020000 00001b02   = 27 / 2 / 4  = 03.3750
> >
> >     0x4028 - core   - a0120000 00001b04   = 27 / 4 / 4  = 01.6875
> >                       a0020000 00001b04   = 27 / 4 / 4  = 01.6875
> >
> >     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                       80020000 00001b04   = 27 / 4 / 4  = 01.6875
> >
> > level1:
> >
> >     0x4008 - memory - 809ae400 00002505   = 37 / 5 / 4  = 01.8500
> >                       80026400 00002505   = 37 / 5 / 4  = 01.8500
> >    0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
> >                       80010000 00000b01   = 11 / 1 / 2  = 05.5000
> >    0x4028 - core   - a0090000 00000b02   = 11 / 2 / 1  = 05.5000
> >                       a0010000 00000b02   = 11 / 2 / 1  = 05.5000
> >
> >     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                       80010000 00000b02   = 11 / 2 / 2  = 05.5000
> >
> > level2:
> >
> >     0x4008 - memory - 8059e400 00002505   = 37 / 5 / 2  = 03.7000
> >                       80016400 00002505   = 37 / 5 / 2  = 03.7000
> >
> >     0x4020 - shader - 80000000 00000b01   = 11 / 1 / 1  = 11.0000
> >                       80000000 00001002   = 16 / 2 / 1  = 08.0000
> >
> >     0x4028 - core   - a0090000 00001002   = 16 / 2 / 2  = 04.0000
> >                       a0010000 00001002   = 16 / 2 / 2  = 04.0000
> >
> >     0x4030 - unk_05 - a0120000 00000b02   = 11 / 2 / 4  = 01.3750
> >                       80010000 00001202   = 18 / 2 / 2  = 06.0000
> >
> >
> > Clocks are very different and we seems to overclock hard the unk_05 PLL
> > And shader PLL is underclocked always, so another way to boost performance?
> > (it didn't seem to affect perfomace much here, but yet...)
> >
> >
> 
> Maxim most likely you are tired of hearing this, but the shader PLL in many
> cases is disabled(-ish)
> Either by nv_wr32(dev, (nv_rd32(dev, 0x4020) & ~0x80000000)), or by the
> 0xc040 register (don't remember the exact bit)
> Whereas for the unk_05 the logic of the blob has been changed in it's recent
> versions - i.e. blob v195.xx the numbers(P/M/N) where the same as the nouveau
> 
> Cheers
> Emil (xexaxo)

I sure didn't know about register 0xC040, but I know about bit 31
(0x80000000) and took it into account. It isn't set only for memory
clock in perf level 0 by the blob.

Also clocking seems not to be the major power problem, there is
something else.
(see my post about observed power consumption).

My gut feeling is that blob can decrease number of running shader cores,
and in nouveau all are running anyway.



Interesting fact is that GPU temperatures rise to very high levels
(~75C) even while doing CPU only work (like compiling kernel for
example).


-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 16:58             ` Maxim Levitsky
@ 2011-04-28 18:24               ` Martin Peres
       [not found]                 ` <4DB9B0D7.2050700-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-28 18:24 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 28/04/2011 18:58, Maxim Levitsky a écrit :
> Interesting fact is that GPU temperatures rise to very high levels
> (~75C) even while doing CPU only work (like compiling kernel for
> example).
Let me guess, you're on a laptop? The temperature of the case goes up 
when the processor is working and so, it cools the GPU less ;)
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]                 ` <4DB9B0D7.2050700-GANU6spQydw@public.gmane.org>
@ 2011-04-28 18:29                   ` Maxim Levitsky
  2011-04-28 18:35                     ` Martin Peres
  0 siblings, 1 reply; 18+ messages in thread
From: Maxim Levitsky @ 2011-04-28 18:29 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, 2011-04-28 at 20:24 +0200, Martin Peres wrote:
> Le 28/04/2011 18:58, Maxim Levitsky a écrit :
> > Interesting fact is that GPU temperatures rise to very high levels
> > (~75C) even while doing CPU only work (like compiling kernel for
> > example).
> Let me guess, you're on a laptop? The temperature of the case goes up 
> when the processor is working and so, it cools the GPU less ;)

Yes, but that doesn't happen while using nvidia to this extent.
I think that GPU has many units running in endless loop doing nothing
but consuming power (~5W) difference!

-- 
Best regards,
        Maxim Levitsky

Visit my blog: http://maximlevitsky.wordpress.com
Warning: Above blog contains rants.

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
  2011-04-28 18:29                   ` Maxim Levitsky
@ 2011-04-28 18:35                     ` Martin Peres
       [not found]                       ` <4DB9B36A.4020704-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-28 18:35 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 28/04/2011 20:29, Maxim Levitsky a écrit :
> On Thu, 2011-04-28 at 20:24 +0200, Martin Peres wrote:
>> Le 28/04/2011 18:58, Maxim Levitsky a écrit :
>>> Interesting fact is that GPU temperatures rise to very high levels
>>> (~75C) even while doing CPU only work (like compiling kernel for
>>> example).
>> Let me guess, you're on a laptop? The temperature of the case goes up
>> when the processor is working and so, it cools the GPU less ;)
> Yes, but that doesn't happen while using nvidia to this extent.
> I think that GPU has many units running in endless loop doing nothing
> but consuming power (~5W) difference!
I really need to have a look at this. I have a power meter now (acpi 
reports me funky power consumption figures because my battery is dead).

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]                       ` <4DB9B36A.4020704-GANU6spQydw@public.gmane.org>
@ 2011-04-29  0:56                         ` Nigel Cunningham
       [not found]                           ` <4DBA0CA6.2010508-jrkICu/7yk3k1uMJSBkQmQ@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Nigel Cunningham @ 2011-04-29  0:56 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi.

On 29/04/11 04:35, Martin Peres wrote:
> Le 28/04/2011 20:29, Maxim Levitsky a écrit :
>> On Thu, 2011-04-28 at 20:24 +0200, Martin Peres wrote:
>>> Le 28/04/2011 18:58, Maxim Levitsky a écrit :
>>>> Interesting fact is that GPU temperatures rise to very high levels
>>>> (~75C) even while doing CPU only work (like compiling kernel for
>>>> example).
>>> Let me guess, you're on a laptop? The temperature of the case goes up
>>> when the processor is working and so, it cools the GPU less ;)
>> Yes, but that doesn't happen while using nvidia to this extent.
>> I think that GPU has many units running in endless loop doing nothing
>> but consuming power (~5W) difference!
> I really need to have a look at this. I have a power meter now (acpi
> reports me funky power consumption figures because my battery is dead).

Just been reading through your posts from last night, and wanted to let
you know I'm interested and willing to test patches too.

I have an 8600GTS based laptop, and have seen the same range of power
usage (ie have seen 14W in the page - presumably when I was using the
blob -, but can only get it down to 23W at the moment using Nouveau).

Regards,

Nigel
-- 
Evolution (n): A hypothetical process whereby improbable
events occur with alarming frequency, order arises from chaos, and
no one is given credit.
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]                           ` <4DBA0CA6.2010508-jrkICu/7yk3k1uMJSBkQmQ@public.gmane.org>
@ 2011-04-29  6:54                             ` Martin Peres
       [not found]                               ` <4DBA60B6.2060905-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Martin Peres @ 2011-04-29  6:54 UTC (permalink / raw)
  To: Nigel Cunningham; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Le 29/04/2011 02:56, Nigel Cunningham a écrit :
> Hi.
>
> On 29/04/11 04:35, Martin Peres wrote:
>> Le 28/04/2011 20:29, Maxim Levitsky a écrit :
>>> On Thu, 2011-04-28 at 20:24 +0200, Martin Peres wrote:
>>>> Le 28/04/2011 18:58, Maxim Levitsky a écrit :
>>>>> Interesting fact is that GPU temperatures rise to very high levels
>>>>> (~75C) even while doing CPU only work (like compiling kernel for
>>>>> example).
>>>> Let me guess, you're on a laptop? The temperature of the case goes up
>>>> when the processor is working and so, it cools the GPU less ;)
>>> Yes, but that doesn't happen while using nvidia to this extent.
>>> I think that GPU has many units running in endless loop doing nothing
>>> but consuming power (~5W) difference!
>> I really need to have a look at this. I have a power meter now (acpi
>> reports me funky power consumption figures because my battery is dead).
> Just been reading through your posts from last night, and wanted to let
> you know I'm interested and willing to test patches too.
>
> I have an 8600GTS based laptop, and have seen the same range of power
> usage (ie have seen 14W in the page - presumably when I was using the
> blob -, but can only get it down to 23W at the moment using Nouveau).
>
> Regards,
>
> Nigel
9W! Well, you can try downclocking the card, but this won't get you down 
to 14W. When fan management is done, I'll have a look at what the blob 
does and try to find some magic there.

Martin
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/2] reclocking stability improvements
       [not found]                               ` <4DBA60B6.2060905-GANU6spQydw@public.gmane.org>
@ 2011-04-29  7:07                                 ` Nigel Cunningham
  0 siblings, 0 replies; 18+ messages in thread
From: Nigel Cunningham @ 2011-04-29  7:07 UTC (permalink / raw)
  To: Martin Peres; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi.

On 29/04/11 16:54, Martin Peres wrote:
> Le 29/04/2011 02:56, Nigel Cunningham a écrit :
>> Hi.
>>
>> On 29/04/11 04:35, Martin Peres wrote:
>>> Le 28/04/2011 20:29, Maxim Levitsky a écrit :
>>>> On Thu, 2011-04-28 at 20:24 +0200, Martin Peres wrote:
>>>>> Le 28/04/2011 18:58, Maxim Levitsky a écrit :
>>>>>> Interesting fact is that GPU temperatures rise to very high levels
>>>>>> (~75C) even while doing CPU only work (like compiling kernel for
>>>>>> example).
>>>>> Let me guess, you're on a laptop? The temperature of the case goes up
>>>>> when the processor is working and so, it cools the GPU less ;)
>>>> Yes, but that doesn't happen while using nvidia to this extent.
>>>> I think that GPU has many units running in endless loop doing nothing
>>>> but consuming power (~5W) difference!
>>> I really need to have a look at this. I have a power meter now (acpi
>>> reports me funky power consumption figures because my battery is dead).
>> Just been reading through your posts from last night, and wanted to let
>> you know I'm interested and willing to test patches too.
>>
>> I have an 8600GTS based laptop, and have seen the same range of power
>> usage (ie have seen 14W in the page - presumably when I was using the
>> blob -, but can only get it down to 23W at the moment using Nouveau).
>>
>> Regards,
>>
>> Nigel
> 9W! Well, you can try downclocking the card, but this won't get you down
> to 14W. When fan management is done, I'll have a look at what the blob
> does and try to find some magic there.

Yeah - I find 9W a bit unbelievable too. I'll try to find some time to
give the NVidia driver a run again, but I'm a chronic over-committer, so
won't promise I'll do anything real soon now!

Nigel
-- 
Evolution (n): A hypothetical process whereby improbable
events occur with alarming frequency, order arises from chaos, and
no one is given credit.
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-04-29  7:07 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-27 23:58 [PATCH 0/2] reclocking stability improvements Martin Peres
     [not found] ` <1303948692-22455-1-git-send-email-martin.peres-GANU6spQydw@public.gmane.org>
2011-04-27 23:58   ` [PATCH 1/2] drm/nouveau/pm: Add pm.(un)pause functions Martin Peres
2011-04-27 23:58   ` [PATCH 2/2] drm/nouveau/nv50: reclock memory using PMS on nv50 Martin Peres
2011-04-28 10:32   ` [PATCH 0/2] reclocking stability improvements Maxim Levitsky
2011-04-28 11:15     ` Martin Peres
     [not found]       ` <4DB94C4F.3000801-GANU6spQydw@public.gmane.org>
2011-04-28 11:43         ` Maxim Levitsky
2011-04-28 12:11           ` Martin Peres
     [not found]             ` <4DB95989.2090702-GANU6spQydw@public.gmane.org>
2011-04-28 12:32               ` Maxim Levitsky
2011-04-28 12:35                 ` Martin Peres
     [not found]                   ` <4DB95F02.50202-GANU6spQydw@public.gmane.org>
2011-04-28 14:09                     ` Maxim Levitsky
2011-04-28 15:24           ` Emil Velikov
2011-04-28 16:58             ` Maxim Levitsky
2011-04-28 18:24               ` Martin Peres
     [not found]                 ` <4DB9B0D7.2050700-GANU6spQydw@public.gmane.org>
2011-04-28 18:29                   ` Maxim Levitsky
2011-04-28 18:35                     ` Martin Peres
     [not found]                       ` <4DB9B36A.4020704-GANU6spQydw@public.gmane.org>
2011-04-29  0:56                         ` Nigel Cunningham
     [not found]                           ` <4DBA0CA6.2010508-jrkICu/7yk3k1uMJSBkQmQ@public.gmane.org>
2011-04-29  6:54                             ` Martin Peres
     [not found]                               ` <4DBA60B6.2060905-GANU6spQydw@public.gmane.org>
2011-04-29  7:07                                 ` Nigel Cunningham

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.