[ANNOUNCE] 3.10.18-rt14

linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [ANNOUNCE] 3.10.18-rt14
@ 2013-11-08 22:15 Sebastian Andrzej Siewior
  2013-11-11 12:08 ` Christoph Mathys
  0 siblings, 1 reply; 3+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-11-08 22:15 UTC (permalink / raw)
  To: linux-rt-users; +Cc: LKML, Thomas Gleixner, rostedt, John Kacur

Dear RT folks!

I'm pleased to announce the v3.10.18-rt14 patch set.

Changes since v3.10.18-rt13
- a SLUB fix. The delayed free might use wrong kmem_cache structure.
- update to Yang Shi's memcontrol sleeping while atomic fix. Thanks you
  Yang Shi.
- dropping the wbinvd in i915. The do_wbinvd module parameter is gone,
  the fix from mainline has been backported. This has been compile
  tested, some feedback would be nice.

Known issues:

      - SLAB support not working

      - The cpsw network driver shows some issues.

      - bcache is disabled.

      - an ancient race (since we got sleeping spinlocks) where the
        TASK_TRACED state is temporary replaced while waiting on a rw
        lock and the task can't be traced.

The delta patch against v3.10.18-rt14 is appended below and can be found
here:
   https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/incr/patch-3.10.18-rt13-rt14.patch.xz

The RT patch against 3.10.18 can be found here:

   https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patch-3.10.18-rt14.patch.xz

The split quilt queue is available at:

   https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patches-3.10.18-rt14.tar.xz

Sebastian

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d62b80d..080b1b2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -35,7 +35,6 @@
 #include <linux/swap.h>
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
-#include <linux/module.h>
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
@@ -2714,47 +2713,17 @@ static inline int fence_number(struct drm_i915_private *dev_priv,
 	return fence - dev_priv->fence_regs;
 }
 
-static bool do_wbinvd = true;
-module_param(do_wbinvd, bool, 0644);
-MODULE_PARM_DESC(do_wbinvd, "Do expensive synchronization. Say no after you pin each GPU process to the same CPU in order to lower the latency.");
-
-static void i915_gem_write_fence__ipi(void *data)
-{
-	wbinvd();
-}
-
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 					 struct drm_i915_fence_reg *fence,
 					 bool enable)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int fence_reg = fence_number(dev_priv, fence);
-
-	/* In order to fully serialize access to the fenced region and
-	 * the update to the fence register we need to take extreme
-	 * measures on SNB+. In theory, the write to the fence register
-	 * flushes all memory transactions before, and coupled with the
-	 * mb() placed around the register write we serialise all memory
-	 * operations with respect to the changes in the tiler. Yet, on
-	 * SNB+ we need to take a step further and emit an explicit wbinvd()
-	 * on each processor in order to manually flush all memory
-	 * transactions before updating the fence register.
-	 */
-	if (HAS_LLC(obj->base.dev)) {
-		if (do_wbinvd) {
-#ifdef CONFIG_PREEMPT_RT_FULL
-			pr_err_once("WARNING! The i915 invalidates all caches which increases the latency.");
-			pr_err_once("As a workaround use 'i915.do_wbinvd=no' and PIN each process doing ");
-			pr_err_once("any kind of GPU activity to the same CPU to avoid problems.");
-#endif
-			on_each_cpu(i915_gem_write_fence__ipi, NULL, 1);
-		}
-	}
-	i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	int reg = fence_number(dev_priv, fence);
+
+	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
 
 	if (enable) {
-		obj->fence_reg = fence_reg;
+		obj->fence_reg = reg;
 		fence->obj = obj;
 		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
 	} else {
diff --git a/localversion-rt b/localversion-rt
index 9f7d0bd..08b3e75 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt13
+-rt14
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f113cb7..2163f2f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2436,10 +2436,11 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  */
 static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
 {
-	int cpu;
+	int cpu, curcpu;
 
 	/* Notify other cpus that system-wide "drain" is running */
 	get_online_cpus();
+	curcpu = get_cpu_light();
 	for_each_online_cpu(cpu) {
 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
 		struct mem_cgroup *memcg;
@@ -2449,9 +2450,14 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
 			continue;
 		if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
 			continue;
-		if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
-			schedule_work_on(cpu, &stock->work);
+		if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+			if (cpu == curcpu)
+				drain_local_stock(&stock->work);
+			else
+				schedule_work_on(cpu, &stock->work);
+		}
 	}
+	put_cpu_light();
 
 	if (!sync)
 		goto out;
diff --git a/mm/slub.c b/mm/slub.c
index 1378cd1..31c6f9f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1428,13 +1428,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	__free_memcg_kmem_pages(page, order);
 }
 
-static void free_delayed(struct kmem_cache *s, struct list_head *h)
+static void free_delayed(struct list_head *h)
 {
 	while(!list_empty(h)) {
 		struct page *page = list_first_entry(h, struct page, lru);
 
 		list_del(&page->lru);
-		__free_slab(s, page);
+		__free_slab(page->slab_cache, page);
 	}
 }
 
@@ -2007,7 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 				list_splice_init(&f->list, &tofree);
 				raw_spin_unlock(&f->lock);
 				local_irq_restore(flags);
-				free_delayed(s, &tofree);
+				free_delayed(&tofree);
 				oldpage = NULL;
 				pobjects = 0;
 				pages = 0;
@@ -2083,7 +2083,7 @@ static void flush_all(struct kmem_cache *s)
 		raw_spin_lock_irq(&f->lock);
 		list_splice_init(&f->list, &tofree);
 		raw_spin_unlock_irq(&f->lock);
-		free_delayed(s, &tofree);
+		free_delayed(&tofree);
 	}
 }
 
@@ -2331,7 +2331,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	list_splice_init(&f->list, &tofree);
 	raw_spin_unlock(&f->lock);
 	local_irq_restore(flags);
-	free_delayed(s, &tofree);
+	free_delayed(&tofree);
 	return freelist;
 
 new_slab:

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [ANNOUNCE] 3.10.18-rt14
  2013-11-08 22:15 [ANNOUNCE] 3.10.18-rt14 Sebastian Andrzej Siewior
@ 2013-11-11 12:08 ` Christoph Mathys
  2013-11-11 12:50   ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Mathys @ 2013-11-11 12:08 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Linux RT Users

The i915-fix seems to work.

cyclictest has been running on the kernel for several hours now and
has a maximum latency of 61us. On the previous kernel version without
enabling the do_wbindvd-hack the maximum latency was above 2ms.

Thanks.
Christoph

On Fri, Nov 8, 2013 at 11:15 PM, Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
> Dear RT folks!
>
> I'm pleased to announce the v3.10.18-rt14 patch set.
>
> Changes since v3.10.18-rt13
> - a SLUB fix. The delayed free might use wrong kmem_cache structure.
> - update to Yang Shi's memcontrol sleeping while atomic fix. Thanks you
>   Yang Shi.
> - dropping the wbinvd in i915. The do_wbinvd module parameter is gone,
>   the fix from mainline has been backported. This has been compile
>   tested, some feedback would be nice.
>
> Known issues:
>
>       - SLAB support not working
>
>       - The cpsw network driver shows some issues.
>
>       - bcache is disabled.
>
>       - an ancient race (since we got sleeping spinlocks) where the
>         TASK_TRACED state is temporary replaced while waiting on a rw
>         lock and the task can't be traced.
>
> The delta patch against v3.10.18-rt14 is appended below and can be found
> here:
>    https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/incr/patch-3.10.18-rt13-rt14.patch.xz
>
> The RT patch against 3.10.18 can be found here:
>
>    https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patch-3.10.18-rt14.patch.xz
>
> The split quilt queue is available at:
>
>    https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patches-3.10.18-rt14.tar.xz
>
> Sebastian
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d62b80d..080b1b2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -35,7 +35,6 @@
>  #include <linux/swap.h>
>  #include <linux/pci.h>
>  #include <linux/dma-buf.h>
> -#include <linux/module.h>
>
>  static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
>  static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
> @@ -2714,47 +2713,17 @@ static inline int fence_number(struct drm_i915_private *dev_priv,
>         return fence - dev_priv->fence_regs;
>  }
>
> -static bool do_wbinvd = true;
> -module_param(do_wbinvd, bool, 0644);
> -MODULE_PARM_DESC(do_wbinvd, "Do expensive synchronization. Say no after you pin each GPU process to the same CPU in order to lower the latency.");
> -
> -static void i915_gem_write_fence__ipi(void *data)
> -{
> -       wbinvd();
> -}
> -
>  static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
>                                          struct drm_i915_fence_reg *fence,
>                                          bool enable)
>  {
> -       struct drm_device *dev = obj->base.dev;
> -       struct drm_i915_private *dev_priv = dev->dev_private;
> -       int fence_reg = fence_number(dev_priv, fence);
> -
> -       /* In order to fully serialize access to the fenced region and
> -        * the update to the fence register we need to take extreme
> -        * measures on SNB+. In theory, the write to the fence register
> -        * flushes all memory transactions before, and coupled with the
> -        * mb() placed around the register write we serialise all memory
> -        * operations with respect to the changes in the tiler. Yet, on
> -        * SNB+ we need to take a step further and emit an explicit wbinvd()
> -        * on each processor in order to manually flush all memory
> -        * transactions before updating the fence register.
> -        */
> -       if (HAS_LLC(obj->base.dev)) {
> -               if (do_wbinvd) {
> -#ifdef CONFIG_PREEMPT_RT_FULL
> -                       pr_err_once("WARNING! The i915 invalidates all caches which increases the latency.");
> -                       pr_err_once("As a workaround use 'i915.do_wbinvd=no' and PIN each process doing ");
> -                       pr_err_once("any kind of GPU activity to the same CPU to avoid problems.");
> -#endif
> -                       on_each_cpu(i915_gem_write_fence__ipi, NULL, 1);
> -               }
> -       }
> -       i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
> +       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +       int reg = fence_number(dev_priv, fence);
> +
> +       i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
>
>         if (enable) {
> -               obj->fence_reg = fence_reg;
> +               obj->fence_reg = reg;
>                 fence->obj = obj;
>                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
>         } else {
> diff --git a/localversion-rt b/localversion-rt
> index 9f7d0bd..08b3e75 100644
> --- a/localversion-rt
> +++ b/localversion-rt
> @@ -1 +1 @@
> --rt13
> +-rt14
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f113cb7..2163f2f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2436,10 +2436,11 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>   */
>  static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
>  {
> -       int cpu;
> +       int cpu, curcpu;
>
>         /* Notify other cpus that system-wide "drain" is running */
>         get_online_cpus();
> +       curcpu = get_cpu_light();
>         for_each_online_cpu(cpu) {
>                 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
>                 struct mem_cgroup *memcg;
> @@ -2449,9 +2450,14 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
>                         continue;
>                 if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
>                         continue;
> -               if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
> -                       schedule_work_on(cpu, &stock->work);
> +               if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
> +                       if (cpu == curcpu)
> +                               drain_local_stock(&stock->work);
> +                       else
> +                               schedule_work_on(cpu, &stock->work);
> +               }
>         }
> +       put_cpu_light();
>
>         if (!sync)
>                 goto out;
> diff --git a/mm/slub.c b/mm/slub.c
> index 1378cd1..31c6f9f 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1428,13 +1428,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
>         __free_memcg_kmem_pages(page, order);
>  }
>
> -static void free_delayed(struct kmem_cache *s, struct list_head *h)
> +static void free_delayed(struct list_head *h)
>  {
>         while(!list_empty(h)) {
>                 struct page *page = list_first_entry(h, struct page, lru);
>
>                 list_del(&page->lru);
> -               __free_slab(s, page);
> +               __free_slab(page->slab_cache, page);
>         }
>  }
>
> @@ -2007,7 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
>                                 list_splice_init(&f->list, &tofree);
>                                 raw_spin_unlock(&f->lock);
>                                 local_irq_restore(flags);
> -                               free_delayed(s, &tofree);
> +                               free_delayed(&tofree);
>                                 oldpage = NULL;
>                                 pobjects = 0;
>                                 pages = 0;
> @@ -2083,7 +2083,7 @@ static void flush_all(struct kmem_cache *s)
>                 raw_spin_lock_irq(&f->lock);
>                 list_splice_init(&f->list, &tofree);
>                 raw_spin_unlock_irq(&f->lock);
> -               free_delayed(s, &tofree);
> +               free_delayed(&tofree);
>         }
>  }
>
> @@ -2331,7 +2331,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
>         list_splice_init(&f->list, &tofree);
>         raw_spin_unlock(&f->lock);
>         local_irq_restore(flags);
> -       free_delayed(s, &tofree);
> +       free_delayed(&tofree);
>         return freelist;
>
>  new_slab:
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [ANNOUNCE] 3.10.18-rt14
  2013-11-11 12:08 ` Christoph Mathys
@ 2013-11-11 12:50   ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 3+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-11-11 12:50 UTC (permalink / raw)
  To: Christoph Mathys; +Cc: Linux RT Users

On 11/11/2013 01:08 PM, Christoph Mathys wrote:
> The i915-fix seems to work.

Thank you for the info.

> Thanks.
> Christoph

Sebastian

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-11-11 12:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-08 22:15 [ANNOUNCE] 3.10.18-rt14 Sebastian Andrzej Siewior
2013-11-11 12:08 ` Christoph Mathys
2013-11-11 12:50   ` Sebastian Andrzej Siewior

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).