All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@elte.hu>,
	Josh Boyer <jwboyer@linux.vnet.ibm.com>
Cc: linux-kernel@vger.kernel.org, ltt-dev@lists.casi.polymtl.ca
Subject: cli/sti vs local_cmpxchg and local_add_return
Date: Mon, 16 Mar 2009 21:32:20 -0400	[thread overview]
Message-ID: <20090317013220.GA22474@Krystal> (raw)

Hi,

I am trying to get access to some non-x86 hardware to run some atomic
primitive benchmarks for a paper on LTTng I am preparing. That should be
useful to argue about performance benefit of per-cpu atomic operations
vs interrupt disabling. I would like to run the following benchmark
module on CONFIG_SMP :

- PowerPC
- MIPS
- ia64
- alpha

usage :
make
insmod test-cmpxchg-nolock.ko
insmod: error inserting 'test-cmpxchg-nolock.ko': -1 Resource temporarily unavailable
dmesg (see dmesg output)

If some of you would be kind enough to run my test module provided below
and provide the results of these tests on a recent kernel (2.6.26~2.6.29
should be good) along with their cpuinfo, I would greatly appreciate.

Here are the CAS results for various Intel-based architectures :

Architecture         | Speedup                      |      CAS     |         Interrupts         |
                     | (cli + sti) / local cmpxchg  | local | sync | Enable (sti) | Disable (cli)
-------------------------------------------------------------------------------------------------
Intel Pentium 4      | 5.24                         |  25   | 81   | 70           | 61          |
AMD Athlon(tm)64 X2  | 4.57                         |  7    | 17   | 17           | 15          |
Intel Core2          | 6.33                         |  6    | 30   | 20           | 18          |
Intel Xeon E5405     | 5.25                         |  8    | 24   | 20           | 22          |

The benefit expected on PowerPC, ia64 and alpha should principally come
from removed memory barriers in the local primitives.

Thanks,

Mathieu

P.S. please forgive the coding style and hackish interface. :)


/* test-cmpxchg-nolock.c
 *
 * Compare local cmpxchg with irq disable / enable.
 */


#include <linux/jiffies.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <asm/timex.h>
#include <asm/system.h>

#define NR_LOOPS 20000

int test_val;

static void do_testbaseline(void)
{
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		asm volatile ("");
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for baseline\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> baseline takes %llu cycles\n", time);
	printk(KERN_ALERT "test end\n");
}

static void do_test_sync_cmpxchg(void)
{
	int ret;
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
#ifdef CONFIG_X86_32
		ret = sync_cmpxchg(&test_val, 0, 0);
#else
		ret = cmpxchg(&test_val, 0, 0);
#endif
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for locked cmpxchg\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> locked cmpxchg takes %llu cycles\n", time);
	printk(KERN_ALERT "test end\n");
}

static void do_test_cmpxchg(void)
{
	int ret;
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		ret = cmpxchg_local(&test_val, 0, 0);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for non locked cmpxchg\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time);
	printk(KERN_ALERT "test end\n");
}
static void do_test_sync_inc(void)
{
	int ret;
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;
	atomic_t val;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		ret = atomic_add_return(10, &val);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for locked add return\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> locked add return takes %llu cycles\n", time);
	printk(KERN_ALERT "test end\n");
}


static void do_test_inc(void)
{
	int ret;
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;
	local_t loc_val;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		ret = local_add_return(10, &loc_val);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for non locked add return\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> non locked add return takes %llu cycles\n", time);
	printk(KERN_ALERT "test end\n");
}



/*
 * This test will have a higher standard deviation due to incoming interrupts.
 */
static void do_test_enable_int(void)
{
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		local_irq_restore(flags);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n",
					time);
	printk(KERN_ALERT "test end\n");
}

static void do_test_disable_int(void)
{
	unsigned long flags, flags2;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for ( i = 0; i < NR_LOOPS; i++) {
		local_irq_save(flags2);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for disabling interrupts (CLI)\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n",
				time);
	printk(KERN_ALERT "test end\n");
}

static void do_test_int(void)
{
	unsigned long flags;
	unsigned int i;
	cycles_t time1, time2, time;
	u32 rem;

	local_irq_save(flags);
	preempt_disable();
	time1 = get_cycles();
	for (i = 0; i < NR_LOOPS; i++) {
		local_irq_restore(flags);
		local_irq_save(flags);
	}
	time2 = get_cycles();
	local_irq_restore(flags);
	preempt_enable();
	time = time2 - time1;

	printk(KERN_ALERT "test results: time for disabling/enabling interrupts (STI/CLI)\n");
	printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
	printk(KERN_ALERT "total time: %llu\n", time);
	time = div_u64_rem(time, NR_LOOPS, &rem);
	printk(KERN_ALERT "-> enabling/disabling interrupts (STI/CLI) takes %llu cycles\n",
					time);
	printk(KERN_ALERT "test end\n");
}



static int ltt_test_init(void)
{
	printk(KERN_ALERT "test init\n");
	
	do_testbaseline();
	do_test_sync_cmpxchg();
	do_test_cmpxchg();
	do_test_sync_inc();
	do_test_inc();
	do_test_enable_int();
	do_test_disable_int();
	do_test_int();
	return -EAGAIN; /* Fail will directly unload the module */
}

static void ltt_test_exit(void)
{
	printk(KERN_ALERT "test exit\n");
}

module_init(ltt_test_init)
module_exit(ltt_test_exit)

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Cmpxchg vs int Test");



* Makefile

ifneq ($(KERNELRELEASE),)
	obj-m += test-cmpxchg-nolock.o
else
	KERNELDIR ?= /lib/modules/$(shell uname -r)/build
	PWD := $(shell pwd)
	KERNELRELEASE = $(shell cat $(KERNELDIR)/$(KBUILD_OUTPUT)/include/linux/version.h | sed -n 's/.*UTS_RELEASE.*\"\(.*\)\".*/\1/p')
ifneq ($(INSTALL_MOD_PATH),)
	DEPMOD_OPT := -b $(INSTALL_MOD_PATH)
endif

default:
	$(MAKE) -C $(KERNELDIR) M=$(PWD) modules

modules_install:
	$(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install
	if [ -f $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map ] ; then /sbin/depmod -ae -F $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map $(DEPMOD_OPT) $(KERNELRELEASE) ; fi


clean:
	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
endif


-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

             reply	other threads:[~2009-03-17  1:32 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-17  1:32 Mathieu Desnoyers [this message]
2009-03-17  3:37 ` cli/sti vs local_cmpxchg and local_add_return David Miller
2009-03-17  4:10   ` Mathieu Desnoyers
2009-03-17  4:27     ` David Miller
2009-03-17  4:44       ` Mathieu Desnoyers
2009-03-17  5:01 ` Paul E. McKenney
2009-03-17 16:06   ` Mathieu Desnoyers
2009-03-17 19:28     ` David Miller
2009-03-17 19:35       ` Mathieu Desnoyers
2009-03-17  6:05 ` Nick Piggin
2009-03-17 15:14   ` [ltt-dev] " Mathieu Desnoyers
2009-03-18 11:43     ` Nick Piggin
2009-03-18 15:10       ` Mathieu Desnoyers
2009-03-17 18:42 ` Alan D. Brunelle
2009-03-17 19:01   ` Andika Triwidada
2009-03-23 16:50   ` Mathieu Desnoyers
2009-03-18 11:56 ` Josh Boyer
2009-03-23 16:56   ` Mathieu Desnoyers
2009-03-23 17:04     ` Josh Boyer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090317013220.GA22474@Krystal \
    --to=mathieu.desnoyers@polymtl.ca \
    --cc=jwboyer@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ltt-dev@lists.casi.polymtl.ca \
    --cc=mingo@elte.hu \
    --cc=paulmck@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.