public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] [GIT PULL][2.6.37] ftrace: C version of recordmcount
@ 2010-10-14 21:00 Steven Rostedt
  2010-10-14 21:00 ` [PATCH 1/3] ftrace: Add C version of recordmcount compile time code Steven Rostedt
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-14 21:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker


Ingo,

Please pull the latest tip/perf/recordmcount tree, which can be found at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
tip/perf/recordmcount


John Reiser (1):
      ftrace: Add C version of recordmcount compile time code

Steven Rostedt (2):
      ftrace/x86: Add support for C version of recordmcount
      ftrace: Remove duplicate code for 64 and 32 bit in recordmcount.c

----
 Makefile               |    6 +
 arch/x86/Kconfig       |    1 +
 kernel/trace/Kconfig   |    5 +
 scripts/Makefile       |    1 +
 scripts/Makefile.build |    4 +
 scripts/recordmcount.c |  345 +++++++++++++++++++++++++++++++++++++++++++++
 scripts/recordmcount.h |  366 ++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 728 insertions(+), 0 deletions(-)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] ftrace: Add C version of recordmcount compile time code
  2010-10-14 21:00 [PATCH 0/3] [GIT PULL][2.6.37] ftrace: C version of recordmcount Steven Rostedt
@ 2010-10-14 21:00 ` Steven Rostedt
  2010-10-14 21:00 ` [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount Steven Rostedt
  2010-10-14 21:00 ` [PATCH 3/3] ftrace: Remove duplicate code for 64 and 32 bit in recordmcount.c Steven Rostedt
  2 siblings, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-14 21:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, John Reiser

[-- Attachment #1: 0001-ftrace-Add-C-version-of-recordmcount-compile-time-co.patch --]
[-- Type: text/plain, Size: 27856 bytes --]

From: John Reiser <jreiser@bitwagon.com>

Currently, the mcount callers are found with a perl script that does
an objdump on every file in the kernel. This is a C version of that
same code which should increase the performance time of compiling
the kernel with dynamic ftrace enabled.

Signed-off-by: John Reiser <jreiser@bitwagon.com>

[ Updated the code to include .text.unlikely section as well as
  changing the format to follow Linux coding style. ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 scripts/recordmcount.c |  885 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 885 insertions(+), 0 deletions(-)
 create mode 100644 scripts/recordmcount.c

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
new file mode 100644
index 0000000..34f32be
--- /dev/null
+++ b/scripts/recordmcount.c
@@ -0,0 +1,885 @@
+/*
+ * recordmcount.c: construct a table of the locations of calls to 'mcount'
+ * so that ftrace can find them quickly.
+ * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>.  All rights reserved.
+ * Licensed under the GNU General Public License, version 2 (GPLv2).
+ *
+ * Restructured to fit Linux format, as well as other updates:
+ *  Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+ */
+
+/*
+ * Strategy: alter the .o file in-place.
+ *
+ * Append a new STRTAB that has the new section names, followed by a new array
+ * ElfXX_Shdr[] that has the new section headers, followed by the section
+ * contents for __mcount_loc and its relocations.  The old shstrtab strings,
+ * and the old ElfXX_Shdr[] array, remain as "garbage" (commonly, a couple
+ * kilobytes.)  Subsequent processing by /bin/ld (or the kernel module loader)
+ * will ignore the garbage regions, because they are not designated by the
+ * new .e_shoff nor the new ElfXX_Shdr[].  [In order to remove the garbage,
+ * then use "ld -r" to create a new file that omits the garbage.]
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static int fd_map;	/* File descriptor for file being modified. */
+static int mmap_failed; /* Boolean flag. */
+static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
+static char gpfx;	/* prefix for global symbol name (sometimes '_') */
+static struct stat sb;	/* Remember .st_size, etc. */
+static jmp_buf jmpenv;	/* setjmp/longjmp per-file error escape */
+
+/* setjmp() return values */
+enum {
+	SJ_SETJMP = 0,  /* hardwired first return */
+	SJ_FAIL,
+	SJ_SUCCEED
+};
+
+/* Per-file resource cleanup when multiple files. */
+static void
+cleanup(void)
+{
+	if (!mmap_failed)
+		munmap(ehdr_curr, sb.st_size);
+	else
+		free(ehdr_curr);
+	close(fd_map);
+}
+
+static void __attribute__((noreturn))
+fail_file(void)
+{
+	cleanup();
+	longjmp(jmpenv, SJ_FAIL);
+}
+
+static void __attribute__((noreturn))
+succeed_file(void)
+{
+	cleanup();
+	longjmp(jmpenv, SJ_SUCCEED);
+}
+
+/* ulseek, uread, ...:  Check return value for errors. */
+
+static off_t
+ulseek(int const fd, off_t const offset, int const whence)
+{
+	off_t const w = lseek(fd, offset, whence);
+	if ((off_t)-1 == w) {
+		perror("lseek");
+		fail_file();
+	}
+	return w;
+}
+
+static size_t
+uread(int const fd, void *const buf, size_t const count)
+{
+	size_t const n = read(fd, buf, count);
+	if (n != count) {
+		perror("read");
+		fail_file();
+	}
+	return n;
+}
+
+static size_t
+uwrite(int const fd, void const *const buf, size_t const count)
+{
+	size_t const n = write(fd, buf, count);
+	if (n != count) {
+		perror("write");
+		fail_file();
+	}
+	return n;
+}
+
+static void *
+umalloc(size_t size)
+{
+	void *const addr = malloc(size);
+	if (0 == addr) {
+		fprintf(stderr, "malloc failed: %zu bytes\n", size);
+		fail_file();
+	}
+	return addr;
+}
+
+/*
+ * Get the whole file as a programming convenience in order to avoid
+ * malloc+lseek+read+free of many pieces.  If successful, then mmap
+ * avoids copying unused pieces; else just read the whole file.
+ * Open for both read and write; new info will be appended to the file.
+ * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr
+ * do not propagate to the file until an explicit overwrite at the last.
+ * This preserves most aspects of consistency (all except .st_size)
+ * for simultaneous readers of the file while we are appending to it.
+ * However, multiple writers still are bad.  We choose not to use
+ * locking because it is expensive and the use case of kernel build
+ * makes multiple writers unlikely.
+ */
+static void *mmap_file(char const *fname)
+{
+	void *addr;
+
+	fd_map = open(fname, O_RDWR);
+	if (0 > fd_map || 0 > fstat(fd_map, &sb)) {
+		perror(fname);
+		fail_file();
+	}
+	if (!S_ISREG(sb.st_mode)) {
+		fprintf(stderr, "not a regular file: %s\n", fname);
+		fail_file();
+	}
+	addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+		    fd_map, 0);
+	mmap_failed = 0;
+	if (MAP_FAILED == addr) {
+		mmap_failed = 1;
+		addr = umalloc(sb.st_size);
+		uread(fd_map, addr, sb.st_size);
+	}
+	return addr;
+}
+
+/* w8rev, w8nat, ...: Handle endianness. */
+
+static uint64_t w8rev(uint64_t const x)
+{
+	return   ((0xff & (x >> (0 * 8))) << (7 * 8))
+	       | ((0xff & (x >> (1 * 8))) << (6 * 8))
+	       | ((0xff & (x >> (2 * 8))) << (5 * 8))
+	       | ((0xff & (x >> (3 * 8))) << (4 * 8))
+	       | ((0xff & (x >> (4 * 8))) << (3 * 8))
+	       | ((0xff & (x >> (5 * 8))) << (2 * 8))
+	       | ((0xff & (x >> (6 * 8))) << (1 * 8))
+	       | ((0xff & (x >> (7 * 8))) << (0 * 8));
+}
+
+static uint32_t w4rev(uint32_t const x)
+{
+	return   ((0xff & (x >> (0 * 8))) << (3 * 8))
+	       | ((0xff & (x >> (1 * 8))) << (2 * 8))
+	       | ((0xff & (x >> (2 * 8))) << (1 * 8))
+	       | ((0xff & (x >> (3 * 8))) << (0 * 8));
+}
+
+static uint32_t w2rev(uint16_t const x)
+{
+	return   ((0xff & (x >> (0 * 8))) << (1 * 8))
+	       | ((0xff & (x >> (1 * 8))) << (0 * 8));
+}
+
+static uint64_t w8nat(uint64_t const x)
+{
+	return x;
+}
+
+static uint32_t w4nat(uint32_t const x)
+{
+	return x;
+}
+
+static uint32_t w2nat(uint16_t const x)
+{
+	return x;
+}
+
+static uint64_t (*w8)(uint64_t);
+static uint32_t (*w)(uint32_t);
+static uint32_t (*w2)(uint16_t);
+
+/* Names of the sections that could contain calls to mcount. */
+static int
+is_mcounted_section_name(char const *const txtname)
+{
+	return 0 == strcmp(".text",          txtname) ||
+		0 == strcmp(".sched.text",    txtname) ||
+		0 == strcmp(".spinlock.text", txtname) ||
+		0 == strcmp(".irqentry.text", txtname) ||
+		0 == strcmp(".text.unlikely", txtname);
+}
+
+/* Append the new shstrtab, Elf32_Shdr[], __mcount_loc and its relocations. */
+static void append32(Elf32_Ehdr *const ehdr,
+		     Elf32_Shdr *const shstr,
+		     uint32_t const *const mloc0,
+		     uint32_t const *const mlocp,
+		     Elf32_Rel const *const mrel0,
+		     Elf32_Rel const *const mrelp,
+		     unsigned int const rel_entsize,
+		     unsigned int const symsec_sh_link)
+{
+	/* Begin constructing output file */
+	Elf32_Shdr mcsec;
+	char const *mc_name = (sizeof(Elf32_Rela) == rel_entsize)
+		? ".rela__mcount_loc"
+		:  ".rel__mcount_loc";
+	unsigned const old_shnum = w2(ehdr->e_shnum);
+	uint32_t const old_shoff = w(ehdr->e_shoff);
+	uint32_t const old_shstr_sh_size   = w(shstr->sh_size);
+	uint32_t const old_shstr_sh_offset = w(shstr->sh_offset);
+	uint32_t t = 1 + strlen(mc_name) + w(shstr->sh_size);
+	uint32_t new_e_shoff;
+
+	shstr->sh_size = w(t);
+	shstr->sh_offset = w(sb.st_size);
+	t += sb.st_size;
+	t += (3u & -t);  /* 4-byte align */
+	new_e_shoff = t;
+
+	/* body for new shstrtab */
+	ulseek(fd_map, sb.st_size, SEEK_SET);
+	uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
+	uwrite(fd_map, mc_name, 1 + strlen(mc_name));
+
+	/* old(modified) Elf32_Shdr table, 4-byte aligned */
+	ulseek(fd_map, t, SEEK_SET);
+	t += sizeof(Elf32_Shdr) * old_shnum;
+	uwrite(fd_map, old_shoff + (void *)ehdr,
+	       sizeof(Elf32_Shdr) * old_shnum);
+
+	/* new sections __mcount_loc and .rel__mcount_loc */
+	t += 2*sizeof(mcsec);
+	mcsec.sh_name = w((sizeof(Elf32_Rela) == rel_entsize) + strlen(".rel")
+		+ old_shstr_sh_size);
+	mcsec.sh_type = w(SHT_PROGBITS);
+	mcsec.sh_flags = w(SHF_ALLOC);
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = w(t);
+	mcsec.sh_size = w((void *)mlocp - (void *)mloc0);
+	mcsec.sh_link = 0;
+	mcsec.sh_info = 0;
+	mcsec.sh_addralign = w(4);
+	mcsec.sh_entsize = w(4);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	mcsec.sh_name = w(old_shstr_sh_size);
+	mcsec.sh_type = (sizeof(Elf32_Rela) == rel_entsize)
+		? w(SHT_RELA)
+		: w(SHT_REL);
+	mcsec.sh_flags = 0;
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = w((void *)mlocp - (void *)mloc0 + t);
+	mcsec.sh_size   = w((void *)mrelp - (void *)mrel0);
+	mcsec.sh_link = w(symsec_sh_link);
+	mcsec.sh_info = w(old_shnum);
+	mcsec.sh_addralign = w(4);
+	mcsec.sh_entsize = w(rel_entsize);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
+	uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
+
+	ehdr->e_shoff = w(new_e_shoff);
+	ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum));  /* {.rel,}__mcount_loc */
+	ulseek(fd_map, 0, SEEK_SET);
+	uwrite(fd_map, ehdr, sizeof(*ehdr));
+}
+
+/*
+ * append64 and append32 (and other analogous pairs) could be templated
+ * using C++, but the complexity is high.  (For an example, look at p_elf.h
+ * in the source for UPX, http://upx.sourceforge.net)  So: remember to make
+ * the corresponding change in the routine for the other size.
+ */
+static void append64(Elf64_Ehdr *const ehdr,
+		     Elf64_Shdr *const shstr,
+		     uint64_t const *const mloc0,
+		     uint64_t const *const mlocp,
+		     Elf64_Rel const *const mrel0,
+		     Elf64_Rel const *const mrelp,
+		     unsigned int const rel_entsize,
+		     unsigned int const symsec_sh_link)
+{
+	/* Begin constructing output file */
+	Elf64_Shdr mcsec;
+	char const *mc_name = (sizeof(Elf64_Rela) == rel_entsize)
+		? ".rela__mcount_loc"
+		:  ".rel__mcount_loc";
+	unsigned const old_shnum = w2(ehdr->e_shnum);
+	uint64_t const old_shoff = w8(ehdr->e_shoff);
+	uint64_t const old_shstr_sh_size   = w8(shstr->sh_size);
+	uint64_t const old_shstr_sh_offset = w8(shstr->sh_offset);
+	uint64_t t = 1 + strlen(mc_name) + w8(shstr->sh_size);
+	uint64_t new_e_shoff;
+
+	shstr->sh_size = w8(t);
+	shstr->sh_offset = w8(sb.st_size);
+	t += sb.st_size;
+	t += (7u & -t);  /* 8-byte align */
+	new_e_shoff = t;
+
+	/* body for new shstrtab */
+	ulseek(fd_map, sb.st_size, SEEK_SET);
+	uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
+	uwrite(fd_map, mc_name, 1 + strlen(mc_name));
+
+	/* old(modified) Elf64_Shdr table, 8-byte aligned */
+	ulseek(fd_map, t, SEEK_SET);
+	t += sizeof(Elf64_Shdr) * old_shnum;
+	uwrite(fd_map, old_shoff + (void *)ehdr,
+		sizeof(Elf64_Shdr) * old_shnum);
+
+	/* new sections __mcount_loc and .rel__mcount_loc */
+	t += 2*sizeof(mcsec);
+	mcsec.sh_name = w((sizeof(Elf64_Rela) == rel_entsize) + strlen(".rel")
+		+ old_shstr_sh_size);
+	mcsec.sh_type = w(SHT_PROGBITS);
+	mcsec.sh_flags = w8(SHF_ALLOC);
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = w8(t);
+	mcsec.sh_size = w8((void *)mlocp - (void *)mloc0);
+	mcsec.sh_link = 0;
+	mcsec.sh_info = 0;
+	mcsec.sh_addralign = w8(8);
+	mcsec.sh_entsize = w8(8);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	mcsec.sh_name = w(old_shstr_sh_size);
+	mcsec.sh_type = (sizeof(Elf64_Rela) == rel_entsize)
+		? w(SHT_RELA)
+		: w(SHT_REL);
+	mcsec.sh_flags = 0;
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = w8((void *)mlocp - (void *)mloc0 + t);
+	mcsec.sh_size   = w8((void *)mrelp - (void *)mrel0);
+	mcsec.sh_link = w(symsec_sh_link);
+	mcsec.sh_info = w(old_shnum);
+	mcsec.sh_addralign = w8(8);
+	mcsec.sh_entsize = w8(rel_entsize);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
+	uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
+
+	ehdr->e_shoff = w8(new_e_shoff);
+	ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum));  /* {.rel,}__mcount_loc */
+	ulseek(fd_map, 0, SEEK_SET);
+	uwrite(fd_map, ehdr, sizeof(*ehdr));
+}
+
+/*
+ * Look at the relocations in order to find the calls to mcount.
+ * Accumulate the section offsets that are found, and their relocation info,
+ * onto the end of the existing arrays.
+ */
+static uint32_t *sift32_rel_mcount(uint32_t *mlocp,
+				   unsigned const offbase,
+				   Elf32_Rel **const mrelpp,
+				   Elf32_Shdr const *const relhdr,
+				   Elf32_Ehdr const *const ehdr,
+				   unsigned const recsym,
+				   uint32_t const recval,
+				   unsigned const reltype)
+{
+	uint32_t *const mloc0 = mlocp;
+	Elf32_Rel *mrelp = *mrelpp;
+	Elf32_Shdr *const shdr0 = (Elf32_Shdr *)(w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const symsec_sh_link = w(relhdr->sh_link);
+	Elf32_Shdr const *const symsec = &shdr0[symsec_sh_link];
+	Elf32_Sym const *const sym0 = (Elf32_Sym const *)(w(symsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf32_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
+	char const *const str0 = (char const *)(w(strsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf32_Rel const *const rel0 = (Elf32_Rel const *)(w(relhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned rel_entsize = w(relhdr->sh_entsize);
+	unsigned const nrel = w(relhdr->sh_size) / rel_entsize;
+	Elf32_Rel const *relp = rel0;
+
+	unsigned mcountsym = 0;
+	unsigned t;
+
+	for (t = nrel; t; --t) {
+		if (!mcountsym) {
+			Elf32_Sym const *const symp =
+				&sym0[ELF32_R_SYM(w(relp->r_info))];
+
+			if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
+					&str0[w(symp->st_name)]))
+				mcountsym = ELF32_R_SYM(w(relp->r_info));
+		}
+		if (mcountsym == ELF32_R_SYM(w(relp->r_info))) {
+			uint32_t const addend = w(w(relp->r_offset) - recval);
+			mrelp->r_offset = w(offbase
+				+ ((void *)mlocp - (void *)mloc0));
+			mrelp->r_info = w(ELF32_R_INFO(recsym, reltype));
+			if (sizeof(Elf32_Rela) == rel_entsize) {
+				((Elf32_Rela *)mrelp)->r_addend = addend;
+				*mlocp++ = 0;
+			} else
+				*mlocp++ = addend;
+
+			mrelp = (Elf32_Rel *)(rel_entsize + (void *)mrelp);
+		}
+		relp = (Elf32_Rel const *)(rel_entsize + (void *)relp);
+	}
+	*mrelpp = mrelp;
+	return mlocp;
+}
+
+static uint64_t *sift64_rel_mcount(uint64_t *mlocp,
+				   unsigned const offbase,
+				   Elf64_Rel **const mrelpp,
+				   Elf64_Shdr const *const relhdr,
+				   Elf64_Ehdr const *const ehdr,
+				   unsigned const recsym,
+				   uint64_t const recval,
+				   unsigned const reltype)
+{
+	uint64_t *const mloc0 = mlocp;
+	Elf64_Rel *mrelp = *mrelpp;
+	Elf64_Shdr *const shdr0 = (Elf64_Shdr *)(w8(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const symsec_sh_link = w(relhdr->sh_link);
+	Elf64_Shdr const *const symsec = &shdr0[symsec_sh_link];
+	Elf64_Sym const *const sym0 = (Elf64_Sym const *)(w8(symsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf64_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
+	char const *const str0 = (char const *)(w8(strsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf64_Rel const *const rel0 = (Elf64_Rel const *)(w8(relhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned rel_entsize = w8(relhdr->sh_entsize);
+	unsigned const nrel = w8(relhdr->sh_size) / rel_entsize;
+	Elf64_Rel const *relp = rel0;
+
+	unsigned mcountsym = 0;
+	unsigned t;
+
+	for (t = nrel; 0 != t; --t) {
+		if (!mcountsym) {
+			Elf64_Sym const *const symp =
+				&sym0[ELF64_R_SYM(w8(relp->r_info))];
+			char const *symname = &str0[w(symp->st_name)];
+
+			if ('.' == symname[0])
+				++symname;  /* ppc64 hack */
+			if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
+					symname))
+				mcountsym = ELF64_R_SYM(w8(relp->r_info));
+		}
+
+		if (mcountsym == ELF64_R_SYM(w8(relp->r_info))) {
+			uint64_t const addend = w8(w8(relp->r_offset) - recval);
+
+			mrelp->r_offset = w8(offbase
+				+ ((void *)mlocp - (void *)mloc0));
+			mrelp->r_info = w8(ELF64_R_INFO(recsym, reltype));
+			if (sizeof(Elf64_Rela) == rel_entsize) {
+				((Elf64_Rela *)mrelp)->r_addend = addend;
+				*mlocp++ = 0;
+			} else
+				*mlocp++ = addend;
+
+			mrelp = (Elf64_Rel *)(rel_entsize + (void *)mrelp);
+		}
+		relp = (Elf64_Rel const *)(rel_entsize + (void *)relp);
+	}
+	*mrelpp = mrelp;
+
+	return mlocp;
+}
+
+/*
+ * Find a symbol in the given section, to be used as the base for relocating
+ * the table of offsets of calls to mcount.  A local or global symbol suffices,
+ * but avoid a Weak symbol because it may be overridden; the change in value
+ * would invalidate the relocations of the offsets of the calls to mcount.
+ * Often the found symbol will be the unnamed local symbol generated by
+ * GNU 'as' for the start of each section.  For example:
+ *    Num:    Value  Size Type    Bind   Vis      Ndx Name
+ *      2: 00000000     0 SECTION LOCAL  DEFAULT    1
+ */
+static unsigned find32_secsym_ndx(unsigned const txtndx,
+				  char const *const txtname,
+				  uint32_t *const recvalp,
+				  Elf32_Shdr const *const symhdr,
+				  Elf32_Ehdr const *const ehdr)
+{
+	Elf32_Sym const *const sym0 = (Elf32_Sym const *)(w(symhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned const nsym = w(symhdr->sh_size) / w(symhdr->sh_entsize);
+	Elf32_Sym const *symp;
+	unsigned t;
+
+	for (symp = sym0, t = nsym; t; --t, ++symp) {
+		unsigned int const st_bind = ELF32_ST_BIND(symp->st_info);
+
+		if (txtndx == w2(symp->st_shndx)
+			/* avoid STB_WEAK */
+		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
+			*recvalp = w(symp->st_value);
+			return symp - sym0;
+		}
+	}
+	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
+		txtndx, txtname);
+	fail_file();
+}
+
+static unsigned find64_secsym_ndx(unsigned const txtndx,
+				  char const *const txtname,
+				  uint64_t *const recvalp,
+				  Elf64_Shdr const *const symhdr,
+				  Elf64_Ehdr const *const ehdr)
+{
+	Elf64_Sym const *const sym0 = (Elf64_Sym const *)(w8(symhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned const nsym = w8(symhdr->sh_size) / w8(symhdr->sh_entsize);
+	Elf64_Sym const *symp;
+	unsigned t;
+
+	for (symp = sym0, t = nsym; t; --t, ++symp) {
+		unsigned int const st_bind = ELF64_ST_BIND(symp->st_info);
+
+		if (txtndx == w2(symp->st_shndx)
+			/* avoid STB_WEAK */
+		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
+			*recvalp = w8(symp->st_value);
+			return symp - sym0;
+		}
+	}
+	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
+		txtndx, txtname);
+	fail_file();
+}
+
+/*
+ * Evade ISO C restriction: no declaration after statement in
+ * has32_rel_mcount.
+ */
+static char const *
+__has32_rel_mcount(Elf32_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
+		   Elf32_Shdr const *const shdr0,
+		   char const *const shstrtab,
+		   char const *const fname)
+{
+	/* .sh_info depends on .sh_type == SHT_REL[,A] */
+	Elf32_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
+	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
+
+	if (0 == strcmp("__mcount_loc", txtname)) {
+		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
+			fname);
+		succeed_file();
+	}
+	if (SHT_PROGBITS != w(txthdr->sh_type) ||
+	    !is_mcounted_section_name(txtname))
+		return NULL;
+	return txtname;
+}
+
+static char const *has32_rel_mcount(Elf32_Shdr const *const relhdr,
+				    Elf32_Shdr const *const shdr0,
+				    char const *const shstrtab,
+				    char const *const fname)
+{
+	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
+		return NULL;
+	return __has32_rel_mcount(relhdr, shdr0, shstrtab, fname);
+}
+
+static char const *__has64_rel_mcount(Elf64_Shdr const *const relhdr,
+				      Elf64_Shdr const *const shdr0,
+				      char const *const shstrtab,
+				      char const *const fname)
+{
+	/* .sh_info depends on .sh_type == SHT_REL[,A] */
+	Elf64_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
+	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
+
+	if (0 == strcmp("__mcount_loc", txtname)) {
+		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
+			fname);
+		succeed_file();
+	}
+	if (SHT_PROGBITS != w(txthdr->sh_type) ||
+	    !is_mcounted_section_name(txtname))
+		return NULL;
+	return txtname;
+}
+
+static char const *has64_rel_mcount(Elf64_Shdr const *const relhdr,
+				    Elf64_Shdr const *const shdr0,
+				    char const *const shstrtab,
+				    char const *const fname)
+{
+	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
+		return NULL;
+	return __has64_rel_mcount(relhdr, shdr0, shstrtab, fname);
+}
+
+static unsigned tot32_relsize(Elf32_Shdr const *const shdr0,
+			      unsigned nhdr,
+			      const char *const shstrtab,
+			      const char *const fname)
+{
+	unsigned totrelsz = 0;
+	Elf32_Shdr const *shdrp = shdr0;
+	for (; 0 != nhdr; --nhdr, ++shdrp) {
+		if (has32_rel_mcount(shdrp, shdr0, shstrtab, fname))
+			totrelsz += w(shdrp->sh_size);
+	}
+	return totrelsz;
+}
+
+static unsigned tot64_relsize(Elf64_Shdr const *const shdr0,
+			      unsigned nhdr,
+			      const char *const shstrtab,
+			      const char *const fname)
+{
+	unsigned totrelsz = 0;
+	Elf64_Shdr const *shdrp = shdr0;
+
+	for (; nhdr; --nhdr, ++shdrp) {
+		if (has64_rel_mcount(shdrp, shdr0, shstrtab, fname))
+			totrelsz += w8(shdrp->sh_size);
+	}
+	return totrelsz;
+}
+
+/* Overall supervision for Elf32 ET_REL file. */
+static void
+do32(Elf32_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
+{
+	Elf32_Shdr *const shdr0 = (Elf32_Shdr *)(w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const nhdr = w2(ehdr->e_shnum);
+	Elf32_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
+	char const *const shstrtab = (char const *)(w(shstr->sh_offset)
+		+ (void *)ehdr);
+
+	Elf32_Shdr const *relhdr;
+	unsigned k;
+
+	/* Upper bound on space: assume all relevant relocs are for mcount. */
+	unsigned const totrelsz = tot32_relsize(shdr0, nhdr, shstrtab, fname);
+	Elf32_Rel *const mrel0 = umalloc(totrelsz);
+	Elf32_Rel *      mrelp = mrel0;
+
+	/* 2*sizeof(address) <= sizeof(Elf32_Rel) */
+	uint32_t *const mloc0 = umalloc(totrelsz>>1);
+	uint32_t *      mlocp = mloc0;
+
+	unsigned rel_entsize = 0;
+	unsigned symsec_sh_link = 0;
+
+	for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) {
+		char const *const txtname = has32_rel_mcount(relhdr, shdr0,
+			shstrtab, fname);
+		if (txtname) {
+			uint32_t recval = 0;
+			unsigned const recsym = find32_secsym_ndx(
+				w(relhdr->sh_info), txtname, &recval,
+				&shdr0[symsec_sh_link = w(relhdr->sh_link)],
+				ehdr);
+
+			rel_entsize = w(relhdr->sh_entsize);
+			mlocp = sift32_rel_mcount(mlocp,
+				(void *)mlocp - (void *)mloc0, &mrelp,
+				relhdr, ehdr, recsym, recval, reltype);
+		}
+	}
+	if (mloc0 != mlocp) {
+		append32(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
+			rel_entsize, symsec_sh_link);
+	}
+	free(mrel0);
+	free(mloc0);
+}
+
+static void
+do64(Elf64_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
+{
+	Elf64_Shdr *const shdr0 = (Elf64_Shdr *)(w8(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const nhdr = w2(ehdr->e_shnum);
+	Elf64_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
+	char const *const shstrtab = (char const *)(w8(shstr->sh_offset)
+		+ (void *)ehdr);
+
+	Elf64_Shdr const *relhdr;
+	unsigned k;
+
+	/* Upper bound on space: assume all relevant relocs are for mcount. */
+	unsigned const totrelsz = tot64_relsize(shdr0, nhdr, shstrtab, fname);
+	Elf64_Rel *const mrel0 = umalloc(totrelsz);
+	Elf64_Rel *      mrelp = mrel0;
+
+	/* 2*sizeof(address) <= sizeof(Elf64_Rel) */
+	uint64_t *const mloc0 = umalloc(totrelsz>>1);
+	uint64_t *      mlocp = mloc0;
+
+	unsigned rel_entsize = 0;
+	unsigned symsec_sh_link = 0;
+
+	for ((relhdr = shdr0), k = nhdr; k; --k, ++relhdr) {
+		char const *const txtname = has64_rel_mcount(relhdr, shdr0,
+			shstrtab, fname);
+		if (txtname) {
+			uint64_t recval = 0;
+			unsigned const recsym = find64_secsym_ndx(
+				w(relhdr->sh_info), txtname, &recval,
+				&shdr0[symsec_sh_link = w(relhdr->sh_link)],
+				ehdr);
+
+			rel_entsize = w8(relhdr->sh_entsize);
+			mlocp = sift64_rel_mcount(mlocp,
+				(void *)mlocp - (void *)mloc0, &mrelp,
+				relhdr, ehdr, recsym, recval, reltype);
+		}
+	}
+	if (mloc0 != mlocp) {
+		append64(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
+			rel_entsize, symsec_sh_link);
+	}
+	free(mrel0);
+	free(mloc0);
+}
+
+static void
+do_file(char const *const fname)
+{
+	Elf32_Ehdr *const ehdr = mmap_file(fname);
+	unsigned int reltype = 0;
+
+	ehdr_curr = ehdr;
+	w = w4nat;
+	w2 = w2nat;
+	w8 = w8nat;
+	switch (ehdr->e_ident[EI_DATA]) {
+		static unsigned int const endian = 1;
+	default: {
+		fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
+			ehdr->e_ident[EI_DATA], fname);
+		fail_file();
+	} break;
+	case ELFDATA2LSB: {
+		if (1 != *(unsigned char const *)&endian) {
+			/* main() is big endian, file.o is little endian. */
+			w = w4rev;
+			w2 = w2rev;
+			w8 = w8rev;
+		}
+	} break;
+	case ELFDATA2MSB: {
+		if (0 != *(unsigned char const *)&endian) {
+			/* main() is little endian, file.o is big endian. */
+			w = w4rev;
+			w2 = w2rev;
+			w8 = w8rev;
+		}
+	} break;
+	}  /* end switch */
+	if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG)
+	||  ET_REL != w2(ehdr->e_type)
+	||  EV_CURRENT != ehdr->e_ident[EI_VERSION]) {
+		fprintf(stderr, "unrecognized ET_REL file %s\n", fname);
+		fail_file();
+	}
+
+	gpfx = 0;
+	switch (w2(ehdr->e_machine)) {
+	default: {
+		fprintf(stderr, "unrecognized e_machine %d %s\n",
+			w2(ehdr->e_machine), fname);
+		fail_file();
+	} break;
+	case EM_386:	 reltype = R_386_32;                   break;
+	case EM_ARM:	 reltype = R_ARM_ABS32;                break;
+	case EM_IA_64:	 reltype = R_IA64_IMM64;   gpfx = '_'; break;
+	case EM_PPC:	 reltype = R_PPC_ADDR32;   gpfx = '_'; break;
+	case EM_PPC64:	 reltype = R_PPC64_ADDR64; gpfx = '_'; break;
+	case EM_S390:    /* reltype: e_class    */ gpfx = '_'; break;
+	case EM_SH:	 reltype = R_SH_DIR32;                 break;
+	case EM_SPARCV9: reltype = R_SPARC_64;     gpfx = '_'; break;
+	case EM_X86_64:	 reltype = R_X86_64_64;                break;
+	}  /* end switch */
+
+	switch (ehdr->e_ident[EI_CLASS]) {
+	default: {
+		fprintf(stderr, "unrecognized ELF class %d %s\n",
+			ehdr->e_ident[EI_CLASS], fname);
+		fail_file();
+	} break;
+	case ELFCLASS32: {
+		if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize)
+		||  sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) {
+			fprintf(stderr,
+				"unrecognized ET_REL file: %s\n", fname);
+			fail_file();
+		}
+		if (EM_S390 == w2(ehdr->e_machine))
+			reltype = R_390_32;
+		do32(ehdr, fname, reltype);
+	} break;
+	case ELFCLASS64: {
+		Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr;
+		if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize)
+		||  sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) {
+			fprintf(stderr,
+				"unrecognized ET_REL file: %s\n", fname);
+			fail_file();
+		}
+		if (EM_S390 == w2(ghdr->e_machine))
+			reltype = R_390_64;
+		do64(ghdr, fname, reltype);
+	} break;
+	}  /* end switch */
+
+	cleanup();
+}
+
+int
+main(int argc, char const *argv[])
+{
+	int n_error = 0;  /* gcc-4.3.0 false positive complaint */
+	if (argc <= 1)
+		fprintf(stderr, "usage: recordmcount file.o...\n");
+	else  /* Process each file in turn, allowing deep failure. */
+	for (--argc, ++argv; 0 < argc; --argc, ++argv) {
+		int const sjval = setjmp(jmpenv);
+		switch (sjval) {
+		default: {
+			fprintf(stderr, "internal error: %s\n", argv[0]);
+			exit(1);
+		} break;
+		case SJ_SETJMP: {  /* normal sequence */
+			/* Avoid problems if early cleanup() */
+			fd_map = -1;
+			ehdr_curr = NULL;
+			mmap_failed = 1;
+			do_file(argv[0]);
+		} break;
+		case SJ_FAIL: {  /* error in do_file or below */
+			++n_error;
+		} break;
+		case SJ_SUCCEED: {  /* premature success */
+			/* do nothing */
+		} break;
+		}  /* end switch */
+	}
+	return !!n_error;
+}
+
+
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-14 21:00 [PATCH 0/3] [GIT PULL][2.6.37] ftrace: C version of recordmcount Steven Rostedt
  2010-10-14 21:00 ` [PATCH 1/3] ftrace: Add C version of recordmcount compile time code Steven Rostedt
@ 2010-10-14 21:00 ` Steven Rostedt
  2010-10-15  2:50   ` Ingo Molnar
  2010-10-27  3:25   ` Paul Mundt
  2010-10-14 21:00 ` [PATCH 3/3] ftrace: Remove duplicate code for 64 and 32 bit in recordmcount.c Steven Rostedt
  2 siblings, 2 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-14 21:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, linux-arch,
	Michal Marek, linux-kbuild, John Reiser

[-- Attachment #1: 0002-ftrace-x86-Add-support-for-C-version-of-recordmcount.patch --]
[-- Type: text/plain, Size: 2978 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

This patch adds the support for the C version of recordmcount and
compile times show ~ 12% improvement.

After verifying this works, other archs can add:

 HAVE_C_MCOUNT_RECORD

in its Kconfig and it will use the C version of recordmcount
instead of the perl version.

Cc: <linux-arch@vger.kernel.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: linux-kbuild@vger.kernel.org
Cc: John Reiser <jreiser@bitwagon.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Makefile               |    6 ++++++
 arch/x86/Kconfig       |    1 +
 kernel/trace/Kconfig   |    5 +++++
 scripts/Makefile       |    1 +
 scripts/Makefile.build |    4 ++++
 5 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile
index 534c09c..0dd3a8d 100644
--- a/Makefile
+++ b/Makefile
@@ -568,6 +568,12 @@ endif
 
 ifdef CONFIG_FUNCTION_TRACER
 KBUILD_CFLAGS	+= -pg
+ifdef CONFIG_DYNAMIC_FTRACE
+	ifdef CONFIG_HAVE_C_MCOUNT_RECORD
+		BUILD_C_RECORDMCOUNT := y
+		export BUILD_C_RECORDMCOUNT
+	endif
+endif
 endif
 
 # We trigger additional mismatches with less inlining
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c14d8b4..788b50e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_C_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c..df00fbb 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
 	help
 	  See Documentation/trace/ftrace-design.txt
 
+config HAVE_C_MCOUNT_RECORD
+	bool
+	help
+	  C version of recordmcount available?
+
 config TRACER_MAX_TRACE
 	bool
 
diff --git a/scripts/Makefile b/scripts/Makefile
index 842dbc2..2e08810 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -11,6 +11,7 @@ hostprogs-$(CONFIG_KALLSYMS)     += kallsyms
 hostprogs-$(CONFIG_LOGO)         += pnmtologo
 hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(CONFIG_IKCONFIG)     += bin2c
+hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 
 always		:= $(hostprogs-y) $(hostprogs-m)
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a1a5cf9..4d03a7e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -209,12 +209,16 @@ cmd_modversions =								\
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ifdef BUILD_C_RECORDMCOUNT
+cmd_record_mcount = $(srctree)/scripts/recordmcount "$(@)";
+else
 cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
 	"$(if $(CONFIG_64BIT),64,32)" \
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
 	"$(if $(part-of-module),1,0)" "$(@)";
 endif
+endif
 
 define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] ftrace: Remove duplicate code for 64 and 32 bit in recordmcount.c
  2010-10-14 21:00 [PATCH 0/3] [GIT PULL][2.6.37] ftrace: C version of recordmcount Steven Rostedt
  2010-10-14 21:00 ` [PATCH 1/3] ftrace: Add C version of recordmcount compile time code Steven Rostedt
  2010-10-14 21:00 ` [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount Steven Rostedt
@ 2010-10-14 21:00 ` Steven Rostedt
  2 siblings, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-14 21:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker, John Reiser

[-- Attachment #1: 0003-ftrace-Remove-duplicate-code-for-64-and-32-bit-in-re.patch --]
[-- Type: text/plain, Size: 30636 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

The elf reader for recordmcount.c had duplicate functions for both
32 bit and 64 bit elf handling. This was due to the need of using
the 32 and 64 bit elf structures.

This patch consolidates the two by using macros to define the 32
and 64 bit names in a recordmcount.h file, and then by just defining
a RECORD_MCOUNT_64 macro and including recordmcount.h twice we
create the funtions for both the 32 bit version as well as the
64 bit version using one code source.

Cc: John Reiser <jreiser@bitwagon.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 scripts/recordmcount.c |  548 +-----------------------------------------------
 scripts/recordmcount.h |  366 ++++++++++++++++++++++++++++++++
 2 files changed, 370 insertions(+), 544 deletions(-)
 create mode 100644 scripts/recordmcount.h

diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 34f32be..7f7f718 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -212,550 +212,10 @@ is_mcounted_section_name(char const *const txtname)
 		0 == strcmp(".text.unlikely", txtname);
 }
 
-/* Append the new shstrtab, Elf32_Shdr[], __mcount_loc and its relocations. */
-static void append32(Elf32_Ehdr *const ehdr,
-		     Elf32_Shdr *const shstr,
-		     uint32_t const *const mloc0,
-		     uint32_t const *const mlocp,
-		     Elf32_Rel const *const mrel0,
-		     Elf32_Rel const *const mrelp,
-		     unsigned int const rel_entsize,
-		     unsigned int const symsec_sh_link)
-{
-	/* Begin constructing output file */
-	Elf32_Shdr mcsec;
-	char const *mc_name = (sizeof(Elf32_Rela) == rel_entsize)
-		? ".rela__mcount_loc"
-		:  ".rel__mcount_loc";
-	unsigned const old_shnum = w2(ehdr->e_shnum);
-	uint32_t const old_shoff = w(ehdr->e_shoff);
-	uint32_t const old_shstr_sh_size   = w(shstr->sh_size);
-	uint32_t const old_shstr_sh_offset = w(shstr->sh_offset);
-	uint32_t t = 1 + strlen(mc_name) + w(shstr->sh_size);
-	uint32_t new_e_shoff;
-
-	shstr->sh_size = w(t);
-	shstr->sh_offset = w(sb.st_size);
-	t += sb.st_size;
-	t += (3u & -t);  /* 4-byte align */
-	new_e_shoff = t;
-
-	/* body for new shstrtab */
-	ulseek(fd_map, sb.st_size, SEEK_SET);
-	uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
-	uwrite(fd_map, mc_name, 1 + strlen(mc_name));
-
-	/* old(modified) Elf32_Shdr table, 4-byte aligned */
-	ulseek(fd_map, t, SEEK_SET);
-	t += sizeof(Elf32_Shdr) * old_shnum;
-	uwrite(fd_map, old_shoff + (void *)ehdr,
-	       sizeof(Elf32_Shdr) * old_shnum);
-
-	/* new sections __mcount_loc and .rel__mcount_loc */
-	t += 2*sizeof(mcsec);
-	mcsec.sh_name = w((sizeof(Elf32_Rela) == rel_entsize) + strlen(".rel")
-		+ old_shstr_sh_size);
-	mcsec.sh_type = w(SHT_PROGBITS);
-	mcsec.sh_flags = w(SHF_ALLOC);
-	mcsec.sh_addr = 0;
-	mcsec.sh_offset = w(t);
-	mcsec.sh_size = w((void *)mlocp - (void *)mloc0);
-	mcsec.sh_link = 0;
-	mcsec.sh_info = 0;
-	mcsec.sh_addralign = w(4);
-	mcsec.sh_entsize = w(4);
-	uwrite(fd_map, &mcsec, sizeof(mcsec));
-
-	mcsec.sh_name = w(old_shstr_sh_size);
-	mcsec.sh_type = (sizeof(Elf32_Rela) == rel_entsize)
-		? w(SHT_RELA)
-		: w(SHT_REL);
-	mcsec.sh_flags = 0;
-	mcsec.sh_addr = 0;
-	mcsec.sh_offset = w((void *)mlocp - (void *)mloc0 + t);
-	mcsec.sh_size   = w((void *)mrelp - (void *)mrel0);
-	mcsec.sh_link = w(symsec_sh_link);
-	mcsec.sh_info = w(old_shnum);
-	mcsec.sh_addralign = w(4);
-	mcsec.sh_entsize = w(rel_entsize);
-	uwrite(fd_map, &mcsec, sizeof(mcsec));
-
-	uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
-	uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
-
-	ehdr->e_shoff = w(new_e_shoff);
-	ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum));  /* {.rel,}__mcount_loc */
-	ulseek(fd_map, 0, SEEK_SET);
-	uwrite(fd_map, ehdr, sizeof(*ehdr));
-}
-
-/*
- * append64 and append32 (and other analogous pairs) could be templated
- * using C++, but the complexity is high.  (For an example, look at p_elf.h
- * in the source for UPX, http://upx.sourceforge.net)  So: remember to make
- * the corresponding change in the routine for the other size.
- */
-static void append64(Elf64_Ehdr *const ehdr,
-		     Elf64_Shdr *const shstr,
-		     uint64_t const *const mloc0,
-		     uint64_t const *const mlocp,
-		     Elf64_Rel const *const mrel0,
-		     Elf64_Rel const *const mrelp,
-		     unsigned int const rel_entsize,
-		     unsigned int const symsec_sh_link)
-{
-	/* Begin constructing output file */
-	Elf64_Shdr mcsec;
-	char const *mc_name = (sizeof(Elf64_Rela) == rel_entsize)
-		? ".rela__mcount_loc"
-		:  ".rel__mcount_loc";
-	unsigned const old_shnum = w2(ehdr->e_shnum);
-	uint64_t const old_shoff = w8(ehdr->e_shoff);
-	uint64_t const old_shstr_sh_size   = w8(shstr->sh_size);
-	uint64_t const old_shstr_sh_offset = w8(shstr->sh_offset);
-	uint64_t t = 1 + strlen(mc_name) + w8(shstr->sh_size);
-	uint64_t new_e_shoff;
-
-	shstr->sh_size = w8(t);
-	shstr->sh_offset = w8(sb.st_size);
-	t += sb.st_size;
-	t += (7u & -t);  /* 8-byte align */
-	new_e_shoff = t;
-
-	/* body for new shstrtab */
-	ulseek(fd_map, sb.st_size, SEEK_SET);
-	uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
-	uwrite(fd_map, mc_name, 1 + strlen(mc_name));
-
-	/* old(modified) Elf64_Shdr table, 8-byte aligned */
-	ulseek(fd_map, t, SEEK_SET);
-	t += sizeof(Elf64_Shdr) * old_shnum;
-	uwrite(fd_map, old_shoff + (void *)ehdr,
-		sizeof(Elf64_Shdr) * old_shnum);
-
-	/* new sections __mcount_loc and .rel__mcount_loc */
-	t += 2*sizeof(mcsec);
-	mcsec.sh_name = w((sizeof(Elf64_Rela) == rel_entsize) + strlen(".rel")
-		+ old_shstr_sh_size);
-	mcsec.sh_type = w(SHT_PROGBITS);
-	mcsec.sh_flags = w8(SHF_ALLOC);
-	mcsec.sh_addr = 0;
-	mcsec.sh_offset = w8(t);
-	mcsec.sh_size = w8((void *)mlocp - (void *)mloc0);
-	mcsec.sh_link = 0;
-	mcsec.sh_info = 0;
-	mcsec.sh_addralign = w8(8);
-	mcsec.sh_entsize = w8(8);
-	uwrite(fd_map, &mcsec, sizeof(mcsec));
-
-	mcsec.sh_name = w(old_shstr_sh_size);
-	mcsec.sh_type = (sizeof(Elf64_Rela) == rel_entsize)
-		? w(SHT_RELA)
-		: w(SHT_REL);
-	mcsec.sh_flags = 0;
-	mcsec.sh_addr = 0;
-	mcsec.sh_offset = w8((void *)mlocp - (void *)mloc0 + t);
-	mcsec.sh_size   = w8((void *)mrelp - (void *)mrel0);
-	mcsec.sh_link = w(symsec_sh_link);
-	mcsec.sh_info = w(old_shnum);
-	mcsec.sh_addralign = w8(8);
-	mcsec.sh_entsize = w8(rel_entsize);
-	uwrite(fd_map, &mcsec, sizeof(mcsec));
-
-	uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
-	uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
-
-	ehdr->e_shoff = w8(new_e_shoff);
-	ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum));  /* {.rel,}__mcount_loc */
-	ulseek(fd_map, 0, SEEK_SET);
-	uwrite(fd_map, ehdr, sizeof(*ehdr));
-}
-
-/*
- * Look at the relocations in order to find the calls to mcount.
- * Accumulate the section offsets that are found, and their relocation info,
- * onto the end of the existing arrays.
- */
-static uint32_t *sift32_rel_mcount(uint32_t *mlocp,
-				   unsigned const offbase,
-				   Elf32_Rel **const mrelpp,
-				   Elf32_Shdr const *const relhdr,
-				   Elf32_Ehdr const *const ehdr,
-				   unsigned const recsym,
-				   uint32_t const recval,
-				   unsigned const reltype)
-{
-	uint32_t *const mloc0 = mlocp;
-	Elf32_Rel *mrelp = *mrelpp;
-	Elf32_Shdr *const shdr0 = (Elf32_Shdr *)(w(ehdr->e_shoff)
-		+ (void *)ehdr);
-	unsigned const symsec_sh_link = w(relhdr->sh_link);
-	Elf32_Shdr const *const symsec = &shdr0[symsec_sh_link];
-	Elf32_Sym const *const sym0 = (Elf32_Sym const *)(w(symsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf32_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
-	char const *const str0 = (char const *)(w(strsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf32_Rel const *const rel0 = (Elf32_Rel const *)(w(relhdr->sh_offset)
-		+ (void *)ehdr);
-	unsigned rel_entsize = w(relhdr->sh_entsize);
-	unsigned const nrel = w(relhdr->sh_size) / rel_entsize;
-	Elf32_Rel const *relp = rel0;
-
-	unsigned mcountsym = 0;
-	unsigned t;
-
-	for (t = nrel; t; --t) {
-		if (!mcountsym) {
-			Elf32_Sym const *const symp =
-				&sym0[ELF32_R_SYM(w(relp->r_info))];
-
-			if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
-					&str0[w(symp->st_name)]))
-				mcountsym = ELF32_R_SYM(w(relp->r_info));
-		}
-		if (mcountsym == ELF32_R_SYM(w(relp->r_info))) {
-			uint32_t const addend = w(w(relp->r_offset) - recval);
-			mrelp->r_offset = w(offbase
-				+ ((void *)mlocp - (void *)mloc0));
-			mrelp->r_info = w(ELF32_R_INFO(recsym, reltype));
-			if (sizeof(Elf32_Rela) == rel_entsize) {
-				((Elf32_Rela *)mrelp)->r_addend = addend;
-				*mlocp++ = 0;
-			} else
-				*mlocp++ = addend;
-
-			mrelp = (Elf32_Rel *)(rel_entsize + (void *)mrelp);
-		}
-		relp = (Elf32_Rel const *)(rel_entsize + (void *)relp);
-	}
-	*mrelpp = mrelp;
-	return mlocp;
-}
-
-static uint64_t *sift64_rel_mcount(uint64_t *mlocp,
-				   unsigned const offbase,
-				   Elf64_Rel **const mrelpp,
-				   Elf64_Shdr const *const relhdr,
-				   Elf64_Ehdr const *const ehdr,
-				   unsigned const recsym,
-				   uint64_t const recval,
-				   unsigned const reltype)
-{
-	uint64_t *const mloc0 = mlocp;
-	Elf64_Rel *mrelp = *mrelpp;
-	Elf64_Shdr *const shdr0 = (Elf64_Shdr *)(w8(ehdr->e_shoff)
-		+ (void *)ehdr);
-	unsigned const symsec_sh_link = w(relhdr->sh_link);
-	Elf64_Shdr const *const symsec = &shdr0[symsec_sh_link];
-	Elf64_Sym const *const sym0 = (Elf64_Sym const *)(w8(symsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf64_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
-	char const *const str0 = (char const *)(w8(strsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf64_Rel const *const rel0 = (Elf64_Rel const *)(w8(relhdr->sh_offset)
-		+ (void *)ehdr);
-	unsigned rel_entsize = w8(relhdr->sh_entsize);
-	unsigned const nrel = w8(relhdr->sh_size) / rel_entsize;
-	Elf64_Rel const *relp = rel0;
-
-	unsigned mcountsym = 0;
-	unsigned t;
-
-	for (t = nrel; 0 != t; --t) {
-		if (!mcountsym) {
-			Elf64_Sym const *const symp =
-				&sym0[ELF64_R_SYM(w8(relp->r_info))];
-			char const *symname = &str0[w(symp->st_name)];
-
-			if ('.' == symname[0])
-				++symname;  /* ppc64 hack */
-			if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
-					symname))
-				mcountsym = ELF64_R_SYM(w8(relp->r_info));
-		}
-
-		if (mcountsym == ELF64_R_SYM(w8(relp->r_info))) {
-			uint64_t const addend = w8(w8(relp->r_offset) - recval);
-
-			mrelp->r_offset = w8(offbase
-				+ ((void *)mlocp - (void *)mloc0));
-			mrelp->r_info = w8(ELF64_R_INFO(recsym, reltype));
-			if (sizeof(Elf64_Rela) == rel_entsize) {
-				((Elf64_Rela *)mrelp)->r_addend = addend;
-				*mlocp++ = 0;
-			} else
-				*mlocp++ = addend;
-
-			mrelp = (Elf64_Rel *)(rel_entsize + (void *)mrelp);
-		}
-		relp = (Elf64_Rel const *)(rel_entsize + (void *)relp);
-	}
-	*mrelpp = mrelp;
-
-	return mlocp;
-}
-
-/*
- * Find a symbol in the given section, to be used as the base for relocating
- * the table of offsets of calls to mcount.  A local or global symbol suffices,
- * but avoid a Weak symbol because it may be overridden; the change in value
- * would invalidate the relocations of the offsets of the calls to mcount.
- * Often the found symbol will be the unnamed local symbol generated by
- * GNU 'as' for the start of each section.  For example:
- *    Num:    Value  Size Type    Bind   Vis      Ndx Name
- *      2: 00000000     0 SECTION LOCAL  DEFAULT    1
- */
-static unsigned find32_secsym_ndx(unsigned const txtndx,
-				  char const *const txtname,
-				  uint32_t *const recvalp,
-				  Elf32_Shdr const *const symhdr,
-				  Elf32_Ehdr const *const ehdr)
-{
-	Elf32_Sym const *const sym0 = (Elf32_Sym const *)(w(symhdr->sh_offset)
-		+ (void *)ehdr);
-	unsigned const nsym = w(symhdr->sh_size) / w(symhdr->sh_entsize);
-	Elf32_Sym const *symp;
-	unsigned t;
-
-	for (symp = sym0, t = nsym; t; --t, ++symp) {
-		unsigned int const st_bind = ELF32_ST_BIND(symp->st_info);
-
-		if (txtndx == w2(symp->st_shndx)
-			/* avoid STB_WEAK */
-		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
-			*recvalp = w(symp->st_value);
-			return symp - sym0;
-		}
-	}
-	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
-		txtndx, txtname);
-	fail_file();
-}
-
-static unsigned find64_secsym_ndx(unsigned const txtndx,
-				  char const *const txtname,
-				  uint64_t *const recvalp,
-				  Elf64_Shdr const *const symhdr,
-				  Elf64_Ehdr const *const ehdr)
-{
-	Elf64_Sym const *const sym0 = (Elf64_Sym const *)(w8(symhdr->sh_offset)
-		+ (void *)ehdr);
-	unsigned const nsym = w8(symhdr->sh_size) / w8(symhdr->sh_entsize);
-	Elf64_Sym const *symp;
-	unsigned t;
-
-	for (symp = sym0, t = nsym; t; --t, ++symp) {
-		unsigned int const st_bind = ELF64_ST_BIND(symp->st_info);
-
-		if (txtndx == w2(symp->st_shndx)
-			/* avoid STB_WEAK */
-		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
-			*recvalp = w8(symp->st_value);
-			return symp - sym0;
-		}
-	}
-	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
-		txtndx, txtname);
-	fail_file();
-}
-
-/*
- * Evade ISO C restriction: no declaration after statement in
- * has32_rel_mcount.
- */
-static char const *
-__has32_rel_mcount(Elf32_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
-		   Elf32_Shdr const *const shdr0,
-		   char const *const shstrtab,
-		   char const *const fname)
-{
-	/* .sh_info depends on .sh_type == SHT_REL[,A] */
-	Elf32_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
-	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
-
-	if (0 == strcmp("__mcount_loc", txtname)) {
-		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
-			fname);
-		succeed_file();
-	}
-	if (SHT_PROGBITS != w(txthdr->sh_type) ||
-	    !is_mcounted_section_name(txtname))
-		return NULL;
-	return txtname;
-}
-
-static char const *has32_rel_mcount(Elf32_Shdr const *const relhdr,
-				    Elf32_Shdr const *const shdr0,
-				    char const *const shstrtab,
-				    char const *const fname)
-{
-	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
-		return NULL;
-	return __has32_rel_mcount(relhdr, shdr0, shstrtab, fname);
-}
-
-static char const *__has64_rel_mcount(Elf64_Shdr const *const relhdr,
-				      Elf64_Shdr const *const shdr0,
-				      char const *const shstrtab,
-				      char const *const fname)
-{
-	/* .sh_info depends on .sh_type == SHT_REL[,A] */
-	Elf64_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
-	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
-
-	if (0 == strcmp("__mcount_loc", txtname)) {
-		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
-			fname);
-		succeed_file();
-	}
-	if (SHT_PROGBITS != w(txthdr->sh_type) ||
-	    !is_mcounted_section_name(txtname))
-		return NULL;
-	return txtname;
-}
-
-static char const *has64_rel_mcount(Elf64_Shdr const *const relhdr,
-				    Elf64_Shdr const *const shdr0,
-				    char const *const shstrtab,
-				    char const *const fname)
-{
-	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
-		return NULL;
-	return __has64_rel_mcount(relhdr, shdr0, shstrtab, fname);
-}
-
-static unsigned tot32_relsize(Elf32_Shdr const *const shdr0,
-			      unsigned nhdr,
-			      const char *const shstrtab,
-			      const char *const fname)
-{
-	unsigned totrelsz = 0;
-	Elf32_Shdr const *shdrp = shdr0;
-	for (; 0 != nhdr; --nhdr, ++shdrp) {
-		if (has32_rel_mcount(shdrp, shdr0, shstrtab, fname))
-			totrelsz += w(shdrp->sh_size);
-	}
-	return totrelsz;
-}
-
-static unsigned tot64_relsize(Elf64_Shdr const *const shdr0,
-			      unsigned nhdr,
-			      const char *const shstrtab,
-			      const char *const fname)
-{
-	unsigned totrelsz = 0;
-	Elf64_Shdr const *shdrp = shdr0;
-
-	for (; nhdr; --nhdr, ++shdrp) {
-		if (has64_rel_mcount(shdrp, shdr0, shstrtab, fname))
-			totrelsz += w8(shdrp->sh_size);
-	}
-	return totrelsz;
-}
-
-/* Overall supervision for Elf32 ET_REL file. */
-static void
-do32(Elf32_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
-{
-	Elf32_Shdr *const shdr0 = (Elf32_Shdr *)(w(ehdr->e_shoff)
-		+ (void *)ehdr);
-	unsigned const nhdr = w2(ehdr->e_shnum);
-	Elf32_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
-	char const *const shstrtab = (char const *)(w(shstr->sh_offset)
-		+ (void *)ehdr);
-
-	Elf32_Shdr const *relhdr;
-	unsigned k;
-
-	/* Upper bound on space: assume all relevant relocs are for mcount. */
-	unsigned const totrelsz = tot32_relsize(shdr0, nhdr, shstrtab, fname);
-	Elf32_Rel *const mrel0 = umalloc(totrelsz);
-	Elf32_Rel *      mrelp = mrel0;
-
-	/* 2*sizeof(address) <= sizeof(Elf32_Rel) */
-	uint32_t *const mloc0 = umalloc(totrelsz>>1);
-	uint32_t *      mlocp = mloc0;
-
-	unsigned rel_entsize = 0;
-	unsigned symsec_sh_link = 0;
-
-	for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) {
-		char const *const txtname = has32_rel_mcount(relhdr, shdr0,
-			shstrtab, fname);
-		if (txtname) {
-			uint32_t recval = 0;
-			unsigned const recsym = find32_secsym_ndx(
-				w(relhdr->sh_info), txtname, &recval,
-				&shdr0[symsec_sh_link = w(relhdr->sh_link)],
-				ehdr);
-
-			rel_entsize = w(relhdr->sh_entsize);
-			mlocp = sift32_rel_mcount(mlocp,
-				(void *)mlocp - (void *)mloc0, &mrelp,
-				relhdr, ehdr, recsym, recval, reltype);
-		}
-	}
-	if (mloc0 != mlocp) {
-		append32(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
-			rel_entsize, symsec_sh_link);
-	}
-	free(mrel0);
-	free(mloc0);
-}
-
-static void
-do64(Elf64_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
-{
-	Elf64_Shdr *const shdr0 = (Elf64_Shdr *)(w8(ehdr->e_shoff)
-		+ (void *)ehdr);
-	unsigned const nhdr = w2(ehdr->e_shnum);
-	Elf64_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
-	char const *const shstrtab = (char const *)(w8(shstr->sh_offset)
-		+ (void *)ehdr);
-
-	Elf64_Shdr const *relhdr;
-	unsigned k;
-
-	/* Upper bound on space: assume all relevant relocs are for mcount. */
-	unsigned const totrelsz = tot64_relsize(shdr0, nhdr, shstrtab, fname);
-	Elf64_Rel *const mrel0 = umalloc(totrelsz);
-	Elf64_Rel *      mrelp = mrel0;
-
-	/* 2*sizeof(address) <= sizeof(Elf64_Rel) */
-	uint64_t *const mloc0 = umalloc(totrelsz>>1);
-	uint64_t *      mlocp = mloc0;
-
-	unsigned rel_entsize = 0;
-	unsigned symsec_sh_link = 0;
-
-	for ((relhdr = shdr0), k = nhdr; k; --k, ++relhdr) {
-		char const *const txtname = has64_rel_mcount(relhdr, shdr0,
-			shstrtab, fname);
-		if (txtname) {
-			uint64_t recval = 0;
-			unsigned const recsym = find64_secsym_ndx(
-				w(relhdr->sh_info), txtname, &recval,
-				&shdr0[symsec_sh_link = w(relhdr->sh_link)],
-				ehdr);
-
-			rel_entsize = w8(relhdr->sh_entsize);
-			mlocp = sift64_rel_mcount(mlocp,
-				(void *)mlocp - (void *)mloc0, &mrelp,
-				relhdr, ehdr, recsym, recval, reltype);
-		}
-	}
-	if (mloc0 != mlocp) {
-		append64(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
-			rel_entsize, symsec_sh_link);
-	}
-	free(mrel0);
-	free(mloc0);
-}
+/* 32 bit and 64 bit are very similar */
+#include "recordmcount.h"
+#define RECORD_MCOUNT_64
+#include "recordmcount.h"
 
 static void
 do_file(char const *const fname)
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
new file mode 100644
index 0000000..7f39d09
--- /dev/null
+++ b/scripts/recordmcount.h
@@ -0,0 +1,366 @@
+/*
+ * recordmcount.h
+ *
+ * This code was taken out of recordmcount.c written by
+ * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>.  All rights reserved.
+ *
+ * The original code had the same algorithms for both 32bit
+ * and 64bit ELF files, but the code was duplicated to support
+ * the difference in structures that were used. This
+ * file creates a macro of everything that is different between
+ * the 64 and 32 bit code, such that by including this header
+ * twice we can create both sets of functions by including this
+ * header once with RECORD_MCOUNT_64 undefined, and again with
+ * it defined.
+ *
+ * This conversion to macros was done by:
+ * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+ *
+ * Licensed under the GNU General Public License, version 2 (GPLv2).
+ */
+#undef append_func
+#undef sift_rel_mcount
+#undef find_secsym_ndx
+#undef __has_rel_mcount
+#undef has_rel_mcount
+#undef tot_relsize
+#undef do_func
+#undef Elf_Ehdr
+#undef Elf_Shdr
+#undef Elf_Rel
+#undef Elf_Rela
+#undef Elf_Sym
+#undef ELF_R_SYM
+#undef ELF_R_INFO
+#undef ELF_ST_BIND
+#undef uint_t
+#undef _w
+#undef _align
+#undef _size
+
+#ifdef RECORD_MCOUNT_64
+# define append_func		append64
+# define sift_rel_mcount	sift64_rel_mcount
+# define find_secsym_ndx	find64_secsym_ndx
+# define __has_rel_mcount	__has64_rel_mcount
+# define has_rel_mcount		has64_rel_mcount
+# define tot_relsize		tot64_relsize
+# define do_func		do64
+# define Elf_Ehdr		Elf64_Ehdr
+# define Elf_Shdr		Elf64_Shdr
+# define Elf_Rel		Elf64_Rel
+# define Elf_Rela		Elf64_Rela
+# define Elf_Sym		Elf64_Sym
+# define ELF_R_SYM		ELF64_R_SYM
+# define ELF_R_INFO		ELF64_R_INFO
+# define ELF_ST_BIND		ELF64_ST_BIND
+# define uint_t			uint64_t
+# define _w			w8
+# define _align			7u
+# define _size			8
+#else
+# define append_func		append32
+# define sift_rel_mcount	sift32_rel_mcount
+# define find_secsym_ndx	find32_secsym_ndx
+# define __has_rel_mcount	__has32_rel_mcount
+# define has_rel_mcount		has32_rel_mcount
+# define tot_relsize		tot32_relsize
+# define do_func		do32
+# define Elf_Ehdr		Elf32_Ehdr
+# define Elf_Shdr		Elf32_Shdr
+# define Elf_Rel		Elf32_Rel
+# define Elf_Rela		Elf32_Rela
+# define Elf_Sym		Elf32_Sym
+# define ELF_R_SYM		ELF32_R_SYM
+# define ELF_R_INFO		ELF32_R_INFO
+# define ELF_ST_BIND		ELF32_ST_BIND
+# define uint_t			uint32_t
+# define _w			w
+# define _align			3u
+# define _size			4
+#endif
+
+/* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */
+static void append_func(Elf_Ehdr *const ehdr,
+			Elf_Shdr *const shstr,
+			uint_t const *const mloc0,
+			uint_t const *const mlocp,
+			Elf_Rel const *const mrel0,
+			Elf_Rel const *const mrelp,
+			unsigned int const rel_entsize,
+			unsigned int const symsec_sh_link)
+{
+	/* Begin constructing output file */
+	Elf_Shdr mcsec;
+	char const *mc_name = (sizeof(Elf_Rela) == rel_entsize)
+		? ".rela__mcount_loc"
+		:  ".rel__mcount_loc";
+	unsigned const old_shnum = w2(ehdr->e_shnum);
+	uint_t const old_shoff = _w(ehdr->e_shoff);
+	uint_t const old_shstr_sh_size   = _w(shstr->sh_size);
+	uint_t const old_shstr_sh_offset = _w(shstr->sh_offset);
+	uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size);
+	uint_t new_e_shoff;
+
+	shstr->sh_size = _w(t);
+	shstr->sh_offset = _w(sb.st_size);
+	t += sb.st_size;
+	t += (_align & -t);  /* word-byte align */
+	new_e_shoff = t;
+
+	/* body for new shstrtab */
+	ulseek(fd_map, sb.st_size, SEEK_SET);
+	uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
+	uwrite(fd_map, mc_name, 1 + strlen(mc_name));
+
+	/* old(modified) Elf_Shdr table, word-byte aligned */
+	ulseek(fd_map, t, SEEK_SET);
+	t += sizeof(Elf_Shdr) * old_shnum;
+	uwrite(fd_map, old_shoff + (void *)ehdr,
+	       sizeof(Elf_Shdr) * old_shnum);
+
+	/* new sections __mcount_loc and .rel__mcount_loc */
+	t += 2*sizeof(mcsec);
+	mcsec.sh_name = w((sizeof(Elf_Rela) == rel_entsize) + strlen(".rel")
+		+ old_shstr_sh_size);
+	mcsec.sh_type = w(SHT_PROGBITS);
+	mcsec.sh_flags = _w(SHF_ALLOC);
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = _w(t);
+	mcsec.sh_size = _w((void *)mlocp - (void *)mloc0);
+	mcsec.sh_link = 0;
+	mcsec.sh_info = 0;
+	mcsec.sh_addralign = _w(_size);
+	mcsec.sh_entsize = _w(_size);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	mcsec.sh_name = w(old_shstr_sh_size);
+	mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize)
+		? w(SHT_RELA)
+		: w(SHT_REL);
+	mcsec.sh_flags = 0;
+	mcsec.sh_addr = 0;
+	mcsec.sh_offset = _w((void *)mlocp - (void *)mloc0 + t);
+	mcsec.sh_size   = _w((void *)mrelp - (void *)mrel0);
+	mcsec.sh_link = w(symsec_sh_link);
+	mcsec.sh_info = w(old_shnum);
+	mcsec.sh_addralign = _w(_size);
+	mcsec.sh_entsize = _w(rel_entsize);
+	uwrite(fd_map, &mcsec, sizeof(mcsec));
+
+	uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
+	uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
+
+	ehdr->e_shoff = _w(new_e_shoff);
+	ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum));  /* {.rel,}__mcount_loc */
+	ulseek(fd_map, 0, SEEK_SET);
+	uwrite(fd_map, ehdr, sizeof(*ehdr));
+}
+
+
+/*
+ * Look at the relocations in order to find the calls to mcount.
+ * Accumulate the section offsets that are found, and their relocation info,
+ * onto the end of the existing arrays.
+ */
+static uint_t *sift_rel_mcount(uint_t *mlocp,
+			       unsigned const offbase,
+			       Elf_Rel **const mrelpp,
+			       Elf_Shdr const *const relhdr,
+			       Elf_Ehdr const *const ehdr,
+			       unsigned const recsym,
+			       uint_t const recval,
+			       unsigned const reltype)
+{
+	uint_t *const mloc0 = mlocp;
+	Elf_Rel *mrelp = *mrelpp;
+	Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const symsec_sh_link = w(relhdr->sh_link);
+	Elf_Shdr const *const symsec = &shdr0[symsec_sh_link];
+	Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
+	char const *const str0 = (char const *)(_w(strsec->sh_offset)
+		+ (void *)ehdr);
+
+	Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned rel_entsize = _w(relhdr->sh_entsize);
+	unsigned const nrel = _w(relhdr->sh_size) / rel_entsize;
+	Elf_Rel const *relp = rel0;
+
+	unsigned mcountsym = 0;
+	unsigned t;
+
+	for (t = nrel; t; --t) {
+		if (!mcountsym) {
+			Elf_Sym const *const symp =
+				&sym0[ELF_R_SYM(_w(relp->r_info))];
+			char const *symname = &str0[w(symp->st_name)];
+
+			if ('.' == symname[0])
+				++symname;  /* ppc64 hack */
+			if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
+					symname))
+				mcountsym = ELF_R_SYM(_w(relp->r_info));
+		}
+
+		if (mcountsym == ELF_R_SYM(_w(relp->r_info))) {
+			uint_t const addend = _w(_w(relp->r_offset) - recval);
+
+			mrelp->r_offset = _w(offbase
+				+ ((void *)mlocp - (void *)mloc0));
+			mrelp->r_info = _w(ELF_R_INFO(recsym, reltype));
+			if (sizeof(Elf_Rela) == rel_entsize) {
+				((Elf_Rela *)mrelp)->r_addend = addend;
+				*mlocp++ = 0;
+			} else
+				*mlocp++ = addend;
+
+			mrelp = (Elf_Rel *)(rel_entsize + (void *)mrelp);
+		}
+		relp = (Elf_Rel const *)(rel_entsize + (void *)relp);
+	}
+	*mrelpp = mrelp;
+	return mlocp;
+}
+
+
+/*
+ * Find a symbol in the given section, to be used as the base for relocating
+ * the table of offsets of calls to mcount.  A local or global symbol suffices,
+ * but avoid a Weak symbol because it may be overridden; the change in value
+ * would invalidate the relocations of the offsets of the calls to mcount.
+ * Often the found symbol will be the unnamed local symbol generated by
+ * GNU 'as' for the start of each section.  For example:
+ *    Num:    Value  Size Type    Bind   Vis      Ndx Name
+ *      2: 00000000     0 SECTION LOCAL  DEFAULT    1
+ */
+static unsigned find_secsym_ndx(unsigned const txtndx,
+				char const *const txtname,
+				uint_t *const recvalp,
+				Elf_Shdr const *const symhdr,
+				Elf_Ehdr const *const ehdr)
+{
+	Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset)
+		+ (void *)ehdr);
+	unsigned const nsym = _w(symhdr->sh_size) / _w(symhdr->sh_entsize);
+	Elf_Sym const *symp;
+	unsigned t;
+
+	for (symp = sym0, t = nsym; t; --t, ++symp) {
+		unsigned int const st_bind = ELF_ST_BIND(symp->st_info);
+
+		if (txtndx == w2(symp->st_shndx)
+			/* avoid STB_WEAK */
+		    && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
+			*recvalp = _w(symp->st_value);
+			return symp - sym0;
+		}
+	}
+	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
+		txtndx, txtname);
+	fail_file();
+}
+
+
+/* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */
+static char const *
+__has_rel_mcount(Elf_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
+		 Elf_Shdr const *const shdr0,
+		 char const *const shstrtab,
+		 char const *const fname)
+{
+	/* .sh_info depends on .sh_type == SHT_REL[,A] */
+	Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
+	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
+
+	if (0 == strcmp("__mcount_loc", txtname)) {
+		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
+			fname);
+		succeed_file();
+	}
+	if (SHT_PROGBITS != w(txthdr->sh_type) ||
+	    !is_mcounted_section_name(txtname))
+		return NULL;
+	return txtname;
+}
+
+static char const *has_rel_mcount(Elf_Shdr const *const relhdr,
+				  Elf_Shdr const *const shdr0,
+				  char const *const shstrtab,
+				  char const *const fname)
+{
+	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
+		return NULL;
+	return __has_rel_mcount(relhdr, shdr0, shstrtab, fname);
+}
+
+
+static unsigned tot_relsize(Elf_Shdr const *const shdr0,
+			    unsigned nhdr,
+			    const char *const shstrtab,
+			    const char *const fname)
+{
+	unsigned totrelsz = 0;
+	Elf_Shdr const *shdrp = shdr0;
+
+	for (; nhdr; --nhdr, ++shdrp) {
+		if (has_rel_mcount(shdrp, shdr0, shstrtab, fname))
+			totrelsz += _w(shdrp->sh_size);
+	}
+	return totrelsz;
+}
+
+
+/* Overall supervision for Elf32 ET_REL file. */
+static void
+do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
+{
+	Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const nhdr = w2(ehdr->e_shnum);
+	Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
+	char const *const shstrtab = (char const *)(_w(shstr->sh_offset)
+		+ (void *)ehdr);
+
+	Elf_Shdr const *relhdr;
+	unsigned k;
+
+	/* Upper bound on space: assume all relevant relocs are for mcount. */
+	unsigned const totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname);
+	Elf_Rel *const mrel0 = umalloc(totrelsz);
+	Elf_Rel *      mrelp = mrel0;
+
+	/* 2*sizeof(address) <= sizeof(Elf_Rel) */
+	uint_t *const mloc0 = umalloc(totrelsz>>1);
+	uint_t *      mlocp = mloc0;
+
+	unsigned rel_entsize = 0;
+	unsigned symsec_sh_link = 0;
+
+	for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) {
+		char const *const txtname = has_rel_mcount(relhdr, shdr0,
+			shstrtab, fname);
+		if (txtname) {
+			uint_t recval = 0;
+			unsigned const recsym = find_secsym_ndx(
+				w(relhdr->sh_info), txtname, &recval,
+				&shdr0[symsec_sh_link = w(relhdr->sh_link)],
+				ehdr);
+
+			rel_entsize = _w(relhdr->sh_entsize);
+			mlocp = sift_rel_mcount(mlocp,
+				(void *)mlocp - (void *)mloc0, &mrelp,
+				relhdr, ehdr, recsym, recval, reltype);
+		}
+	}
+	if (mloc0 != mlocp) {
+		append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
+			    rel_entsize, symsec_sh_link);
+	}
+	free(mrel0);
+	free(mloc0);
+}
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-14 21:00 ` [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount Steven Rostedt
@ 2010-10-15  2:50   ` Ingo Molnar
  2010-10-15  3:14     ` Steven Rostedt
  2010-10-27  3:25   ` Paul Mundt
  1 sibling, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2010-10-15  2:50 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, linux-arch,
	Michal Marek, linux-kbuild, John Reiser


* Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Steven Rostedt <srostedt@redhat.com>
> 
> This patch adds the support for the C version of recordmcount and
> compile times show ~ 12% improvement.

I reported this recordmcount performance problem 2 years ago. Better 
later than never i guess.

> +ifdef CONFIG_DYNAMIC_FTRACE
> +	ifdef CONFIG_HAVE_C_MCOUNT_RECORD
> +		BUILD_C_RECORDMCOUNT := y
> +		export BUILD_C_RECORDMCOUNT

> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -33,6 +33,7 @@ config X86
>  	select HAVE_KRETPROBES
>  	select HAVE_OPTPROBES
>  	select HAVE_FTRACE_MCOUNT_RECORD
> +	select HAVE_C_MCOUNT_RECORD

The naming is inconsistent here - it should be HAVE_C_RECORDMCOUNT, like 
the build variable has, and like the utility is called. If we are going 
to add this flag to most architectures we should name it consistently.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-15  2:50   ` Ingo Molnar
@ 2010-10-15  3:14     ` Steven Rostedt
  2010-10-15  3:18       ` Ingo Molnar
  0 siblings, 1 reply; 9+ messages in thread
From: Steven Rostedt @ 2010-10-15  3:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, linux-arch,
	Michal Marek, linux-kbuild, John Reiser

On Fri, 2010-10-15 at 04:50 +0200, Ingo Molnar wrote:
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > From: Steven Rostedt <srostedt@redhat.com>
> > 
> > This patch adds the support for the C version of recordmcount and
> > compile times show ~ 12% improvement.
> 
> I reported this recordmcount performance problem 2 years ago. Better 
> later than never i guess.

And I also remember saying after I posted this code that it would have a
compile time performance hit. Heck, it's a perl script running on every
object file. It was obvious what was at issue here. But it's better to
slow down the kernel build than to brick network cards. Also, perl was
much easier to do.

That said, the embarrassing thing is not that I knew (or you reported
it) about this performance problem. I'm actually quite embarrassed that
I had this code sitting in my inbox for over a year. I just kept having
other things that were more important coming up than lowering the
compile time of the kernel. Although, I did work to get streamline
config to offset this performance hit.

Finally, while at the End Users Summit, I decided to take a look at
John's code, and I was quite impressed.

But as you said, better late than never.


> 
> > +ifdef CONFIG_DYNAMIC_FTRACE
> > +	ifdef CONFIG_HAVE_C_MCOUNT_RECORD
> > +		BUILD_C_RECORDMCOUNT := y
> > +		export BUILD_C_RECORDMCOUNT
> 
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -33,6 +33,7 @@ config X86
> >  	select HAVE_KRETPROBES
> >  	select HAVE_OPTPROBES
> >  	select HAVE_FTRACE_MCOUNT_RECORD
> > +	select HAVE_C_MCOUNT_RECORD
> 
> The naming is inconsistent here - it should be HAVE_C_RECORDMCOUNT, like 
> the build variable has, and like the utility is called. If we are going 
> to add this flag to most architectures we should name it consistently.

Sure, want me to rebase it or just write a patch on top of it?

-- Steve



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-15  3:14     ` Steven Rostedt
@ 2010-10-15  3:18       ` Ingo Molnar
  2010-10-15  3:23         ` Steven Rostedt
  0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2010-10-15  3:18 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, linux-arch,
	Michal Marek, linux-kbuild, John Reiser


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 2010-10-15 at 04:50 +0200, Ingo Molnar wrote:
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > From: Steven Rostedt <srostedt@redhat.com>
> > > 
> > > This patch adds the support for the C version of recordmcount and
> > > compile times show ~ 12% improvement.
> > 
> > I reported this recordmcount performance problem 2 years ago. Better 
> > later than never i guess.
> 
> And I also remember saying after I posted this code that it would have 
> a compile time performance hit. Heck, it's a perl script running on 
> every object file. It was obvious what was at issue here. But it's 
> better to slow down the kernel build than to brick network cards. 

Well, it's even better to do neither!

> Also, perl was much easier to do.

Lets write the whole kernel in perl and forget about performance ;-)

> That said, the embarrassing thing is not that I knew (or you reported 
> it) about this performance problem. I'm actually quite embarrassed 
> that I had this code sitting in my inbox for over a year. I just kept 
> having other things that were more important coming up than lowering 
> the compile time of the kernel. Although, I did work to get streamline 
> config to offset this performance hit.
> 
> Finally, while at the End Users Summit, I decided to take a look at 
> John's code, and I was quite impressed.
> 
> But as you said, better late than never.

Yeah. Note that as a maintainer i need to grumble when i see some 
not-so-good event - even if there's a happy resolution! Otherwise such 
cases would tend to creep up in frequency ;-)

> > > +ifdef CONFIG_DYNAMIC_FTRACE
> > > +	ifdef CONFIG_HAVE_C_MCOUNT_RECORD
> > > +		BUILD_C_RECORDMCOUNT := y
> > > +		export BUILD_C_RECORDMCOUNT
> > 
> > > --- a/arch/x86/Kconfig
> > > +++ b/arch/x86/Kconfig
> > > @@ -33,6 +33,7 @@ config X86
> > >  	select HAVE_KRETPROBES
> > >  	select HAVE_OPTPROBES
> > >  	select HAVE_FTRACE_MCOUNT_RECORD
> > > +	select HAVE_C_MCOUNT_RECORD
> > 
> > The naming is inconsistent here - it should be HAVE_C_RECORDMCOUNT, like 
> > the build variable has, and like the utility is called. If we are going 
> > to add this flag to most architectures we should name it consistently.
> 
> Sure, want me to rebase it or just write a patch on top of it?

Sure, patch on top would be fine.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-15  3:18       ` Ingo Molnar
@ 2010-10-15  3:23         ` Steven Rostedt
  0 siblings, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-15  3:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Andrew Morton, Frederic Weisbecker, linux-arch,
	Michal Marek, linux-kbuild, John Reiser

On Fri, 2010-10-15 at 05:18 +0200, Ingo Molnar wrote:
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Fri, 2010-10-15 at 04:50 +0200, Ingo Molnar wrote:
> > > * Steven Rostedt <rostedt@goodmis.org> wrote:

> > Also, perl was much easier to do.
> 
> Lets write the whole kernel in perl and forget about performance ;-)

Shhh, you're letting people know about my evil secret agenda!


> > > 
> > > The naming is inconsistent here - it should be HAVE_C_RECORDMCOUNT, like 
> > > the build variable has, and like the utility is called. If we are going 
> > > to add this flag to most architectures we should name it consistently.
> > 
> > Sure, want me to rebase it or just write a patch on top of it?
> 
> Sure, patch on top would be fine.

OK, I'll add a patch on top.

Thanks,

-- Steve



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount
  2010-10-14 21:00 ` [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount Steven Rostedt
  2010-10-15  2:50   ` Ingo Molnar
@ 2010-10-27  3:25   ` Paul Mundt
  1 sibling, 0 replies; 9+ messages in thread
From: Paul Mundt @ 2010-10-27  3:25 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	linux-arch, Michal Marek, linux-kbuild, John Reiser

On Thu, Oct 14, 2010 at 05:00:16PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <srostedt@redhat.com>
> 
> This patch adds the support for the C version of recordmcount and
> compile times show ~ 12% improvement.
> 
> After verifying this works, other archs can add:
> 
>  HAVE_C_MCOUNT_RECORD
> 
> in its Kconfig and it will use the C version of recordmcount
> instead of the perl version.
> 
While I haven't had the chance to debug this yet, turning it on for SH
blows up immediately:

ftrace: allocating 15200 entries in 30 pages
------------[ cut here ]------------
WARNING: at /home/pmundt/devel/git/sh-2.6/kernel/trace/ftrace.c:1007
Modules linked in:

Pid : 0, Comm:           swapper
CPU : 0                  Not tainted  (2.6.36-05622-g38ab134-dirty #508)

PC is at ftrace_bug+0x78/0x23c
PR is at ftrace_bug+0x74/0x23c
PC  : 80064df4 SP  : 8056ff70 SR  : 400080f0 TEA : c0000004
R0  : 00000001 R1  : 00000001 R2  : 8064d862 R3  : 8056ff64
R4  : 805b47b4 R5  : 00000001 R6  : 00000000 R7  : 00000001
R8  : 803b15d8 R9  : 00000001 R10 : 9fc38be8 R11 : 00000000
R12 : 8064e88c R13 : 8064e880 R14 : 8056ff70
MACH: 00000000 MACL: 003d0900 GBR : 296e1678 PR  : 80064df0

Call trace:
 [<80066a86>] ftrace_process_locs+0x15a/0x284
 [<803b15d8>] dns_query+0x0/0x26c
 [<805f6a1a>] ftrace_init+0x112/0x1a8
 [<801deec0>] strlen+0x0/0x58
 [<8008f098>] get_zeroed_page+0x0/0x34
 [<805f0918>] start_kernel+0x3e0/0x480
 [<801deec0>] strlen+0x0/0x58
 [<801eb388>] debug_smp_processor_id+0x0/0xe4
 [<80002132>] _stext+0x132/0x140

Code:
  80064dee:  mov       r9, r5
  80064df0:  tst       r9, r9
  80064df2:  bt        80064df6
->80064df4:  trapa     #62
  80064df6:  bra       80064ef2
  80064df8:  mov       r9, r5
  80064dfa:  mov.l     80064f68 <ftrace_bug+0x1ec/0x23c>, r1  ! 8064d862 <__warned.27604+0x0/0x1>
  80064dfc:  mov.b     r2, @r1
  80064dfe:  mov.l     80064f48 <ftrace_bug+0x1cc/0x23c>, r1  ! 8057021c <ftrace_disabled+0x0/0x4>

---[ end trace 4eaa2a86a8e2da22 ]---
ftrace failed to modify [<803b15d8>] dns_query+0x0/0x26c
 actual: 02:d1:22:4f
Testing tracer nop: PASSED

Suggestions?

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-10-27  3:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-14 21:00 [PATCH 0/3] [GIT PULL][2.6.37] ftrace: C version of recordmcount Steven Rostedt
2010-10-14 21:00 ` [PATCH 1/3] ftrace: Add C version of recordmcount compile time code Steven Rostedt
2010-10-14 21:00 ` [PATCH 2/3] ftrace/x86: Add support for C version of recordmcount Steven Rostedt
2010-10-15  2:50   ` Ingo Molnar
2010-10-15  3:14     ` Steven Rostedt
2010-10-15  3:18       ` Ingo Molnar
2010-10-15  3:23         ` Steven Rostedt
2010-10-27  3:25   ` Paul Mundt
2010-10-14 21:00 ` [PATCH 3/3] ftrace: Remove duplicate code for 64 and 32 bit in recordmcount.c Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox