Re: [PATCH V2 5/6] x86/intel_rdt: Use perf infrastructure for measurements

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Reinette Chatre <reinette.chatre@intel.com>
Cc: tglx@linutronix.de, fenghua.yu@intel.com, tony.luck@intel.com,
	mingo@redhat.com, acme@kernel.org,
	vikas.shivappa@linux.intel.com, gavin.hindman@intel.com,
	jithu.joseph@intel.com, dave.hansen@intel.com, hpa@zytor.com,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH V2 5/6] x86/intel_rdt: Use perf infrastructure for measurements
Date: Thu, 6 Sep 2018 23:38:57 +0200	[thread overview]
Message-ID: <20180906213857.GF9358@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <12da3ce5-710b-b18e-8c0c-a0aa3724afd2@intel.com>

On Thu, Sep 06, 2018 at 01:37:14PM -0700, Reinette Chatre wrote:
> On 9/6/2018 1:29 PM, Peter Zijlstra wrote:
> > On Thu, Sep 06, 2018 at 01:05:05PM -0700, Reinette Chatre wrote:
> >> When I separate the above into the two functions it just becomes either:
> >>                    rdpmcl(l2_hit_pmcnum, l2_hits_after);
> >>                    rdpmcl(l2_miss_pmcnum, l2_miss_after);
> >> or:
> >>                    rdpmcl(l3_hit_pmcnum, l3_hits_after);
> >>                    rdpmcl(l3_miss_pmcnum, l3_miss_after);
> >>
> > 
> > Right, which is the exact _same_ code, so you only need a single
> > function.
> > 
> 
> From my understanding it is not this code specifically that is causing
> the cache misses but instead the code and variables used to decide
> whether to run them or not. These would still be needed when I extract
> the above into inline functions.

Oh, seriously, use your brain.. This is trivial stuff. Compare the two
functions l2/l3.

They are _identical_ except for some silly bits before/after and
some spurious differences because apparently you cannot copy/paste.
I thought there would be some differences in the loop, but not even
that. They really are identical.

The below should work I think.

---

struct recidency_counts {
	u64 miss_before, hits_before;
	u64 miss_after,  hits_after;
};

static int measure_residency_fn(struct perf_event_attr *miss_attr,
				struct perf_event_attr *hit_attr,
				void *plr, struct recidency_counts *counts)
 {
+	u64 hits_before, hits_after, miss_before, miss_after;
+	struct perf_event *miss_event, *hit_event;
+	int hit_pmcnum, miss_pmcnum;
 	unsigned int line_size;
 	unsigned int size;
 	unsigned long i;
 	void *mem_r;
+	u64 tmp;
 
+	miss_event = perf_event_create_kernel_counter(miss_attr,
+							 plr->cpu,
+							 NULL, NULL, NULL);
+	if (IS_ERR(miss_event))
+		goto out;
+
+	hit_event = perf_event_create_kernel_counter(hit_attr,
+							plr->cpu,
+							NULL, NULL, NULL);
+	if (IS_ERR(hit_event))
+		goto out_miss;
+
+	local_irq_disable();
+	/*
+	 * Check any possible error state of events used by performing
+	 * one local read.
+	 */
+	if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
+		local_irq_enable();
+		goto out_hit;
+	}
+	if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
+		local_irq_enable();
+		goto out_hit;
+	}
+
+	/*
+	 * Disable hardware prefetchers.
 	 *
+	 * Call wrmsr direcly to avoid the local register variables from
+	 * being overwritten due to reordering of their assignment with
+	 * the wrmsr calls.
+	 */
+	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+
+	/* Initialize rest of local variables */
+	/*
+	 * Performance event has been validated right before this with
+	 * interrupts disabled - it is thus safe to read the counter index.
+	 */
+	miss_pmcnum = x86_perf_rdpmc_index(miss_event);
+	hit_pmcnum = x86_perf_rdpmc_index(hit_event);
+	line_size = READ_ONCE(plr->line_size);
+	mem_r = READ_ONCE(plr->kmem);
+	size = READ_ONCE(plr->size);
+
+	/*
+	 * Read counter variables twice - first to load the instructions
+	 * used in L1 cache, second to capture accurate value that does not
+	 * include cache misses incurred because of instruction loads.
+	 */
+	rdpmcl(hit_pmcnum, hits_before);
+	rdpmcl(miss_pmcnum, miss_before);
+	/*
+	 */
+	rmb();
+	rdpmcl(hit_pmcnum, hits_before);
+	rdpmcl(miss_pmcnum, miss_before);
+	/*
+	 */
+	rmb();
+	for (i = 0; i < size; i += line_size) {
+		/*
+		 * Add a barrier to prevent speculative execution of this
+		 * loop reading beyond the end of the buffer.
+		 */
+		rmb();
+		asm volatile("mov (%0,%1,1), %%eax\n\t"
+			     :
+			     : "r" (mem_r), "r" (i)
+			     : "%eax", "memory");
+	}
	rmb();
+	rdpmcl(hit_pmcnum, hits_after);
+	rdpmcl(miss_pmcnum, miss_after);
+	rmb();
+	/* Re-enable hardware prefetchers */
+	wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+	local_irq_enable();
+out_hit:
+	perf_event_release_kernel(hit_event);
+out_miss:
+	perf_event_release_kernel(miss_event);
+out:
	counts->miss_before = miss_before;
	counts->hits_before = hits_before;
	counts->miss_after  = miss_after;
	counts->hits_after  = hits_after;
+	return 0;
+}

measure_l2_recidency()
{
	struct recidency_counts counts;

+       switch (boot_cpu_data.x86_model) {
+       case INTEL_FAM6_ATOM_GOLDMONT:
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
+               perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
+                                                  .umask = 0x10);
+               perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
+                                                 .umask = 0x2);
+               break;
+       default:
+               goto out;
+       }

	measure_recidency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);

	trace_pseudo_lock_l2(counts->hits_after - counts->hits_before,
			     counts->miss_after - counts->miss_before);
out:
+	plr->thread_done = 1;
+	wake_up_interruptible(&plr->lock_thread_wq);
}

measure_l3_residency()
{
	struct recidency_counts counts;

        switch (boot_cpu_data.x86_model) {
        case INTEL_FAM6_BROADWELL_X:
                /* On BDW the l3_hit_bits count references, not hits */
+               perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
+                                                 .umask = 0x4f);
+               perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
+                                                  .umask = 0x41);
                break;
        default:
                goto out;
        }

	measure_recidency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);

+       counts->miss_after -= counts->miss_before;
+       if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+               /*
+                * On BDW references and misses are counted, need to adjust.
+                * Sometimes the "hits" counter is a bit more than the
+                * references, for example, x references but x + 1 hits.
+                * To not report invalid hit values in this case we treat
+                * that as misses equal to references.
+                */
+               /* First compute the number of cache references measured */
+               counts->hits_after -= counts->hits_before;
+               /* Next convert references to cache hits */
+               counts->hits_after -= counts->miss_after > counts->hits_after ?
+                                       counts->hits_after : counts->miss_after;
+       } else {
+               counts->hits_after -= counts->hits_before;
        }

+       trace_pseudo_lock_l3(counts->hits_after, counts->miss_after);
}

next prev parent reply	other threads:[~2018-09-06 21:39 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-16 20:16 [PATCH V2 0/6] perf/core and x86/intel_rdt: Fix lack of coordination with perf Reinette Chatre
2018-08-16 20:16 ` [PATCH V2 1/6] perf/core: Add sanity check to deal with pinned event failure Reinette Chatre
2018-08-16 20:16 ` [PATCH V2 2/6] x86/intel_rdt: Remove local register variables Reinette Chatre
2018-08-16 20:16 ` [PATCH V2 3/6] x86/intel_rdt: Create required perf event attributes Reinette Chatre
2018-08-16 20:16 ` [PATCH V2 4/6] x86/intel_rdt: Add helper to obtain performance counter index Reinette Chatre
2018-09-06 14:47   ` Peter Zijlstra
2018-09-06 23:26     ` Reinette Chatre
2018-08-16 20:16 ` [PATCH V2 5/6] x86/intel_rdt: Use perf infrastructure for measurements Reinette Chatre
2018-09-06 14:15   ` Peter Zijlstra
2018-09-06 19:21     ` Reinette Chatre
2018-09-06 19:44       ` Peter Zijlstra
2018-09-06 20:05         ` Reinette Chatre
2018-09-06 20:29           ` Peter Zijlstra
2018-09-06 20:37             ` Reinette Chatre
2018-09-06 21:38               ` Peter Zijlstra [this message]
2018-09-06 14:38   ` Peter Zijlstra
2018-08-16 20:16 ` [PATCH V2 6/6] x86/intel_rdt: Re-enable pseudo-lock measurements Reinette Chatre
2018-09-04 16:50 ` [PATCH V2 0/6] perf/core and x86/intel_rdt: Fix lack of coordination with perf Reinette Chatre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180906213857.GF9358@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=dave.hansen@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=gavin.hindman@intel.com \
    --cc=hpa@zytor.com \
    --cc=jithu.joseph@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=reinette.chatre@intel.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.