From: Masami Hiramatsu <mhiramat@kernel.org>
To: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: oleg@redhat.com, peterz@infradead.org, srikar@linux.vnet.ibm.com,
acme@kernel.org, ananth@linux.vnet.ibm.com,
akpm@linux-foundation.org, alexander.shishkin@linux.intel.com,
alexis.berlemont@gmail.com, corbet@lwn.net,
dan.j.williams@intel.com, gregkh@linuxfoundation.org,
huawei.libin@huawei.com, hughd@google.com, jack@suse.cz,
jglisse@redhat.com, jolsa@redhat.com, kan.liang@intel.com,
kirill.shutemov@linux.intel.com, kjlx@templeofstupid.com,
kstewart@linuxfoundation.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
mhocko@suse.com, milian.wolff@kdab.com, mingo@redhat.com,
namhyung@kernel.org, naveen.n.rao@linux.vnet.ibm.com,
pc@us.ibm.com, pombredanne@nexb.com, rostedt@goodmis.org,
tglx@linutronix.de, tmricht@linux.vnet.ibm.com,
willy@infradead.org, yao.jin@linux.intel.com,
fengguang.wu@intel.com
Subject: Re: [PATCH 6/8] trace_uprobe/sdt: Fix multiple update of same reference counter
Date: Wed, 14 Mar 2018 23:15:40 +0900 [thread overview]
Message-ID: <20180314231540.b98c74a153255f59f54ebc46@kernel.org> (raw)
In-Reply-To: <20180313125603.19819-7-ravi.bangoria@linux.vnet.ibm.com>
On Tue, 13 Mar 2018 18:26:01 +0530
Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> wrote:
> For tiny binaries/libraries, different mmap regions points to the
> same file portion. In such cases, we may increment reference counter
> multiple times. But while de-registration, reference counter will get
> decremented only by once leaving reference counter > 0 even if no one
> is tracing on that marker.
>
> Ensure increment and decrement happens in sync by keeping list of
> mms in trace_uprobe. Increment reference counter only if mm is not
> present in the list and decrement only if mm is present in the list.
>
> Example
>
> # echo "p:sdt_tick/loop2 /tmp/tick:0x6e4(0x10036)" > uprobe_events
>
> Before patch:
>
> # perf stat -a -e sdt_tick:loop2
> # /tmp/tick
> # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
> 0000000: 02 .
>
> # pkill perf
> # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
> 0000000: 01 .
>
> After patch:
>
> # perf stat -a -e sdt_tick:loop2
> # /tmp/tick
> # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
> 0000000: 01 .
>
> # pkill perf
> # dd if=/proc/`pgrep tick`/mem bs=1 count=1 skip=$(( 0x10020036 )) 2>/dev/null | xxd
> 0000000: 00 .
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
> ---
> kernel/trace/trace_uprobe.c | 105 +++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 103 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index b6c9b48..9bf3f7a 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -50,6 +50,11 @@ struct trace_uprobe_filter {
> struct list_head perf_events;
> };
>
> +struct sdt_mm_list {
> + struct mm_struct *mm;
> + struct sdt_mm_list *next;
> +};
Oh, please use struct list_head instead of defining your own pointer-chain :(
> +
> /*
> * uprobe event core functions
> */
> @@ -61,6 +66,8 @@ struct trace_uprobe {
> char *filename;
> unsigned long offset;
> unsigned long ref_ctr_offset;
> + struct sdt_mm_list *sml;
> + struct rw_semaphore sml_rw_sem;
BTW, is there any reason to use rw_semaphore? (mutex doesn't fit?)
Thank you,
> unsigned long nhit;
> struct trace_probe tp;
> };
> @@ -274,6 +281,7 @@ static inline bool is_ret_probe(struct trace_uprobe *tu)
> if (is_ret)
> tu->consumer.ret_handler = uretprobe_dispatcher;
> init_trace_uprobe_filter(&tu->filter);
> + init_rwsem(&tu->sml_rw_sem);
> return tu;
>
> error:
> @@ -921,6 +929,74 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
> return trace_handle_return(s);
> }
>
> +static bool sdt_check_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
> +{
> + struct sdt_mm_list *tmp = tu->sml;
> +
> + if (!tu->sml || !mm)
> + return false;
> +
> + while (tmp) {
> + if (tmp->mm == mm)
> + return true;
> + tmp = tmp->next;
> + }
> +
> + return false;
> +}
> +
> +static void sdt_add_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
> +{
> + struct sdt_mm_list *tmp;
> +
> + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
> + if (!tmp)
> + return;
> +
> + tmp->mm = mm;
> + tmp->next = tu->sml;
> + tu->sml = tmp;
> +}
> +
> +static void sdt_del_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
> +{
> + struct sdt_mm_list *prev, *curr;
> +
> + if (!tu->sml)
> + return;
> +
> + if (tu->sml->mm == mm) {
> + curr = tu->sml;
> + tu->sml = tu->sml->next;
> + kfree(curr);
> + return;
> + }
> +
> + prev = tu->sml;
> + curr = tu->sml->next;
> + while (curr) {
> + if (curr->mm == mm) {
> + prev->next = curr->next;
> + kfree(curr);
> + return;
> + }
> + prev = curr;
> + curr = curr->next;
> + }
> +}
> +
> +static void sdt_flush_mm_list(struct trace_uprobe *tu)
> +{
> + struct sdt_mm_list *next, *curr = tu->sml;
> +
> + while (curr) {
> + next = curr->next;
> + kfree(curr);
> + curr = next;
> + }
> + tu->sml = NULL;
> +}
> +
> static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct *vma)
> {
> unsigned long vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> @@ -989,17 +1065,25 @@ static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> if (IS_ERR(info))
> goto out;
>
> + down_write(&tu->sml_rw_sem);
> while (info) {
> + if (sdt_check_mm_list(tu, info->mm))
> + goto cont;
> +
> down_write(&info->mm->mmap_sem);
>
> vma = sdt_find_vma(info->mm, tu);
> vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> - sdt_update_ref_ctr(info->mm, vaddr, 1);
> + if (!sdt_update_ref_ctr(info->mm, vaddr, 1))
> + sdt_add_mm_list(tu, info->mm);
>
> up_write(&info->mm->mmap_sem);
> +
> +cont:
> mmput(info->mm);
> info = uprobe_free_map_info(info);
> }
> + up_write(&tu->sml_rw_sem);
>
> out:
> uprobe_end_dup_mmap();
> @@ -1020,8 +1104,16 @@ void trace_uprobe_mmap_callback(struct vm_area_struct *vma)
> !trace_probe_is_enabled(&tu->tp))
> continue;
>
> + down_write(&tu->sml_rw_sem);
> + if (sdt_check_mm_list(tu, vma->vm_mm))
> + goto cont;
> +
> vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> - sdt_update_ref_ctr(vma->vm_mm, vaddr, 1);
> + if (!sdt_update_ref_ctr(vma->vm_mm, vaddr, 1))
> + sdt_add_mm_list(tu, vma->vm_mm);
> +
> +cont:
> + up_write(&tu->sml_rw_sem);
> }
> mutex_unlock(&uprobe_lock);
> }
> @@ -1038,7 +1130,11 @@ static void sdt_decrement_ref_ctr(struct trace_uprobe *tu)
> if (IS_ERR(info))
> goto out;
>
> + down_write(&tu->sml_rw_sem);
> while (info) {
> + if (!sdt_check_mm_list(tu, info->mm))
> + goto cont;
> +
> down_write(&info->mm->mmap_sem);
>
> vma = sdt_find_vma(info->mm, tu);
> @@ -1046,9 +1142,14 @@ static void sdt_decrement_ref_ctr(struct trace_uprobe *tu)
> sdt_update_ref_ctr(info->mm, vaddr, -1);
>
> up_write(&info->mm->mmap_sem);
> + sdt_del_mm_list(tu, info->mm);
> +
> +cont:
> mmput(info->mm);
> info = uprobe_free_map_info(info);
> }
> + sdt_flush_mm_list(tu);
> + up_write(&tu->sml_rw_sem);
>
> out:
> uprobe_end_dup_mmap();
> --
> 1.8.3.1
>
--
Masami Hiramatsu <mhiramat@kernel.org>
next prev parent reply other threads:[~2018-03-14 14:15 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-13 12:55 [PATCH 0/8] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-03-13 12:55 ` [PATCH 1/8] Uprobe: Export vaddr <-> offset conversion functions Ravi Bangoria
2018-03-13 20:36 ` Jerome Glisse
2018-03-15 16:27 ` Steven Rostedt
2018-03-16 8:54 ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 2/8] mm: Prefix vma_ to vaddr_to_offset() and offset_to_vaddr() Ravi Bangoria
2018-03-13 20:38 ` Jerome Glisse
2018-03-15 16:28 ` Steven Rostedt
2018-03-16 8:58 ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 3/8] Uprobe: Rename map_info to uprobe_map_info Ravi Bangoria
2018-03-13 20:39 ` Jerome Glisse
2018-03-15 16:44 ` Steven Rostedt
2018-03-16 8:56 ` Ravi Bangoria
2018-03-13 12:55 ` [PATCH 4/8] Uprobe: Export uprobe_map_info along with uprobe_{build/free}_map_info() Ravi Bangoria
2018-03-13 20:40 ` Jerome Glisse
2018-03-15 16:32 ` Steven Rostedt
2018-03-16 8:59 ` Ravi Bangoria
2018-03-13 12:56 ` [PATCH 5/8] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-03-14 13:48 ` Masami Hiramatsu
2018-03-14 15:12 ` Ravi Bangoria
2018-03-14 16:59 ` Oleg Nesterov
2018-03-15 11:23 ` Ravi Bangoria
2018-03-19 4:28 ` Ravi Bangoria
2018-03-19 13:46 ` Oleg Nesterov
2018-03-14 21:58 ` Steven Rostedt
2018-03-15 14:21 ` Oleg Nesterov
2018-03-15 14:30 ` Oleg Nesterov
2018-03-16 9:28 ` Ravi Bangoria
2018-03-16 11:39 ` Oleg Nesterov
2018-03-16 11:46 ` Ravi Bangoria
2018-03-16 9:21 ` Ravi Bangoria
2018-03-15 15:01 ` Oleg Nesterov
2018-03-16 9:31 ` Ravi Bangoria
2018-03-15 16:48 ` Steven Rostedt
2018-03-16 9:01 ` Ravi Bangoria
2018-03-16 16:16 ` Oleg Nesterov
2018-03-13 12:56 ` [PATCH 6/8] trace_uprobe/sdt: Fix multiple update of same reference counter Ravi Bangoria
2018-03-14 14:15 ` Masami Hiramatsu [this message]
2018-03-14 15:15 ` Ravi Bangoria
2018-03-15 14:49 ` Oleg Nesterov
2018-03-16 12:12 ` Ravi Bangoria
2018-03-16 13:49 ` Ravi Bangoria
2018-03-16 17:50 ` Oleg Nesterov
2018-03-19 9:18 ` Ravi Bangoria
2018-03-19 13:40 ` Oleg Nesterov
2018-03-13 12:56 ` [PATCH 7/8] perf probe: Support SDT markers having reference counter (semaphore) Ravi Bangoria
2018-03-14 14:09 ` Masami Hiramatsu
2018-03-14 15:21 ` Ravi Bangoria
2018-03-13 12:56 ` [PATCH 8/8] trace_uprobe/sdt: Document about reference counter Ravi Bangoria
2018-03-14 13:50 ` Masami Hiramatsu
2018-03-14 15:22 ` Ravi Bangoria
2018-03-15 12:47 ` Masami Hiramatsu
2018-03-16 9:42 ` Ravi Bangoria
2018-03-16 14:26 ` Masami Hiramatsu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180314231540.b98c74a153255f59f54ebc46@kernel.org \
--to=mhiramat@kernel.org \
--cc=acme@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=alexis.berlemont@gmail.com \
--cc=ananth@linux.vnet.ibm.com \
--cc=corbet@lwn.net \
--cc=dan.j.williams@intel.com \
--cc=fengguang.wu@intel.com \
--cc=gregkh@linuxfoundation.org \
--cc=huawei.libin@huawei.com \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=jglisse@redhat.com \
--cc=jolsa@redhat.com \
--cc=kan.liang@intel.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=kjlx@templeofstupid.com \
--cc=kstewart@linuxfoundation.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=milian.wolff@kdab.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=naveen.n.rao@linux.vnet.ibm.com \
--cc=oleg@redhat.com \
--cc=pc@us.ibm.com \
--cc=peterz@infradead.org \
--cc=pombredanne@nexb.com \
--cc=ravi.bangoria@linux.vnet.ibm.com \
--cc=rostedt@goodmis.org \
--cc=srikar@linux.vnet.ibm.com \
--cc=tglx@linutronix.de \
--cc=tmricht@linux.vnet.ibm.com \
--cc=willy@infradead.org \
--cc=yao.jin@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).