From: Li Yu <raise.sail@gmail.com>
To: Linux Netdev List <netdev@vger.kernel.org>
Subject: [PATCH 1/3] skbtrace v2: core feature and common events
Date: Fri, 19 Oct 2012 14:16:17 +0800 [thread overview]
Message-ID: <5080F031.5040804@gmail.com> (raw)
From: Li Yu <bingtian.ly@taobao.com>
This patch contains:
1. The glue code of tracepoints subsystem and relay file system.
2. API for particular networking trace points.
3. The skb_rps_info trace point.
Thanks
Sign-off-by: Li Yu <bingtian.ly@taobao.com>
include/linux/skbtrace.h | 478 ++++++++++++
include/linux/skbtrace_api.h | 73 +
include/linux/skbuff.h | 7
include/net/skbtrace_api_common.h | 84 ++
include/net/sock.h | 14
include/trace/events/skbtrace.h | 32
include/trace/events/skbtrace_common.h | 41 +
kernel/trace/Kconfig | 8
net/core/Makefile | 2
net/core/dev.c | 3
net/core/net-traces.c | 24
net/core/skbtrace-core.c | 1226
+++++++++++++++++++++++++++++++++
net/core/skbtrace-events-common.c | 68 +
net/core/skbuff.c | 5
net/core/sock.c | 9
15 files changed, 2073 insertions(+), 1 deletion(-)
============================
diff --git a/include/linux/skbtrace.h b/include/linux/skbtrace.h
new file mode 100644
index 0000000..71fbff0
--- /dev/null
+++ b/include/linux/skbtrace.h
@@ -0,0 +1,478 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * API for kernel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+
+#ifndef _LINUX_SKBTRACE_H
+#define _LINUX_SKBTRACE_H
+
+#include <linux/jump_label.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/skbtrace_api.h>
+#include <asm/atomic.h>
+
+#include <net/sock.h>
+#include <net/inet_timewait_sock.h>
+
+#if defined(CONFIG_SKBTRACE) || defined(CONFIG_SKBTRACE_MODULE)
+#define HAVE_SKBTRACE 1
+#else
+#define HAVE_SKBTRACE 0
+#endif
+
+#if HAVE_SKBTRACE
+
+/* The size parameters of secondary_buffer->slots */
+#define SECONDARY_BUFFER_ORDER 0
+#define SECONDARY_BUFFER_SIZE (PAGE_SIZE<<SECONDARY_BUFFER_ORDER)
+#define SECONDARY_BUFFER_UNIT (128)
+#define SECONDARY_BUFFER_COUNTS
(SECONDARY_BUFFER_SIZE/SECONDARY_BUFFER_UNIT)
+
+struct secondary_buffer {
+ atomic_t refcnt;
+ struct hlist_node node;
+ int action; /* the action of primary event */
+ spinlock_t lock;
+ unsigned long session;
+ int offset; /* next writeable slot */
+ int count; /* count of current cached events in 'slots' */
+ char *slots; /* the cache of secondary events */
+};
+
+
+#define SECONDARY_TABLE_SHIFT 6
+#define SECONDARY_TABLE_SIZE (1<<SECONDARY_TABLE_SHIFT)
+#define SECONDARY_TABLE_MASK (SECONDARY_TABLE_SIZE - 1)
+
+struct secondary_table {
+ spinlock_t lock;
+ struct hlist_head table[SECONDARY_TABLE_SIZE];
+};
+
+struct skbtrace_tracepoint {
+ const char *trace_name;
+ int action;
+ int nr_secondary;
+ size_t block_size;
+ void *probe;
+ int (*setup_options)(struct skbtrace_tracepoint *tp,
+ char *options);
+ void (*enable)(struct skbtrace_tracepoint *tp);
+ void (*disable)(struct skbtrace_tracepoint *tp);
+ char *(*desc)(struct skbtrace_tracepoint *tp);
+ void *private;
+
+ /* Below is for internals, which is not a part of kernel API */
+ unsigned int enabled : 1;
+ struct skbtrace_tracepoint *primary;
+ /* The secondary events of sk_buff based event are */
+ /* cached here. The secondary events of socket based */
+ /* event are cached in hash table skbtrace_context->sec_table */
+ struct secondary_buffer sec_buffer;
+};
+
+extern atomic64_t skbtrace_event_seq;
+extern int sysctl_skbtrace_filter_default;
+
+#define INIT_SKBTRACE_BLOCK(blk, p, act, fl, blk_size) \
+ do {\
+ (blk)->magic = 0xDEADBEEF;\
+ (blk)->len = (blk_size);\
+ (blk)->action = (act);\
+ (blk)->flags = (fl);\
+ (blk)->seq = atomic64_add_return(1, &skbtrace_event_seq);\
+ (blk)->ts = current_kernel_time();\
+ (blk)->ptr = (p);\
+ } while (0)
+
+#define EMPTY_SKBTRACE_TP {.trace_name = NULL, }
+
+struct inet_timewait_sock;
+struct skbtrace_ops {
+ int (*tw_getname)(struct inet_timewait_sock *tw,
+ struct sockaddr *uaddr, int peer);
+ int (*tw_filter_skb)(struct inet_timewait_sock *tw,
+ struct sk_buff *skb);
+ int (*getname)(struct sock *sk, struct sockaddr *uaddr,
+ int *uaddr_len, int peer);
+ int (*filter_skb)(struct sock *sk, struct sk_buff *skb);
+};
+
+struct skbtrace_context {
+ unsigned long session;
+ struct skbtrace_ops *ops;
+ unsigned int active_conn_hit : 1;
+ struct secondary_table sec_table;
+};
+
+extern unsigned long skbtrace_session;
+
+extern int skbtrace_register_proto(int af,
+ struct skbtrace_tracepoint *tp_list,
+ struct skbtrace_ops *ops);
+extern void skbtrace_unregister_proto(int af);
+extern struct skbtrace_ops* skbtrace_ops_get(int af);
+
+extern void __skbtrace_probe(struct skbtrace_tracepoint *tp,
+ struct skbtrace_context *ctx,
+ struct skbtrace_block *blk);
+extern int skbtrace_events_common_init(void);
+
+extern struct static_key skbtrace_filters_enabled;
+extern struct sk_filter *skbtrace_skb_filter;
+extern struct sk_filter *skbtrace_sock_filter;
+
+extern struct sk_buff* skbtrace_get_sock_filter_skb(struct sock *sk);
+static inline void skbtrace_put_sock_filter_skb(struct sk_buff *skb)
+{
+ skb->data = skb->head;
+ skb->len = 0;
+ skb_reset_tail_pointer(skb);
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+ local_bh_enable();
+}
+extern struct sk_buff* skbtrace_get_twsk_filter_skb(
+ struct inet_timewait_sock *tw);
+#define skbtrace_put_twsk_filter_skb skbtrace_put_sock_filter_skb
+
+static inline void skbtrace_probe(struct skbtrace_tracepoint *t,
+ struct skbtrace_context *ctx,
+ struct skbtrace_block *blk)
+{
+ if (skbtrace_action_invalid == blk->action)
+ return;
+ __skbtrace_probe(t, ctx, blk);
+}
+
+static inline int skbtrace_bypass_skb(struct sk_buff *skb)
+{
+ if (static_key_false(&skbtrace_filters_enabled)) {
+ if (skb->skbtrace_filtered)
+ return skb->hit_skbtrace;
+ else if (skbtrace_skb_filter) {
+ unsigned int pkt_len;
+
+ pkt_len = SK_RUN_FILTER(skbtrace_skb_filter, skb);
+ skb->hit_skbtrace = !pkt_len;
+ skb->skbtrace_filtered = 1;
+ return skb->hit_skbtrace;
+ }
+ }
+ return 0;
+}
+
+static inline void secondary_buffer_get(struct secondary_buffer *buf)
+{
+ atomic_inc(&buf->refcnt);
+}
+
+static inline void secondary_buffer_put(struct secondary_buffer *buf)
+{
+ if (buf && atomic_dec_and_test(&buf->refcnt)) {
+ free_pages((unsigned long)buf->slots, SECONDARY_BUFFER_ORDER);
+ buf->slots = NULL;
+ }
+}
+
+static inline void secondary_buffer_reset(struct secondary_buffer *buf)
+{
+ buf->offset = 0;
+ buf->count = 0;
+}
+
+static inline int secondary_buffer_init(struct secondary_buffer *buf,
+ struct skbtrace_tracepoint *tp)
+{
+ buf->slots = (char *)__get_free_pages(GFP_ATOMIC,
+ SECONDARY_BUFFER_ORDER);
+ if (!buf->slots)
+ return -ENOMEM;
+
+ INIT_HLIST_NODE(&buf->node);
+ spin_lock_init(&buf->lock);
+ buf->action = tp->action;
+ buf->session = skbtrace_session;
+ atomic_set(&buf->refcnt, 0);
+ secondary_buffer_reset(buf);
+ secondary_buffer_get(buf);
+ return 0;
+}
+
+static inline struct secondary_buffer* secondary_buffer_new(
+ struct skbtrace_tracepoint *tp)
+{
+ struct secondary_buffer *buf;
+
+ buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
+ if (buf && secondary_buffer_init(buf, tp)) {
+ kfree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
+static inline void secondary_buffer_destroy(struct secondary_buffer *buf)
+{
+ if (buf) {
+ secondary_buffer_put(buf);
+ kfree(buf);
+ }
+}
+
+static inline struct secondary_buffer* secondary_table_lookup(
+ struct secondary_table *table,
+ struct skbtrace_tracepoint *tp)
+{
+ unsigned int key;
+ struct secondary_buffer *buffer;
+ struct hlist_node *pos;
+
+ key = (47 * tp->action) & SECONDARY_TABLE_MASK;
+ spin_lock_bh(&table->lock);
+ hlist_for_each_entry(buffer, pos, &table->table[key], node) {
+ if (buffer->session != skbtrace_session)
+ continue;
+ if (buffer->action == tp->action)
+ goto unlock;
+ }
+ buffer = NULL;
+unlock:
+ spin_unlock_bh(&table->lock);
+
+ return buffer;
+}
+
+static inline struct secondary_buffer* secondary_table_lookup_or_create(
+ struct secondary_table *table,
+ struct skbtrace_tracepoint *tp)
+{
+ unsigned int key;
+ struct secondary_buffer *buffer;
+ struct hlist_node *pos;
+
+ key = (47 * tp->action) & SECONDARY_TABLE_MASK;
+ spin_lock_bh(&table->lock);
+ hlist_for_each_entry(buffer, pos, &table->table[key], node) {
+ if (buffer->session != skbtrace_session)
+ continue;
+ if (buffer->action == tp->action)
+ goto unlock;
+ }
+ buffer = secondary_buffer_new(tp);
+ if (buffer)
+ hlist_add_head(&buffer->node, &table->table[key]);
+unlock:
+ spin_unlock_bh(&table->lock);
+
+ return buffer;
+}
+
+static inline void secondary_table_clean(struct secondary_table *table)
+{
+ unsigned int key;
+
+ spin_lock_bh(&table->lock);
+ for (key = 0; key < SECONDARY_TABLE_SIZE; key++) {
+ while (!hlist_empty(&table->table[key])) {
+ struct secondary_buffer *buffer;
+
+ buffer = container_of(table->table[key].first,
+ struct secondary_buffer, node);
+ hlist_del(table->table[key].first);
+ secondary_buffer_destroy(buffer);
+ }
+ }
+ spin_unlock_bh(&table->lock);
+}
+
+static inline void secondary_table_init(struct secondary_table *table)
+{
+ unsigned int key;
+
+ spin_lock_init(&table->lock);
+ for (key = 0; key < SECONDARY_TABLE_SIZE; key++)
+ INIT_HLIST_HEAD(&table->table[key]);
+}
+
+extern struct skbtrace_context *skbtrace_context_get(struct sock *sk);
+extern void skbtrace_context_setup(struct skbtrace_context *ctx,
+ struct skbtrace_ops *ops);
+
+static inline void skbtrace_context_destroy(struct skbtrace_context **ctx)
+{
+ if (!*ctx)
+ return;
+ secondary_table_clean(&(*ctx)->sec_table);
+ kfree(*ctx);
+ *ctx = NULL;
+}
+
+static inline void sock_skbtrace_reset(struct sock *sk)
+{
+ sk->sk_skbtrace = NULL;
+}
+
+static inline void* secondary_buffer_get_block(struct secondary_buffer
*buf,
+ struct skbtrace_tracepoint *primary)
+{
+ void *ret;
+
+ if (!buf->slots && secondary_buffer_init(buf, primary))
+ return NULL;
+
+ spin_lock_bh(&buf->lock);
+ ret = &buf->slots[buf->offset * SECONDARY_BUFFER_UNIT];
+ if (buf->count < SECONDARY_BUFFER_COUNTS)
+ buf->count++;
+ if (++buf->offset >= SECONDARY_BUFFER_COUNTS)
+ buf->offset = 0;
+ spin_unlock_bh(&buf->lock);
+ return ret;
+}
+
+static inline void* skbtrace_block_get(struct skbtrace_tracepoint *tp,
+ struct skbtrace_context *ctx,
+ void *fast)
+{
+ struct skbtrace_tracepoint *pri;
+
+ if (!tp || !tp->primary)
+ return fast;
+
+ pri = tp->primary;
+ if (ctx) {
+ struct secondary_buffer *buf;
+ struct secondary_table *table;
+
+ table = &ctx->sec_table;
+ buf = secondary_table_lookup_or_create(table, pri);
+ if (!buf)
+ return fast;
+ return secondary_buffer_get_block(buf, pri) ? : fast;
+ }
+ return secondary_buffer_get_block(&pri->sec_buffer, pri) ? : fast;
+}
+
+static inline void* skbtrace_block_sk_get(struct skbtrace_tracepoint *tp,
+ struct sock *sk,
+ void *fast)
+{
+ return skbtrace_block_get(tp, skbtrace_context_get(sk), fast);
+}
+
+#define SKBTRACE_SKB_EVENT_BEGIN \
+{\
+ if (skbtrace_bypass_skb(skb)) {\
+ return; \
+ } else {
+
+#define SKBTRACE_SKB_EVENT_END \
+ } \
+}
+
+extern u32 skbtrace_sock_filter_id;
+static inline int skbtrace_bypass_sock(struct sock *sk)
+{
+ if (static_key_false(&skbtrace_filters_enabled)) {
+ if (likely(sk->sk_skbtrace_filtered &&
+ (skbtrace_sock_filter_id == sk->sk_skbtrace_fid))) {
+ return sk->sk_hit_skbtrace;
+ }
+ if (skbtrace_sock_filter) {
+ unsigned int pkt_len;
+ struct sk_buff *skb;
+
+ skb = skbtrace_get_sock_filter_skb(sk);
+ if (skb) {
+ pkt_len = SK_RUN_FILTER(skbtrace_sock_filter, skb);
+ sk->sk_hit_skbtrace = !pkt_len;
+ sk->sk_skbtrace_filtered = 1;
+ skbtrace_put_sock_filter_skb(skb);
+ sk->sk_skbtrace_fid = skbtrace_sock_filter_id;
+ return sk->sk_hit_skbtrace;
+ }
+ return sysctl_skbtrace_filter_default;
+ }
+ }
+ return 0;
+}
+
+static inline int skbtrace_bypass_twsk(struct inet_timewait_sock *tw)
+{
+ if (static_key_false(&skbtrace_filters_enabled)) {
+ if (likely(tw->tw_skbtrace_filtered &&
+ (skbtrace_sock_filter_id == tw->tw_skbtrace_fid))) {
+ return tw->tw_hit_skbtrace;
+ }
+ if (skbtrace_sock_filter) {
+ unsigned int pkt_len;
+ struct sk_buff *skb;
+
+ skb = skbtrace_get_twsk_filter_skb(tw);
+ if (skb) {
+ pkt_len = SK_RUN_FILTER(skbtrace_sock_filter, skb);
+ tw->tw_hit_skbtrace = !pkt_len;
+ tw->tw_skbtrace_filtered = 1;
+ skbtrace_put_twsk_filter_skb(skb);
+ tw->tw_skbtrace_fid = skbtrace_sock_filter_id;
+ return tw->tw_hit_skbtrace;
+ }
+ return sysctl_skbtrace_filter_default;
+ }
+ }
+ return 0;
+}
+
+#define SKBTRACE_SOCK_EVENT_BEGIN \
+{\
+ if (skbtrace_bypass_sock(sk)) {\
+ return; \
+ } else {
+
+#define SKBTRACE_SOCK_EVENT_END \
+ } \
+}
+
+extern int inet_filter_skb(struct sock *sk, struct sk_buff *skb);
+extern int inet_tw_getname(struct inet_timewait_sock *tw,
+ struct sockaddr *uaddr, int peer);
+extern int inet_tw_filter_skb(struct inet_timewait_sock *tw,
+ struct sk_buff *skb);
+extern int tcp_tw_filter_skb(struct inet_timewait_sock *tw,
+ struct sk_buff *skb);
+extern int tcp_filter_skb(struct sock *sk, struct sk_buff *skb);
+
+#else /* HAVE_SKBTRACE */
+
+static inline void sock_skbtrace_reset(struct sock *sk)
+{
+}
+
+static inline void skbtrace_context_destroy(struct skbtrace_context **ctx)
+{
+}
+
+#endif /* HAVE_SKBTRACE */
+
+#endif /* _LINUX_SKBTRACE_H */
diff --git a/include/linux/skbtrace_api.h b/include/linux/skbtrace_api.h
new file mode 100644
index 0000000..2d14ff6
--- /dev/null
+++ b/include/linux/skbtrace_api.h
@@ -0,0 +1,73 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * User/Kernel Interface
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+#ifndef _LINUX_SKBTRACE_API_H
+#define _LINUX_SKBTRACE_API_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+#include <linux/time.h>
+#else
+#include <time.h>
+#define __packed __attribute__ ((__packed__))
+#endif
+
+#define TRACE_SPEC_MAX_LEN 256
+
+#define SKBTRACE_DEF_SUBBUF_SIZE (1<<12)
+#define SKBTRACE_DEF_SUBBUF_NR (1<<11)
+
+#define SKBTRACE_MIN_SUBBUF_SIZE SKBTRACE_DEF_SUBBUF_SIZE
+#define SKBTRACE_MIN_SUBBUF_NR SKBTRACE_DEF_SUBBUF_NR
+
+#define SKBTRACE_MAX_SUBBUF_SIZE (1<<16)
+#define SKBTRACE_MAX_SUBBUF_NR (1<<20)
+
+#define SC 0 /* for tracepoints in process context */
+#define SI 1 /* for tracepoints in softirq context */
+#define HW 2 /* for tracepoints in hardirq context */
+#define NR_CHANNELS 3
+
+/* struct skbtrace_block - be used in kernel/user interaction */
+/* @len: whole data structure size in bytes */
+/* @action: action of this skbtrace_block */
+/* @flags: the flags depend on above action field */
+/* @ts: the timestamp of this event. */
+/* @ptr: the major source kernel data structure */
+/* of this event, for gerneral, a sk_buff or sock */
+/* PLEASE: */
+/* Keep 64 bits alignment */
+struct skbtrace_block {
+ __u64 magic;
+ __u16 len;
+ __u16 action;
+ __u32 flags;
+ struct timespec ts;
+ __u64 seq;
+ void *ptr;
+} __packed;
+
+#include <net/skbtrace_api_common.h>
+#include <net/skbtrace_api_ipv4.h>
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7632c87..27a0fe0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -351,6 +351,8 @@ typedef unsigned char *sk_buff_data_t;
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
+ * @hit_skbtrace: is this should be skipped by skbtrace filter?
+ * @skbtrace_filtered: is this already processed by skbtrace filter?
* @protocol: Packet protocol from driver
* @destructor: Destruct function
* @nfct: Associated connection, if any
@@ -469,7 +471,10 @@ struct sk_buff {
__u8 wifi_acked:1;
__u8 no_fcs:1;
__u8 head_frag:1;
- /* 8/10 bit hole (depending on ndisc_nodetype presence) */
+#if defined(CONFIG_SKBTRACE) || defined(CONFIG_SKBTRACE_MODULE)
+ __u8 hit_skbtrace:1;
+ __u8 skbtrace_filtered:1;
+#endif
kmemcheck_bitfield_end(flags2);
#ifdef CONFIG_NET_DMA
diff --git a/include/net/skbtrace_api_common.h
b/include/net/skbtrace_api_common.h
new file mode 100644
index 0000000..87892d6
--- /dev/null
+++ b/include/net/skbtrace_api_common.h
@@ -0,0 +1,84 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * User/Kernel Interface
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+#ifndef _NET_SKBTRACE_API_COMMON_H
+#define _NET_SKBTRACE_API_COMMON_H
+
+#include <linux/types.h>
+
+/********************* Common section *********************/
+
+/* skbtrace_block->action */
+enum {
+ skbtrace_action_invalid = 0,
+ skbtrace_action_common_min = 1,
+ skbtrace_action_skb_rps_info = 1,
+ skbtrace_action_sk_timer = 2,
+ skbtrace_action_common_max = 99,
+};
+
+/* common skbtrace_block->flags */
+/* miss_secondary - none secondary events or no enough memory to cache
them */
+enum {
+ skbtrace_flags_reserved_min = 28,
+ skbtrace_flags_miss_secondary = 28,
+ skbtrace_flags_reserved_max = 31,
+};
+
+/* it is copied from <net/flow_keys.h>, except pad fields and packed */
+struct skbtrace_flow_keys {
+ __u32 src;
+ __u32 dst;
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u32 ip_proto;
+} __packed;
+
+struct skbtrace_skb_rps_info_blk {
+ struct skbtrace_block blk;
+ __u16 rx_queue;
+ __u16 pad;
+ __u32 rx_hash;
+ __u32 cpu;
+ __u32 ifindex;
+ struct skbtrace_flow_keys keys;
+} __packed;
+
+
+/* socket timers */
+/* flags */
+enum {
+ skbtrace_sk_timer_setup = 0,
+ skbtrace_sk_timer_reset = 1,
+ skbtrace_sk_timer_stop = 2,
+ skbtrace_sk_timer_last = 3,
+};
+
+struct skbtrace_sk_timer_blk {
+ struct skbtrace_block blk;
+ __s32 proto;
+ __s32 timeout;
+} __packed;
+
+#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index adb7da2..7a1d861 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -190,6 +190,8 @@ struct sock_common {
};
struct cg_proto;
+struct skbtrace_context;
+
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with inet_timewait_sock
@@ -332,7 +334,12 @@ struct sock {
sk_userlocks : 4,
sk_protocol : 8,
sk_type : 16;
+#if defined(CONFIG_SKBTRACE) || defined(CONFIG_SKBTRACE_MODULE)
+ unsigned int sk_hit_skbtrace : 1,
+ sk_skbtrace_filtered : 1;
+#endif
kmemcheck_bitfield_end(flags);
+ unsigned int sk_skbtrace_fid;
int sk_wmem_queued;
gfp_t sk_allocation;
netdev_features_t sk_route_caps;
@@ -373,6 +380,9 @@ struct sock {
__u32 sk_mark;
u32 sk_classid;
struct cg_proto *sk_cgrp;
+#if defined(CONFIG_SKBTRACE) || defined(CONFIG_SKBTRACE_MODULE)
+ struct skbtrace_context *sk_skbtrace;
+#endif
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_write_space)(struct sock *sk);
@@ -842,6 +852,10 @@ struct module;
* transport -> network interface is defined by struct inet_proto
*/
struct proto {
+#if defined(CONFIG_SKBTRACE) || defined(CONFIG_SKBTRACE_MODULE)
+ int (*filter_skb)(struct sock *sk,
+ struct sk_buff *skb);
+#endif
void (*close)(struct sock *sk,
long timeout);
int (*connect)(struct sock *sk,
diff --git a/include/trace/events/skbtrace.h
b/include/trace/events/skbtrace.h
new file mode 100644
index 0000000..91567bf
--- /dev/null
+++ b/include/trace/events/skbtrace.h
@@ -0,0 +1,32 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * Events
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+
+#if !defined(_TRACE_EVENTS_SKBTRACE_H)
+#define _TRACE_EVENTS_SKBTRACE_H
+
+#include <linux/tracepoint.h>
+
+#include <trace/events/skbtrace_common.h>
+#include <trace/events/skbtrace_ipv4.h>
+
+#endif
diff --git a/include/trace/events/skbtrace_common.h
b/include/trace/events/skbtrace_common.h
new file mode 100644
index 0000000..4352564
--- /dev/null
+++ b/include/trace/events/skbtrace_common.h
@@ -0,0 +1,41 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * Comon events
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+
+#if !defined(_TRACE_EVENTS_SKBTRACE_COMMON_H)
+#define _TRACE_EVENTS_SKBTRACE_COMMON_H
+
+#include <linux/tracepoint.h>
+
+struct sk_buff;
+struct net_device;
+struct timer_list;
+
+DECLARE_TRACE(skb_rps_info,
+ TP_PROTO(struct sk_buff *skb, struct net_device *dev, int cpu),
+ TP_ARGS(skb, dev, cpu));
+
+DECLARE_TRACE(sk_timer,
+ TP_PROTO(void *sk, struct timer_list *timer, int action),
+ TP_ARGS(sk, timer, action));
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8c4c070..cc49b26 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -367,6 +367,14 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
+config SKBTRACE
+ tristate "skbtrace : flexible networking tracing"
+ help
+ A blktrace like utility for networking subsystem, you can enable
this feature
+ as a kernel module.
+
+ If unsure, say N.
+
config KPROBE_EVENT
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/net/core/Makefile b/net/core/Makefile
index 674641b..6a80a85 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,6 +18,8 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
+obj-${CONFIG_SKBTRACE} += skbtrace.o
+skbtrace-objs := skbtrace-core.o skbtrace-events-common.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 89e33a5..b363716 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,8 @@
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
+#include <trace/events/skbtrace_common.h>
+#include <linux/skbtrace.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
@@ -2813,6 +2815,7 @@ static int get_rps_cpu(struct net_device *dev,
struct sk_buff *skb,
}
done:
+ trace_skb_rps_info(skb, dev, cpu);
return cpu;
}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ba3c012..41e1766 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -21,6 +21,7 @@
#include <linux/netlink.h>
#include <linux/net_dropmon.h>
#include <linux/slab.h>
+#include <linux/skbtrace.h>
#include <asm/unaligned.h>
#include <asm/bitops.h>
@@ -31,7 +32,30 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/skbtrace.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+
+#if HAVE_SKBTRACE
+
+#define NEW_SKBTRACE_TP(name) \
+ DEFINE_TRACE(name); \
+ EXPORT_TRACEPOINT_SYMBOL_GPL(name);
+
+NEW_SKBTRACE_TP(skb_rps_info);
+NEW_SKBTRACE_TP(sk_timer);
+
+NEW_SKBTRACE_TP(tcp_congestion);
+NEW_SKBTRACE_TP(tcp_connection);
+NEW_SKBTRACE_TP(icsk_connection);
+NEW_SKBTRACE_TP(tcp_sendlimit);
+NEW_SKBTRACE_TP(tcp_active_conn);
+NEW_SKBTRACE_TP(tcp_rttm);
+NEW_SKBTRACE_TP(tcp_ca_state);
+
+unsigned long skbtrace_session;
+EXPORT_SYMBOL(skbtrace_session);
+
+#endif
diff --git a/net/core/skbtrace-core.c b/net/core/skbtrace-core.c
new file mode 100644
index 0000000..2c2ac3e
--- /dev/null
+++ b/net/core/skbtrace-core.c
@@ -0,0 +1,1226 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/relay.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/jhash.h>
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/skbtrace.h>
+#include <net/sock.h>
+
+#define SKBTRACE_VERSION "1"
+#define SKBTRACE_DIR "skbtrace"
+
+static unsigned long skbtrace_dropped[NR_CHANNELS][NR_CPUS];
+/* +1 for quick indexing trick in __skbtrace_probe() */
+static struct rchan *skbtrace_channels[NR_CHANNELS + 1];
+
+int sysctl_skbtrace_filter_default = 0;
+EXPORT_SYMBOL_GPL(sysctl_skbtrace_filter_default);
+static struct sk_buff **sock_filter_skb;
+static struct sock_fprog skb_filter_fprog;
+static struct sock_fprog sock_filter_fprog;
+struct sk_filter *skbtrace_skb_filter;
+EXPORT_SYMBOL_GPL(skbtrace_skb_filter);
+
+u32 skbtrace_sock_filter_id;
+EXPORT_SYMBOL_GPL(skbtrace_sock_filter_id);
+struct sk_filter *skbtrace_sock_filter;
+EXPORT_SYMBOL_GPL(skbtrace_sock_filter);
+
+static struct dentry *skbtrace_dentry;
+static struct dentry *enabled_control;
+static struct dentry *dropped_control;
+static struct dentry *version_control;
+static struct dentry *subbuf_nr_control;
+static struct dentry *subbuf_size_control;
+static struct dentry *filters_control;
+static struct dentry *sock_filters_control;
+
+static const struct file_operations enabled_fops;
+static const struct file_operations dropped_fops;
+static const struct file_operations version_fops;
+static const struct file_operations subbuf_nr_fops;
+static const struct file_operations subbuf_size_fops;
+static const struct file_operations filters_fops;
+static const struct file_operations sock_filters_fops;
+
+static int nr_skbtrace_enabled_tp;
+static int subbuf_nr = SKBTRACE_DEF_SUBBUF_NR;
+static int subbuf_size = SKBTRACE_DEF_SUBBUF_SIZE;
+
+static bool should_load_proto;
+
+struct static_key skbtrace_filters_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(skbtrace_filters_enabled);
+
+atomic64_t skbtrace_event_seq = ATOMIC64_INIT(0);
+EXPORT_SYMBOL_GPL(skbtrace_event_seq);
+
+/* protect agaist af_tp_list and skbtrace_channels */
+static struct mutex skbtrace_lock;
+static struct skbtrace_tracepoint *af_tp_list[AF_MAX];
+struct skbtrace_ops* skbtrace_ops[AF_MAX];
+
+static int create_controls(void);
+static void remove_controls(void);
+static int create_channels(void);
+static void flush_channels(void);
+static void destroy_channels(void);
+static ssize_t sk_filter_read(struct sock_fprog *fprog, char __user
*buffer,
+ size_t count);
+static ssize_t sk_filter_write(struct sock_fprog *sk_fprog,
+ struct sk_filter **sk_filter,
+ const char __user *buffer, size_t count);
+static void reset_filter(struct sock_fprog *fprog, struct sk_filter
**filter);
+static void skbtrace_filters_clean(void);
+
+struct skbtrace_ops* skbtrace_ops_get(int af)
+{
+ return skbtrace_ops[af];
+}
+EXPORT_SYMBOL_GPL(skbtrace_ops_get);
+
+static void skbtrace_proto_load(void)
+{
+ int af;
+
+ if (!should_load_proto)
+ return;
+
+ should_load_proto = false;
+
+ for (af = AF_UNSPEC; af < AF_MAX; af++) {
+ /* load proto-specific events */
+ if (!af_tp_list[af])
+ request_module("skbtrace-af-%d", af);
+ }
+}
+
+void __skbtrace_block_probe(struct skbtrace_block *blk)
+{
+ unsigned int chan_id;
+ struct rchan *rchan;
+
+ chan_id = (!!in_irq()) << 1;
+ chan_id |= !!in_softirq(); /* make sparse happy */
+ rchan = skbtrace_channels[chan_id];
+
+ if (unlikely(chan_id >= HW))
+ relay_write(rchan, blk, blk->len);
+ else {
+ local_bh_disable();
+ __relay_write(rchan, blk, blk->len);
+ local_bh_enable();
+ }
+ blk->action = skbtrace_action_invalid;
+}
+
+void __skbtrace_do_probe(struct skbtrace_tracepoint *tp,
+ struct skbtrace_context *ctx,
+ struct skbtrace_block *blk)
+{
+ int i;
+ char *sec_blk;
+ struct secondary_buffer *buf;
+
+ if (ctx)
+ buf = secondary_table_lookup(&ctx->sec_table, tp);
+ else
+ buf = &tp->sec_buffer;
+
+ if (!buf) {
+ if (tp->nr_secondary)
+ blk->flags |= 1<<skbtrace_flags_miss_secondary;
+ goto quit;
+ }
+
+ spin_lock_bh(&buf->lock);
+ for (i = 0; i < buf->count; i++) {
+ if (--buf->offset < 0)
+ buf->offset = SECONDARY_BUFFER_COUNTS - 1;
+ sec_blk = &buf->slots[buf->offset * SECONDARY_BUFFER_UNIT];
+ __skbtrace_block_probe((struct skbtrace_block*)sec_blk);
+ }
+ secondary_buffer_reset(buf);
+ spin_unlock_bh(&buf->lock);
+
+quit:
+ __skbtrace_block_probe(blk);
+}
+
+void __skbtrace_probe(struct skbtrace_tracepoint *tp,
+ struct skbtrace_context *ctx,
+ struct skbtrace_block *blk)
+{
+ if (!tp)
+ return;
+ if (!tp->primary)
+ __skbtrace_do_probe(tp, ctx, blk);
+}
+EXPORT_SYMBOL_GPL(__skbtrace_probe);
+
+static void __skbtrace_setup_tracepoints(struct skbtrace_tracepoint
*tp_list)
+{
+ struct skbtrace_tracepoint *tp;
+
+ tp = tp_list;
+ while (tp && tp->trace_name) {
+ secondary_buffer_init(&tp->sec_buffer, tp);
+ tp->primary = NULL;
+ tp->enabled = 0;
+ tp++;
+ }
+}
+
+static int __skbtrace_register_tracepoints(int af,
+ struct skbtrace_tracepoint *tp_list)
+{
+ int ret = 0;
+
+ if (af_tp_list[af])
+ ret = -EEXIST;
+
+ if (tp_list) {
+ __skbtrace_setup_tracepoints(tp_list);
+ if (tp_list[0].trace_name)
+ af_tp_list[af] = tp_list;
+ else
+ ret = -EINVAL;
+ } else
+ af_tp_list[af] = NULL;
+
+ return ret;
+}
+
+static void __skbtrace_unregister_tracepoints(int af)
+{
+ struct skbtrace_tracepoint *tp;
+
+ tp = af_tp_list[af];
+ while (tp && tp->trace_name) {
+ if (tp->enabled) {
+ tp->enabled = 0;
+ --nr_skbtrace_enabled_tp;
+ tracepoint_probe_unregister(tp->trace_name,
+ tp->probe, tp);
+ secondary_buffer_put(&tp->sec_buffer);
+ }
+ tp++;
+ }
+ af_tp_list[af] = NULL;
+}
+
+static inline int __skbtrace_register_ops(int af, struct skbtrace_ops *ops)
+{
+ if (skbtrace_ops[af])
+ return -EEXIST;
+ skbtrace_ops[af] = ops;
+ return 0;
+}
+
+static inline void __skbtrace_unregister_ops(int af)
+{
+ skbtrace_ops[af] = NULL;
+}
+
+int skbtrace_register_proto(int af,
+ struct skbtrace_tracepoint *tp_list,
+ struct skbtrace_ops *ops)
+{
+ int ret;
+
+ if (af < 0 || af >= AF_MAX)
+ return -EINVAL;
+
+ mutex_lock(&skbtrace_lock);
+ ret = __skbtrace_register_tracepoints(af, tp_list);
+ if (!ret) {
+ ret = __skbtrace_register_ops(af, ops);
+ if (ret)
+ __skbtrace_unregister_tracepoints(af);
+ }
+ mutex_unlock(&skbtrace_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(skbtrace_register_proto);
+
+void skbtrace_unregister_proto(int af)
+{
+ if (af < 0 || af >= AF_MAX)
+ return;
+
+ mutex_lock(&skbtrace_lock);
+ __skbtrace_unregister_tracepoints(af);
+ __skbtrace_unregister_ops(af);
+ mutex_unlock(&skbtrace_lock);
+
+ flush_channels();
+ should_load_proto = true;
+}
+EXPORT_SYMBOL_GPL(skbtrace_unregister_proto);
+
+void skbtrace_context_setup(struct skbtrace_context *ctx,
+ struct skbtrace_ops *ops)
+{
+ ctx->ops = ops;
+ ctx->session = skbtrace_session;
+ secondary_table_init(&ctx->sec_table);
+}
+EXPORT_SYMBOL(skbtrace_context_setup);
+
+struct skbtrace_context *skbtrace_context_get(struct sock *sk)
+{
+ struct skbtrace_ops *ops;
+ struct skbtrace_context *ctx;
+
+ ops = skbtrace_ops_get(sk->sk_family);
+ if (!ops)
+ return NULL;
+ local_bh_disable();
+
+ if (sk->sk_skbtrace &&
+ (skbtrace_session != sk->sk_skbtrace->session))
+ skbtrace_context_destroy(&sk->sk_skbtrace);
+
+ if (!sk->sk_skbtrace) {
+ ctx = kzalloc(sizeof(struct skbtrace_context), GFP_ATOMIC);
+ if (likely(ctx)) {
+ skbtrace_context_setup(ctx, ops);
+ sk->sk_skbtrace = ctx;
+ }
+ }
+
+ local_bh_enable();
+ return sk->sk_skbtrace;
+}
+EXPORT_SYMBOL(skbtrace_context_get);
+
+static int subbuf_start_handler(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ size_t prev_padding)
+{
+ if (relay_buf_full(buf)) {
+ long trace, cpu;
+
+ trace = (long)buf->chan->private_data;
+ cpu = buf->cpu;
+ skbtrace_dropped[trace][cpu]++;
+ return 0;
+ }
+ return 1;
+}
+
+static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent,
+ umode_t mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+ return 0;
+}
+
+static struct rchan_callbacks relayfs_callbacks = {
+ .subbuf_start = subbuf_start_handler,
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+};
+
+/* caller must hold skbtrace_lock */
+static int create_channels(void)
+{
+ unsigned long i, created;
+ const char *skbtrace_names[NR_CHANNELS] = { "trace.syscall.cpu",
+ "trace.softirq.cpu",
+ "trace.hardirq.cpu" };
+ created = 0;
+ for (i = 0; i < NR_CHANNELS; i++) {
+ if (skbtrace_channels[i])
+ continue;
+ skbtrace_channels[i] = relay_open(skbtrace_names[i],
+ skbtrace_dentry, subbuf_size, subbuf_nr,
+ &relayfs_callbacks, (void *)i);
+ if (!skbtrace_channels[i]) {
+ destroy_channels();
+ return -ENOMEM;
+ }
+ created = 1;
+ }
+ skbtrace_channels[HW + 1] = skbtrace_channels[HW];
+
+ if (created)
+ __module_get(THIS_MODULE);
+ return 0;
+}
+
+static void flush_channels(void)
+{
+ int i;
+ for (i = 0; i < NR_CHANNELS; i++) {
+ if (skbtrace_channels[i])
+ relay_flush(skbtrace_channels[i]);
+ }
+}
+
+/* caller must hold skbtrace_lock */
+static void destroy_channels(void)
+{
+ int i, removed;
+
+ removed = 0;
+ for (i = 0; i < NR_CHANNELS; i++) {
+ if (skbtrace_channels[i]) {
+ relay_flush(skbtrace_channels[i]);
+ relay_close(skbtrace_channels[i]);
+ skbtrace_channels[i] = NULL;
+ removed = 1;
+ }
+ }
+ skbtrace_channels[HW + 1] = NULL;
+
+ if (removed)
+ module_put(THIS_MODULE);
+}
+
+static void remove_controls(void)
+{
+#define REMOVE_DEBUGFS_FILE(name) \
+ do {\
+ if (name##_control) \
+ debugfs_remove(name##_control); \
+ } while(0);
+
+ REMOVE_DEBUGFS_FILE(enabled)
+ REMOVE_DEBUGFS_FILE(dropped)
+ REMOVE_DEBUGFS_FILE(version)
+ REMOVE_DEBUGFS_FILE(subbuf_nr)
+ REMOVE_DEBUGFS_FILE(subbuf_size)
+ REMOVE_DEBUGFS_FILE(filters)
+ REMOVE_DEBUGFS_FILE(sock_filters)
+}
+
+static int create_controls(void)
+{
+#define CREATE_DEBUGFS_FILE(name)\
+ do {\
+ name##_control = debugfs_create_file(#name, 0,\
+ skbtrace_dentry, NULL, &name##_fops);\
+ if (name##_control)\
+ break;\
+ pr_err("skbtrace: couldn't create relayfs file '" #name "'\n");\
+ goto fail;\
+ } while (0);
+
+ CREATE_DEBUGFS_FILE(enabled)
+ CREATE_DEBUGFS_FILE(dropped)
+ CREATE_DEBUGFS_FILE(version)
+ CREATE_DEBUGFS_FILE(subbuf_nr)
+ CREATE_DEBUGFS_FILE(subbuf_size)
+ CREATE_DEBUGFS_FILE(filters)
+ CREATE_DEBUGFS_FILE(sock_filters)
+
+#undef CREATE_DEBUGFS_FILE
+ return 0;
+fail:
+ remove_controls();
+ return -1;
+}
+
+static char *skbtrace_tracepoint_default_desc(struct
skbtrace_tracepoint *t)
+{
+ char *desc;
+ int n;
+
+ n = strlen(t->trace_name) + 64;
+ desc = kmalloc(n, GFP_KERNEL);
+ if (!desc)
+ return NULL;
+
+ snprintf(desc, n, "%s enabled:%d\n", t->trace_name, !!t->enabled);
+ return desc;
+}
+
+static char *skbtrace_tracepoint_desc(struct skbtrace_tracepoint *tp)
+{
+ if (tp->desc)
+ return tp->desc(tp);
+ return skbtrace_tracepoint_default_desc(tp);
+}
+
+static ssize_t enabled_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ size_t ret, offset, len;
+ struct skbtrace_tracepoint *tp;
+ int af;
+ char *desc = NULL;
+
+ skbtrace_proto_load();
+
+ ret = offset = 0;
+ mutex_lock(&skbtrace_lock);
+ for (af = AF_UNSPEC; af < AF_MAX; af++) {
+ tp = af_tp_list[af];
+ while (tp && tp->trace_name) {
+ kfree(desc);
+ desc = skbtrace_tracepoint_desc(tp);
+ if (!desc)
+ return -ENOMEM;
+ len = strlen(desc);
+ offset += len;
+ if (offset <= *ppos) {
+ ++tp;
+ continue;
+ }
+ if (count < len) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ if (copy_to_user(buffer, desc, len)) {
+ ret = -EFAULT;
+ goto unlock;
+ }
+ *ppos += len;
+ ret = len;
+ goto unlock;
+ }
+ }
+unlock:
+ kfree(desc);
+ mutex_unlock(&skbtrace_lock);
+
+ return ret;
+}
+
+static struct skbtrace_tracepoint *skbtrace_lookup_tp(char *name)
+{
+ int af;
+ struct skbtrace_tracepoint *tp;
+
+ for (af = AF_UNSPEC; af < AF_MAX; af++) {
+ tp = af_tp_list[af];
+ while (tp && tp->trace_name) {
+ if (!strcmp(name, tp->trace_name))
+ return tp;
+ ++tp;
+ }
+ }
+
+ return NULL;
+}
+
+struct skbtrace_options_context {
+ char *name;
+ char *options;
+ struct skbtrace_tracepoint *primary;
+};
+
+struct option_handler {
+ char *key;
+ int (*handler)(struct skbtrace_options_context *ctx, char *val);
+};
+
+static int handle_primary_option(struct skbtrace_options_context *ctx,
char *val)
+{
+ ctx->primary = skbtrace_lookup_tp(val);
+ if (!ctx->primary)
+ return -EINVAL;
+ return 0;
+}
+
+static struct option_handler common_handlers[] = {
+ {
+ .key = "primary=",
+ .handler = handle_primary_option,
+ },
+ {
+ .key = NULL,
+ },
+};
+
+static int handle_options(char *event_spec, struct option_handler
*handlers,
+ struct skbtrace_options_context *ctx)
+{
+ char *option;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->options = strchr(event_spec, ',');
+ if (!ctx->options)
+ return 0;
+ *(ctx->options) = '\x0';
+ option = ++(ctx->options);
+
+ while (option && *option) {
+ char *end;
+ struct option_handler *h;
+
+ end = strchr(option, ',');
+ if (end)
+ *end = '\x0';
+ h = &handlers[0];
+ while (h->key) {
+ if (strstr(option, h->key) == option) {
+ int ret;
+ char *val;
+
+ val = option + strlen(h->key);
+ ret = h->handler(ctx, val);
+ if (!ret)
+ break;
+ else
+ return -EINVAL;
+ }
+ h++;
+ }
+ if (!h->key) {
+ if (end) {
+ *end = ',';
+ option = end + 1;
+ } else
+ break;
+ } else {
+ if (end) {
+ memmove(option, end + 1, strlen(end + 1) + 1);
+ } else
+ *option = '\x0';
+ }
+ }
+
+ return 0;
+}
+
+static int __enable_tp(struct skbtrace_tracepoint *tp,
+ struct skbtrace_options_context *ctx)
+{
+ int ret = 0;
+
+ if (tp->enabled)
+ return -EBUSY;
+
+ if (tp->enable)
+ tp->enable(tp);
+ ret = tracepoint_probe_register(tp->trace_name, tp->probe, tp);
+ if (!ret) {
+ tp->primary = ctx->primary;
+ if (tp->primary)
+ tp->primary->nr_secondary++;
+ tp->enabled = 1;
+ } else {
+ if (tp->disable)
+ tp->disable(tp);
+ }
+
+ return ret;
+}
+
+static int __disable_tp(struct skbtrace_tracepoint *tp)
+{
+ int ret;
+
+ if (!tp->enabled)
+ return -EINVAL;
+
+ ret = tracepoint_probe_unregister(tp->trace_name, tp->probe, tp);
+ if (ret)
+ return ret;
+
+ if (tp->disable)
+ tp->disable(tp);
+ if (tp->primary) {
+ secondary_buffer_put(&tp->primary->sec_buffer);
+ tp->primary->nr_secondary--;
+ }
+ tp->enabled = 0;
+ return 0;
+}
+
+static int skbtrace_enable_tp(char *event_spec)
+{
+ struct skbtrace_options_context ctx;
+ int ret;
+ struct skbtrace_tracepoint *tp;
+
+ ret = handle_options(event_spec, common_handlers, &ctx);
+ if (ret)
+ return ret;
+ ctx.name = event_spec;
+
+ mutex_lock(&skbtrace_lock);
+ if (!nr_skbtrace_enabled_tp) {
+ ret = create_channels();
+ if (ret)
+ goto unlock;
+ }
+
+ tp = skbtrace_lookup_tp(ctx.name);
+ if (!tp || tp->enabled) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (ctx.options && tp->setup_options) {
+ ret = tp->setup_options(tp, ctx.options);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = __enable_tp(tp, &ctx);
+
+ if (ret && !nr_skbtrace_enabled_tp)
+ destroy_channels();
+ else if (!ret)
+ ++nr_skbtrace_enabled_tp;
+
+unlock:
+ mutex_unlock(&skbtrace_lock);
+ return ret;
+}
+
+static int skbtrace_disable_all_tp(void)
+{
+ int ret, af;
+ struct skbtrace_tracepoint *tp;
+
+ /*
+ * '-*' has two meanings:
+ *
+ * (0) first time, it disables all tracepoints, and flush channels.
+ * (1) second time, it removes all channels.
+ */
+
+ if (!nr_skbtrace_enabled_tp) {
+ skbtrace_filters_clean();
+ ++skbtrace_session;
+ destroy_channels();
+ return 0;
+ }
+
+ ret = -EINVAL;
+ mutex_lock(&skbtrace_lock);
+ for (af = AF_UNSPEC; af < AF_MAX; af++) {
+ tp = af_tp_list[af];
+ while (tp && tp->trace_name) {
+ ret = __disable_tp(tp);
+ if (!ret)
+ --nr_skbtrace_enabled_tp;
+ ++tp;
+ }
+ }
+ mutex_unlock(&skbtrace_lock);
+ flush_channels();
+
+ return ret;
+}
+
+/* The user given buffer should contains such like string:
+ * (0) To enable a skbtrace event: "TRACE_NAME,opt1=val1,opt2=val2,..."
+ * (1) To disable all skbtrace events:"-*"
+ */
+static ssize_t enabled_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char kbuf[TRACE_SPEC_MAX_LEN+1];
+ int ret;
+
+ skbtrace_proto_load();
+
+ if (count >= TRACE_SPEC_MAX_LEN)
+ return -EINVAL;
+ if (copy_from_user(kbuf, buffer, count))
+ return -EFAULT;
+ kbuf[count] = '\x0';
+
+ if (strcmp("-*", kbuf))
+ ret = skbtrace_enable_tp(&kbuf[0]);
+ else
+ ret = skbtrace_disable_all_tp();
+
+ return ret ?: count;
+}
+
+static int kmod_open(struct inode *inodep, struct file *filp)
+{
+ __module_get(THIS_MODULE);
+ return 0;
+}
+
+static int kmod_release(struct inode *inodep, struct file *filp)
+{
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+static const struct file_operations enabled_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = enabled_read,
+ .write = enabled_write,
+};
+
+static ssize_t dropped_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+
+ char buf[256];
+ unsigned long skbtrace_total_dropped[NR_CHANNELS] = {0, 0, 0};
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ skbtrace_total_dropped[HW] += skbtrace_dropped[HW][cpu];
+ skbtrace_total_dropped[SI] += skbtrace_dropped[SI][cpu];
+ skbtrace_total_dropped[SC] += skbtrace_dropped[SC][cpu];
+ }
+
+ snprintf(buf, sizeof(buf), "%lu %lu %lu\n",
+ skbtrace_total_dropped[HW],
+ skbtrace_total_dropped[SI],
+ skbtrace_total_dropped[SC]
+ );
+
+ return simple_read_from_buffer(buffer, count, ppos,
+ buf, strlen(buf));
+}
+
+static ssize_t dropped_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ memset(skbtrace_dropped, 0, sizeof(skbtrace_dropped));
+ return count;
+}
+
+static const struct file_operations dropped_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = dropped_read,
+ .write = dropped_write,
+};
+
+static ssize_t version_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return simple_read_from_buffer(buffer, count, ppos,
+ SKBTRACE_VERSION "\n",
+ strlen(SKBTRACE_VERSION "\n"));
+}
+
+static const struct file_operations version_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = version_read,
+};
+
+static ssize_t subbuf_x_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos, int which)
+{
+ char buf[24];
+
+ sprintf(buf, "%d\n", which);
+ return simple_read_from_buffer(buffer, count, ppos,
+ buf, strlen(buf));
+}
+
+static ssize_t subbuf_x_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos,
+ int *which, int min_val, int max_val)
+{
+ char buf[24];
+ int v;
+
+ if (nr_skbtrace_enabled_tp)
+ return -EBUSY;
+
+ if (!buffer || count > sizeof(buf) - 1)
+ return -EINVAL;
+ memset(buf, 0, sizeof(buf));
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+ if (sscanf(buf, "%d", &v) != 1)
+ return -EINVAL;
+ if (v < min_val || v > max_val)
+ return -EINVAL;
+
+ *which = v;
+ return count;
+}
+
+static ssize_t subbuf_nr_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return subbuf_x_read(filp, buffer, count, ppos, subbuf_nr);
+}
+
+static ssize_t subbuf_nr_write(struct file *filp, const char __user
*buffer,
+ size_t count, loff_t *ppos)
+{
+ return subbuf_x_write(filp, buffer, count, ppos, &subbuf_nr,
+ SKBTRACE_MIN_SUBBUF_NR, SKBTRACE_MAX_SUBBUF_NR);
+}
+
+static const struct file_operations subbuf_nr_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = subbuf_nr_read,
+ .write = subbuf_nr_write,
+};
+
+static ssize_t subbuf_size_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return subbuf_x_read(filp, buffer, count, ppos, subbuf_size);
+}
+
+static ssize_t subbuf_size_write(struct file *filp, const char __user
*buffer,
+ size_t count, loff_t *ppos)
+{
+ return subbuf_x_write(filp, buffer, count, ppos, &subbuf_size,
+ SKBTRACE_MIN_SUBBUF_SIZE, SKBTRACE_MAX_SUBBUF_SIZE);
+}
+
+static const struct file_operations subbuf_size_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = subbuf_size_read,
+ .write = subbuf_size_write,
+};
+
+struct sk_buff* skbtrace_get_twsk_filter_skb(struct inet_timewait_sock *tw)
+{
+ unsigned int cpu;
+ struct sk_buff **p_skb;
+ int ret;
+ struct skbtrace_ops *ops;
+
+ local_bh_disable();
+
+ ops = skbtrace_ops_get(tw->tw_family);
+ if (!ops || !ops->filter_skb) {
+ local_bh_enable();
+ return NULL;
+ }
+
+ cpu = smp_processor_id();
+ p_skb = per_cpu_ptr(sock_filter_skb, cpu);
+ if (unlikely(!*p_skb)) {
+ *p_skb = alloc_skb(1500, GFP_ATOMIC);
+ if (!*p_skb) {
+ local_bh_enable();
+ return NULL;
+ }
+ }
+
+ ret = ops->tw_filter_skb(tw, *p_skb);
+ if (ret < 0) {
+ skbtrace_put_twsk_filter_skb(*p_skb);
+ return NULL;
+ }
+
+ return *p_skb;
+}
+EXPORT_SYMBOL_GPL(skbtrace_get_twsk_filter_skb);
+
+struct sk_buff* skbtrace_get_sock_filter_skb(struct sock *sk)
+{
+ unsigned int cpu;
+ struct sk_buff **p_skb;
+ int ret;
+ struct skbtrace_ops *ops;
+
+ local_bh_disable();
+
+ ops = skbtrace_ops_get(sk->sk_family);
+ if (!ops || !ops->filter_skb) {
+ local_bh_enable();
+ return NULL;
+ }
+
+ cpu = smp_processor_id();
+ p_skb = per_cpu_ptr(sock_filter_skb, cpu);
+ if (unlikely(!*p_skb)) {
+ *p_skb = alloc_skb(1500, GFP_ATOMIC);
+ if (!*p_skb) {
+ local_bh_enable();
+ return NULL;
+ }
+ }
+
+ ret = ops->filter_skb(sk, *p_skb);
+ if (ret < 0) {
+ skbtrace_put_sock_filter_skb(*p_skb);
+ return NULL;
+ }
+
+ return *p_skb;
+}
+EXPORT_SYMBOL_GPL(skbtrace_get_sock_filter_skb);
+
+static ssize_t sk_filter_read(struct sock_fprog *fprog, char __user
*buffer,
+ size_t count)
+{
+ int sz_filter;
+ struct sock_filter __user *user_filter;
+
+ if (!fprog || !fprog->filter)
+ return -EINVAL;
+ sz_filter = fprog->len * sizeof(struct sock_filter);
+ if (count < sizeof(struct sock_fprog) + sz_filter)
+ return -EINVAL;
+
+ if (copy_to_user(buffer, &fprog->len, sizeof(short)))
+ return -EFAULT;
+
+ if (copy_from_user(&user_filter,
+ buffer + sizeof(short), sizeof(user_filter)))
+ return -EFAULT;
+ if (copy_to_user(user_filter, fprog->filter, sz_filter))
+ return -EFAULT;
+
+ return sizeof(struct sock_fprog) + sz_filter;
+}
+
+static ssize_t sk_filter_write(struct sock_fprog *sk_fprog,
+ struct sk_filter **sk_filter,
+ const char __user *buffer, size_t count)
+{
+ int sz_filter, ret;
+ struct sock_filter __user *user_filter;
+
+ if (count < sizeof(struct sock_fprog) || sk_fprog->filter)
+ return -EINVAL;
+ if (copy_from_user(sk_fprog, buffer, sizeof(struct sock_fprog)))
+ return -EFAULT;
+ sz_filter = sk_fprog->len * sizeof(struct sock_filter);
+ user_filter = sk_fprog->filter;
+
+ sk_fprog->filter = kzalloc(sz_filter, GFP_KERNEL);
+ if (!sk_fprog->filter)
+ ret = -ENOMEM;
+
+ ret = -EFAULT;
+ if (!copy_from_user(sk_fprog->filter, user_filter, sz_filter)) {
+ ret = sk_unattached_filter_create(sk_filter, sk_fprog);
+ if (ret) {
+ reset_filter(sk_fprog, sk_filter);
+ return ret;
+ }
+ }
+ static_key_slow_inc(&skbtrace_filters_enabled);
+ return sizeof(struct sock_fprog) + sz_filter;
+}
+
+static ssize_t filters_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos, struct sock_fprog *fprog)
+{
+ return sk_filter_read(fprog, buffer, count);
+}
+
+static ssize_t skb_filters_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return filters_read(filp, buffer, count, ppos, &skb_filter_fprog);
+}
+
+static ssize_t sock_filters_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return filters_read(filp, buffer, count, ppos, &sock_filter_fprog);
+}
+
+static ssize_t filters_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos,
+ struct sock_fprog *fprog, struct sk_filter **filter)
+
+{
+ skbtrace_proto_load();
+
+ if (nr_skbtrace_enabled_tp)
+ return -EBUSY;
+ reset_filter(fprog, filter);
+ return sk_filter_write(fprog, filter, buffer, count);
+}
+
+static ssize_t skb_filters_write(struct file *filp, const char __user
*buffer,
+ size_t count, loff_t *ppos)
+{
+ return filters_write(filp, buffer, count, ppos,
+ &skb_filter_fprog, &skbtrace_skb_filter);
+}
+
+static ssize_t sock_filters_write(struct file *filp, const char __user
*buffer,
+ size_t count, loff_t *ppos)
+{
+ if (unlikely(!++skbtrace_sock_filter_id))
+ skbtrace_sock_filter_id = 1;
+ return filters_write(filp, buffer, count, ppos,
+ &sock_filter_fprog, &skbtrace_sock_filter);
+}
+
+static const struct file_operations filters_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = skb_filters_read,
+ .write = skb_filters_write,
+};
+
+static const struct file_operations sock_filters_fops = {
+ .owner = THIS_MODULE,
+ .open = kmod_open,
+ .release = kmod_release,
+ .read = sock_filters_read,
+ .write = sock_filters_write,
+};
+
+static void reset_filter(struct sock_fprog *fprog, struct sk_filter
**filter)
+{
+ if (fprog->filter)
+ kfree(fprog->filter);
+ memset(fprog, 0, sizeof(struct sock_fprog));
+
+ if (*filter) {
+ static_key_slow_dec(&skbtrace_filters_enabled);
+ sk_unattached_filter_destroy(*filter);
+ *filter = NULL;
+ }
+}
+
+static void skbtrace_filters_clean(void)
+{
+ reset_filter(&sock_filter_fprog, &skbtrace_sock_filter);
+ reset_filter(&skb_filter_fprog, &skbtrace_skb_filter);
+}
+
+static void clean_skbtrace_filters(void)
+{
+ unsigned int cpu;
+
+ if (skb_filter_fprog.filter)
+ kfree(skb_filter_fprog.filter);
+ if (skbtrace_skb_filter) {
+ static_key_slow_dec(&skbtrace_filters_enabled);
+ sk_unattached_filter_destroy(skbtrace_skb_filter);
+ }
+
+ if (sock_filter_fprog.filter)
+ kfree(sock_filter_fprog.filter);
+ if (skbtrace_sock_filter) {
+ static_key_slow_dec(&skbtrace_filters_enabled);
+ sk_unattached_filter_destroy(skbtrace_sock_filter);
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct sk_buff **p_skb;
+
+ p_skb = per_cpu_ptr(sock_filter_skb, cpu);
+ if (*p_skb)
+ kfree_skb(*p_skb);
+ }
+ free_percpu(sock_filter_skb);
+}
+
+static int setup_skbtrace_filters(void)
+{
+ unsigned int cpu, err;
+
+ skbtrace_sock_filter_id = random32();
+
+ skbtrace_filters_clean();
+
+ sock_filter_skb = alloc_percpu(struct sk_buff*);
+ err = 0;
+ for_each_possible_cpu(cpu) {
+ struct sk_buff **p_skb;
+
+ p_skb = per_cpu_ptr(sock_filter_skb, cpu);
+ if (cpu_online(cpu)) {
+ *p_skb = alloc_skb(1500, GFP_KERNEL);
+ if (!*p_skb)
+ err = 1;
+ } else
+ *p_skb = NULL;
+ }
+
+ if (err) {
+ clean_skbtrace_filters();
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int skbtrace_init(void)
+{
+ mutex_init(&skbtrace_lock);
+ if (!skbtrace_session)
+ skbtrace_session = random32();
+
+ if (setup_skbtrace_filters() < 0)
+ return -ENOMEM;
+
+ if (skbtrace_events_common_init())
+ return -ENODEV;
+
+ skbtrace_dentry = debugfs_create_dir(SKBTRACE_DIR, NULL);
+ if (!skbtrace_dentry)
+ return -ENOMEM;
+
+ if (create_controls()) {
+ debugfs_remove(skbtrace_dentry);
+ return -ENOMEM;
+ }
+
+ should_load_proto = true;
+ return 0;
+}
+
+static void skbtrace_exit(void)
+{
+ skbtrace_disable_all_tp(); /* disable all enabled tracepoints */
+ skbtrace_disable_all_tp(); /* remove channels in debugfs at 2nd time */
+ if (unlikely(nr_skbtrace_enabled_tp))
+ pr_err("skbtrace: failed to clean tracepoints.\n");
+ remove_controls();
+ debugfs_remove(skbtrace_dentry);
+ clean_skbtrace_filters();
+}
+
+module_init(skbtrace_init);
+module_exit(skbtrace_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/core/skbtrace-events-common.c
b/net/core/skbtrace-events-common.c
new file mode 100644
index 0000000..30a3730
--- /dev/null
+++ b/net/core/skbtrace-events-common.c
@@ -0,0 +1,68 @@
+/*
+ * skbtrace - sk_buff trace utilty
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
+ *
+ * 2012 Li Yu <bingtian.ly@taobao.com>
+ *
+ */
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/skbtrace_api.h>
+#include <linux/skbtrace.h>
+#include <net/flow_keys.h>
+
+static void skbtrace_skb_rps_info(struct skbtrace_tracepoint *t,
+ struct sk_buff *skb, struct net_device *dev, int cpu)
+SKBTRACE_SKB_EVENT_BEGIN
+ struct skbtrace_skb_rps_info_blk blk, *b;
+ struct flow_keys keys;
+
+ b = skbtrace_block_get(t, NULL, &blk);
+ INIT_SKBTRACE_BLOCK(&b->blk, skb,
+ skbtrace_action_skb_rps_info,
+ 0,
+ sizeof(blk));
+ b->rx_hash = skb->rxhash;
+ if (skb_rx_queue_recorded(skb))
+ b->rx_queue = skb_get_rx_queue(skb);
+ else
+ b->rx_queue = 0;
+ skb_flow_dissect(skb, &keys);
+ b->keys.src = keys.src;
+ b->keys.dst = keys.dst;
+ b->keys.ports = keys.ports;
+ b->keys.ip_proto = keys.ip_proto;
+ b->cpu = cpu;
+ b->ifindex = dev->ifindex;
+ skbtrace_probe(t, NULL, &b->blk);
+SKBTRACE_SKB_EVENT_END
+
+static struct skbtrace_tracepoint common[] = {
+ {
+ .trace_name = "skb_rps_info",
+ .action = skbtrace_action_skb_rps_info,
+ .block_size = sizeof(struct skbtrace_skb_rps_info_blk),
+ .probe = skbtrace_skb_rps_info,
+ },
+ EMPTY_SKBTRACE_TP
+};
+
+int skbtrace_events_common_init(void)
+{
+ return skbtrace_register_proto(AF_UNSPEC, common, NULL);
+}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e33ebae..15954ae 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
#include <asm/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
+#include <linux/skbtrace.h>
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -700,6 +701,10 @@ static void __copy_skb_header(struct sk_buff *new,
const struct sk_buff *old)
new->ooo_okay = old->ooo_okay;
new->l4_rxhash = old->l4_rxhash;
new->no_fcs = old->no_fcs;
+#if HAVE_SKBTRACE
+ new->hit_skbtrace = old->hit_skbtrace;
+ new->skbtrace_filtered = old->skbtrace_filtered;
+#endif
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index a6000fb..b818961 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -132,8 +132,10 @@
#include <net/netprio_cgroup.h>
#include <linux/filter.h>
+#include <linux/skbtrace.h>
#include <trace/events/sock.h>
+#include <trace/events/skbtrace_common.h>
#ifdef CONFIG_INET
#include <net/tcp.h>
@@ -1272,6 +1274,7 @@ struct sock *sk_alloc(struct net *net, int family,
gfp_t priority,
sock_update_classid(sk);
sock_update_netprioidx(sk, current);
+ sock_skbtrace_reset(sk);
}
return sk;
@@ -1292,6 +1295,8 @@ static void __sk_free(struct sock *sk)
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
+ skbtrace_context_destroy(&sk->sk_skbtrace);
+
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
if (atomic_read(&sk->sk_omem_alloc))
@@ -1440,6 +1445,8 @@ struct sock *sk_clone_lock(const struct sock *sk,
const gfp_t priority)
if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
+
+ sock_skbtrace_reset(newsk);
}
out:
return newsk;
@@ -2124,6 +2131,7 @@ void sk_reset_timer(struct sock *sk, struct
timer_list* timer,
{
if (!mod_timer(timer, expires))
sock_hold(sk);
+ trace_sk_timer(sk, timer, skbtrace_sk_timer_reset);
}
EXPORT_SYMBOL(sk_reset_timer);
@@ -2131,6 +2139,7 @@ void sk_stop_timer(struct sock *sk, struct
timer_list* timer)
{
if (timer_pending(timer) && del_timer(timer))
__sock_put(sk);
+ trace_sk_timer(sk, timer, skbtrace_sk_timer_stop);
}
EXPORT_SYMBOL(sk_stop_timer);
next reply other threads:[~2012-10-19 6:16 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-10-19 6:16 Li Yu [this message]
2012-10-19 7:10 ` [PATCH 1/3] skbtrace v2: core feature and common events Eric Dumazet
2012-10-19 7:34 ` Li Yu
2012-10-19 8:01 ` Eric Dumazet
[not found] ` <CA+WLrf8kfYdodMmV7wqETBdVkmKwYWioQ35SSdUVXpGpZocznw@mail.gmail.com>
2012-10-28 13:42 ` Li Yu
2012-10-19 7:30 ` Cong Wang
2012-10-28 13:45 ` Li Yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5080F031.5040804@gmail.com \
--to=raise.sail@gmail.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.