* [PATCH 03/10] batman-adv: support array of debugfs general attributes
From: Antonio Quartulli @ 2012-11-19 8:24 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1353313451-2930-1-git-send-email-ordex@autistici.org>
This patch adds support for an array of debugfs general (not soft_iface
specific) attributes. With this change it will be possible to add more general
attributes by simply appending them to the array without touching the rest of
the code.
Reported-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Acked-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
net/batman-adv/debugfs.c | 34 +++++++++++++++++++++++++---------
1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 3f679cb..6f58ddd 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -323,7 +323,17 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \
} \
};
+/* the following attributes are general and therefore they will be directly
+ * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
+ */
static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open);
+
+static struct batadv_debuginfo *batadv_general_debuginfos[] = {
+ &batadv_debuginfo_routing_algos,
+ NULL,
+};
+
+/* The following attributes are per soft interface */
static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open);
static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open);
static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
@@ -358,7 +368,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
void batadv_debugfs_init(void)
{
- struct batadv_debuginfo *bat_debug;
+ struct batadv_debuginfo **bat_debug;
struct dentry *file;
batadv_debugfs = debugfs_create_dir(BATADV_DEBUGFS_SUBDIR, NULL);
@@ -366,17 +376,23 @@ void batadv_debugfs_init(void)
batadv_debugfs = NULL;
if (!batadv_debugfs)
- goto out;
+ goto err;
- bat_debug = &batadv_debuginfo_routing_algos;
- file = debugfs_create_file(bat_debug->attr.name,
- S_IFREG | bat_debug->attr.mode,
- batadv_debugfs, NULL, &bat_debug->fops);
- if (!file)
- pr_err("Can't add debugfs file: %s\n", bat_debug->attr.name);
+ for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug) {
+ file = debugfs_create_file(((*bat_debug)->attr).name,
+ S_IFREG | ((*bat_debug)->attr).mode,
+ batadv_debugfs, NULL,
+ &(*bat_debug)->fops);
+ if (!file) {
+ pr_err("Can't add general debugfs file: %s\n",
+ ((*bat_debug)->attr).name);
+ goto err;
+ }
+ }
-out:
return;
+err:
+ debugfs_remove_recursive(batadv_debugfs);
}
void batadv_debugfs_destroy(void)
--
1.8.0
^ permalink raw reply related
* [PATCH 02/10] batman-adv: fix bla compare function
From: Antonio Quartulli @ 2012-11-19 8:24 UTC (permalink / raw)
To: davem
Cc: netdev, b.a.t.m.a.n, Simon Wunderlich, Simon Wunderlich,
Marek Lindner, Antonio Quartulli
In-Reply-To: <1353313451-2930-1-git-send-email-ordex@autistici.org>
From: Simon Wunderlich <simon.wunderlich@s2003.tu-chemnitz.de>
The address and the VLAN VID may not be packed in the respective
structs. Fix this by comparing the elements individually.
Reported-by: Marek Lindner <lindner_marek@yahoo.de>
Reported-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/bridge_loop_avoidance.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index bda8b17..7ffef8b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -77,8 +77,15 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_backbone_gw,
hash_entry);
+ const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2;
- return (memcmp(data1, data2, ETH_ALEN + sizeof(short)) == 0 ? 1 : 0);
+ if (!batadv_compare_eth(gw1->orig, gw2->orig))
+ return 0;
+
+ if (gw1->vid != gw2->vid)
+ return 0;
+
+ return 1;
}
/* compares address and vid of two claims */
@@ -87,8 +94,15 @@ static int batadv_compare_claim(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_claim,
hash_entry);
+ const struct batadv_claim *cl1 = data1, *cl2 = data2;
- return (memcmp(data1, data2, ETH_ALEN + sizeof(short)) == 0 ? 1 : 0);
+ if (!batadv_compare_eth(cl1->addr, cl2->addr))
+ return 0;
+
+ if (cl1->vid != cl2->vid)
+ return 0;
+
+ return 1;
}
/* free a backbone gw */
--
1.8.0
^ permalink raw reply related
* [PATCH 01/10] batman-adv: Mark best gateway in transtable_global debugfs
From: Antonio Quartulli @ 2012-11-19 8:24 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1353313451-2930-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
The transtable_global debug file can show multiple entries for a single client
when multiple gateways exist. The chosen gateway isn't marked in the list and
therefore the user cannot easily debug the situation when there is a problem
with the currently used gateway.
The best gateway is now marked with "*" and secondary gateways are marked with
"+".
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/translation-table.c | 90 +++++++++++++++++++++++++++-----------
1 file changed, 64 insertions(+), 26 deletions(-)
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 582f134..cdad824 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -911,8 +911,44 @@ out:
return ret;
}
-/* print all orig nodes who announce the address for this global entry.
- * it is assumed that the caller holds rcu_read_lock();
+/* batadv_transtable_best_orig - Get best originator list entry from tt entry
+ * @tt_global_entry: global translation table entry to be analyzed
+ *
+ * This functon assumes the caller holds rcu_read_lock().
+ * Returns best originator list entry or NULL on errors.
+ */
+static struct batadv_tt_orig_list_entry *
+batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
+{
+ struct batadv_neigh_node *router = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
+ int best_tq = 0;
+
+ head = &tt_global_entry->orig_list;
+ hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+ router = batadv_orig_node_get_router(orig_entry->orig_node);
+ if (!router)
+ continue;
+
+ if (router->tq_avg > best_tq) {
+ best_entry = orig_entry;
+ best_tq = router->tq_avg;
+ }
+
+ batadv_neigh_node_free_ref(router);
+ }
+
+ return best_entry;
+}
+
+/* batadv_tt_global_print_entry - print all orig nodes who announce the address
+ * for this global entry
+ * @tt_global_entry: global translation table entry to be printed
+ * @seq: debugfs table seq_file struct
+ *
+ * This functon assumes the caller holds rcu_read_lock().
*/
static void
batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
@@ -920,21 +956,37 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
{
struct hlist_head *head;
struct hlist_node *node;
- struct batadv_tt_orig_list_entry *orig_entry;
+ struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
struct batadv_tt_common_entry *tt_common_entry;
uint16_t flags;
uint8_t last_ttvn;
tt_common_entry = &tt_global_entry->common;
+ flags = tt_common_entry->flags;
+
+ best_entry = batadv_transtable_best_orig(tt_global_entry);
+ if (best_entry) {
+ last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
+ seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n",
+ '*', tt_global_entry->common.addr,
+ best_entry->ttvn, best_entry->orig_node->orig,
+ last_ttvn,
+ (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
+ (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
+ (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+ }
head = &tt_global_entry->orig_list;
hlist_for_each_entry_rcu(orig_entry, node, head, list) {
- flags = tt_common_entry->flags;
+ if (best_entry == orig_entry)
+ continue;
+
last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn);
- seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c%c]\n",
- tt_global_entry->common.addr, orig_entry->ttvn,
- orig_entry->orig_node->orig, last_ttvn,
+ seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n",
+ '+', tt_global_entry->common.addr,
+ orig_entry->ttvn, orig_entry->orig_node->orig,
+ last_ttvn,
(flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
(flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
(flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
@@ -1280,11 +1332,7 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry = NULL;
struct batadv_tt_global_entry *tt_global_entry = NULL;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_neigh_node *router = NULL;
- struct hlist_head *head;
- struct hlist_node *node;
- struct batadv_tt_orig_list_entry *orig_entry;
- int best_tq;
+ struct batadv_tt_orig_list_entry *best_entry;
if (src && atomic_read(&bat_priv->ap_isolation)) {
tt_local_entry = batadv_tt_local_hash_find(bat_priv, src);
@@ -1304,25 +1352,15 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
goto out;
- best_tq = 0;
-
rcu_read_lock();
- head = &tt_global_entry->orig_list;
- hlist_for_each_entry_rcu(orig_entry, node, head, list) {
- router = batadv_orig_node_get_router(orig_entry->orig_node);
- if (!router)
- continue;
-
- if (router->tq_avg > best_tq) {
- orig_node = orig_entry->orig_node;
- best_tq = router->tq_avg;
- }
- batadv_neigh_node_free_ref(router);
- }
+ best_entry = batadv_transtable_best_orig(tt_global_entry);
/* found anything? */
+ if (best_entry)
+ orig_node = best_entry->orig_node;
if (orig_node && !atomic_inc_not_zero(&orig_node->refcount))
orig_node = NULL;
rcu_read_unlock();
+
out:
if (tt_global_entry)
batadv_tt_global_entry_free_ref(tt_global_entry);
--
1.8.0
^ permalink raw reply related
* pull request: batman-adv 2012-11-19
From: Antonio Quartulli @ 2012-11-19 8:24 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n
Hello David,
this should be our last batch of patches intended for net-next/linux-3.8.
In this patchset we have patches 7,8/10 by Sven Eckelmann which improve the crc
computation on broadcast packets (in the Bridge Loop Avoidance component) by
using crc32c and by avoiding the entire linearisation of the skb! Then, patch
4/10 introduces a new debugfs file which exports the compatibility version so
that users having different batman-adv releases can understand whether they can
or cannot communicate.
Patch 10/10 removes the packed attribute for the unicast message type and adds
"#pragma pack(2)" (again, this is just part of our intermediate changes which do
not break compatibility. The real restructure will come later..).
The others are cleanups or small code refactoring.
Let me know if there is any problem!
Thank you,
Antonio
The following changes since commit 3594698a1fb8e5ae60a92c72ce9ca280256939a7:
net: Make CAP_NET_BIND_SERVICE per user namespace (2012-11-18 20:33:37 -0500)
are available in the git repository at:
git://git.open-mesh.org/linux-merge.git tags/batman-adv-for-davem
for you to fetch changes up to 15401e33ef94d4f251c42e8228e6c387327f38f8:
batman-adv: Use packing of 2 for all headers before an ethernet header (2012-11-19 09:14:11 +0100)
----------------------------------------------------------------
Included changes:
- Increase batman-adv version
- Bridge Loop Avoidance: compute checksum (using crc32) on skb fragments instead
of linearising it
- sort the sysfs documentation
- export the compatibility version via debugfs
- some other minor cleanups
----------------------------------------------------------------
Antonio Quartulli (2):
batman-adv: support array of debugfs general attributes
batman-adv: export compatibility version via debugfs
Marek Lindner (1):
batman-adv: sysfs documentation should keep alphabetical order
Martin Hundebøll (1):
batman-adv: Add wrapper to look up neighbor and send skb
Simon Wunderlich (2):
batman-adv: fix bla compare function
batman-adv: Fix broadcast duplist for fragmentation
Sven Eckelmann (4):
batman-adv: Mark best gateway in transtable_global debugfs
batman-adv: Add function to calculate crc32c for the skb payload
batman-adv: Start new development cycle
batman-adv: Use packing of 2 for all headers before an ethernet header
.../ABI/testing/sysfs-class-net-batman-adv | 11 +-
Documentation/ABI/testing/sysfs-class-net-mesh | 40 +++---
net/batman-adv/Kconfig | 1 +
net/batman-adv/bridge_loop_avoidance.c | 36 +++--
net/batman-adv/bridge_loop_avoidance.h | 6 +-
net/batman-adv/debugfs.c | 46 ++++--
net/batman-adv/main.c | 46 ++++++
net/batman-adv/main.h | 4 +-
net/batman-adv/packet.h | 16 ++-
net/batman-adv/routing.c | 45 ++----
net/batman-adv/send.c | 33 +++++
net/batman-adv/send.h | 3 +
net/batman-adv/translation-table.c | 155 +++++++++++----------
net/batman-adv/types.h | 2 +-
net/batman-adv/unicast.c | 8 +-
net/batman-adv/vis.c | 35 ++---
16 files changed, 293 insertions(+), 194 deletions(-)
^ permalink raw reply
* [PATCH RFC 4/5] printk: add ns_printk for specific syslog_ns
From: Rui Xiang @ 2012-11-19 8:17 UTC (permalink / raw)
To: serge.hallyn, containers; +Cc: Eric W. Biederman, netdev
From: Libo Chen <clbchenlibo.chen@huawei.com>
In some context such as iptable, we can not get correct syslog_ns by
current_syslog_ns, because we get init_syslog_ns instead of syslog_ns
belonged to container.
We add a new interface ns_printk,and give it an parameter syslog_ns.
Signed-off-by: Libo Chen <clbchenlibo.chen@huawei.com>
Signed-off-by: Xiang Rui <rui.xiang@huawei.com>
---
include/linux/printk.h | 1 +
kernel/printk.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 38 insertions(+), 0 deletions(-)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index e0c60d9..444d229 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -119,6 +119,7 @@ asmlinkage int printk_emit(int facility, int level,
asmlinkage __printf(1, 2) __cold
int printk(const char *fmt, ...);
+int ns_printk(struct syslog_namespace *syslog_ns, const char *fmt, ...);
/*
* Special printk facility for scheduler use only, _DO_NOT_USE_ !
diff --git a/kernel/printk.c b/kernel/printk.c
index 2ef9c46..85a9965 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1681,6 +1681,43 @@ asmlinkage int printk(const char *fmt, ...)
}
EXPORT_SYMBOL(printk);
+/**
+ * ns_printk - print a kernel message in syslog_ns
+ * @syslog_ns: syslog namespace
+ * @fmt: format string
+ *
+ * This is ns_printk().
+ * It can be called from container context. We add a param
+ * syslog_ns to record current syslog namespace,because
+ * we can't get the correct syslog_ns from current_syslog_ns
+ * in some context,e.g. iptable.
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ **/
+asmlinkage int ns_printk(struct syslog_namespace *syslog_ns,
+ const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ if (!syslog_ns)
+ syslog_ns = current_syslog_ns();
+
+#ifdef CONFIG_KGDB_KDB
+ if (unlikely(kdb_trap_printk)) {
+ va_start(args, fmt);
+ r = vkdb_printf(fmt, args);
+ va_end(args);
+ return r;
+ }
+#endif
+ va_start(args, fmt);
+ r = vprintk_emit(0, -1, NULL, 0, fmt, args, syslog_ns);
+ va_end(args);
+
+ return r;
+}
+EXPORT_SYMBOL(ns_printk);
#else /* CONFIG_PRINTK */
#define LOG_LINE_MAX 0
--
1.7.1
^ permalink raw reply related
* [PATCH RFC 5/5] printk: use ns_printk in iptable context
From: Rui Xiang @ 2012-11-19 8:17 UTC (permalink / raw)
To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw,
netdev-u79uwXL29TY76Z2rM5mHXA
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
Eric W. Biederman
From: Libo Chen <clbchenlibo.chen-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
We add a syslog_ns pointer into net namespace for fix the iptable
issue, and use ns_printk as getting syslog_ns parameter from
skb->dev->nd_net->syslog_ns.
Signed-off-by: Libo Chen <clbchenlibo.chen-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Xiang Rui <rui.xiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
include/linux/syslog_namespace.h | 7 ++++---
include/net/net_namespace.h | 7 +++++--
include/net/netfilter/xt_log.h | 7 +++++--
kernel/nsproxy.c | 21 +++++++++++----------
kernel/syslog_namespace.c | 6 ++++--
net/core/net_namespace.c | 12 ++++++++++--
net/netfilter/xt_LOG.c | 4 ++--
7 files changed, 41 insertions(+), 23 deletions(-)
diff --git a/include/linux/syslog_namespace.h b/include/linux/syslog_namespace.h
index 1ecb8b8..2053409 100644
--- a/include/linux/syslog_namespace.h
+++ b/include/linux/syslog_namespace.h
@@ -58,7 +58,7 @@ static inline struct syslog_namespace *current_syslog_ns(void)
#ifdef CONFIG_SYSLOG_NS
extern void free_syslog_ns(struct kref *kref);
extern struct syslog_namespace *copy_syslog_ns(unsigned long flags,
- struct task_struct *tsk);
+ struct syslog_namespace *syslog_ns);
static inline struct syslog_namespace *get_syslog_ns(
struct syslog_namespace *ns)
@@ -76,11 +76,12 @@ static inline void put_syslog_ns(struct syslog_namespace *ns)
#else
static inline struct syslog_namespace *copy_syslog_ns(unsigned long flags,
- struct task_struct *tsk)
+ struct syslog_namespace *syslog_ns)
{
if (flags & CLONE_NEWSYSLOG)
return ERR_PTR(-EINVAL);
- return tsk->nsproxy->syslog_ns;
+
+ return syslog_ns;
}
static inline struct syslog_namespace *get_syslog_ns(
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 95e6466..61fe80f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -108,6 +108,7 @@ struct net {
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
+ struct syslog_namespace *syslog_ns;
struct netns_ipvs *ipvs;
struct sock *diag_nlsk;
atomic_t rt_genid;
@@ -127,10 +128,12 @@ struct net {
extern struct net init_net;
#ifdef CONFIG_NET
-extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
+extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns,
+ struct syslog_namespace *syslog_ns);
#else /* CONFIG_NET */
-static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
+static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns,
+ struct syslog_namespace *syslog_ns);
{
/* There is nothing to copy so this is a noop */
return net_ns;
diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h
index 9d9756c..5f15e0e 100644
--- a/include/net/netfilter/xt_log.h
+++ b/include/net/netfilter/xt_log.h
@@ -39,11 +39,14 @@ static struct sbuff *sb_open(void)
return m;
}
-static void sb_close(struct sbuff *m)
+static void sb_close(struct sbuff *m, struct sk_buff *skb)
{
m->buf[m->count] = 0;
+#ifdef CONFIG_NET_NS
+ ns_printk(skb->dev->nd_net->syslog_ns, "%s\n", m->buf);
+#else
printk("%s\n", m->buf);
-
+#endif
if (likely(m != &emergency))
kfree(m);
else {
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 331d31f..cb9608a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -92,24 +92,25 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_pid;
}
- new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
- if (IS_ERR(new_nsp->net_ns)) {
- err = PTR_ERR(new_nsp->net_ns);
- goto out_net;
- }
-
- new_nsp->syslog_ns = copy_syslog_ns(flags, tsk);
+ new_nsp->syslog_ns = copy_syslog_ns(flags, tsk->nsproxy->syslog_ns);
if (IS_ERR(new_nsp->syslog_ns)) {
err = PTR_ERR(new_nsp->syslog_ns);
goto out_syslog;
}
+ new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns,
+ new_nsp->syslog_ns);
+ if (IS_ERR(new_nsp->net_ns)) {
+ err = PTR_ERR(new_nsp->net_ns);
+ goto out_net;
+ }
+
return new_nsp;
-out_syslog:
- if (new_nsp->net_ns)
- put_net(new_nsp->net_ns);
out_net:
+ if (new_nsp->syslog_ns)
+ put_net(new_nsp->syslog_ns);
+out_syslog:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
out_pid:
diff --git a/kernel/syslog_namespace.c b/kernel/syslog_namespace.c
index a12e1c1..1c3ed4b 100644
--- a/kernel/syslog_namespace.c
+++ b/kernel/syslog_namespace.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/bootmem.h>
#include <linux/syslog_namespace.h>
+#include <net/net_namespace.h>
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -46,10 +47,11 @@ static struct syslog_namespace *create_syslog_ns(unsigned int buf_len)
}
struct syslog_namespace *copy_syslog_ns(unsigned long flags,
- struct task_struct *tsk)
+ struct syslog_namespace *syslog_ns)
{
if (!(flags & CLONE_NEWSYSLOG))
- return get_syslog_ns(tsk->nsproxy->syslog_ns);
+ return get_syslog_ns(syslog_ns);
+
return create_syslog_ns(CONTAINER_BUF_LEN);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42f1e1c..f192e1e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,7 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/syslog_namespace.h>
/*
* Our network namespace constructor/destructor lists
@@ -29,6 +30,7 @@ EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net = {
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+ .syslog_ns = &init_syslog_ns
};
EXPORT_SYMBOL(init_net);
@@ -232,7 +234,8 @@ void net_drop_ns(void *p)
net_free(ns);
}
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+struct net *copy_net_ns(unsigned long flags, struct net *old_net,
+ struct syslog_namespace *syslog_ns)
{
struct net *net;
int rv;
@@ -255,6 +258,9 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
net_drop_ns(net);
return ERR_PTR(rv);
}
+
+ net->syslog_ns = get_syslog_ns(syslog_ns);
+
return net;
}
@@ -308,6 +314,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ put_syslog_ns(net->syslog_ns);
net_drop_ns(net);
}
}
@@ -347,7 +354,8 @@ struct net *get_net_ns_by_fd(int fd)
}
#else
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+struct net *copy_net_ns(unsigned long flags, struct net *old_net,
+ struct syslog_namespace *syslog_ns)
{
if (flags & CLONE_NEWNET)
return ERR_PTR(-EINVAL);
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096..6b13b72 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -486,7 +486,7 @@ ipt_log_packet(u_int8_t pf,
dump_ipv4_packet(m, loginfo, skb, 0);
- sb_close(m);
+ sb_close(m, skb);
}
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -810,7 +810,7 @@ ip6t_log_packet(u_int8_t pf,
dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
- sb_close(m);
+ sb_close(m, skb);
}
#endif
--
1.7.1
^ permalink raw reply related
* [PATCH RFC 3/5] printk: modify printk interface for syslog_namespace
From: Rui Xiang @ 2012-11-19 8:16 UTC (permalink / raw)
To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman
From: Libo Chen <clbchenlibo.chen-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
We re-implement printk by additional syslog_ns.
The function include printk, /dev/kmsg, do_syslog and kmsg_dump should be modifyed
for syslog_ns. Previous identifier *** such as log_first_seq should be replaced
by syslog_ns->***.
Signed-off-by: Libo Chen <clbchenlibo.chen-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Xiang Rui <rui.xiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
drivers/base/core.c | 4 +-
include/linux/printk.h | 4 +-
kernel/printk.c | 609 +++++++++++++++++++++++++++++-------------------
3 files changed, 372 insertions(+), 245 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index abea76c..665c2f7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -26,6 +26,7 @@
#include <linux/async.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>
+#include <linux/syslog_namespace.h>
#include "base.h"
#include "power/power.h"
@@ -1922,7 +1923,8 @@ int dev_vprintk_emit(int level, const struct device *dev,
hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
- return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
+ return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen,
+ fmt, args, current_syslog_ns());
}
EXPORT_SYMBOL(dev_vprintk_emit);
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e..e0c60d9 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -7,6 +7,7 @@
extern const char linux_banner[];
extern const char linux_proc_banner[];
+struct syslog_namespace;
static inline int printk_get_level(const char *buffer)
{
if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
@@ -105,7 +106,8 @@ extern void printk_tick(void);
asmlinkage __printf(5, 0)
int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
- const char *fmt, va_list args);
+ const char *fmt, va_list args,
+ struct syslog_namespace *syslog_ns);
asmlinkage __printf(1, 0)
int vprintk(const char *fmt, va_list args);
diff --git a/kernel/printk.c b/kernel/printk.c
index 2d607f4..2ef9c46 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,6 +42,7 @@
#include <linux/notifier.h>
#include <linux/rculist.h>
#include <linux/poll.h>
+#include <linux/syslog_namespace.h>
#include <asm/uaccess.h>
@@ -214,46 +215,14 @@ struct log {
* The logbuf_lock protects kmsg buffer, indices, counters. It is also
* used in interesting ways to provide interlocking in console_unlock();
*/
-static DEFINE_RAW_SPINLOCK(logbuf_lock);
#ifdef CONFIG_PRINTK
-/* the next printk record to read by syslog(READ) or /proc/kmsg */
-static u64 syslog_seq;
-static u32 syslog_idx;
-static enum log_flags syslog_prev;
-static size_t syslog_partial;
-
-/* index and sequence number of the first record stored in the buffer */
-static u64 log_first_seq;
-static u32 log_first_idx;
-
-/* index and sequence number of the next record to store in the buffer */
-static u64 log_next_seq;
-static u32 log_next_idx;
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u32 console_idx;
static enum log_flags console_prev;
-/* the next printk record to read after the last 'clear' command */
-static u64 clear_seq;
-static u32 clear_idx;
-
#define PREFIX_MAX 32
#define LOG_LINE_MAX 1024 - PREFIX_MAX
-/* record buffer */
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-#define LOG_ALIGN 4
-#else
-#define LOG_ALIGN __alignof__(struct log)
-#endif
-#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
-static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
-static char *log_buf = __log_buf;
-static u32 log_buf_len = __LOG_BUF_LEN;
-
/* cpu currently holding logbuf_lock */
static volatile unsigned int logbuf_cpu = UINT_MAX;
@@ -270,23 +239,23 @@ static char *log_dict(const struct log *msg)
}
/* get record by index; idx must point to valid msg */
-static struct log *log_from_idx(u32 idx)
+static struct log *log_from_idx(u32 idx, struct syslog_namespace *syslog_ns)
{
- struct log *msg = (struct log *)(log_buf + idx);
+ struct log *msg = (struct log *)(syslog_ns->log_buf + idx);
/*
* A length == 0 record is the end of buffer marker. Wrap around and
* read the message at the start of the buffer.
*/
if (!msg->len)
- return (struct log *)log_buf;
+ return (struct log *)syslog_ns->log_buf;
return msg;
}
/* get next record; idx must point to valid msg */
-static u32 log_next(u32 idx)
+static u32 log_next(u32 idx, struct syslog_namespace *syslog_ns)
{
- struct log *msg = (struct log *)(log_buf + idx);
+ struct log *msg = (struct log *)(syslog_ns->log_buf + idx);
/* length == 0 indicates the end of the buffer; wrap */
/*
@@ -295,7 +264,7 @@ static u32 log_next(u32 idx)
* return the one after that.
*/
if (!msg->len) {
- msg = (struct log *)log_buf;
+ msg = (struct log *)syslog_ns->log_buf;
return msg->len;
}
return idx + msg->len;
@@ -305,7 +274,8 @@ static u32 log_next(u32 idx)
static void log_store(int facility, int level,
enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
- const char *text, u16 text_len)
+ const char *text, u16 text_len,
+ struct syslog_namespace *syslog_ns)
{
struct log *msg;
u32 size, pad_len;
@@ -315,34 +285,40 @@ static void log_store(int facility, int level,
pad_len = (-size) & (LOG_ALIGN - 1);
size += pad_len;
- while (log_first_seq < log_next_seq) {
+ while (syslog_ns->log_first_seq < syslog_ns->log_next_seq) {
u32 free;
- if (log_next_idx > log_first_idx)
- free = max(log_buf_len - log_next_idx, log_first_idx);
+ if (syslog_ns->log_next_idx > syslog_ns->log_first_idx)
+ free = max(syslog_ns->log_buf_len -
+ syslog_ns->log_next_idx,
+ syslog_ns->log_first_idx);
else
- free = log_first_idx - log_next_idx;
+ free = syslog_ns->log_first_idx -
+ syslog_ns->log_next_idx;
if (free > size + sizeof(struct log))
break;
/* drop old messages until we have enough contiuous space */
- log_first_idx = log_next(log_first_idx);
- log_first_seq++;
+ syslog_ns->log_first_idx =
+ log_next(syslog_ns->log_first_idx, syslog_ns);
+ syslog_ns->log_first_seq++;
}
- if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
+ if (syslog_ns->log_next_idx + size + sizeof(struct log) >=
+ syslog_ns->log_buf_len) {
/*
* This message + an additional empty header does not fit
* at the end of the buffer. Add an empty header with len == 0
* to signify a wrap around.
*/
- memset(log_buf + log_next_idx, 0, sizeof(struct log));
- log_next_idx = 0;
+ memset(syslog_ns->log_buf + syslog_ns->log_next_idx,
+ 0, sizeof(struct log));
+ syslog_ns->log_next_idx = 0;
}
/* fill message */
- msg = (struct log *)(log_buf + log_next_idx);
+ msg = (struct log *)(syslog_ns->log_buf + syslog_ns->log_next_idx);
memcpy(log_text(msg), text, text_len);
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
@@ -358,8 +334,8 @@ static void log_store(int facility, int level,
msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
/* insert message */
- log_next_idx += msg->len;
- log_next_seq++;
+ syslog_ns->log_next_idx += msg->len;
+ syslog_ns->log_next_seq++;
}
/* /dev/kmsg - userspace message inject/listen interface */
@@ -368,6 +344,7 @@ struct devkmsg_user {
u32 idx;
enum log_flags prev;
struct mutex lock;
+ struct syslog_namespace *syslog_ns;
char buf[8192];
};
@@ -431,6 +408,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct devkmsg_user *user = file->private_data;
+ struct syslog_namespace *syslog_ns = user->syslog_ns;
struct log *msg;
u64 ts_usec;
size_t i;
@@ -444,32 +422,32 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
ret = mutex_lock_interruptible(&user->lock);
if (ret)
return ret;
- raw_spin_lock_irq(&logbuf_lock);
- while (user->seq == log_next_seq) {
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
+ while (user->seq == syslog_ns->log_next_seq) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
goto out;
}
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
ret = wait_event_interruptible(log_wait,
- user->seq != log_next_seq);
+ user->seq != syslog_ns->log_next_seq);
if (ret)
goto out;
- raw_spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
}
- if (user->seq < log_first_seq) {
+ if (user->seq < syslog_ns->log_first_seq) {
/* our last seen message is gone, return error and reset */
- user->idx = log_first_idx;
- user->seq = log_first_seq;
+ user->idx = syslog_ns->log_first_idx;
+ user->seq = syslog_ns->log_first_seq;
ret = -EPIPE;
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
goto out;
}
- msg = log_from_idx(user->idx);
+ msg = log_from_idx(user->idx, syslog_ns);
ts_usec = msg->ts_nsec;
do_div(ts_usec, 1000);
@@ -530,9 +508,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
user->buf[len++] = '\n';
}
- user->idx = log_next(user->idx);
+ user->idx = log_next(user->idx, syslog_ns);
user->seq++;
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
if (len > count) {
ret = -EINVAL;
@@ -552,6 +530,7 @@ out:
static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
{
struct devkmsg_user *user = file->private_data;
+ struct syslog_namespace *syslog_ns = user->syslog_ns;
loff_t ret = 0;
if (!user)
@@ -559,12 +538,12 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
if (offset)
return -ESPIPE;
- raw_spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
switch (whence) {
case SEEK_SET:
/* the first record */
- user->idx = log_first_idx;
- user->seq = log_first_seq;
+ user->idx = syslog_ns->log_first_idx;
+ user->seq = syslog_ns->log_first_seq;
break;
case SEEK_DATA:
/*
@@ -572,24 +551,25 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- user->idx = clear_idx;
- user->seq = clear_seq;
+ user->idx = syslog_ns->clear_idx;
+ user->seq = syslog_ns->clear_seq;
break;
case SEEK_END:
/* after the last record */
- user->idx = log_next_idx;
- user->seq = log_next_seq;
+ user->idx = syslog_ns->log_next_idx;
+ user->seq = syslog_ns->log_next_seq;
break;
default:
ret = -EINVAL;
}
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
return ret;
}
static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
{
struct devkmsg_user *user = file->private_data;
+ struct syslog_namespace *syslog_ns = user->syslog_ns;
int ret = 0;
if (!user)
@@ -597,20 +577,21 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
- raw_spin_lock_irq(&logbuf_lock);
- if (user->seq < log_next_seq) {
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
+ if (user->seq < syslog_ns->log_next_seq) {
/* return error when data has vanished underneath us */
- if (user->seq < log_first_seq)
+ if (user->seq < syslog_ns->log_first_seq)
ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
ret = POLLIN|POLLRDNORM;
}
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
return ret;
}
static int devkmsg_open(struct inode *inode, struct file *file)
{
+ struct syslog_namespace *syslog_ns;
struct devkmsg_user *user;
int err;
@@ -628,10 +609,11 @@ static int devkmsg_open(struct inode *inode, struct file *file)
mutex_init(&user->lock);
- raw_spin_lock_irq(&logbuf_lock);
- user->idx = log_first_idx;
- user->seq = log_first_seq;
- raw_spin_unlock_irq(&logbuf_lock);
+ user->syslog_ns = current_syslog_ns();
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
+ user->idx = syslog_ns->log_first_idx;
+ user->seq = syslog_ns->log_first_seq;
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
file->private_data = user;
return 0;
@@ -669,10 +651,12 @@ const struct file_operations kmsg_fops = {
*/
void log_buf_kexec_setup(void)
{
- VMCOREINFO_SYMBOL(log_buf);
- VMCOREINFO_SYMBOL(log_buf_len);
- VMCOREINFO_SYMBOL(log_first_idx);
- VMCOREINFO_SYMBOL(log_next_idx);
+ struct syslog_namespace *syslog_ns = current_syslog_ns();
+
+ VMCOREINFO_SYMBOL(syslog_ns->log_buf);
+ VMCOREINFO_SYMBOL(syslog_ns->log_buf_len);
+ VMCOREINFO_SYMBOL(syslog_ns->log_first_idx);
+ VMCOREINFO_SYMBOL(syslog_ns->log_next_idx);
/*
* Export struct log size and field offsets. User space tools can
* parse it and detect any changes to structure down the line.
@@ -692,10 +676,11 @@ static unsigned long __initdata new_log_buf_len;
static int __init log_buf_len_setup(char *str)
{
unsigned size = memparse(str, &str);
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
if (size)
size = roundup_pow_of_two(size);
- if (size > log_buf_len)
+ if (size > syslog_ns->log_buf_len)
new_log_buf_len = size;
return 0;
@@ -707,6 +692,7 @@ void __init setup_log_buf(int early)
unsigned long flags;
char *new_log_buf;
int free;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
if (!new_log_buf_len)
return;
@@ -728,15 +714,15 @@ void __init setup_log_buf(int early)
return;
}
- raw_spin_lock_irqsave(&logbuf_lock, flags);
- log_buf_len = new_log_buf_len;
- log_buf = new_log_buf;
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ memcpy(new_log_buf, syslog_ns->log_buf, __LOG_BUF_LEN);
+ syslog_ns->log_buf_len = new_log_buf_len;
+ syslog_ns->log_buf = new_log_buf;
new_log_buf_len = 0;
- free = __LOG_BUF_LEN - log_next_idx;
- memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ free = __LOG_BUF_LEN - syslog_ns->log_next_idx;
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
- pr_info("log_buf_len: %d\n", log_buf_len);
+ pr_info("log_buf_len: %d\n", syslog_ns->log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
}
@@ -937,7 +923,8 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev,
return len;
}
-static int syslog_print(char __user *buf, int size)
+static int syslog_print(char __user *buf, int size,
+ struct syslog_namespace *syslog_ns)
{
char *text;
struct log *msg;
@@ -951,37 +938,38 @@ static int syslog_print(char __user *buf, int size)
size_t n;
size_t skip;
- raw_spin_lock_irq(&logbuf_lock);
- if (syslog_seq < log_first_seq) {
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
+ if (syslog_ns->syslog_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first one */
- syslog_seq = log_first_seq;
- syslog_idx = log_first_idx;
- syslog_prev = 0;
- syslog_partial = 0;
+ syslog_ns->syslog_seq = syslog_ns->log_first_seq;
+ syslog_ns->syslog_idx = syslog_ns->log_first_idx;
+ syslog_ns->syslog_prev = 0;
+ syslog_ns->syslog_partial = 0;
}
- if (syslog_seq == log_next_seq) {
- raw_spin_unlock_irq(&logbuf_lock);
+ if (syslog_ns->syslog_seq == syslog_ns->log_next_seq) {
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
break;
}
- skip = syslog_partial;
- msg = log_from_idx(syslog_idx);
- n = msg_print_text(msg, syslog_prev, true, text,
+ skip = syslog_ns->syslog_partial;
+ msg = log_from_idx(syslog_ns->syslog_idx, syslog_ns);
+ n = msg_print_text(msg, syslog_ns->syslog_prev, true, text,
LOG_LINE_MAX + PREFIX_MAX);
- if (n - syslog_partial <= size) {
+ if (n - syslog_ns->syslog_partial <= size) {
/* message fits into buffer, move forward */
- syslog_idx = log_next(syslog_idx);
- syslog_seq++;
- syslog_prev = msg->flags;
- n -= syslog_partial;
- syslog_partial = 0;
+ syslog_ns->syslog_idx =
+ log_next(syslog_ns->syslog_idx, syslog_ns);
+ syslog_ns->syslog_seq++;
+ syslog_ns->syslog_prev = msg->flags;
+ n -= syslog_ns->syslog_partial;
+ syslog_ns->syslog_partial = 0;
} else if (!len){
/* partial read(), remember position */
n = size;
- syslog_partial += n;
+ syslog_ns->syslog_partial += n;
} else
n = 0;
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
if (!n)
break;
@@ -1001,7 +989,8 @@ static int syslog_print(char __user *buf, int size)
return len;
}
-static int syslog_print_all(char __user *buf, int size, bool clear)
+static int syslog_print_all(char __user *buf, int size, bool clear,
+ struct syslog_namespace *syslog_ns)
{
char *text;
int len = 0;
@@ -1010,55 +999,55 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
if (!text)
return -ENOMEM;
- raw_spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
if (buf) {
u64 next_seq;
u64 seq;
u32 idx;
enum log_flags prev;
- if (clear_seq < log_first_seq) {
+ if (syslog_ns->clear_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first available one */
- clear_seq = log_first_seq;
- clear_idx = log_first_idx;
+ syslog_ns->clear_seq = syslog_ns->log_first_seq;
+ syslog_ns->clear_idx = syslog_ns->log_first_idx;
}
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- seq = clear_seq;
- idx = clear_idx;
+ seq = syslog_ns->clear_seq;
+ idx = syslog_ns->clear_idx;
prev = 0;
- while (seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ while (seq < syslog_ns->log_next_seq) {
+ struct log *msg = log_from_idx(idx, syslog_ns);
len += msg_print_text(msg, prev, true, NULL, 0);
prev = msg->flags;
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
}
/* move first record forward until length fits into the buffer */
- seq = clear_seq;
- idx = clear_idx;
+ seq = syslog_ns->clear_seq;
+ idx = syslog_ns->clear_idx;
prev = 0;
- while (len > size && seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ while (len > size && seq < syslog_ns->log_next_seq) {
+ struct log *msg = log_from_idx(idx, syslog_ns);
len -= msg_print_text(msg, prev, true, NULL, 0);
prev = msg->flags;
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
}
/* last message fitting into this dump */
- next_seq = log_next_seq;
+ next_seq = syslog_ns->log_next_seq;
len = 0;
prev = 0;
while (len >= 0 && seq < next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct log *msg = log_from_idx(idx, syslog_ns);
int textlen;
textlen = msg_print_text(msg, prev, true, text,
@@ -1067,31 +1056,31 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
len = textlen;
break;
}
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
prev = msg->flags;
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
if (copy_to_user(buf + len, text, textlen))
len = -EFAULT;
else
len += textlen;
- raw_spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
- if (seq < log_first_seq) {
+ if (seq < syslog_ns->log_first_seq) {
/* messages are gone, move to next one */
- seq = log_first_seq;
- idx = log_first_idx;
+ seq = syslog_ns->log_first_seq;
+ idx = syslog_ns->log_first_idx;
prev = 0;
}
}
}
if (clear) {
- clear_seq = log_next_seq;
- clear_idx = log_next_idx;
+ syslog_ns->clear_seq = syslog_ns->log_next_seq;
+ syslog_ns->clear_idx = syslog_ns->log_next_idx;
}
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
kfree(text);
return len;
@@ -1102,6 +1091,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
bool clear = false;
static int saved_console_loglevel = -1;
int error;
+ struct syslog_namespace *syslog_ns = current_syslog_ns();
error = check_syslog_permissions(type, from_file);
if (error)
@@ -1128,10 +1118,10 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
goto out;
}
error = wait_event_interruptible(log_wait,
- syslog_seq != log_next_seq);
+ syslog_ns->syslog_seq != syslog_ns->log_next_seq);
if (error)
goto out;
- error = syslog_print(buf, len);
+ error = syslog_print(buf, len, syslog_ns);
break;
/* Read/clear last kernel messages */
case SYSLOG_ACTION_READ_CLEAR:
@@ -1149,11 +1139,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
error = -EFAULT;
goto out;
}
- error = syslog_print_all(buf, len, clear);
+ error = syslog_print_all(buf, len, clear, syslog_ns);
break;
/* Clear ring buffer */
case SYSLOG_ACTION_CLEAR:
- syslog_print_all(NULL, 0, true);
+ syslog_print_all(NULL, 0, true, syslog_ns);
break;
/* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_OFF:
@@ -1182,13 +1172,13 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
break;
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
- raw_spin_lock_irq(&logbuf_lock);
- if (syslog_seq < log_first_seq) {
+ raw_spin_lock_irq(&syslog_ns->logbuf_lock);
+ if (syslog_ns->syslog_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first one */
- syslog_seq = log_first_seq;
- syslog_idx = log_first_idx;
- syslog_prev = 0;
- syslog_partial = 0;
+ syslog_ns->syslog_seq = syslog_ns->log_first_seq;
+ syslog_ns->syslog_idx = syslog_ns->log_first_idx;
+ syslog_ns->syslog_prev = 0;
+ syslog_ns->syslog_partial = 0;
}
if (from_file) {
/*
@@ -1196,28 +1186,28 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
* for pending data, not the size; return the count of
* records, not the length.
*/
- error = log_next_idx - syslog_idx;
+ error = syslog_ns->log_next_idx - syslog_ns->syslog_idx;
} else {
- u64 seq = syslog_seq;
- u32 idx = syslog_idx;
- enum log_flags prev = syslog_prev;
+ u64 seq = syslog_ns->syslog_seq;
+ u32 idx = syslog_ns->syslog_idx;
+ enum log_flags prev = syslog_ns->syslog_prev;
error = 0;
- while (seq < log_next_seq) {
- struct log *msg = log_from_idx(idx);
+ while (seq < syslog_ns->log_next_seq) {
+ struct log *msg = log_from_idx(idx, syslog_ns);
error += msg_print_text(msg, prev, true, NULL, 0);
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
prev = msg->flags;
}
- error -= syslog_partial;
+ error -= syslog_ns->syslog_partial;
}
- raw_spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&syslog_ns->logbuf_lock);
break;
/* Size of the log buffer */
case SYSLOG_ACTION_SIZE_BUFFER:
- error = log_buf_len;
+ error = syslog_ns->log_buf_len;
break;
default:
error = -EINVAL;
@@ -1282,7 +1272,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
* every 10 seconds, to leave time for slow consoles to print a
* full oops.
*/
-static void zap_locks(void)
+static void zap_locks(struct syslog_namespace *syslog_ns)
{
static unsigned long oops_timestamp;
@@ -1294,7 +1284,7 @@ static void zap_locks(void)
debug_locks_off();
/* If a crash is occurring, make sure we can't deadlock */
- raw_spin_lock_init(&logbuf_lock);
+ raw_spin_lock_init(&syslog_ns->logbuf_lock);
/* And make sure that we print immediately */
sema_init(&console_sem, 1);
}
@@ -1334,8 +1324,9 @@ static inline int can_use_console(unsigned int cpu)
* interrupts disabled. It should return with 'lockbuf_lock'
* released but interrupts still disabled.
*/
-static int console_trylock_for_printk(unsigned int cpu)
- __releases(&logbuf_lock)
+static int console_trylock_for_printk(unsigned int cpu,
+ struct syslog_namespace *syslog_ns)
+ __releases(&syslog_ns->logbuf_lock)
{
int retval = 0, wake = 0;
@@ -1357,7 +1348,7 @@ static int console_trylock_for_printk(unsigned int cpu)
logbuf_cpu = UINT_MAX;
if (wake)
up(&console_sem);
- raw_spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
return retval;
}
@@ -1393,7 +1384,7 @@ static struct cont {
bool flushed:1; /* buffer sealed and committed */
} cont;
-static void cont_flush(enum log_flags flags)
+static void cont_flush(enum log_flags flags, struct syslog_namespace *syslog_ns)
{
if (cont.flushed)
return;
@@ -1407,7 +1398,7 @@ static void cont_flush(enum log_flags flags)
* line. LOG_NOCONS suppresses a duplicated output.
*/
log_store(cont.facility, cont.level, flags | LOG_NOCONS,
- cont.ts_nsec, NULL, 0, cont.buf, cont.len);
+ cont.ts_nsec, NULL, 0, cont.buf, cont.len, syslog_ns);
cont.flags = flags;
cont.flushed = true;
} else {
@@ -1416,19 +1407,20 @@ static void cont_flush(enum log_flags flags)
* just submit it to the store and free the buffer.
*/
log_store(cont.facility, cont.level, flags, 0,
- NULL, 0, cont.buf, cont.len);
+ NULL, 0, cont.buf, cont.len, syslog_ns);
cont.len = 0;
}
}
-static bool cont_add(int facility, int level, const char *text, size_t len)
+static bool cont_add(int facility, int level, const char *text, size_t len,
+ struct syslog_namespace *syslog_ns)
{
if (cont.len && cont.flushed)
return false;
if (cont.len + len > sizeof(cont.buf)) {
/* the line gets too long, split it up in separate records */
- cont_flush(LOG_CONT);
+ cont_flush(LOG_CONT, syslog_ns);
return false;
}
@@ -1446,7 +1438,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
cont.len += len;
if (cont.len > (sizeof(cont.buf) * 80) / 100)
- cont_flush(LOG_CONT);
+ cont_flush(LOG_CONT, syslog_ns);
return true;
}
@@ -1481,7 +1473,8 @@ static size_t cont_print_text(char *text, size_t size)
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
- const char *fmt, va_list args)
+ const char *fmt, va_list args,
+ struct syslog_namespace *syslog_ns)
{
static int recursion_bug;
static char textbuf[LOG_LINE_MAX];
@@ -1514,11 +1507,11 @@ asmlinkage int vprintk_emit(int facility, int level,
recursion_bug = 1;
goto out_restore_irqs;
}
- zap_locks();
+ zap_locks(syslog_ns);
}
lockdep_off();
- raw_spin_lock(&logbuf_lock);
+ raw_spin_lock(&syslog_ns->logbuf_lock);
logbuf_cpu = this_cpu;
if (recursion_bug) {
@@ -1529,7 +1522,7 @@ asmlinkage int vprintk_emit(int facility, int level,
printed_len += strlen(recursion_msg);
/* emit KERN_CRIT message */
log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
- NULL, 0, recursion_msg, printed_len);
+ NULL, 0, recursion_msg, printed_len, syslog_ns);
}
/*
@@ -1576,12 +1569,12 @@ asmlinkage int vprintk_emit(int facility, int level,
* or another task also prints continuation lines.
*/
if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
- cont_flush(LOG_NEWLINE);
+ cont_flush(LOG_NEWLINE, syslog_ns);
/* buffer line if possible, otherwise store it right away */
- if (!cont_add(facility, level, text, text_len))
+ if (!cont_add(facility, level, text, text_len, syslog_ns))
log_store(facility, level, lflags | LOG_CONT, 0,
- dict, dictlen, text, text_len);
+ dict, dictlen, text, text_len, syslog_ns);
} else {
bool stored = false;
@@ -1593,13 +1586,14 @@ asmlinkage int vprintk_emit(int facility, int level,
*/
if (cont.len && cont.owner == current) {
if (!(lflags & LOG_PREFIX))
- stored = cont_add(facility, level, text, text_len);
- cont_flush(LOG_NEWLINE);
+ stored = cont_add(facility, level, text,
+ text_len, syslog_ns);
+ cont_flush(LOG_NEWLINE, syslog_ns);
}
if (!stored)
log_store(facility, level, lflags, 0,
- dict, dictlen, text, text_len);
+ dict, dictlen, text, text_len, syslog_ns);
}
printed_len += text_len;
@@ -1611,7 +1605,7 @@ asmlinkage int vprintk_emit(int facility, int level,
* The console_trylock_for_printk() function will release 'logbuf_lock'
* regardless of whether it actually gets the console semaphore or not.
*/
- if (console_trylock_for_printk(this_cpu))
+ if (console_trylock_for_printk(this_cpu, syslog_ns))
console_unlock();
lockdep_on();
@@ -1624,7 +1618,8 @@ EXPORT_SYMBOL(vprintk_emit);
asmlinkage int vprintk(const char *fmt, va_list args)
{
- return vprintk_emit(0, -1, NULL, 0, fmt, args);
+ return vprintk_emit(0, -1, NULL, 0, fmt, args,
+ current_syslog_ns());
}
EXPORT_SYMBOL(vprintk);
@@ -1636,7 +1631,8 @@ asmlinkage int printk_emit(int facility, int level,
int r;
va_start(args, fmt);
- r = vprintk_emit(facility, level, dict, dictlen, fmt, args);
+ r = vprintk_emit(facility, level, dict, dictlen, fmt, args,
+ current_syslog_ns());
va_end(args);
return r;
@@ -1678,7 +1674,7 @@ asmlinkage int printk(const char *fmt, ...)
}
#endif
va_start(args, fmt);
- r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+ r = vprintk_emit(0, -1, NULL, 0, fmt, args, current_syslog_ns());
va_end(args);
return r;
@@ -1981,12 +1977,13 @@ void wake_up_klogd(void)
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
}
-static void console_cont_flush(char *text, size_t size)
+static void console_cont_flush(char *text, size_t size,
+ struct syslog_namespace *syslog_ns)
{
unsigned long flags;
size_t len;
- raw_spin_lock_irqsave(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
if (!cont.len)
goto out;
@@ -1996,18 +1993,131 @@ static void console_cont_flush(char *text, size_t size)
* busy. The earlier ones need to be printed before this one, we
* did not flush any fragment so far, so just let it queue up.
*/
- if (console_seq < log_next_seq && !cont.cons)
+ if (syslog_ns->console_seq < syslog_ns->log_next_seq && !cont.cons)
goto out;
len = cont_print_text(text, size);
- raw_spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
stop_critical_timings();
call_console_drivers(cont.level, text, len);
start_critical_timings();
local_irq_restore(flags);
return;
out:
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
+}
+
+/**
+ * syslog_console_unlock - unlock the console system for syslog_namespace
+ *
+ * Releases the console_lock which the caller holds on the console system
+ * and the console driver list.
+ *
+ * While the console_lock was held, console output may have been buffered
+ * by printk(). If this is the case, syslog_console_unlock(); emits
+ * the output prior to releasing the lock.
+ *
+ * If there is output waiting, we wake /dev/kmsg and syslog() users.
+ *
+ * syslog_console_unlock(); may be called from any context.
+ */
+void syslog_console_unlock(struct syslog_namespace *syslog_ns)
+{
+ static char text[LOG_LINE_MAX + PREFIX_MAX];
+ static u64 seen_seq;
+ unsigned long flags;
+ bool wake_klogd = false;
+ bool retry;
+
+ if (console_suspended) {
+ up(&console_sem);
+ return;
+ }
+
+ console_may_schedule = 0;
+
+ /* flush buffered message fragment immediately to console */
+ console_cont_flush(text, sizeof(text), syslog_ns);
+again:
+ for (;;) {
+ struct log *msg;
+ size_t len;
+ int level;
+
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ if (seen_seq != syslog_ns->log_next_seq) {
+ wake_klogd = true;
+ seen_seq = syslog_ns->log_next_seq;
+ }
+
+ if (syslog_ns->console_seq < syslog_ns->log_first_seq) {
+ /* messages are gone, move to first one */
+ syslog_ns->console_seq = syslog_ns->log_first_seq;
+ syslog_ns->console_idx = syslog_ns->log_first_idx;
+ console_prev = 0;
+ }
+skip:
+ if (syslog_ns->console_seq == syslog_ns->log_next_seq)
+ break;
+
+ msg = log_from_idx(syslog_ns->console_idx, syslog_ns);
+ if (msg->flags & LOG_NOCONS) {
+ /*
+ * Skip record we have buffered and already printed
+ * directly to the console when we received it.
+ */
+ syslog_ns->console_idx =
+ log_next(syslog_ns->console_idx, syslog_ns);
+ syslog_ns->console_seq++;
+ /*
+ * We will get here again when we register a new
+ * CON_PRINTBUFFER console. Clear the flag so we
+ * will properly dump everything later.
+ */
+ msg->flags &= ~LOG_NOCONS;
+ console_prev = msg->flags;
+ goto skip;
+ }
+
+ level = msg->level;
+ len = msg_print_text(msg, console_prev, false,
+ text, sizeof(text));
+ syslog_ns->console_idx =
+ log_next(syslog_ns->console_idx, syslog_ns);
+ syslog_ns->console_seq++;
+ console_prev = msg->flags;
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
+
+ stop_critical_timings(); /* don't trace print latency */
+ call_console_drivers(level, text, len);
+ start_critical_timings();
+ local_irq_restore(flags);
+ }
+ console_locked = 0;
+
+ /* Release the exclusive_console once it is used */
+ if (unlikely(exclusive_console))
+ exclusive_console = NULL;
+
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
+
+ up(&console_sem);
+
+ /*
+ * Someone could have filled up the buffer again, so re-check if there's
+ * something to flush. In case we cannot trylock the console_sem again,
+ * there's a new owner and the console_unlock() from them will do the
+ * flush, no worries.
+ */
+ raw_spin_lock(&syslog_ns->logbuf_lock);
+ retry = syslog_ns->console_seq != syslog_ns->log_next_seq;
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
+
+ if (retry && console_trylock())
+ goto again;
+
+ if (wake_klogd)
+ wake_up_klogd();
}
/**
@@ -2027,6 +2137,7 @@ out:
void console_unlock(void)
{
static char text[LOG_LINE_MAX + PREFIX_MAX];
+ struct syslog_namespace *syslog_ns = current_syslog_ns();
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
@@ -2040,37 +2151,38 @@ void console_unlock(void)
console_may_schedule = 0;
/* flush buffered message fragment immediately to console */
- console_cont_flush(text, sizeof(text));
+ console_cont_flush(text, sizeof(text), syslog_ns);
again:
for (;;) {
struct log *msg;
size_t len;
int level;
- raw_spin_lock_irqsave(&logbuf_lock, flags);
- if (seen_seq != log_next_seq) {
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ if (seen_seq != syslog_ns->log_next_seq) {
wake_klogd = true;
- seen_seq = log_next_seq;
+ seen_seq = syslog_ns->log_next_seq;
}
- if (console_seq < log_first_seq) {
+ if (syslog_ns->console_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first one */
- console_seq = log_first_seq;
- console_idx = log_first_idx;
+ syslog_ns->console_seq = syslog_ns->log_first_seq;
+ syslog_ns->console_idx = syslog_ns->log_first_idx;
console_prev = 0;
}
skip:
- if (console_seq == log_next_seq)
+ if (syslog_ns->console_seq == syslog_ns->log_next_seq)
break;
- msg = log_from_idx(console_idx);
+ msg = log_from_idx(syslog_ns->console_idx, syslog_ns);
if (msg->flags & LOG_NOCONS) {
/*
* Skip record we have buffered and already printed
* directly to the console when we received it.
*/
- console_idx = log_next(console_idx);
- console_seq++;
+ syslog_ns->console_idx =
+ log_next(syslog_ns->console_idx, syslog_ns);
+ syslog_ns->console_seq++;
/*
* We will get here again when we register a new
* CON_PRINTBUFFER console. Clear the flag so we
@@ -2084,10 +2196,11 @@ skip:
level = msg->level;
len = msg_print_text(msg, console_prev, false,
text, sizeof(text));
- console_idx = log_next(console_idx);
- console_seq++;
+ syslog_ns->console_idx =
+ log_next(syslog_ns->console_idx, syslog_ns);
+ syslog_ns->console_seq++;
console_prev = msg->flags;
- raw_spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(level, text, len);
@@ -2100,7 +2213,7 @@ skip:
if (unlikely(exclusive_console))
exclusive_console = NULL;
- raw_spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&syslog_ns->logbuf_lock);
up(&console_sem);
@@ -2110,9 +2223,9 @@ skip:
* there's a new owner and the console_unlock() from them will do the
* flush, no worries.
*/
- raw_spin_lock(&logbuf_lock);
- retry = console_seq != log_next_seq;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_lock(&syslog_ns->logbuf_lock);
+ retry = syslog_ns->console_seq != syslog_ns->log_next_seq;
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
if (retry && console_trylock())
goto again;
@@ -2237,6 +2350,7 @@ void register_console(struct console *newcon)
int i;
unsigned long flags;
struct console *bcon = NULL;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
/*
* before we register a new CON_BOOT console, make sure we don't
@@ -2346,11 +2460,11 @@ void register_console(struct console *newcon)
* console_unlock(); will print out the buffered messages
* for us.
*/
- raw_spin_lock_irqsave(&logbuf_lock, flags);
- console_seq = syslog_seq;
- console_idx = syslog_idx;
- console_prev = syslog_prev;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ syslog_ns->console_seq = syslog_ns->syslog_seq;
+ syslog_ns->console_idx = syslog_ns->syslog_idx;
+ console_prev = syslog_ns->syslog_prev;
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
/*
* We're about to replay the log buffer. Only do this to the
* just-registered console to avoid excessive message spam to
@@ -2573,6 +2687,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
{
struct kmsg_dumper *dumper;
unsigned long flags;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
return;
@@ -2585,12 +2700,12 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* initialize iterator with data about the stored records */
dumper->active = true;
- raw_spin_lock_irqsave(&logbuf_lock, flags);
- dumper->cur_seq = clear_seq;
- dumper->cur_idx = clear_idx;
- dumper->next_seq = log_next_seq;
- dumper->next_idx = log_next_idx;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ dumper->cur_seq = syslog_ns->clear_seq;
+ dumper->cur_idx = syslog_ns->clear_idx;
+ dumper->next_seq = syslog_ns->log_next_seq;
+ dumper->next_idx = syslog_ns->log_next_idx;
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
/* invoke dumper which will iterate over records */
dumper->dump(dumper, reason);
@@ -2626,24 +2741,25 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
struct log *msg;
size_t l = 0;
bool ret = false;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
if (!dumper->active)
goto out;
- if (dumper->cur_seq < log_first_seq) {
+ if (dumper->cur_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first available one */
- dumper->cur_seq = log_first_seq;
- dumper->cur_idx = log_first_idx;
+ dumper->cur_seq = syslog_ns->log_first_seq;
+ dumper->cur_idx = syslog_ns->log_first_idx;
}
/* last entry */
- if (dumper->cur_seq >= log_next_seq)
+ if (dumper->cur_seq >= syslog_ns->log_next_seq)
goto out;
- msg = log_from_idx(dumper->cur_idx);
+ msg = log_from_idx(dumper->cur_idx, syslog_ns);
l = msg_print_text(msg, 0, syslog, line, size);
- dumper->cur_idx = log_next(dumper->cur_idx);
+ dumper->cur_idx = log_next(dumper->cur_idx, syslog_ns);
dumper->cur_seq++;
ret = true;
out:
@@ -2674,10 +2790,12 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
{
unsigned long flags;
bool ret;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
+
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
- raw_spin_lock_irqsave(&logbuf_lock, flags);
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
return ret;
}
@@ -2713,20 +2831,21 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
enum log_flags prev;
size_t l = 0;
bool ret = false;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
if (!dumper->active)
goto out;
- raw_spin_lock_irqsave(&logbuf_lock, flags);
- if (dumper->cur_seq < log_first_seq) {
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
+ if (dumper->cur_seq < syslog_ns->log_first_seq) {
/* messages are gone, move to first available one */
- dumper->cur_seq = log_first_seq;
- dumper->cur_idx = log_first_idx;
+ dumper->cur_seq = syslog_ns->log_first_seq;
+ dumper->cur_idx = syslog_ns->log_first_idx;
}
/* last entry */
if (dumper->cur_seq >= dumper->next_seq) {
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
goto out;
}
@@ -2735,10 +2854,10 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
idx = dumper->cur_idx;
prev = 0;
while (seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct log *msg = log_from_idx(idx, syslog_ns);
l += msg_print_text(msg, prev, true, NULL, 0);
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
prev = msg->flags;
}
@@ -2748,10 +2867,10 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
idx = dumper->cur_idx;
prev = 0;
while (l > size && seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct log *msg = log_from_idx(idx, syslog_ns);
l -= msg_print_text(msg, prev, true, NULL, 0);
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
prev = msg->flags;
}
@@ -2763,10 +2882,10 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
l = 0;
prev = 0;
while (seq < dumper->next_seq) {
- struct log *msg = log_from_idx(idx);
+ struct log *msg = log_from_idx(idx, syslog_ns);
l += msg_print_text(msg, prev, syslog, buf + l, size - l);
- idx = log_next(idx);
+ idx = log_next(idx, syslog_ns);
seq++;
prev = msg->flags;
}
@@ -2774,7 +2893,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
dumper->next_seq = next_seq;
dumper->next_idx = next_idx;
ret = true;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
out:
if (len)
*len = l;
@@ -2794,10 +2913,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
*/
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
{
- dumper->cur_seq = clear_seq;
- dumper->cur_idx = clear_idx;
- dumper->next_seq = log_next_seq;
- dumper->next_idx = log_next_idx;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
+
+ dumper->cur_seq = syslog_ns->clear_seq;
+ dumper->cur_idx = syslog_ns->clear_idx;
+ dumper->next_seq = syslog_ns->log_next_seq;
+ dumper->next_idx = syslog_ns->log_next_idx;
}
/**
@@ -2811,10 +2932,12 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
void kmsg_dump_rewind(struct kmsg_dumper *dumper)
{
unsigned long flags;
+ struct syslog_namespace *syslog_ns = &init_syslog_ns;
+
+ raw_spin_lock_irqsave(&syslog_ns->logbuf_lock, flags);
- raw_spin_lock_irqsave(&logbuf_lock, flags);
kmsg_dump_rewind_nolock(dumper);
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&syslog_ns->logbuf_lock, flags);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#endif
--
1.7.1
^ permalink raw reply related
* [PATCH RFC 1/5] Syslog_ns: add syslog_namespace struct and API
From: Rui Xiang @ 2012-11-19 8:16 UTC (permalink / raw)
To: serge.hallyn, containers; +Cc: Eric W. Biederman, netdev
From: Xiang Rui <rui.xiang@huawei.com>
This patch add a struct syslog_namespace which contains the necessary member
when handling syslog.
We realize gut_syslog_ns and put_syslog_ns API, and syslog_ns is initialized
by init_syslog_ns. CONFIG_SYSLOG_NS is defined to allow to create syslog_ns.
Signed-off-by: Xiang Rui <rui.xiang@huawei.com>
Signed-off-by: Libo Chen <clbchenlibo.chen@huawei.com>
---
include/linux/syslog_namespace.h | 78 ++++++++++++++++++++++++++++++++++++++
init/Kconfig | 7 +++
kernel/Makefile | 1 +
kernel/syslog_namespace.c | 31 +++++++++++++++
4 files changed, 117 insertions(+), 0 deletions(-)
create mode 100644 include/linux/syslog_namespace.h
create mode 100644 kernel/syslog_namespace.c
diff --git a/include/linux/syslog_namespace.h b/include/linux/syslog_namespace.h
new file mode 100644
index 0000000..8c8ac5a
--- /dev/null
+++ b/include/linux/syslog_namespace.h
@@ -0,0 +1,78 @@
+#ifndef _LINUX_SYSLOG_NAMESPACE_H
+#define _LINUX_SYSLOG_NAMESPACE_H
+
+#include <linux/kref.h>
+
+/* record buffer */
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define LOG_ALIGN 4
+#else
+#define LOG_ALIGN __alignof__(struct log)
+#endif
+
+#define CONTAINER_BUF_LEN 4096
+
+#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+
+struct syslog_namespace {
+ struct kref kref; /* syslog_ns reference count & control */
+
+ raw_spinlock_t logbuf_lock; /* access conflict locker */
+
+ /* index and sequence number of the first record stored in the buffer */
+ u64 log_first_seq;
+ u32 log_first_idx;
+
+ /* index and sequence number of the next record stored in the buffer */
+ u64 log_next_seq;
+ u32 log_next_idx;
+
+ /* the next printk record to read after the last 'clear' command */
+ u64 clear_seq;
+ u32 clear_idx;
+
+ char *log_buf;
+ u32 log_buf_len;
+
+ /* the next printk record to write to the console */
+ u64 console_seq;
+ u32 console_idx;
+
+ /* the next printk record to read by syslog(READ) or /proc/kmsg */
+ u64 syslog_seq;
+ u32 syslog_idx;
+ int syslog_prev;
+ size_t syslog_partial;
+};
+
+extern struct syslog_namespace init_syslog_ns;
+
+#ifdef CONFIG_SYSLOG_NS
+extern void free_syslog_ns(struct kref *kref);
+static inline struct syslog_namespace *get_syslog_ns(
+ struct syslog_namespace *ns)
+{
+ if (ns != &init_syslog_ns)
+ kref_get(&ns->kref);
+ return ns;
+}
+
+static inline void put_syslog_ns(struct syslog_namespace *ns)
+{
+ if (ns != &init_syslog_ns)
+ kref_put(&ns->kref, free_syslog_ns);
+}
+
+#else
+static inline struct syslog_namespace *get_syslog_ns(
+ struct syslog_namespace *ns)
+{
+ return ns;
+}
+
+static inline void put_syslog_ns(struct syslog_namespace *ns)
+{
+}
+#endif
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e3..82771e0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -988,6 +988,13 @@ config NET_NS
Allow user space to create what appear to be multiple instances
of the network stack.
+config SYSLOG_NS
+ bool "Syslog namespace"
+ default y
+ help
+ Allow containers to use syslog namespaces to provide different
+ syslog for containers.
+
endif # NAMESPACES
config UIDGID_CONVERTED
diff --git a/kernel/Makefile b/kernel/Makefile
index 0dfeca4..cb3cba0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -28,6 +28,7 @@ obj-y += power/
ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
obj-$(CONFIG_X86) += kcmp.o
endif
+obj-$(CONFIG_SYSLOG_NS) += syslog_namespace.o
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/syslog_namespace.c b/kernel/syslog_namespace.c
new file mode 100644
index 0000000..9482927
--- /dev/null
+++ b/kernel/syslog_namespace.c
@@ -0,0 +1,31 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/syslog_namespace.h>
+
+static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
+
+struct syslog_namespace init_syslog_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .logbuf_lock = __RAW_SPIN_LOCK_UNLOCKED(init_syslog_ns.logbuf_lock),
+ .log_buf_len = __LOG_BUF_LEN,
+ .log_buf = __log_buf,
+};
+EXPORT_SYMBOL_GPL(init_syslog_ns);
+
+void free_syslog_ns(struct kref *kref)
+{
+ struct syslog_namespace *ns;
+ ns = container_of(kref, struct syslog_namespace, kref);
+
+ kfree(ns->log_buf);
+ kfree(ns);
+}
--
1.7.1
^ permalink raw reply related
* [PATCH RFC 2/5] Syslog_ns: add CLONE_NEWSYSLOG and create syslog_ns when copying process
From: Rui Xiang @ 2012-11-19 8:16 UTC (permalink / raw)
To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman
From: Xiang Rui <rui.xiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
We add a new clone flag named CLONE_NEWSYSLOG, and use 0x02000000 which was
previously the unused CLONE_STOPPED and is now available for re-use.
In syslog_namespaces.c, the interface copy_syslog_ns is implemented for create
a new syslog_ns. When a new namespace was created for one process copying, the
interface was used.
Signed-off-by: Xiang Rui <rui.xiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Libo Chen <clbchenlibo.chen-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
include/linux/nsproxy.h | 2 ++
include/linux/syslog_namespace.h | 19 +++++++++++++++++++
include/uapi/linux/sched.h | 3 +--
kernel/nsproxy.c | 16 +++++++++++++++-
kernel/syslog_namespace.c | 32 ++++++++++++++++++++++++++++++++
5 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55..9db2527 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
+struct syslog_namespace;
struct fs_struct;
/*
@@ -29,6 +30,7 @@ struct nsproxy {
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
struct net *net_ns;
+ struct syslog_namespace *syslog_ns;
};
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/syslog_namespace.h b/include/linux/syslog_namespace.h
index 8c8ac5a..1ecb8b8 100644
--- a/include/linux/syslog_namespace.h
+++ b/include/linux/syslog_namespace.h
@@ -2,6 +2,9 @@
#define _LINUX_SYSLOG_NAMESPACE_H
#include <linux/kref.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/err.h>
/* record buffer */
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -47,8 +50,16 @@ struct syslog_namespace {
extern struct syslog_namespace init_syslog_ns;
+static inline struct syslog_namespace *current_syslog_ns(void)
+{
+ return current->nsproxy->syslog_ns;
+}
+
#ifdef CONFIG_SYSLOG_NS
extern void free_syslog_ns(struct kref *kref);
+extern struct syslog_namespace *copy_syslog_ns(unsigned long flags,
+ struct task_struct *tsk);
+
static inline struct syslog_namespace *get_syslog_ns(
struct syslog_namespace *ns)
{
@@ -64,6 +75,14 @@ static inline void put_syslog_ns(struct syslog_namespace *ns)
}
#else
+static inline struct syslog_namespace *copy_syslog_ns(unsigned long flags,
+ struct task_struct *tsk)
+{
+ if (flags & CLONE_NEWSYSLOG)
+ return ERR_PTR(-EINVAL);
+ return tsk->nsproxy->syslog_ns;
+}
+
static inline struct syslog_namespace *get_syslog_ns(
struct syslog_namespace *ns)
{
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5a0f945..906a3da 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -21,8 +21,7 @@
#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
-/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
- and is now available for re-use. */
+#define CLONE_NEWSYSLOG 0x02000000 /* New syslog namespace */
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b576f7f..331d31f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,7 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/syslog_namespace.h>
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <linux/syscalls.h>
@@ -36,6 +37,7 @@ struct nsproxy init_nsproxy = {
#endif
.mnt_ns = NULL,
.pid_ns = &init_pid_ns,
+ .syslog_ns = &init_syslog_ns,
#ifdef CONFIG_NET
.net_ns = &init_net,
#endif
@@ -96,8 +98,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_net;
}
+ new_nsp->syslog_ns = copy_syslog_ns(flags, tsk);
+ if (IS_ERR(new_nsp->syslog_ns)) {
+ err = PTR_ERR(new_nsp->syslog_ns);
+ goto out_syslog;
+ }
+
return new_nsp;
+out_syslog:
+ if (new_nsp->net_ns)
+ put_net(new_nsp->net_ns);
out_net:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
@@ -131,7 +142,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWPID | CLONE_NEWNET)))
+ CLONE_NEWPID | CLONE_NEWNET |
+ CLONE_NEWSYSLOG)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -174,6 +186,8 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
+ if (ns->syslog_ns)
+ put_syslog_ns(ns->syslog_ns);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
diff --git a/kernel/syslog_namespace.c b/kernel/syslog_namespace.c
index 9482927..a12e1c1 100644
--- a/kernel/syslog_namespace.c
+++ b/kernel/syslog_namespace.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/bootmem.h>
#include <linux/syslog_namespace.h>
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -21,6 +22,37 @@ struct syslog_namespace init_syslog_ns = {
};
EXPORT_SYMBOL_GPL(init_syslog_ns);
+static struct syslog_namespace *create_syslog_ns(unsigned int buf_len)
+{
+ struct syslog_namespace *ns;
+
+ if (buf_len <= 0)
+ return ERR_PTR(-EINVAL);
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&(ns->kref));
+
+ ns->log_buf_len = buf_len;
+ ns->log_buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!ns->log_buf) {
+ kfree(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+ raw_spin_lock_init(&(ns->logbuf_lock));
+
+ return ns;
+}
+
+struct syslog_namespace *copy_syslog_ns(unsigned long flags,
+ struct task_struct *tsk)
+{
+ if (!(flags & CLONE_NEWSYSLOG))
+ return get_syslog_ns(tsk->nsproxy->syslog_ns);
+ return create_syslog_ns(CONTAINER_BUF_LEN);
+}
+
void free_syslog_ns(struct kref *kref)
{
struct syslog_namespace *ns;
--
1.7.1
^ permalink raw reply related
* [PATCH RFC 0/5] Containerize syslog
From: Rui Xiang @ 2012-11-19 8:16 UTC (permalink / raw)
To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman
From: Xiang Rui <rui.xiang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
In Serge's patch (http://lwn.net/Articles/525629/), syslog_namespace was tied to a user
namespace. We add syslog_ns tied to nsproxy instead, and implement ns_printk in
ip_table context.
We add syslog_namespace as a part of nsproxy, and a new flag CLONE_SYSLOG to unshare
syslog area.
In syslog_namespace, some necessary identifiers for handling syslog buf are contained.
When one container creates a new syslog namespace,containerized buf will be allocated
to store log ownned this container. Containerized identifiers such as log_first_seq
instead of global variable only affect their own buf.The buf will not be free until
syslog_namespace is destructed by host.
Printk should be re-implimented because log buf is isolated into syslog_ns. The function
include printk, /dev/kmsg, do_syslog and kmsg_dump should be realized in container. So,
to make these funtions available in container, a parameter syslog_ns is necessory for
their interfaces.
For container context, the value syslog namespace is reasonable if we use current method
to get syslog_ns when using iptable. Because the log info belong to each containers will
be printed in host.
We add a pointer in net namespace, and use it to track the syslog_ns which was created
when the log was generated in container. Then add ns_printk to provide a new interface
while using syslog_ns.
This patchset is based on the develop tree of net branch
https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git.
Libo Chen (3):
printk: modify printk interface for syslog_namespace
printk: add ns_printk for specific syslog_ns
printk: use ns_printk in iptable context
Xiang Rui (2):
Syslog_ns: add syslog_namespace struct and API
Syslog_ns: add CLONE_NEWSYSLOG and create syslog_ns when copying
process
drivers/base/core.c | 4 +-
include/linux/nsproxy.h | 2 +
include/linux/printk.h | 5 +-
include/linux/syslog_namespace.h | 98 ++++++
include/net/net_namespace.h | 7 +-
include/net/netfilter/xt_log.h | 7 +-
include/uapi/linux/sched.h | 3 +-
init/Kconfig | 7 +
kernel/Makefile | 1 +
kernel/nsproxy.c | 19 +-
kernel/printk.c | 646 ++++++++++++++++++++++++--------------
kernel/syslog_namespace.c | 65 ++++
net/core/net_namespace.c | 12 +-
net/netfilter/xt_LOG.c | 4 +-
14 files changed, 623 insertions(+), 257 deletions(-)
create mode 100644 include/linux/syslog_namespace.h
create mode 100644 kernel/syslog_namespace.c
^ permalink raw reply
* Re: [rfc net-next v6 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-11-19 7:40 UTC (permalink / raw)
To: Rusty Russell
Cc: krkumar2, kvm, mst, netdev, linux-kernel, virtualization, davem
In-Reply-To: <87y5igyhyg.fsf@rustcorp.com.au>
On 11/05/2012 09:08 AM, Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
>> +struct virtnet_info {
>> + u16 num_queue_pairs; /* # of RX/TX vq pairs */
>> + u16 total_queue_pairs;
>> +
>> + struct send_queue *sq;
>> + struct receive_queue *rq;
>> + struct virtqueue *cvq;
>> +
>> + struct virtio_device *vdev;
>> + struct net_device *dev;
>> + unsigned int status;
> status seems unused?
>
It's used for tacking the status of the device (e.g in
virtnet_config_changed_work() ).
>> +static const struct ethtool_ops virtnet_ethtool_ops;
> Strange hoist, but I can't tell from the patch if this is necessary.
> Assume it is.
Sorry, this line should belong to patch 3/3.
>
>> +static inline int vq2txq(struct virtqueue *vq)
>> +{
>> + int index = virtqueue_get_queue_index(vq);
>> + return index == 1 ? 0 : (index - 3) / 2;
>> +}
>> +
>> +static inline int txq2vq(int txq)
>> +{
>> + return txq ? 2 * txq + 3 : 1;
>> +}
>> +
>> +static inline int vq2rxq(struct virtqueue *vq)
>> +{
>> + int index = virtqueue_get_queue_index(vq);
>> + return index ? (index - 2) / 2 : 0;
>> +}
>> +
>> +static inline int rxq2vq(int rxq)
>> +{
>> + return rxq ? 2 * rxq + 2 : 0;
>> +}
>> +
>> static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
> I know skb_vnet_hdr() does it, but I generally dislike inline in C
> files; gcc is generally smart enough these days, and inline suppresses
> unused function warnings.
Ok, I will remove the inline here.
> I guess these mappings have to work even when we're switching from mq to
> single queue mode; otherwise we could simplify them using a 'bool mq'
> flag.
Yes, it still work when switching to sq. And what makes it looks strange
is because we reserve the virtqueues for single queue mode and also
reserve vq 3. But it does not bring much benefit, need more thought.
>
>> +static int virtnet_set_queues(struct virtnet_info *vi)
>> +{
>> + struct scatterlist sg;
>> + struct virtio_net_ctrl_steering s;
>> + struct net_device *dev = vi->dev;
>> +
>> + if (vi->num_queue_pairs == 1) {
>> + s.current_steering_rule = VIRTIO_NET_CTRL_STEERING_SINGLE;
>> + s.current_steering_param = 1;
>> + } else {
>> + s.current_steering_rule =
>> + VIRTIO_NET_CTRL_STEERING_RX_FOLLOWS_TX;
>> + s.current_steering_param = vi->num_queue_pairs;
>> + }
> (BTW, VIRTIO_NET_CTRL_STEERING_RX_FOLLOWS_TX etc not defined anywhere?)
It's defined in include/uapi/linux/virtio_net.h
>
> Hmm, it's not clear that anything other than RX_FOLLOWS_TX will ever
> make sense, so this is really just turning mq on and off.
Currently, when multiqueue is enabled for tuntap, it does tx follow rx.
So when guest driver specify the RX_FOLLOWS_TX, qemu would just enable
multiqueue for tuntap and this policy could be done by tuntap.
>
> Unfortunately, we can't turn feature bits on and off after startup, so
> if we want this level of control (and I think we do), there does need to
> be a mechanism.
>
> Michael? I'd prefer this to be further simplfied, to just
> disable/enable. We can extend it later, but for now the second
> parameter is redundant, ie.:
>
> struct virtio_net_ctrl_steering {
> u8 mode; /* 0 == off, 1 == on */
> } __attribute__((packed));
>
We may need more policy in the future, so maybe a
VIRTIO_NET_CTRL_STEERING_NONE is ok?
>> @@ -924,11 +1032,10 @@ static void virtnet_get_ringparam(struct net_device *dev,
>> {
>> struct virtnet_info *vi = netdev_priv(dev);
>>
>> - ring->rx_max_pending = virtqueue_get_vring_size(vi->rvq);
>> - ring->tx_max_pending = virtqueue_get_vring_size(vi->svq);
>> + ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
>> + ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
>> ring->rx_pending = ring->rx_max_pending;
>> ring->tx_pending = ring->tx_max_pending;
>> -
>> }
> This assumes all vqs are the same size. I think this should probably
> check: for mq mode, use the first vq, otherewise use the 0th.
Ok, but I don't see the reason that we need different size for mq.
>
> For bonus points, check this assertion at probe time.
>
>> + /*
>> + * We expect 1 RX virtqueue followed by 1 TX virtqueue, followd by
>> + * possible control virtqueue, followed by 1 reserved vq, followed
>> + * by RX/TX queue pairs used in multiqueue mode.
>> + */
>> + if (vi->total_queue_pairs == 1)
>> + total_vqs = 2 + virtio_has_feature(vi->vdev,
>> + VIRTIO_NET_F_CTRL_VQ);
>> + else
>> + total_vqs = 2 * vi->total_queue_pairs + 2;
> What's the allergy to odd numbers? Why the reserved queue?
It was suggested by Michael to let the vq calculation easier, but it
seems does not help much. So it's better not reserve virtqueue in next
version.
>> + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
>> + vi->has_cvq = true;
>> +
>> + /* Use single tx/rx queue pair as default */
>> + vi->num_queue_pairs = 1;
>> + vi->total_queue_pairs = num_queue_pairs;
>> +
>> + /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
>> + err = virtnet_setup_vqs(vi);
>> if (err)
>> goto free_stats;
>>
>> + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) &&
>> + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
>> + dev->features |= NETIF_F_HW_VLAN_FILTER;
> We should be using has_cvq here...
Sure.
>
>> -#ifdef CONFIG_PM
>> -static int virtnet_freeze(struct virtio_device *vdev)
>> +static void virtnet_stop(struct virtnet_info *vi)
> I think you still want this under CONFIG_PM, right? Doesn't seem used
> elsewhere.
Yes, will fix this.
>
> Cheers,
> Rusty.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH net-next ] net: Allow userns root to control tun and tap devices
From: Eric W. Biederman @ 2012-11-19 7:34 UTC (permalink / raw)
To: David Miller; +Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) calls to
ns_capable(net->user_ns,CAP_NET_ADMIN) calls.
Allow setting of the tun iff flags.
Allow creating of tun devices.
Allow adding a new queue to a tun device.
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
drivers/net/tun.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b44d7b7..b01e8c0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -373,10 +373,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
static inline bool tun_not_capable(struct tun_struct *tun)
{
const struct cred *cred = current_cred();
+ struct net *net = dev_net(tun->dev);
return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
(gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
- !capable(CAP_NET_ADMIN);
+ !ns_capable(net->user_ns, CAP_NET_ADMIN);
}
static void tun_set_real_num_queues(struct tun_struct *tun)
@@ -1559,7 +1560,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
char *name;
unsigned long flags = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = security_tun_dev_create();
if (err < 0)
--
1.7.5.4
^ permalink raw reply related
* Re: Optics (SFP) monitoring on ixgbe and igbe
From: Robert Olsson @ 2012-11-19 7:27 UTC (permalink / raw)
To: footplus; +Cc: Ben Hutchings, netdev
In-Reply-To: <CAPN4dA9f3y1mDPubqd9s+v5supj3hNvZaWym0_y3EMZd7L6MyQ@mail.gmail.com>
Hi,
FYI. DOM use in Serengeti Tanzania (Bunda-Nata 60km) on solar driven low-power
linux atom router @ 20Watt w. igb driver using the older DOM pathes. Very useful
stuff. Yes get included in the kernel.
NATA:/# ethtool -D eth1
Ext-Calbr: Avr RX-Power: Alarm & Warn: RX_LOS: Wavelength: 1550 nm
Alarms, warnings in beginning of line, Ie. AH = Alarm High, WL == Warn Low etc
Temp: 76.2 C Thresh: Lo: -50.0/-48.0 Hi: 95.0/110.0 C
Vcc: 3.27 V Thresh: Lo: 2.9/3.0 Hi: 3.5/3.6 V
Tx-Bias: 27.9 mA Thresh: Lo: 3.0/5.0 Hi: 90.0/100.0 mA
TX-pwr: 3.8 dBm ( 2.39 mW) Thresh: Lo: -5.0/-4.0 Hi: 5.0/6.0 dBm
RX-pwr: -17.3 dBm ( 0.02 mW) Thresh: Lo: -40.0/-37.0 Hi: -5.0/-3.0 dBm
http://herjulf.se/robert/tanzania-2012/Nata-installation-2.jpg
--ro
^ permalink raw reply
* Re: [PATCH net-next 0/17] Make the network stack usable by userns root
From: Eric W. Biederman @ 2012-11-19 7:27 UTC (permalink / raw)
To: David Miller
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <20121118.222601.1683927229305655885.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> writes:
> There were merge issues so I applied the patches and sorted the
> conflicts out one-by-one.
>
> I hope this doesn't cause major problems.
Shucks, I had thought I had tested and verified there would not be merge
issues. Oh well.
No major problems.
To keep it that way I am dropping all but the first two patches from my
userns development tree. I have dependencies on the infrastructure bits.
A quick merge test reveals that your tree against my full development
tree has two minor conflicts that are trivial to resolve. So I don't
anticipate Linus will have any problems.
Eric
^ permalink raw reply
* Birthday Please
From: Samiul Haque @ 2012-11-19 6:53 UTC (permalink / raw)
To: netdev
Hello
Click on the link below and please enter your birthday for me. It will take less than one minute.
http://www.birthdayalarm.com/bd2/88069043a120552402b1546197641c338248399d1386
Thank You,
Samiul
^ permalink raw reply
* Re: Latest 3.6.6 are not compiling due tg3 network driver, hwmon_device_unregister
From: Denys Fedoryshchenko @ 2012-11-19 6:59 UTC (permalink / raw)
To: David Rientjes
Cc: Nithin Nayak Sujir, Paul Gortmaker, Michael Chan, netdev,
linux-kernel
In-Reply-To: <alpine.DEB.2.00.1211181441470.5080@chino.kir.corp.google.com>
On 2012-11-19 00:42, David Rientjes wrote:
> On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
>
>> On 11/14/2012 07:30 PM, David Rientjes wrote:
>> > On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
>> >
>> > > This was fixed by
>> > >
>> > > commit de0a41484c47d783dd4d442914815076aa2caac2
>> > > Author: Paul Gortmaker <paul.gortmaker@windriver.com>
>> > > Date: Mon Oct 1 11:43:49 2012 -0400
>> > >
>> > > tg3: unconditionally select HWMON support when tg3 is
>> enabled.
>> > >
>> > Would you mind submitting this for stable by following the
>> procedure
>> > described in Documentation/stable_kernel_rules.txt?
>> >
>>
>> Will do. Thank you for bringing this to our attention.
>>
>
> Thanks for submitting the patch to stable, Greg has queued it for the
> kernels he maintains. Denys, expect to see this fix in 3.6.8.
Thank you!
---
Denys Fedoryshchenko, Network Engineer, Virtual ISP S.A.L.
^ permalink raw reply
* Re: [rfc net-next v6 3/3] virtio-net: change the number of queues through ethtool
From: Jason Wang @ 2012-11-19 6:22 UTC (permalink / raw)
To: Rusty Russell
Cc: krkumar2, kvm, mst, netdev, linux-kernel, virtualization, davem
In-Reply-To: <871ug9yls5.fsf@rustcorp.com.au>
On 11/05/2012 07:46 AM, Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
>> This patch implement the {set|get}_channels method of ethool to allow user to
>> change the number of queues dymaically when the device is running. This would
>> let the user to tune the device for specific applications.
> ...
>> + /* Only two modes were support currently */
>> + if (queue_pairs == 0)
>> + return -EINVAL;
>> + if (queue_pairs != vi->total_queue_pairs - 1 && queue_pairs != 1)
>> + return -EINVAL;
> OK, so you let them do all or nothing, but this three-way test is
> pretty unclear.
True, looks like the first check could be removed.
>
> In fact, the whole total_queue_pairs/num_queue_pairs thing is weird (and
> uncommented). I think for "total" you mean "max"; the maximum possible
> queue pair number.
Yes, "total" means "max", will add a comment or change the name to
max_queue_pairs/current_queue_pairs.
>
> Let me go back and review the previous patch again...
>
> Cheers,
> Rusty.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply
* Re: [rfc net-next v6 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-11-19 6:18 UTC (permalink / raw)
To: Rusty Russell
Cc: krkumar2, kvm, mst, netdev, linux-kernel, virtualization, davem
In-Reply-To: <874nl5yn4k.fsf@rustcorp.com.au>
On 11/05/2012 07:16 AM, Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
>> This addes multiqueue support to virtio_net driver. There's two mode supported:
>> single queue pair mode and multiple queue pairs mode. An obvious
>> difference compared with a physical mq card is that virtio-net reserve
>> first two virtqueues when it is working in multiqueue mode, this is
>> used for implementing adaptive mode switching in the future. The
>> virtqueues that were in both mq and sq mode were initialized and only
>> one queue pair (single queue mode) were used at default. User could
>> use ethtool -L to switch to multiqueue mode withe the next patch.
> Hi Jason,
>
> This first patch looks good, but conflates three things
> together:
> (1) Separate per-queue structures from struct virtnet_info to allow
> multiple queues. This is the mechanical part of the patch.
> (2) An annotation bugfix, see below.
> (3) Enabling mq using a new feature and negotiation.
Hi Rusty:
Sorry for the late response, just back from vacation.
For 1 and 3, I will split the patch as you suggested.
For 2, will fix it.
Thanks
>
>> @@ -700,7 +767,8 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
>> unsigned int start;
>>
>> for_each_possible_cpu(cpu) {
>> - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
>> + struct virtnet_stats __percpu *stats
>> + = per_cpu_ptr(vi->stats, cpu);
>> u64 tpackets, tbytes, rpackets, rbytes;
>>
>> do {
> Cheers,
> Rusty.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: 82571EB: Detected Hardware Unit Hang
From: Joe Jin @ 2012-11-19 5:38 UTC (permalink / raw)
To: Dave, Tushar N
Cc: e1000-devel@lists.sf.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, Mary Mcgrath
In-Reply-To: <061C8A8601E8EE4CA8D8FD6990CEA8913349A0B4@ORSMSX102.amr.corp.intel.com>
On 11/16/12 04:26, Dave, Tushar N wrote:
>> Would you please help to fine the offset of max payload size in eeprom?
>> I'd like to have a try to modify it by ethtool.
>
> It is defined using bit 8 of word 0x1A.
> Bit value 0 = 128B , bit value 1 = 256B
Hi Tushar,
I checked one of my server which Max Payload Size is 128:
# lspci -vvv -s 52:00.1
52:00.1 Ethernet controller: Intel Corporation 82571EB Gigabit Ethernet Controller (rev 06)
Subsystem: Intel Corporation PRO/1000 PT Quad Port Server Adapter
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin B routed to IRQ 266
Region 0: Memory at dfea0000 (32-bit, non-prefetchable) [size=128K]
Region 1: Memory at dfe80000 (32-bit, non-prefetchable) [size=128K]
Region 2: I/O ports at 6020 [size=32]
[virtual] Expansion ROM at d8120000 [disabled] [size=128K]
Capabilities: [c8] Power Management version 2
Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold-)
Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=1 PME-
Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
Address: 00000000fee00000 Data: 409a
Capabilities: [e0] Express (v1) Endpoint, MSI 00
DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <512ns, L1 <64us
ExtTag- AttnBtn- AttnInd- PwrInd- RBE- FLReset-
DevCtl: Report errors: Correctable+ Non-Fatal+ Fatal+ Unsupported+
RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
MaxPayload 128 bytes, MaxReadReq 4096 bytes
DevSta: CorrErr- UncorrErr- FatalErr+ UnsuppReq+ AuxPwr- TransPend-
LnkCap: Port #0, Speed 2.5GT/s, Width x4, ASPM L0s, Latency L0 <4us, L1 <64us
ClockPM- Surprise- LLActRep- BwNot-
LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- CommClk+
ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
LnkSta: Speed 2.5GT/s, Width x4, TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
Capabilities: [100 v1] Advanced Error Reporting
UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq+ ACSViol-
UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq+ ACSViol-
UESvrt: DLP+ SDES- TLP+ FCP+ CmpltTO+ CmpltAbrt+ UnxCmplt+ RxOF+ MalfTLP+ ECRC- UnsupReq+ ACSViol-
CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
CEMsk: RxErr+ BadTLP+ BadDLLP+ Rollover+ Timeout+ NonFatalErr-
AERCap: First Error Pointer: 14, GenCap- CGenEn- ChkCap- ChkEn-
Capabilities: [140 v1] Device Serial Number 00-15-17-ff-ff-16-ed-86
Kernel driver in use: e1000e
Kernel modules: e1000e
And eeprom dump as below:
Offset Values
------ ------
0x0000 00 15 17 16 ed 86 24 05 ff ff a2 50 ff ff ff ff
0x0010 57 d4 07 74 2f a4 a4 11 86 80 a4 10 86 80 65 b1
0x0020 08 00 a4 10 00 58 00 00 01 50 00 00 00 00 00 01
0x0030 f6 6c b0 37 a6 07 03 84 83 07 00 00 03 c3 02 06
0x0040 08 00 f0 0e 64 21 40 00 01 40 00 00 00 00 00 00
0x0050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0060 00 01 00 40 1e 12 07 40 00 01 00 40 ff ff ff ff
If I did not misunderstand, the value of offset 0x1a is 0x07a6, then the bit 8 is 1, but
my NIC's MPS is 128b, anything I'm wrong?
Thanks,
Joe
^ permalink raw reply
* Re: [PATCH net-next 0/17] Make the network stack usable by userns root
From: David Miller @ 2012-11-19 3:26 UTC (permalink / raw)
To: ebiederm; +Cc: netdev, containers, serge
In-Reply-To: <87d2zd8zwn.fsf@xmission.com>
From: ebiederm@xmission.com (Eric W. Biederman)
Date: Fri, 16 Nov 2012 05:01:44 -0800
>
> In a secondary user namespace the root user only has CAP_NET_ADMIN,
> CAP_NET_RAW and CAP_NET_BIND_SERVICE with respect to the secondary user
> namespace. The test "capable(CAP_NET_ADMIN)" tests for capabilities in
> the initial user namespace.
>
> The following set of patches goes through the networking stack. First
> pushing the capable(CAP_NET_ADMIN) admin calls down farther in the stack
> so individual instances can be changed. Then where I have I it appears
> safe I have relaxed the permission checks.
>
> The code is available in git from:
> git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git netns-v73
>
> The netns-v73 branch is against v3.7-rc3 and merges cleanly with net-next.
>
> In my user namespace tree I am working to allow unprivileged users to
> create user namespace, and to allow the user namespace root able to
> create network namespaces. Making these patches really about allowing
> unprivileged users able to use the networking stack (not that they will
> be able to talk to anyone).
>
> David I have some small dependencies on the first two patches of this
> series in my later user namespace work. So after these changes have
> been reviewed if you can pull my netns-v73 branch (which is just these
> patches) into net-next that will help me avoid unnecessary conflicts.
There were merge issues so I applied the patches and sorted the
conflicts out one-by-one.
I hope this doesn't cause major problems.
^ permalink raw reply
* RE: [net-next:master 83/84] drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c:1551:19: sparse: incorrect type in return expression (different base types)
From: Sony Chacko @ 2012-11-19 1:10 UTC (permalink / raw)
To: kbuild test robot; +Cc: netdev
In-Reply-To: <50a89f9f.oTR4cFYr2+4dX7g+%fengguang.wu@intel.com>
> -----Original Message-----
> From: kbuild test robot [mailto:fengguang.wu@intel.com]
> Sent: Sunday, November 18, 2012 12:43 AM
> To: Sony Chacko
> Cc: netdev
> Subject: [net-next:master 83/84]
> drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c:1551:19: sparse: incorrect
> type in return expression (different base types)
We will submit the fixes after testing the changes.
^ permalink raw reply
* Re: [PATCH net 1/1] sis900: fix sis900_set_mode call parameters.
From: David Miller @ 2012-11-18 23:28 UTC (permalink / raw)
To: romieu; +Cc: netdev, medhefgo, venza
In-Reply-To: <20121118224150.GA6755@electric-eye.fr.zoreil.com>
From: Francois Romieu <romieu@fr.zoreil.com>
Date: Sun, 18 Nov 2012 23:41:50 +0100
> Leftover of 57d6d456cfb89264f87d24f52640ede23fdf12bd ("sis900: stop
> using net_device.{base_addr, irq} and convert to __iomem.").
>
> It is needed for suspend / resume to work.
>
> Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
> Tested-by: Jan Janssen <medhefgo@web.de>
> Cc: Daniele Venzano <venza@brownhat.org>
Ouch, applied, thanks!
^ permalink raw reply
* [PATCH net 1/1] sis900: fix sis900_set_mode call parameters.
From: Francois Romieu @ 2012-11-18 22:41 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Jan Janssen, Daniele Venzano
Leftover of 57d6d456cfb89264f87d24f52640ede23fdf12bd ("sis900: stop
using net_device.{base_addr, irq} and convert to __iomem.").
It is needed for suspend / resume to work.
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Jan Janssen <medhefgo@web.de>
Cc: Daniele Venzano <venza@brownhat.org>
---
drivers/net/ethernet/sis/sis900.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index fb9f6b3..edf5edb 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -2479,7 +2479,7 @@ static int sis900_resume(struct pci_dev *pci_dev)
netif_start_queue(net_dev);
/* Workaround for EDB */
- sis900_set_mode(ioaddr, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
+ sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
--
1.7.11.7
^ permalink raw reply related
* Re: Latest 3.6.6 are not compiling due tg3 network driver, hwmon_device_unregister
From: David Rientjes @ 2012-11-18 22:42 UTC (permalink / raw)
To: Nithin Nayak Sujir, Denys Fedoryshchenko
Cc: Paul Gortmaker, Michael Chan, netdev, linux-kernel
In-Reply-To: <50A464E4.1080105@broadcom.com>
On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
> On 11/14/2012 07:30 PM, David Rientjes wrote:
> > On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
> >
> > > This was fixed by
> > >
> > > commit de0a41484c47d783dd4d442914815076aa2caac2
> > > Author: Paul Gortmaker <paul.gortmaker@windriver.com>
> > > Date: Mon Oct 1 11:43:49 2012 -0400
> > >
> > > tg3: unconditionally select HWMON support when tg3 is enabled.
> > >
> > Would you mind submitting this for stable by following the procedure
> > described in Documentation/stable_kernel_rules.txt?
> >
>
> Will do. Thank you for bringing this to our attention.
>
Thanks for submitting the patch to stable, Greg has queued it for the
kernels he maintains. Denys, expect to see this fix in 3.6.8.
^ permalink raw reply
* Re: Optics (SFP) monitoring on ixgbe and igbe
From: Aurélien @ 2012-11-18 21:35 UTC (permalink / raw)
To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1353094719.2743.21.camel@bwh-desktop.uk.solarflarecom.com>
[-- Attachment #1: Type: text/plain, Size: 1982 bytes --]
Hi Ben,
I've rewritten things according to your remarks.
On Fri, Nov 16, 2012 at 8:38 PM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
>
> This is silly; log10() and <math.h> are part of standard C and -lm is
> standard on Unix. Just use <math.h> and -lm unconditionally.
Ok, I wasn't sure.
>
> Please merge this with the existing -m option and update the
> documentation to say that this covers diagnostics where available. You
> could add a long option alias like --dump-module or --module-info that
> covers the two types of information.
Done that, the current output of -m has been modified so that
everything lines up correctly.
The --module-info option alias has been added.
> All the above offsets need parentheses around their definitions.
[…]
> This is commented as an offset in the A2 'EEPROM' but the offsets
> actually used include the 0x100 offset from the start of the
> concatenated 'EEPROM'.
A new SFF_A2_BASE has been added, and the OFFSET_TO macros are now
using that, so I removed the 0x100 from all the offsets, and they are
now indeed A2-relative.
>
> Why are all the literals explicitly float and not double?
>
It was a keyboard/chair interface problem, now fixed :)
> Please follow kernel coding style for spacing. checkpatch.pl will show
> you what should be changed.
Ran a checkpatch, and fixed everything that should be fixed.
>
> This seems awfuly complicated; why not:
>
> #define OFFSET_TO_TEMP(offset) (((s16)OFFSET_TO_U16(offset)) * 10 / 256)
>
> But why round to tenths of a degree here and then round again to whole
> degrees celsius/fahrenheit when printing?
>
I did not think a simple cast would work, but it seems to give the
right value. I also implemented externally calibrated optics in this
new version, so I now do the whole formatting in the printing, and
store the raw value in the struct.
It should be better now.
Best regards,
--
Aurélien Guillaume
[-- Attachment #2: 0001-Implemented-basic-optics-diagnostics-for-SFF-8472-co.patch --]
[-- Type: application/octet-stream, Size: 21479 bytes --]
From 2b96a1a65e1c24e3c43f479719bd3e3da499656c Mon Sep 17 00:00:00 2001
From: Aurelien Guillaume <aurelien@iwi.me>
Date: Fri, 16 Nov 2012 02:50:00 +0100
Subject: [PATCH] Implemented basic optics diagnostics for SFF-8472 compliant
transceivers in ethtool.
Signed-off-by: Aurelien Guillaume <aurelien@iwi.me>
---
Makefile.am | 2 +-
ethtool.c | 17 +++-
internal.h | 3 +
sfpdiag.c | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
sfpid.c | 35 +++---
5 files changed, 402 insertions(+), 19 deletions(-)
create mode 100644 sfpdiag.c
diff --git a/Makefile.am b/Makefile.am
index e33f71f..89a0d1e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -9,7 +9,7 @@ ethtool_SOURCES = ethtool.c ethtool-copy.h internal.h net_tstamp-copy.h \
fec_8xx.c ibm_emac.c ixgb.c ixgbe.c natsemi.c \
pcnet32.c realtek.c tg3.c marvell.c vioc.c \
smsc911x.c at76c50x-usb.c sfc.c stmmac.c \
- rxclass.c sfpid.c
+ rxclass.c sfpid.c sfpdiag.c
TESTS = test-cmdline test-features
check_PROGRAMS = test-cmdline test-features
diff --git a/ethtool.c b/ethtool.c
index 3db7fec..345c21c 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -3604,6 +3604,16 @@ static int do_getmodule(struct cmd_context *ctx)
return 1;
}
+ /*
+ * SFF-8079 EEPROM layout contains the memory available at A0 address on
+ * the PHY EEPROM.
+ * SFF-8472 defines a virtual extension of the EEPROM, where the
+ * microcontroller on the SFP/SFP+ generates a page at the A2 address,
+ * which contains data relative to optical diagnostics.
+ * The current kernel implementation returns a blob, which contains:
+ * - ETH_MODULE_SFF_8079 => The A0 page only.
+ * - ETH_MODULE_SFF_8472 => The A0 and A2 page concatenated.
+ */
if (geeprom_dump_raw) {
fwrite(eeprom->data, 1, eeprom->len, stdout);
} else {
@@ -3613,8 +3623,11 @@ static int do_getmodule(struct cmd_context *ctx)
} else if (!geeprom_dump_hex) {
switch (modinfo.type) {
case ETH_MODULE_SFF_8079:
+ sff8079_show_all(eeprom->data);
+ break;
case ETH_MODULE_SFF_8472:
sff8079_show_all(eeprom->data);
+ sff8472_show_all(eeprom->data);
break;
default:
geeprom_dump_hex = 1;
@@ -3831,8 +3844,8 @@ static const struct option {
{ "--show-priv-flags" , 1, do_gprivflags, "Query private flags" },
{ "--set-priv-flags", 1, do_sprivflags, "Set private flags",
" FLAG on|off ...\n" },
- { "-m|--dump-module-eeprom", 1, do_getmodule,
- "Qeuery/Decode Module EEPROM information",
+ { "-m|--dump-module-eeprom|--module-info", 1, do_getmodule,
+ "Query/Decode Module EEPROM information and optical diagnostics if available",
" [ raw on|off ]\n"
" [ hex on|off ]\n"
" [ offset N ]\n"
diff --git a/internal.h b/internal.h
index 4f96fd5..e977a81 100644
--- a/internal.h
+++ b/internal.h
@@ -253,4 +253,7 @@ int rxclass_rule_del(struct cmd_context *ctx, __u32 loc);
/* Module EEPROM parsing code */
void sff8079_show_all(const __u8 *id);
+/* Optics diagnostics */
+void sff8472_show_all(const __u8 *id);
+
#endif /* ETHTOOL_INTERNAL_H__ */
diff --git a/sfpdiag.c b/sfpdiag.c
new file mode 100644
index 0000000..1094bd7
--- /dev/null
+++ b/sfpdiag.c
@@ -0,0 +1,364 @@
+/*
+ * sfpdiag.c: Implements SFF-8472 optics diagnostics.
+ *
+ * Aurelien Guillaume <aurelien@iwi.me> (C) 2012
+ * This implementation is loosely based on DOM patches
+ * from Robert Olsson <robert@herjulf.se> (C) 2009
+ * and SFF-8472 specs (ftp://ftp.seagate.com/pub/sff/SFF-8472.PDF)
+ * by SFF Committee.
+ */
+
+#include <stdio.h>
+#include <math.h>
+#include "internal.h"
+
+/* Offsets in decimal, for direct comparison with the SFF specs */
+
+/* A0-based EEPROM offsets for DOM support checks */
+#define SFF_A0_DOM 92
+#define SFF_A0_OPTIONS 93
+#define SFF_A0_COMP 94
+
+/* EEPROM bit values for various registers */
+#define SFF_A0_DOM_EXTCAL (1 << 4)
+#define SFF_A0_DOM_INTCAL (1 << 5)
+#define SFF_A0_DOM_IMPL (1 << 6)
+#define SFF_A0_DOM_PWRT (1 << 3)
+
+#define SFF_A0_OPTIONS_AW (1 << 7)
+
+/*
+ * See ethtool.c comments about SFF-8472, this is the offset
+ * at which the A2 page is in the EEPROM blob returned by the
+ * kernel.
+ */
+#define SFF_A2_BASE 0x100
+
+/* A2-based offsets for DOM */
+#define SFF_A2_TEMP 96
+#define SFF_A2_TEMP_HALRM 0
+#define SFF_A2_TEMP_LALRM 2
+#define SFF_A2_TEMP_HWARN 4
+#define SFF_A2_TEMP_LWARN 6
+
+#define SFF_A2_VCC 98
+#define SFF_A2_VCC_HALRM 8
+#define SFF_A2_VCC_LALRM 10
+#define SFF_A2_VCC_HWARN 12
+#define SFF_A2_VCC_LWARN 14
+
+#define SFF_A2_BIAS 96
+#define SFF_A2_BIAS_HALRM 16
+#define SFF_A2_BIAS_LALRM 18
+#define SFF_A2_BIAS_HWARN 20
+#define SFF_A2_BIAS_LWARN 22
+
+#define SFF_A2_TX_PWR 102
+#define SFF_A2_TX_PWR_HALRM 24
+#define SFF_A2_TX_PWR_LALRM 26
+#define SFF_A2_TX_PWR_HWARN 28
+#define SFF_A2_TX_PWR_LWARN 30
+
+#define SFF_A2_RX_PWR 104
+#define SFF_A2_RX_PWR_HALRM 32
+#define SFF_A2_RX_PWR_LALRM 34
+#define SFF_A2_RX_PWR_HWARN 36
+#define SFF_A2_RX_PWR_LWARN 38
+
+#define SFF_A2_ALRM_FLG 112
+#define SFF_A2_WARN_FLG 116
+
+/* 32-bit little-endian calibration constants */
+#define SFF_A2_CAL_RXPWR4 56
+#define SFF_A2_CAL_RXPWR3 60
+#define SFF_A2_CAL_RXPWR2 64
+#define SFF_A2_CAL_RXPWR1 68
+#define SFF_A2_CAL_RXPWR0 72
+
+/* 16-bit little endian calibration constants */
+#define SFF_A2_CAL_TXI_SLP 76
+#define SFF_A2_CAL_TXI_OFF 78
+#define SFF_A2_CAL_TXPWR_SLP 80
+#define SFF_A2_CAL_TXPWR_OFF 82
+#define SFF_A2_CAL_T_SLP 84
+#define SFF_A2_CAL_T_OFF 86
+#define SFF_A2_CAL_V_SLP 88
+#define SFF_A2_CAL_V_OFF 90
+
+
+struct sff8472_diags {
+
+#define MCURR 0
+#define LWARN 1
+#define HWARN 2
+#define LALRM 3
+#define HALRM 4
+
+ /* [5] tables are current, low/high warn, low/high alarm */
+ __u8 supports_dom; /* Supports DOM */
+ __u8 supports_alarms; /* Supports alarm/warning thold */
+ __u8 calibrated_ext; /* Is externally calibrated */
+ __u16 bias_cur[5]; /* Measured bias current in 2uA units */
+ __u16 tx_power[5]; /* Measured TX Power in 0.1uW units */
+ __u16 rx_power[5]; /* Measured RX Power */
+ __u8 rx_power_type; /* 0 = OMA, 1 = Average power */
+ __s16 sfp_temp[5]; /* SFP Temp in 16-bit signed 1/256 Celcius */
+ __u16 sfp_voltage[5]; /* SFP voltage in 0.1mV units */
+
+};
+
+static struct sff8472_aw_flags {
+ const char *str; /* Human-readable string, null at the end */
+ int offset; /* A2-relative adress offset */
+ __u8 value; /* Alarm is on if (offset & value) != 0. */
+} sff8472_aw_flags[] = {
+ { "Laser bias current high alarm", SFF_A2_ALRM_FLG, (1 << 3) },
+ { "Laser bias current low alarm", SFF_A2_ALRM_FLG, (1 << 2) },
+ { "Laser bias current high warning", SFF_A2_WARN_FLG, (1 << 3) },
+ { "Laser bias current low warning", SFF_A2_WARN_FLG, (1 << 2) },
+
+ { "Laser output power high alarm", SFF_A2_ALRM_FLG, (1 << 1) },
+ { "Laser output power low alarm", SFF_A2_ALRM_FLG, (1 << 0) },
+ { "Laser output power high warning", SFF_A2_WARN_FLG, (1 << 1) },
+ { "Laser output power low warning", SFF_A2_WARN_FLG, (1 << 0) },
+
+ { "Module temperature high alarm", SFF_A2_ALRM_FLG, (1 << 7) },
+ { "Module temperature low alarm", SFF_A2_ALRM_FLG, (1 << 6) },
+ { "Module temperature high warning", SFF_A2_WARN_FLG, (1 << 7) },
+ { "Module temperature low warning", SFF_A2_WARN_FLG, (1 << 6) },
+
+ { "Module voltage high alarm", SFF_A2_ALRM_FLG, (1 << 5) },
+ { "Module voltage low alarm", SFF_A2_ALRM_FLG, (1 << 4) },
+ { "Module voltage high warning", SFF_A2_WARN_FLG, (1 << 5) },
+ { "Module voltage low warning", SFF_A2_WARN_FLG, (1 << 4) },
+
+ { "Laser rx power high alarm", SFF_A2_ALRM_FLG + 1, (1 << 7) },
+ { "Laser rx power low alarm", SFF_A2_ALRM_FLG + 1, (1 << 6) },
+ { "Laser rx power high warning", SFF_A2_WARN_FLG + 1, (1 << 7) },
+ { "Laser rx power low warning", SFF_A2_WARN_FLG + 1, (1 << 6) },
+
+ { NULL, 0, 0 },
+};
+
+static double convert_mw_to_dbm(double mw)
+{
+ return (10. * log10(mw / 1000.)) + 30.;
+}
+
+
+/* Most common case: 16-bit unsigned integer in a certain unit */
+#define A2_OFFSET_TO_U16(offset) \
+ (id[SFF_A2_BASE + (offset)] << 8 | id[SFF_A2_BASE + (offset) + 1])
+
+/* Calibration slope is a number between 0.0 included and 256.0 excluded. */
+#define A2_OFFSET_TO_SLP(offset) \
+ (id[SFF_A2_BASE + (offset)] + id[SFF_A2_BASE + (offset) + 1] / 256.)
+
+/* Calibration offset is an integer from -32768 to 32767 */
+#define A2_OFFSET_TO_OFF(offset) \
+ ((__s16)A2_OFFSET_TO_U16(offset))
+
+/* RXPWR(x) are IEEE-754 floating point numbers in big-endian format */
+#define A2_OFFSET_TO_RXPWRx(offset) \
+ (befloattoh((__u32 *)(id + SFF_A2_BASE + (offset))))
+
+/*
+ * 2-byte internal temperature conversions:
+ * First byte is a signed 8-bit integer, which is the temp decimal part
+ * Second byte are 1/256th of degree, which are added to the dec part.
+ */
+#define A2_OFFSET_TO_TEMP(offset) ((__s16)A2_OFFSET_TO_U16(offset))
+
+
+static void sff8472_dom_parse(const __u8 *id, struct sff8472_diags *sd)
+{
+
+ sd->bias_cur[MCURR] = A2_OFFSET_TO_U16(SFF_A2_BIAS);
+ sd->bias_cur[HALRM] = A2_OFFSET_TO_U16(SFF_A2_BIAS_HALRM);
+ sd->bias_cur[LALRM] = A2_OFFSET_TO_U16(SFF_A2_BIAS_LALRM);
+ sd->bias_cur[HWARN] = A2_OFFSET_TO_U16(SFF_A2_BIAS_HWARN);
+ sd->bias_cur[LWARN] = A2_OFFSET_TO_U16(SFF_A2_BIAS_LWARN);
+
+ sd->sfp_voltage[MCURR] = A2_OFFSET_TO_U16(SFF_A2_VCC);
+ sd->sfp_voltage[HALRM] = A2_OFFSET_TO_U16(SFF_A2_VCC_HALRM);
+ sd->sfp_voltage[LALRM] = A2_OFFSET_TO_U16(SFF_A2_VCC_LALRM);
+ sd->sfp_voltage[HWARN] = A2_OFFSET_TO_U16(SFF_A2_VCC_HWARN);
+ sd->sfp_voltage[LWARN] = A2_OFFSET_TO_U16(SFF_A2_VCC_LWARN);
+
+ sd->tx_power[MCURR] = A2_OFFSET_TO_U16(SFF_A2_TX_PWR);
+ sd->tx_power[HALRM] = A2_OFFSET_TO_U16(SFF_A2_TX_PWR_HALRM);
+ sd->tx_power[LALRM] = A2_OFFSET_TO_U16(SFF_A2_TX_PWR_LALRM);
+ sd->tx_power[HWARN] = A2_OFFSET_TO_U16(SFF_A2_TX_PWR_HWARN);
+ sd->tx_power[LWARN] = A2_OFFSET_TO_U16(SFF_A2_TX_PWR_LWARN);
+
+ sd->rx_power[MCURR] = A2_OFFSET_TO_U16(SFF_A2_RX_PWR);
+ sd->rx_power[HALRM] = A2_OFFSET_TO_U16(SFF_A2_RX_PWR_HALRM);
+ sd->rx_power[LALRM] = A2_OFFSET_TO_U16(SFF_A2_RX_PWR_LALRM);
+ sd->rx_power[HWARN] = A2_OFFSET_TO_U16(SFF_A2_RX_PWR_HWARN);
+ sd->rx_power[LWARN] = A2_OFFSET_TO_U16(SFF_A2_RX_PWR_LWARN);
+
+ sd->sfp_temp[MCURR] = A2_OFFSET_TO_TEMP(SFF_A2_TEMP);
+ sd->sfp_temp[HALRM] = A2_OFFSET_TO_TEMP(SFF_A2_TEMP_HALRM);
+ sd->sfp_temp[LALRM] = A2_OFFSET_TO_TEMP(SFF_A2_TEMP_LALRM);
+ sd->sfp_temp[HWARN] = A2_OFFSET_TO_TEMP(SFF_A2_TEMP_HWARN);
+ sd->sfp_temp[LWARN] = A2_OFFSET_TO_TEMP(SFF_A2_TEMP_LWARN);
+
+}
+
+/* Converts to a float from a big-endian 4-byte source buffer. */
+static float befloattoh(const __u32 *source)
+{
+ union {
+ __u32 src;
+ float dst;
+ } converter;
+
+ converter.src = be32toh(*source);
+ return converter.dst;
+}
+
+static void sff8472_calibration(const __u8 *id, struct sff8472_diags *sd)
+{
+ int i;
+ __u16 rx_reading;
+
+ /* Calibration should occur for all values (threshold and current) */
+ for (i = 0; i < sizeof(sd->bias_cur); ++i) {
+ /*
+ * Apply calibration formula 1 (Temp., Voltage, Bias, Tx Power)
+ */
+ sd->bias_cur[i] *= A2_OFFSET_TO_SLP(SFF_A2_CAL_TXI_SLP);
+ sd->tx_power[i] *= A2_OFFSET_TO_SLP(SFF_A2_CAL_TXPWR_SLP);
+ sd->sfp_voltage[i] *= A2_OFFSET_TO_SLP(SFF_A2_CAL_V_SLP);
+ sd->sfp_temp[i] *= A2_OFFSET_TO_SLP(SFF_A2_CAL_T_SLP);
+
+ sd->bias_cur[i] += A2_OFFSET_TO_OFF(SFF_A2_CAL_TXI_OFF);
+ sd->tx_power[i] += A2_OFFSET_TO_OFF(SFF_A2_CAL_TXPWR_OFF);
+ sd->sfp_voltage[i] += A2_OFFSET_TO_OFF(SFF_A2_CAL_V_OFF);
+ sd->sfp_temp[i] += A2_OFFSET_TO_OFF(SFF_A2_CAL_T_OFF);
+
+ /*
+ * Apply calibration formula 2 (Rx Power only)
+ */
+ rx_reading = sd->rx_power[i];
+ sd->rx_power[i] = A2_OFFSET_TO_RXPWRx(SFF_A2_CAL_RXPWR0);
+ sd->rx_power[i] += rx_reading *
+ A2_OFFSET_TO_RXPWRx(SFF_A2_CAL_RXPWR1);
+ sd->rx_power[i] += rx_reading *
+ A2_OFFSET_TO_RXPWRx(SFF_A2_CAL_RXPWR2);
+ sd->rx_power[i] += rx_reading *
+ A2_OFFSET_TO_RXPWRx(SFF_A2_CAL_RXPWR3);
+ }
+}
+
+static void sff8472_parse_eeprom(const __u8 *id, struct sff8472_diags *sd)
+{
+ sd->supports_dom = id[SFF_A0_DOM] & SFF_A0_DOM_IMPL;
+ sd->supports_alarms = id[SFF_A0_OPTIONS] & SFF_A0_OPTIONS_AW;
+ sd->calibrated_ext = id[SFF_A0_DOM] & SFF_A0_DOM_EXTCAL;
+ sd->rx_power_type = id[SFF_A0_DOM] & SFF_A0_DOM_PWRT;
+
+ sff8472_dom_parse(id, sd);
+
+ /*
+ * If the SFP is externally calibrated, we need to read calibration data
+ * and compensate the already stored readings.
+ */
+ if (sd->calibrated_ext)
+ sff8472_calibration(id, sd);
+}
+
+
+
+void sff8472_show_all(const __u8 *id)
+{
+ struct sff8472_diags sd;
+ char *rx_power_string = NULL;
+ int i;
+
+ sff8472_parse_eeprom(id, &sd);
+
+ if (!sd.supports_dom) {
+ printf("\t%-41s : No\n", "Optical diagnostics support");
+ return ;
+ }
+ printf("\t%-41s : Yes\n", "Optical diagnostics support");
+
+#define PRINT_BIAS(string, index) \
+ printf("\t%-41s : %.3f mA\n", (string), \
+ (double)(sd.bias_cur[(index)] / 500.));
+
+# define PRINT_xX_PWR(string, var, index) \
+ printf("\t%-41s : %.4f mW / %.2f dBm\n", (string), \
+ (double)((var)[(index)] / 10000.), \
+ convert_mw_to_dbm((double)((var)[(index)] / 10000.)));
+
+#define PRINT_TEMP(string, index) \
+ printf("\t%-41s : %.2f degrees C / %.2f degrees F\n", (string), \
+ (double)(sd.sfp_temp[(index)] / 256.), \
+ (double)(sd.sfp_temp[(index)] / 256. * 1.8 + 32.));
+
+#define PRINT_VCC(string, index) \
+ printf("\t%-41s : %.4f V\n", (string), \
+ (double)(sd.sfp_voltage[(index)] / 10000.));
+
+
+ PRINT_BIAS("Laser bias current", MCURR);
+ PRINT_xX_PWR("Laser output power", sd.tx_power, MCURR);
+
+ if (!sd.rx_power_type)
+ rx_power_string = "Receiver signal OMA";
+ else
+ rx_power_string = "Receiver signal average optical power";
+
+ PRINT_xX_PWR(rx_power_string, sd.rx_power, MCURR);
+
+ PRINT_TEMP("Module temperature", MCURR);
+ PRINT_VCC("Module voltage", MCURR);
+
+ printf("\t%-41s : %s\n", "Alarm/warning flags implemented",
+ (sd.supports_alarms ? "Yes" : "No"));
+ if (sd.supports_alarms) {
+
+ for (i = 0; sff8472_aw_flags[i].str; ++i) {
+ printf("\t%-41s : %s\n", sff8472_aw_flags[i].str,
+ id[SFF_A2_BASE + sff8472_aw_flags[i].offset]
+ & sff8472_aw_flags[i].value ? "On" : "Off");
+ }
+
+ PRINT_BIAS("Laser bias current high alarm threshold", HALRM);
+ PRINT_BIAS("Laser bias current low alarm threshold", LALRM);
+ PRINT_BIAS("Laser bias current high warning threshold", HWARN);
+ PRINT_BIAS("Laser bias current low warning threshold", LWARN);
+
+ PRINT_xX_PWR("Laser output power high alarm threshold",
+ sd.tx_power, HALRM);
+ PRINT_xX_PWR("Laser output power low alarm threshold",
+ sd.tx_power, LALRM);
+ PRINT_xX_PWR("Laser output power high warning threshold",
+ sd.tx_power, HWARN);
+ PRINT_xX_PWR("Laser output power low warning threshold",
+ sd.tx_power, LWARN);
+
+ PRINT_TEMP("Module temperature high alarm threshold", HALRM);
+ PRINT_TEMP("Module temperature low alarm threshold", LALRM);
+ PRINT_TEMP("Module temperature high warning threshold", HWARN);
+ PRINT_TEMP("Module temperature low warning threshold", LWARN);
+
+ PRINT_VCC("Module voltage high alarm threshold", HALRM);
+ PRINT_VCC("Module voltage low alarm threshold", LALRM);
+ PRINT_VCC("Module voltage high warning threshold", HWARN);
+ PRINT_VCC("Module voltage low warning threshold", LWARN);
+
+ PRINT_xX_PWR("Laser rx power high alarm threshold",
+ sd.rx_power, HALRM);
+ PRINT_xX_PWR("Laser rx power low alarm threshold",
+ sd.rx_power, LALRM);
+ PRINT_xX_PWR("Laser rx power high warning threshold",
+ sd.rx_power, HWARN);
+ PRINT_xX_PWR("Laser rx power low warning threshold",
+ sd.rx_power, LWARN);
+ }
+
+}
+
diff --git a/sfpid.c b/sfpid.c
index a4a671d..2982d0d 100644
--- a/sfpid.c
+++ b/sfpid.c
@@ -12,7 +12,7 @@
static void sff8079_show_identifier(const __u8 *id)
{
- printf("\tIdentifier : 0x%02x", id[0]);
+ printf("\t%-41s : 0x%02x", "Identifier", id[0]);
switch (id[0]) {
case 0x00:
printf(" (no module present, unknown, or unspecified)\n");
@@ -34,7 +34,7 @@ static void sff8079_show_identifier(const __u8 *id)
static void sff8079_show_ext_identifier(const __u8 *id)
{
- printf("\tExtended identifier : 0x%02x", id[1]);
+ printf("\t%-41s : 0x%02x", "Extended identifier", id[1]);
if (id[1] == 0x00)
printf(" (GBIC not specified / not MOD_DEF compliant)\n");
else if (id[1] == 0x04)
@@ -47,7 +47,7 @@ static void sff8079_show_ext_identifier(const __u8 *id)
static void sff8079_show_connector(const __u8 *id)
{
- printf("\tConnector : 0x%02x", id[2]);
+ printf("\t%-41s : 0x%02x", "Connector", id[2]);
switch (id[2]) {
case 0x00:
printf(" (unknown or unspecified)\n");
@@ -105,10 +105,12 @@ static void sff8079_show_connector(const __u8 *id)
static void sff8079_show_transceiver(const __u8 *id)
{
- static const char *pfx = "\t : =>";
+ static const char *pfx =
+ "\tTransceiver type :";
- printf("\tTransceiver codes : 0x%02x 0x%02x 0x%02x" \
+ printf("\t%-41s : 0x%02x 0x%02x 0x%02x " \
"0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+ "Transceiver codes",
id[3], id[4], id[5], id[6],
id[7], id[8], id[9], id[10]);
/* 10G Ethernet Compliance Codes */
@@ -239,7 +241,7 @@ static void sff8079_show_transceiver(const __u8 *id)
static void sff8079_show_encoding(const __u8 *id)
{
- printf("\tEncoding : 0x%02x", id[11]);
+ printf("\t%-41s : 0x%02x", "Encoding", id[11]);
switch (id[11]) {
case 0x00:
printf(" (unspecified)\n");
@@ -270,7 +272,7 @@ static void sff8079_show_encoding(const __u8 *id)
static void sff8079_show_rate_identifier(const __u8 *id)
{
- printf("\tRate identifier : 0x%02x", id[13]);
+ printf("\t%-41s : 0x%02x", "Rate identifier", id[13]);
switch (id[13]) {
case 0x00:
printf(" (unspecified)\n");
@@ -295,14 +297,14 @@ static void sff8079_show_rate_identifier(const __u8 *id)
static void sff8079_show_oui(const __u8 *id)
{
- printf("\tVendor OUI : %02x:%02x:%02x\n",
+ printf("\t%-41s : %02x:%02x:%02x\n", "Vendor OUI",
id[37], id[38], id[39]);
}
static void sff8079_show_wavelength_or_copper_compliance(const __u8 *id)
{
if (id[8] & (1 << 2)) {
- printf("\tPassive Cu cmplnce. : 0x%02x", id[60]);
+ printf("\t%-41s : 0x%02x", "Passive copper compliance", id[60]);
switch (id[60]) {
case 0x00:
printf(" (unspecified)");
@@ -316,7 +318,7 @@ static void sff8079_show_wavelength_or_copper_compliance(const __u8 *id)
}
printf(" [SFF-8472 rev10.4 only]\n");
} else if (id[8] & (1 << 3)) {
- printf("\tActive Cu cmplnce. : 0x%02x", id[60]);
+ printf("\t%-41s : 0x%02x", "Active copper compliance", id[60]);
switch (id[60]) {
case 0x00:
printf(" (unspecified)");
@@ -333,7 +335,7 @@ static void sff8079_show_wavelength_or_copper_compliance(const __u8 *id)
}
printf(" [SFF-8472 rev10.4 only]\n");
} else {
- printf("\tLaser wavelength : %unm\n",
+ printf("\t%-41s : %unm\n", "Laser wavelength",
(id[60] << 8) | id[61]);
}
}
@@ -344,7 +346,7 @@ static void sff8079_show_value_with_unit(const __u8 *id, unsigned int reg,
{
unsigned int val = id[reg];
- printf("\t%-20s: %u%s\n", name, val * mult, unit);
+ printf("\t%-41s : %u%s\n", name, val * mult, unit);
}
static void sff8079_show_ascii(const __u8 *id, unsigned int first_reg,
@@ -352,7 +354,7 @@ static void sff8079_show_ascii(const __u8 *id, unsigned int first_reg,
{
unsigned int reg, val;
- printf("\t%-20s: ", name);
+ printf("\t%-41s : ", name);
for (reg = first_reg; reg <= last_reg; reg++) {
val = id[reg];
putchar(((val >= 32) && (val <= 126)) ? val : '_');
@@ -368,14 +370,15 @@ void sff8079_show_all(const __u8 *id)
sff8079_show_connector(id);
sff8079_show_transceiver(id);
sff8079_show_encoding(id);
- sff8079_show_value_with_unit(id, 12, "BR, Nominal", 100, "MBd");
+ sff8079_show_value_with_unit(id, 12,
+ "Nominal signalling rate", 100, "MBd");
sff8079_show_rate_identifier(id);
sff8079_show_value_with_unit(id, 14,
- "Length (SMF,km)", 1, "km");
+ "Length (SMF,km)", 1, "km");
sff8079_show_value_with_unit(id, 15, "Length (SMF)", 100, "m");
sff8079_show_value_with_unit(id, 16, "Length (50um)", 10, "m");
sff8079_show_value_with_unit(id, 17,
- "Length (62.5um)", 10, "m");
+ "Length (62.5um)", 10, "m");
sff8079_show_value_with_unit(id, 18, "Length (Copper)", 1, "m");
sff8079_show_value_with_unit(id, 19, "Length (OM3)", 10, "m");
sff8079_show_wavelength_or_copper_compliance(id);
--
1.7.0.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox