* [PATCH 2/4] usbnet: Set device type for wlan and wwan devices
From: Marcel Holtmann @ 2009-10-02 15:15 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Johannes Berg, Greg KH
In-Reply-To: <cover.1254495724.git.marcel@holtmann.org>
For usbnet devices with FLAG_WLAN and FLAG_WWAN set the proper device
type so that uevent contains the correct value. This then allows an easy
identification of the actual underlying technology of the Ethernet device.
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
drivers/net/usb/usbnet.c | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 8124cf1..378da8c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1210,6 +1210,14 @@ static const struct net_device_ops usbnet_netdev_ops = {
// precondition: never called in_interrupt
+static struct device_type wlan_type = {
+ .name = "wlan",
+};
+
+static struct device_type wwan_type = {
+ .name = "wwan",
+};
+
int
usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
{
@@ -1325,6 +1333,12 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1);
SET_NETDEV_DEV(net, &udev->dev);
+
+ if ((dev->driver_info->flags & FLAG_WLAN) != 0)
+ SET_NETDEV_DEVTYPE(net, &wlan_type);
+ if ((dev->driver_info->flags & FLAG_WWAN) != 0)
+ SET_NETDEV_DEVTYPE(net, &wwan_type);
+
status = register_netdev (net);
if (status)
goto out3;
--
1.6.2.5
^ permalink raw reply related
* [PATCH 1/4] usbnet: Use wwan%d interface name for mobile broadband devices
From: Marcel Holtmann @ 2009-10-02 15:15 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Johannes Berg, Greg KH
In-Reply-To: <cover.1254495724.git.marcel@holtmann.org>
Add support for usbnet based devices like CDC-Ether to indicate that they
are actually mobile broadband devices. In that case use wwan%d as default
interface name.
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
drivers/net/usb/cdc_ether.c | 20 ++++++++++++++------
drivers/net/usb/usbnet.c | 3 +++
include/linux/usb/usbnet.h | 1 +
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 4a6aff5..71e65fc 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -420,6 +420,14 @@ static const struct driver_info cdc_info = {
.status = cdc_status,
};
+static const struct driver_info mbm_info = {
+ .description = "Mobile Broadband Network Device",
+ .flags = FLAG_WWAN,
+ .bind = cdc_bind,
+ .unbind = usbnet_cdc_unbind,
+ .status = cdc_status,
+};
+
/*-------------------------------------------------------------------------*/
@@ -532,32 +540,32 @@ static const struct usb_device_id products [] = {
/* Ericsson F3507g */
USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1900, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
}, {
/* Ericsson F3507g ver. 2 */
USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1902, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
}, {
/* Ericsson F3607gw */
USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1904, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
}, {
/* Ericsson F3307 */
USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1906, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
}, {
/* Toshiba F3507g */
USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
}, {
/* Dell F3507g */
USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8147, USB_CLASS_COMM,
USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
- .driver_info = (unsigned long) &cdc_info,
+ .driver_info = (unsigned long) &mbm_info,
},
{ }, // END
};
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ca5ca5a..8124cf1 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1295,6 +1295,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
strcpy(net->name, "wlan%d");
+ /* WWAN devices should always be named "wwan%d" */
+ if ((dev->driver_info->flags & FLAG_WWAN) != 0)
+ strcpy(net->name, "wwan%d");
/* maybe the remote can't receive an Ethernet MTU */
if (net->mtu > (dev->hard_mtu - net->hard_header_len))
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index f814730..86c31b7 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -90,6 +90,7 @@ struct driver_info {
#define FLAG_WLAN 0x0080 /* use "wlan%d" names */
#define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */
#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */
+#define FLAG_WWAN 0x0400 /* use "wwan%d" names */
/* init device ... can sleep, or cause probe() failure */
--
1.6.2.5
^ permalink raw reply related
* [PATCH 0/4] More device type integration
From: Marcel Holtmann @ 2009-10-02 15:15 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Johannes Berg, Greg KH
Hi Dave,
I followed the work from Johannes and made sure we can register the
device type for wireless devices via the netdev notifier callback for
all cfg80211 based devices. This way we don't have to touch any of
the drivers.
For the mobile broadband cards from Ericsson, the device type is now
set to "wwan" and it also uses "wwan%d" for the default interface name.
Regards
Marcel
Johannes Berg (1):
net: introduce NETDEV_POST_INIT notifier
Marcel Holtmann (3):
usbnet: Use wwan%d interface name for mobile broadband devices
usbnet: Set device type for wlan and wwan devices
cfg80211: assign device type in netdev notifier callback
drivers/net/usb/cdc_ether.c | 20 ++++++++++++++------
drivers/net/usb/usbnet.c | 17 +++++++++++++++++
include/linux/notifier.h | 1 +
include/linux/usb/usbnet.h | 1 +
net/core/dev.c | 6 ++++++
net/mac80211/iface.c | 5 -----
net/wireless/core.c | 7 +++++++
7 files changed, 46 insertions(+), 11 deletions(-)
^ permalink raw reply
* Re: [PATCH] net: Fix wrong sizeof
From: Randy Dunlap @ 2009-10-02 15:14 UTC (permalink / raw)
To: Jean Delvare; +Cc: LKML, netdev, linux-doc, stable
In-Reply-To: <20091002113038.1dc3d284@hyperion.delvare>
On Fri, 2 Oct 2009 11:30:38 +0200 Jean Delvare wrote:
> Which is why I have always preferred sizeof(struct foo) over
> sizeof(var).
>
> Signed-off-by: Jean Delvare <khali@linux-fr.org>
> Cc: Randy Dunlap <rdunlap@xenotime.net>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
I also prefer to use sizeof(struct xyz) in my non-kernel code
instead of sizeof(var).
> ---
> Stable team, the non-documentation part of this fix applies to 2.6.31,
> 2.6.30 and 2.6.27.
>
> Documentation/networking/timestamping/timestamping.c | 2 +-
> drivers/net/iseries_veth.c | 2 +-
> 2 files changed, 2 insertions(+), 2 deletions(-)
>
> --- linux-2.6.32-rc1.orig/Documentation/networking/timestamping/timestamping.c 2009-06-10 05:05:27.000000000 +0200
> +++ linux-2.6.32-rc1/Documentation/networking/timestamping/timestamping.c 2009-10-02 11:07:19.000000000 +0200
> @@ -381,7 +381,7 @@ int main(int argc, char **argv)
> memset(&hwtstamp, 0, sizeof(hwtstamp));
> strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
> hwtstamp.ifr_data = (void *)&hwconfig;
> - memset(&hwconfig, 0, sizeof(&hwconfig));
> + memset(&hwconfig, 0, sizeof(hwconfig));
> hwconfig.tx_type =
> (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
> HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
> --- linux-2.6.32-rc1.orig/drivers/net/iseries_veth.c 2009-09-28 10:28:42.000000000 +0200
> +++ linux-2.6.32-rc1/drivers/net/iseries_veth.c 2009-10-02 11:07:15.000000000 +0200
> @@ -495,7 +495,7 @@ static void veth_take_cap_ack(struct vet
> cnx->remote_lp);
> } else {
> memcpy(&cnx->cap_ack_event, event,
> - sizeof(&cnx->cap_ack_event));
> + sizeof(cnx->cap_ack_event));
> cnx->state |= VETH_STATE_GOTCAPACK;
> veth_kick_statemachine(cnx);
> }
>
>
> --
> Jean Delvare
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
---
~Randy
^ permalink raw reply
* [PATCH] TCPCT-1: adding a sysctl
From: William Allen Simpson @ 2009-10-02 14:58 UTC (permalink / raw)
To: netdev
In-Reply-To: <20091001225705.788d38ba@nehalam>
[-- Attachment #1: Type: text/plain, Size: 1510 bytes --]
Stephen Hemminger wrote:
> BUT numbered sysctl values are deprecated and should no longer be added.
> The current way is to use CTL_UNNUMBERED instead, if you use CTL_UNNUMBERED
> then the table does not need to be changed.
>
Thank you, that was immensely helpful. I was using an old (related) example.
While I've long had credit in BSD-derived systems, this is the first I've
tried to implement for Linux kernel -- although I did give permission 15 or so
years ago for a fair amount of my stuff to be ported here under GPL....
This is a straightforward re-implementation of an earlier patch, that no
longer applies cleanly, that was reviewed:
http://thread.gmane.org/gmane.linux.network/102586
With the original author's permission:
Adam Langley wrote:
# I'm afraid that my draft is now mostly dead!
#
# Please feel free to use any of the code that you found if it helps you
# and all the best with it,
#
The principle difference is using a TCP option to carry the cookie nonce,
instead of an offset to a random nonce in the data. This allows several
related concepts to use the same extension option. This cookie option has
been suggested for many years.
http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
Also, as mentioned earlier, I added a sysctl to turn on and off the cookie
feature globally. The cookies are useful even without SYN data.
Since I'm new around here, this first patch is just the ioctl and sysctl.
Any suggestions for improvement? Or general approval?
[-- Attachment #2: tcpct-1.patch --]
[-- Type: text/plain, Size: 11082 bytes --]
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 61723a7..a8d8a88 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
#define TCP_QUICKACK 12 /* Block/reenable quick acks */
#define TCP_CONGESTION 13 /* Congestion control algorithm */
#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
+#define TCP_COOKIE_DATA 15 /* TCP Cookie Transactions extension */
#define TCPI_OPT_TIMESTAMPS 1
#define TCPI_OPT_SACK 2
@@ -170,6 +171,33 @@ struct tcp_md5sig {
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */
};
+/* for TCP_COOKIE_DATA socket option */
+#define TCP_COOKIE_MAX 16 /* 128-bits */
+#define TCP_COOKIE_MIN 8 /* 64-bits */
+#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX)
+
+#define TCP_S_DATA_MAX 64U /* after TCP+IP options */
+#define TCP_S_DATA_MSS_DEFAULT 536U /* default MSS (RFC1122) */
+
+/* Flags for both getsockopt and setsockopt */
+#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */
+#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies.
+ Supercedes the others. */
+
+/* Flags for getsockopt */
+#define TCP_S_DATA_IN (1 << 2) /* Was data received? */
+#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */
+
+/* TCP Cookie Transactions data */
+struct tcp_cookie_data {
+ __u16 tcpcd_flags; /* see above */
+ __u8 __tcpcd_pad1; /* zero */
+ __u8 tcpcd_cookie_desired; /* bytes */
+ __u16 tcpcd_s_data_desired; /* bytes of variable data */
+ __u16 tcpcd_used; /* bytes in value */
+ __u8 tcpcd_value[TCP_S_DATA_MSS_DEFAULT];
+};
+
#ifdef __KERNEL__
#include <linux/skbuff.h>
@@ -217,9 +245,13 @@ struct tcp_options_received {
sack_ok : 4, /* SACK seen on SYN packet */
snd_wscale : 4, /* Window scaling received from sender */
rcv_wscale : 4; /* Window scaling to send to receiver */
-/* SACKs data */
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ u16 extend_ok:1; /* Cookie{less,pair} seen */
+ u8 *cookie_copy;
+ u8 cookie_size; /* bytes in copy */
+#endif
u8 num_sacks; /* Number of SACK blocks */
- u16 user_mss; /* mss requested by user in ioctl */
+ u16 user_mss; /* mss requested by user in ioctl */
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
};
@@ -229,14 +261,27 @@ struct tcp_options_received {
* only four options will fit in a standard TCP header */
#define TCP_NUM_SACKS 4
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+struct tcp_cookie_pair;
+struct tcp_s_data_payload;
+#endif
+
struct tcp_request_sock {
struct inet_request_sock req;
#ifdef CONFIG_TCP_MD5SIG
/* Only used by TCP MD5 Signature so far. */
const struct tcp_request_sock_ops *af_specific;
#endif
- u32 rcv_isn;
- u32 snt_isn;
+ u32 rcv_isn;
+ u32 snt_isn;
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ u8 *cookie_copy;
+ u8 cookie_size; /* bytes in copy */
+ u8 s_data_in:1,
+ s_data_out:1,
+ cookie_in_always:1,
+ cookie_out_never:1;
+#endif
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -406,6 +451,33 @@ struct tcp_sock {
/* TCP MD5 Signature Option information */
struct tcp_md5sig_info *md5sig_info;
#endif
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ /* If s_data_desired > 0 and s_data_payload is non-NULL, then this
+ * object holds a reference to it (s_data_payload->kref)
+ */
+ struct tcp_s_data_payload *s_data_payload;
+
+ /* When the cookie options are generated and exchanged, then this
+ * object holds a reference to them (cookie_pair->kref)
+ */
+ struct tcp_cookie_pair *cookie_pair;
+
+ /* If s_data_payload is non-NULL, then this holds a copy of
+ * s_data_payload->tsdpl_size. Otherwise, this holds the user
+ * specified tcpcd_s_data_desired (variable data).
+ */
+ u16 s_data_desired; /* bytes */
+
+ /* Initially, this holds the user specified tcpcd_cookie_desired.
+ * Zero indicates default (sysctl_tcp_cookie_size). After the
+ * option has been exchanged, this holds the actual size.
+ */
+ u8 cookie_desired; /* bytes */
+ u8 s_data_in:1,
+ s_data_out:1,
+ cookie_in_always:1,
+ cookie_out_never:1;
+#endif
};
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -424,6 +496,12 @@ struct tcp_timewait_sock {
u16 tw_md5_keylen;
u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
#endif
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ /* Few sockets in timewait have cookies; in that case, then this
+ * object holds a reference to it (tw_cookie_pair->kref)
+ */
+ struct tcp_cookie_pair *tw_cookie_pair;
+#endif
};
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
@@ -431,6 +509,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
return (struct tcp_timewait_sock *)sk;
}
-#endif
+#endif /* __KERNEL__ */
#endif /* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..6755ed8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
#include <linux/dmaengine.h>
#include <linux/crypto.h>
#include <linux/cryptohash.h>
+#include <linux/kref.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -167,6 +168,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
/*
* TCP option lengths
@@ -177,6 +179,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_MD5SIG 18
+#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
+#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -237,6 +243,7 @@ extern int sysctl_tcp_base_mss;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
+extern int sysctl_tcp_cookie_size;
extern atomic_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -345,7 +352,12 @@ extern void tcp_enter_quickack_mode(struct sock *sk);
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{
- rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ rx_opt->cookie_copy = NULL;
+ rx_opt->cookie_size = rx_opt->extend_ok =
+#endif
+ rx_opt->tstamp_ok = rx_opt->sack_ok =
+ rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
}
#define TCP_ECN_OK 1
@@ -1480,6 +1492,46 @@ struct tcp_request_sock_ops {
#endif
};
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+/**
+ * This structure contains variable data that is to be included in the
+ * cookie option and compared with later incoming segments.
+ *
+ * A tcp_sock contains a pointer to the current value, and this is cloned to
+ * the tcp_timewait_sock.
+ */
+struct tcp_cookie_pair {
+ struct kref kref;
+ /* 32-bit aligned for faster comparisons? */
+ u8 tcpcp_data[TCP_COOKIE_PAIR_SIZE];
+ u8 tcpcp_size; /* of the cookie pair */
+};
+
+static inline void tcp_cookie_pair_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct tcp_cookie_pair, kref));
+}
+
+/**
+ * This structure contains constant data that is to be included in the
+ * payload of SYN or SYNACK segments when the cookie option is present.
+ *
+ * This structure is immutable (save for the reference counter) once created.
+ * A tcp_sock contains a pointer to the current value, and this is cloned to
+ * the request socks as they are generated.
+ */
+struct tcp_s_data_payload {
+ struct kref kref;
+ u16 tsdpl_size; /* of the trailing payload */
+ u8 tsdpl_data[0]; /* trailing payload */
+};
+
+static inline void tcp_s_data_payload_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct tcp_s_data_payload, kref));
+}
+#endif
+
extern void tcp_v4_init(void);
extern void tcp_init(void);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9..1cf3be5 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -627,3 +627,36 @@ config TCP_MD5SIG
If unsure, say N.
+config TCP_OPT_COOKIE_EXTENSION
+ bool "TCP: Cookie option extension (EXPERIMENTAL)"
+ default n
+ depends on EXPERIMENTAL
+ select CRYPTO
+ select CRYPTO_MD5
+ ---help---
+ TCP/IP networking is open to an attack known as "SYN flooding".
+ This denial-of-service attack prevents legitimate remote users
+ from being able to connect to the computer during an ongoing
+ attack and requires very little work from the attacker, who can
+ operate from anywhere on the Internet.
+
+ TCP Cookie Transactions (TCPCT) deter spoofing of client
+ connections and prevent server resource exhaustion, by
+ eliminating the need to maintain server state during <SYN>
+ establishment and after <FIN> and <RST> termination of
+ connections. The TCPCT cookie exchange itself may optionally
+ carry <SYN> data, limited in size to inhibit Denial of Service
+ (DoS) attacks. Implements TCP header extension, allowing
+ 64-bit timestamps and more Selective Acknowledgments.
+
+ Unlike the passive "SYN cookies" option, other TCP options will
+ continue to work. If configured, SYN cookies continue to function
+ for those parties that do not use this Cookie extension option.
+
+ If you say Y here, note that TCPCT isn't yet enabled by default.
+
+ The sysctl "tcp_cookie_size" should be in the range 8 to 16,
+ although any non-zero value will be adjusted automatically.
+
+ If unsure, say N.
+
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2dcf04d..25b60eb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -712,6 +712,16 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_TCP_OPT_COOKIE_EXTENSION
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_cookie_size",
+ .data = &sysctl_tcp_cookie_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#endif
{
.ctl_name = CTL_UNNUMBERED,
.procname = "udp_mem",
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5200aab..93af24c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,14 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+#ifdef CONFIG_SYSCTL
+/* By default, let the user enable it. */
+int sysctl_tcp_cookie_size __read_mostly = 0;
+#else
+int sysctl_tcp_cookie_size __read_mostly = TCP_COOKIE_MAX;
+#endif
+
+
/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
^ permalink raw reply related
* [RFC PATCH] net: add dataref destructor to sk_buff
From: Gregory Haskins @ 2009-10-02 14:20 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel, ghaskins
(Applies to davem/net-2.6.git:4fdb78d30)
Hi David, netdevs,
The following is an RFC for an attempt at addressing a zero-copy solution.
To be perfectly honest, I have no idea if this is the best solution, or if
there is truly a problem with skb->destructor that requires an alternate
mechanism. What I do know is that this patch seems to work, and I would
like to see some kind of solution available upstream. So I thought I would
send my hack out as at least a point of discussion. FWIW: This has been
tested heavily in my rig and is technically suitable for inclusion after
review as is, if that is decided to be the optimal path forward here.
Thanks for your review and consideration,
Kind regards,
-Greg
----------------------------------------
From: Gregory Haskins <ghaskins@novell.com>
Subject: [RFC PATCH] net: add dataref destructor to sk_buff
What: The skb->destructor field is reportedly unreliable for ensuring
that all shinfo users have dropped their references. Therefore, we add
a distinct ->release() method for the shinfo structure which is closely
tied to the underlying page resources we want to protect.
Why: We want to add zero-copy transmit support for AlacrityVM guests.
In order to support this, the host kernel must map guest pages directly
into a paged-skb and send it as normal. put_page() alone is not
sufficient lifetime management since the pages are ultimately allocated
from within the guest. Therefore, we need higher-level notification
when the skb is finally freed on the host so we can then inject a proper
"tx-complete" event into the guest context.
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---
include/linux/skbuff.h | 2 ++
net/core/skbuff.c | 9 +++++++++
2 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23a..02cdab6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -207,6 +207,8 @@ struct skb_shared_info {
/* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */
void * destructor_arg;
+ void * priv;
+ void (*release)(struct sk_buff *skb);
};
/* We divide dataref into two halves. The higher 16 bits hold references
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 80a9616..a7e40a9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -219,6 +219,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo->tx_flags.flags = 0;
skb_frag_list_init(skb);
memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
+ shinfo->release = NULL;
+ shinfo->priv = NULL;
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -350,6 +352,9 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
+ if (skb_shinfo(skb)->release)
+ skb_shinfo(skb)->release(skb);
+
kfree(skb->head);
}
}
@@ -514,6 +519,8 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
shinfo->tx_flags.flags = 0;
skb_frag_list_init(skb);
memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
+ shinfo->release = NULL;
+ shinfo->priv = NULL;
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->data = skb->head + NET_SKB_PAD;
@@ -856,6 +863,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_shinfo(skb)->release = NULL;
+ skb_shinfo(skb)->priv = NULL;
return 0;
nodata:
^ permalink raw reply related
* [QUESTION] Packet Reordering detection and response with TCP in Reno-mode
From: Daniel Slot @ 2009-10-02 14:09 UTC (permalink / raw)
To: netdev
I have some problems understanding Linux TCP's reordering detection
and response algorithms.
When the SACK option is used, the threshold adaption is understandable.
But in Reno-mode (without SACKs), reordering detection and response
are imho not clear.
Reordering detection:
How is it possible to determine the number of holes without SACK?
Simple DUPACKs do not provide enough information for such an estimation.
kernel 2.6.30.4 - net/ipv4/tcp_input.c -line 1934
static int tcp_limit_reno_sacked(struct tcp_sock *tp)
{
u32 holes;
holes = max(tp->lost_out, 1U);
holes = min(holes, tp->packets_out);
if ((tp->sacked_out + holes) > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
return 1;
}
return 0;
}
Reordering response:
Reordering detection in Reno-mode is only possible in the disorder phase.
When packet reordering has been detected in Reno-mode,
linux's dupthresh (tp->reordering) is set to the number of packets in
flight (plus something else).
The question is, why choosing the number of packets in flight as new dupthresh?
And more important, why adapting the dupthresh when its old value is
still sufficient?
Detecting reordering in the disorder phase means that nothing has been
retransmitted yet.
kernel 2.6.30.4 - net/ipv4/tcp_input.c -line 1952
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_limit_reno_sacked(tp))
tcp_update_reordering(sk, tp->packets_out + addend, 0);
}
29/09/2009 Daniel Slot (slot.daniel(at)gmail.com)
^ permalink raw reply
* Re: [PATCH 0/8] SECURITY ISSUE with connector
From: Greg KH @ 2009-10-02 13:58 UTC (permalink / raw)
To: Philipp Reisner
Cc: linux-kernel, netdev, Andrew Morton, David S. Miller, dm-devel,
Evgeniy Polyakov, linux-fbdev-devel
In-Reply-To: <1254487211-11810-1-git-send-email-philipp.reisner@linbit.com>
On Fri, Oct 02, 2009 at 02:40:03PM +0200, Philipp Reisner wrote:
> Affected: All code that uses connector, in kernel and out of mainline
>
> The connector, as it is today, does not allow the in kernel receiving
> parts to do any checks on privileges of a message's sender.
So, assume I know nothing about the connector architecture, what does
this mean in a security context?
> I know, there are not many out there that like connector, but as
> long as it is in the kernel, we have to fix the security issues it has!
And what specifically are the security issues?
> Please either drop connector, or someone who feels a bit responsible
> and has our beloved dictator's blessing, PLEASE PLEASE PLEASE take
> this into your tree, and send the pull request to Linus.
>
> Patches 1 to 4 are already Acked-by Evgeny, the connector's maintainer.
> Patches 5 to 7 are the obvious fixes to the connector user's code.
Obvious in what way?
thanks,
greg k-h
^ permalink raw reply
* Re: 2.6.32-rc1-git2: Reported regressions from 2.6.31
From: Stefan Richter @ 2009-10-02 13:00 UTC (permalink / raw)
To: Jaswinder Singh Rajput
Cc: Rafael J. Wysocki, Linux Kernel Mailing List, Adrian Bunk,
Andrew Morton, Linus Torvalds, Natalie Protasevich,
Kernel Testers List, Network Development, Linux ACPI,
Linux PM List, Linux SCSI List, Linux Wireless List, DRI
In-Reply-To: <1254469139.3531.19.camel-6Ww87KsxWewAvxtiuMwx3w@public.gmane.org>
Jaswinder Singh Rajput wrote:
> If you add one more entry say "Suspected commit :" then it will be great
> and will solve regressions much faster.
Will? Might.
> You can request submitter to
> submit 'suspected commit' by git bisect and also specify git bisect
> links like : (for more information about git bisect check
> http://kerneltrap.org/node/11753)
I disagree. A reporter should only be asked to bisect (using git or
other tools) /if/ a developer determined that bisection may speed up the
debugging process or is the only remaining option to make progress with
a bug.
It would be wrong to steal a reporter's valuable time by asking for
bisection before anybody familiar with the matter even had a first look
at the report.
Remember:
- Not all bugs can be economically narrowed down by bisection.
- Bisection requires skills, rigor, and time.
- Alas there are considerable sections in our kernel history which
are not bisectable.
--
Stefan Richter
-=====-==--= =-=- ---=-
http://arcgraph.de/sr/
^ permalink raw reply
* [PATCH 8/8] uvesafb/connector: Disallow unpliviged users to send netlink packets
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-8-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
drivers/video/uvesafb.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index aa7cd95..e35232a 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -72,6 +72,9 @@ static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *ns
struct uvesafb_task *utask;
struct uvesafb_ktask *task;
+ if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN))
+ return;
+
if (msg->seq >= UVESAFB_TASKS_MAX)
return;
--
1.6.0.4
^ permalink raw reply related
* [PATCH 7/8] pohmelfs/connector: Disallow unpliviged users to configure pohmelfs
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-7-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
drivers/staging/pohmelfs/config.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index c9162b3..5d04bf5 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -531,6 +531,9 @@ static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *n
{
int err;
+ if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN))
+ return;
+
switch (msg->flags) {
case POHMELFS_FLAGS_ADD:
case POHMELFS_FLAGS_DEL:
--
1.6.0.4
^ permalink raw reply related
* [PATCH 6/8] dst/connector: Disallow unpliviged users to configure dst
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-6-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
drivers/staging/dst/dcore.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index 3943c91..ee16010 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -855,6 +855,11 @@ static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
struct dst_node *n = NULL, *tmp;
unsigned int hash;
+ if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
if (msg->len < sizeof(struct dst_ctl)) {
err = -EBADMSG;
goto out;
--
1.6.0.4
^ permalink raw reply related
* [PATCH 5/8] dm/connector: Only process connector packages from privileged processes
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-5-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
drivers/md/dm-log-userspace-transfer.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 1327e1a..54abf9e 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -133,6 +133,9 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
+ if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN))
+ return;
+
spin_lock(&receiving_list_lock);
if (msg->len == 0)
fill_pkg(msg, NULL);
--
1.6.0.4
^ permalink raw reply related
* [PATCH 3/8] connector/dm: Fixed a compilation warning
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-3-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
---
drivers/md/dm-log-userspace-transfer.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 556131f..1327e1a 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,9 +129,8 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
* This is the connector callback that delivers data
* that was sent from userspace.
*/
-static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
+static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
- struct cn_msg *msg = (struct cn_msg *)data;
struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
spin_lock(&receiving_list_lock);
--
1.6.0.4
^ permalink raw reply related
* [PATCH 0/8] SECURITY ISSUE with connector
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
Affected: All code that uses connector, in kernel and out of mainline
The connector, as it is today, does not allow the in kernel receiving
parts to do any checks on privileges of a message's sender.
I know, there are not many out there that like connector, but as
long as it is in the kernel, we have to fix the security issues it has!
Please either drop connector, or someone who feels a bit responsible
and has our beloved dictator's blessing, PLEASE PLEASE PLEASE take
this into your tree, and send the pull request to Linus.
Patches 1 to 4 are already Acked-by Evgeny, the connector's maintainer.
Patches 5 to 7 are the obvious fixes to the connector user's code.
For convenience these patches are also available as git tree:
git://git.drbd.org/linux-2.6-drbd.git connector-fix
-Phil
Philipp Reisner (8):
connector: Keep the skb in cn_callback_data
connector: Provide the sender's credentials to the callback
connector/dm: Fixed a compilation warning
connector: Removed the destruct_data callback since it is always kfree_skb()
dm/connector: Only process connector packages from privileged processes
dst/connector: Disallow unpliviged users to configure dst
pohmelfs/connector: Disallow unpliviged users to configure pohmelfs
uvesafb/connector: Disallow unpliviged users to send netlink packets
Documentation/connector/cn_test.c | 2 +-
Documentation/connector/connector.txt | 8 ++++----
drivers/connector/cn_queue.c | 12 +++++++-----
drivers/connector/connector.c | 22 ++++++++--------------
drivers/md/dm-log-userspace-transfer.c | 6 ++++--
drivers/staging/dst/dcore.c | 7 ++++++-
drivers/staging/pohmelfs/config.c | 5 ++++-
drivers/video/uvesafb.c | 5 ++++-
drivers/w1/w1_netlink.c | 2 +-
include/linux/connector.h | 11 ++++-------
10 files changed, 43 insertions(+), 37 deletions(-)
^ permalink raw reply
* [PATCH 4/8] connector: Removed the destruct_data callback since it is always kfree_skb()
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-4-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
---
drivers/connector/cn_queue.c | 4 ++--
drivers/connector/connector.c | 11 +++--------
include/linux/connector.h | 3 ---
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 163c3e3..210338e 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -83,8 +83,8 @@ void cn_queue_wrapper(struct work_struct *work)
d->callback(msg, nsp);
- d->destruct_data(d->ddata);
- d->ddata = NULL;
+ kfree_skb(d->skb);
+ d->skb = NULL;
kfree(d->free);
}
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index e59f0ab..f060246 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
/*
* Callback helper - queues work and setup destructor for given data.
*/
-static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb)
{
struct cn_callback_entry *__cbq, *__new_cbq;
struct cn_dev *dev = &cdev;
@@ -140,12 +140,9 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
if (likely(!work_pending(&__cbq->work) &&
- __cbq->data.ddata == NULL)) {
+ __cbq->data.skb == NULL)) {
__cbq->data.skb = skb;
- __cbq->data.ddata = data;
- __cbq->data.destruct_data = destruct_data;
-
if (queue_cn_work(__cbq, &__cbq->work))
err = 0;
else
@@ -159,8 +156,6 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
d = &__new_cbq->data;
d->skb = skb;
d->callback = __cbq->data.callback;
- d->ddata = data;
- d->destruct_data = destruct_data;
d->free = __new_cbq;
__new_cbq->pdev = __cbq->pdev;
@@ -208,7 +203,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
return;
}
- err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
+ err = cn_call_callback(skb);
if (err < 0)
kfree_skb(skb);
}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 545728e..3a14615 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -132,9 +132,6 @@ struct cn_callback_id {
};
struct cn_callback_data {
- void (*destruct_data) (void *);
- void *ddata;
-
struct sk_buff *skb;
void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
--
1.6.0.4
^ permalink raw reply related
* [PATCH 2/8] connector: Provide the sender's credentials to the callback
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-2-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
---
Documentation/connector/cn_test.c | 2 +-
Documentation/connector/connector.txt | 8 ++++----
drivers/connector/cn_queue.c | 7 ++++---
drivers/connector/connector.c | 4 ++--
drivers/md/dm-log-userspace-transfer.c | 2 +-
drivers/staging/dst/dcore.c | 2 +-
drivers/staging/pohmelfs/config.c | 2 +-
drivers/video/uvesafb.c | 2 +-
drivers/w1/w1_netlink.c | 2 +-
include/linux/connector.h | 6 +++---
10 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 1711adc..b07add3 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test";
static struct sock *nls;
static struct timer_list cn_test_timer;
-static void cn_test_callback(struct cn_msg *msg)
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
__func__, jiffies, msg->id.idx, msg->id.val,
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index 81e6bf6..78c9466 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -23,7 +23,7 @@ handling, etc... The Connector driver allows any kernelspace agents to use
netlink based networking for inter-process communication in a significantly
easier way:
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
struct cb_id
@@ -53,15 +53,15 @@ struct cn_msg
Connector interfaces.
/*****************************************/
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
Registers new callback with connector core.
struct cb_id *id - unique connector's user identifier.
It must be registered in connector.h for legal in-kernel users.
char *name - connector's callback symbolic name.
- void (*callback) (void *) - connector's callback.
- Argument must be dereferenced to struct cn_msg *.
+ void (*callback) (struct cn..) - connector's callback.
+ cn_msg and the sender's credentials
void cn_del_callback(struct cb_id *id);
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index b4cfac9..163c3e3 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -79,8 +79,9 @@ void cn_queue_wrapper(struct work_struct *work)
container_of(work, struct cn_callback_entry, work);
struct cn_callback_data *d = &cbq->data;
struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
+ struct netlink_skb_parms *nsp = &NETLINK_CB(d->skb);
- d->callback(msg);
+ d->callback(msg, nsp);
d->destruct_data(d->ddata);
d->ddata = NULL;
@@ -90,7 +91,7 @@ void cn_queue_wrapper(struct work_struct *work)
static struct cn_callback_entry *
cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
- void (*callback)(struct cn_msg *))
+ void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
{
struct cn_callback_entry *cbq;
@@ -124,7 +125,7 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
}
int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id,
- void (*callback)(struct cn_msg *))
+ void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
{
struct cn_callback_entry *cbq, *__cbq;
int found = 0;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fc9887f..e59f0ab 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -269,7 +269,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
* May sleep.
*/
int cn_add_callback(struct cb_id *id, char *name,
- void (*callback)(struct cn_msg *))
+ void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
{
int err;
struct cn_dev *dev = &cdev;
@@ -351,7 +351,7 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
*
* Used for notification of a request's processing.
*/
-static void cn_callback(struct cn_msg *msg)
+static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct cn_ctl_msg *ctl;
struct cn_ctl_entry *ent;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index ba0edad..556131f 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,7 +129,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
* This is the connector callback that delivers data
* that was sent from userspace.
*/
-static void cn_ulog_callback(void *data)
+static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
{
struct cn_msg *msg = (struct cn_msg *)data;
struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac85773..3943c91 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -847,7 +847,7 @@ static dst_command_func dst_commands[] = {
/*
* Configuration parser.
*/
-static void cn_dst_callback(struct cn_msg *msg)
+static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct dst_ctl *ctl;
int err;
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 90f962e..c9162b3 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -527,7 +527,7 @@ out_unlock:
return err;
}
-static void pohmelfs_cn_callback(struct cn_msg *msg)
+static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
int err;
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index e98baf6..aa7cd95 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -67,7 +67,7 @@ static DEFINE_MUTEX(uvfb_lock);
* find the kernel part of the task struct, copy the registers and
* the buffer contents and then complete the task.
*/
-static void uvesafb_cn_callback(struct cn_msg *msg)
+static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct uvesafb_task *utask;
struct uvesafb_ktask *task;
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 52ccb3d..45c126f 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -306,7 +306,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
return error;
}
-static void w1_cn_callback(struct cn_msg *msg)
+static void w1_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
{
struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1);
struct w1_netlink_cmd *cmd;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 05a7a14..545728e 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -136,7 +136,7 @@ struct cn_callback_data {
void *ddata;
struct sk_buff *skb;
- void (*callback) (struct cn_msg *);
+ void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
void *free;
};
@@ -167,11 +167,11 @@ struct cn_dev {
struct cn_queue_dev *cbdev;
};
-int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *));
+int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
void cn_del_callback(struct cb_id *);
int cn_netlink_send(struct cn_msg *, u32, gfp_t);
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *));
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
--
1.6.0.4
^ permalink raw reply related
* [PATCH 1/8] connector: Keep the skb in cn_callback_data
From: Philipp Reisner @ 2009-10-02 12:40 UTC (permalink / raw)
To: linux-kernel, netdev, Andrew Morton, David S. Miller, Greg KH
Cc: dm-devel, Evgeniy Polyakov, linux-fbdev-devel, Philipp Reisner
In-Reply-To: <1254487211-11810-1-git-send-email-philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
---
drivers/connector/cn_queue.c | 3 ++-
drivers/connector/connector.c | 11 +++++------
include/linux/connector.h | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 4a1dfe1..b4cfac9 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -78,8 +78,9 @@ void cn_queue_wrapper(struct work_struct *work)
struct cn_callback_entry *cbq =
container_of(work, struct cn_callback_entry, work);
struct cn_callback_data *d = &cbq->data;
+ struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
- d->callback(d->callback_priv);
+ d->callback(msg);
d->destruct_data(d->ddata);
d->ddata = NULL;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 74f52af..fc9887f 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,10 +129,11 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
/*
* Callback helper - queues work and setup destructor for given data.
*/
-static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
{
struct cn_callback_entry *__cbq, *__new_cbq;
struct cn_dev *dev = &cdev;
+ struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(skb));
int err = -ENODEV;
spin_lock_bh(&dev->cbdev->queue_lock);
@@ -140,7 +141,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
if (likely(!work_pending(&__cbq->work) &&
__cbq->data.ddata == NULL)) {
- __cbq->data.callback_priv = msg;
+ __cbq->data.skb = skb;
__cbq->data.ddata = data;
__cbq->data.destruct_data = destruct_data;
@@ -156,7 +157,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
__new_cbq = kzalloc(sizeof(struct cn_callback_entry), GFP_ATOMIC);
if (__new_cbq) {
d = &__new_cbq->data;
- d->callback_priv = msg;
+ d->skb = skb;
d->callback = __cbq->data.callback;
d->ddata = data;
d->destruct_data = destruct_data;
@@ -191,7 +192,6 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
*/
static void cn_rx_skb(struct sk_buff *__skb)
{
- struct cn_msg *msg;
struct nlmsghdr *nlh;
int err;
struct sk_buff *skb;
@@ -208,8 +208,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
return;
}
- msg = NLMSG_DATA(nlh);
- err = cn_call_callback(msg, (void (*)(void *))kfree_skb, skb);
+ err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
if (err < 0)
kfree_skb(skb);
}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 47ebf41..05a7a14 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -134,8 +134,8 @@ struct cn_callback_id {
struct cn_callback_data {
void (*destruct_data) (void *);
void *ddata;
-
- void *callback_priv;
+
+ struct sk_buff *skb;
void (*callback) (struct cn_msg *);
void *free;
--
1.6.0.4
^ permalink raw reply related
* Re: [RFC take2] pkt_sched: gen_estimator: Dont report fake rate estimators
From: Eric Dumazet @ 2009-10-02 12:39 UTC (permalink / raw)
To: Jarek Poplawski; +Cc: David Miller, kaber, netdev
In-Reply-To: <20091002112514.GA14100@ff.dom.local>
Jarek Poplawski a écrit :
> So you prefer the additional parameter version, but since these
> _active tests are not needed e.g. for HTB classes, which got it
> active by default, so maybe bstats == NULL would let skip such a test?
>
> ...
>> --- a/include/net/gen_stats.h
>> +++ b/include/net/gen_stats.h
>> @@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
>> extern int gnet_stats_copy_basic(struct gnet_dump *d,
>> struct gnet_stats_basic_packed *b);
>> extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
>> + const struct gnet_stats_basic_packed *bstats,
>
> It seems these *b/*bstats defs could look more consistent. Otherwise
> it looks OK to me.
Agreed, here is the updated version, I added your Signoff if you dont mind :)
[RFC] pkt_sched: gen_estimator: Dont report fake rate estimators
We currently send TCA_STATS_RATE_EST elements to netlink users, even if no estimator
is running.
# tc -s -d qdisc
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
Sent 112833764978 bytes 1495081739 pkt (dropped 0, overlimits 0 requeues 0)
rate 0bit 0pps backlog 0b 0p requeues 0
User has no way to tell if the "rate 0bit 0pps" is a real estimation, or a fake
one (because no estimator is active)
After this patch, tc command output is :
$ tc -s -d qdisc
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
Sent 561075 bytes 1196 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
We add a parameter to gnet_stats_copy_rate_est() function so that
it can use gen_estimator_active(bstats, r), as suggested by Jarek.
This parameter can be NULL if check is not necessary, (htb for
example has a mandatory rate estimator)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
---
include/net/gen_stats.h | 1 +
net/core/gen_stats.c | 7 ++++++-
net/sched/act_api.c | 2 +-
net/sched/sch_api.c | 2 +-
net/sched/sch_cbq.c | 2 +-
net/sched/sch_drr.c | 2 +-
net/sched/sch_hfsc.c | 2 +-
net/sched/sch_htb.c | 2 +-
8 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index c148855..eb87a14 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
extern int gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_packed *b);
extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
+ const struct gnet_stats_basic_packed *b,
struct gnet_stats_rate_est *r);
extern int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue *q);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 8569310..054a49c 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -136,8 +136,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+gnet_stats_copy_rate_est(struct gnet_dump *d,
+ const struct gnet_stats_basic_packed *b,
+ struct gnet_stats_rate_est *r)
{
+ if (b && !gen_estimator_active(b, r))
+ return 0;
+
if (d->compat_tc_stats) {
d->tc_stats.bps = r->bps;
d->tc_stats.pps = r->pps;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2dfb3e7..2b0d5ee 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -618,7 +618,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
goto errout;
if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &h->tcf_bstats, &h->tcf_rate_est) < 0 ||
gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 903e418..1acfd29 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1179,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, &q->qstats) < 0)
goto nla_put_failure;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5b132c4..3846d65 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1609,7 +1609,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
cl->xstats.undertime = cl->undertime - q->now;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, &cl->qstats) < 0)
return -1;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 5a888af..a65604f 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -280,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
}
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
return -1;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 2c5c76b..b38b39c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1375,7 +1375,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.rtwork = cl->cl_cumul;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, &cl->qstats) < 0)
return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 85acab9..2e38d1a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1105,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
cl->xstats.ctokens = cl->ctokens;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, &cl->qstats) < 0)
return -1;
^ permalink raw reply related
* Re: Network hangs with 2.6.30.5
From: Eric Dumazet @ 2009-10-02 12:38 UTC (permalink / raw)
To: Ilpo Järvinen
Cc: David Miller, jarkao2, holger.hoffstaette, Netdev,
Evgeniy Polyakov
In-Reply-To: <alpine.DEB.2.00.0910021520280.13543@wel-95.cs.helsinki.fi>
Ilpo Järvinen a écrit :
> On Fri, 2 Oct 2009, Ilpo Järvinen wrote:
>
>> On Thu, 1 Oct 2009, David Miller wrote:
>>
>>> From: Jarek Poplawski <jarkao2@gmail.com>
>>> Date: Mon, 7 Sep 2009 07:21:43 +0000
>>>
>>>> While Eric is analyzing your data, I guess you could try reverting
>>>> some stuff around this tcp_tw_recycle, and my tcp ignorance would
>>>> point these commits for the beginning:
>>>>
>>>> http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.30.y.git;a=commitdiff;h=fc1ad92dfc4e363a055053746552cdb445ba5c57
>>>> http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.30.y.git;a=commitdiff;h=c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72
>>> Ilpo's cleanup (the second commit listed) looks most likely to
>>> be a possibility.
>>>
>>> But I surely cannot find any bugs in it, even after studying it
>>> a few times.
>>>
>>> Ilpo could you audit it one more time for us just in case?
>> Argh, not that one ...the jungle of negations. But I'll try to go it
>> through once more but I tell you I did go through those negations multiple
>> times already before submitting it :-).
>>
>>> I also looked through all the TCP commits in 2.6.29 to 2.6.30
>>> and I could not find anything else that might cause stalls with
>>> time-wait recycled connections.
>> What about the more than 64k connections change a9d8f9110d7e953c2f2 (or
>> its fixes), it might be another possibility? ...It certainly does
>> something related to reuse and happens to be in the correct time frame...
>> (I've added Evgeniy).
I scratched my head to reproduce the conditions of hang but failed.
I am pretty sure both commits are OK (yours and mine), maybe a brute force
git bisection is needed.
^ permalink raw reply
* Re: Network hangs with 2.6.30.5
From: Ilpo Järvinen @ 2009-10-02 12:29 UTC (permalink / raw)
To: David Miller
Cc: jarkao2, holger.hoffstaette, Netdev, eric.dumazet,
Evgeniy Polyakov
In-Reply-To: <alpine.DEB.2.00.0910021104130.13543@wel-95.cs.helsinki.fi>
[-- Attachment #1: Type: TEXT/PLAIN, Size: 5364 bytes --]
On Fri, 2 Oct 2009, Ilpo Järvinen wrote:
> On Thu, 1 Oct 2009, David Miller wrote:
>
> > From: Jarek Poplawski <jarkao2@gmail.com>
> > Date: Mon, 7 Sep 2009 07:21:43 +0000
> >
> > > While Eric is analyzing your data, I guess you could try reverting
> > > some stuff around this tcp_tw_recycle, and my tcp ignorance would
> > > point these commits for the beginning:
> > >
> > > http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.30.y.git;a=commitdiff;h=fc1ad92dfc4e363a055053746552cdb445ba5c57
> > > http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.30.y.git;a=commitdiff;h=c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72
> >
> > Ilpo's cleanup (the second commit listed) looks most likely to
> > be a possibility.
> >
> > But I surely cannot find any bugs in it, even after studying it
> > a few times.
> >
> > Ilpo could you audit it one more time for us just in case?
>
> Argh, not that one ...the jungle of negations. But I'll try to go it
> through once more but I tell you I did go through those negations multiple
> times already before submitting it :-).
>
> > I also looked through all the TCP commits in 2.6.29 to 2.6.30
> > and I could not find anything else that might cause stalls with
> > time-wait recycled connections.
>
> What about the more than 64k connections change a9d8f9110d7e953c2f2 (or
> its fixes), it might be another possibility? ...It certainly does
> something related to reuse and happens to be in the correct time frame...
> (I've added Evgeniy).
Here's my full analysis:
> c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index d74ac30..255ca35 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -997,11 +997,21 @@ static inline int tcp_fin_time(const struct sock *sk)
> return fin_timeout;
> }
>
> -static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
> +static inline int tcp_paws_check(const struct tcp_options_received *rx_opt,
> + int paws_win)
> {
> - if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
> - return 0;
> - if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
> + if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
> + return 1;
> + if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
> + return 1;
> +
> + return 0;
> +}
> +
> +static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt,
> + int rst)
> +{
> + if (tcp_paws_check(rx_opt, 0))
> return 0;
First condition is * -1 to switch subtraction terms around (and reverses
inequality). The other condition is very much the same. In addition, it
has an extra negation round but still OK.
>
> /* RST segments are not recommended to carry timestamp,
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index f527a16..b7d02c5 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3883,8 +3883,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
> * Not only, also it occurs for expired timestamps.
> */
>
> - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
* -1 here too.
> - get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
The very same condition.
> + if (tcp_paws_check(&tp->rx_opt, 0))
> tcp_store_ts_recent(tp);
> }
> }
> @@ -3936,9 +3935,9 @@ static inline int tcp_paws_discard(const struct sock *sk,
> const struct sk_buff *skb)
> {
> const struct tcp_sock *tp = tcp_sk(sk);
> - return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
> - get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
> - !tcp_disordered_ack(sk, skb));
> +
> + return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
DeMorgan:
(a > b) && (c < d)
<==>
!(!(a > b) || !(c < d))
<==>
!((a <= b) || (c >= d))
> + !tcp_disordered_ack(sk, skb);
> }
>
> /* Check segment sequence number for validity.
> @@ -5513,7 +5512,7 @@ discard:
>
> /* PAWS check. */
> if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
> - tcp_paws_check(&tp->rx_opt, 0))
> + tcp_paws_reject(&tp->rx_opt, 0))
A plain rename, the rest likewise.
> goto discard_and_undo;
>
> if (th->syn) {
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 4b0df3e..43bbba7 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> if (tmp_opt.saw_tstamp) {
> tmp_opt.ts_recent = tcptw->tw_ts_recent;
> tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
> - paws_reject = tcp_paws_check(&tmp_opt, th->rst);
> + paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
> }
> }
>
> @@ -511,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
> * from another data.
> */
> tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
> - paws_reject = tcp_paws_check(&tmp_opt, th->rst);
> + paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
> }
> }
>
...Which concludes the patch innocent. ...I certainly won't regret this
cleanup after having to figure that mess out once again - that is to say,
hopefully for the last time :-). ...Sadly the problem remains.
--
i.
^ permalink raw reply
* Re: [PATCH 1/4] qeth: Convert ethtool get_stats_count() ops to get_sset_count()
From: Frank Blaschka @ 2009-10-02 12:13 UTC (permalink / raw)
To: Ben Hutchings
Cc: David Miller, Ursula Braun, Frank Blaschka, linux-s390, netdev
In-Reply-To: <1254432272.2735.20.camel@achroite>
works fine, thanks a lot here is my ACK
Ben Hutchings schrieb:
> This string query operation was supposed to be replaced by the
> generic get_sset_count() starting in 2007. Convert qeth's
> implementation.
>
> Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
> ---
> This is not even compile-tested because I don't have an s390 compiler.
> But it's simple enough that I think I got it right...
>
> Ben.
>
> drivers/s390/net/qeth_core.h | 2 +-
> drivers/s390/net/qeth_core_main.c | 11 ++++++++---
> drivers/s390/net/qeth_l2_main.c | 4 ++--
> drivers/s390/net/qeth_l3_main.c | 2 +-
> 4 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
> index 31a2b4e..e8f72d7 100644
> --- a/drivers/s390/net/qeth_core.h
> +++ b/drivers/s390/net/qeth_core.h
> @@ -849,7 +849,7 @@ int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
> struct sk_buff *, struct qeth_hdr *, int, int, int);
> int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *,
> struct sk_buff *, struct qeth_hdr *, int);
> -int qeth_core_get_stats_count(struct net_device *);
> +int qeth_core_get_sset_count(struct net_device *, int);
> void qeth_core_get_ethtool_stats(struct net_device *,
> struct ethtool_stats *, u64 *);
> void qeth_core_get_strings(struct net_device *, u32, u8 *);
> diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
> index c4a42d9..edee4dc 100644
> --- a/drivers/s390/net/qeth_core_main.c
> +++ b/drivers/s390/net/qeth_core_main.c
> @@ -4305,11 +4305,16 @@ static struct {
> {"tx csum"},
> };
>
> -int qeth_core_get_stats_count(struct net_device *dev)
> +int qeth_core_get_sset_count(struct net_device *dev, int stringset)
> {
> - return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
> + switch (stringset) {
> + case ETH_SS_STATS:
> + return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
> + default:
> + return -EINVAL;
> + }
> }
> -EXPORT_SYMBOL_GPL(qeth_core_get_stats_count);
> +EXPORT_SYMBOL_GPL(qeth_core_get_sset_count);
>
> void qeth_core_get_ethtool_stats(struct net_device *dev,
> struct ethtool_stats *stats, u64 *data)
> diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
> index f4f3ca1..b61d5c7 100644
> --- a/drivers/s390/net/qeth_l2_main.c
> +++ b/drivers/s390/net/qeth_l2_main.c
> @@ -866,7 +866,7 @@ static const struct ethtool_ops qeth_l2_ethtool_ops = {
> .get_link = ethtool_op_get_link,
> .get_strings = qeth_core_get_strings,
> .get_ethtool_stats = qeth_core_get_ethtool_stats,
> - .get_stats_count = qeth_core_get_stats_count,
> + .get_sset_count = qeth_core_get_sset_count,
> .get_drvinfo = qeth_core_get_drvinfo,
> .get_settings = qeth_core_ethtool_get_settings,
> };
> @@ -874,7 +874,7 @@ static const struct ethtool_ops qeth_l2_ethtool_ops = {
> static const struct ethtool_ops qeth_l2_osn_ops = {
> .get_strings = qeth_core_get_strings,
> .get_ethtool_stats = qeth_core_get_ethtool_stats,
> - .get_stats_count = qeth_core_get_stats_count,
> + .get_sset_count = qeth_core_get_sset_count,
> .get_drvinfo = qeth_core_get_drvinfo,
> };
>
> diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
> index 073b6d3..4ca28c1 100644
> --- a/drivers/s390/net/qeth_l3_main.c
> +++ b/drivers/s390/net/qeth_l3_main.c
> @@ -2957,7 +2957,7 @@ static const struct ethtool_ops qeth_l3_ethtool_ops = {
> .set_tso = qeth_l3_ethtool_set_tso,
> .get_strings = qeth_core_get_strings,
> .get_ethtool_stats = qeth_core_get_ethtool_stats,
> - .get_stats_count = qeth_core_get_stats_count,
> + .get_sset_count = qeth_core_get_sset_count,
> .get_drvinfo = qeth_core_get_drvinfo,
> .get_settings = qeth_core_ethtool_get_settings,
> };
>
^ permalink raw reply
* Re: Messages are printed on screen
From: Markus Feldmann @ 2009-10-02 12:01 UTC (permalink / raw)
To: netdev
In-Reply-To: <1254480996.23350.73.camel@localhost>
Ben Hutchings schrieb:
> On Fri, 2009-10-02 at 11:52 +0200, Markus Feldmann wrote:
>
>>
>> As you see some of my IRQ-Lines are multiply in use, so my Server is
>> working hard at his limit.
>
> IRQ sharing is normal on PCs without MSI support, but to see where
> that's happening you need to look at /proc/interrupts and not the BIOS
> setup program or wherever you got the above information from.
Ok i did <cat /proc/interrupts> and got:
CPU0
0: 259603 XT-PIC-XT timer
1: 1421 XT-PIC-XT i8042
2: 0 XT-PIC-XT cascade
4: 200000 XT-PIC-XT ohci_hcd:usb3, pppp0
5: 0 XT-PIC-XT ehci_hcd:usb1, lan0
7: 6959 XT-PIC-XT lan1
8: 2 XT-PIC-XT rtc0
9: 0 XT-PIC-XT acpi
11: 37697 XT-PIC-XT ide2, ide3, ohci_hcd:usb2, lan2
14: 0 XT-PIC-XT ide0
NMI: 0 Non-maskable interrupts
TRM: 0 Thermal event interrupts
MCE: 0 Machine check exceptions
MCP: 13 Machine check polls
ERR: 2
How can i assigned IRQs during Boot?
How can i watch which IRQ Line has most traffic or problems ?
>> The result is sometimes freezing of my
>> Server, especially if there is much processing on these devices. I
>> remember that with Kernel 2.6.18 my system didn't does freezing.
>
> This is simply a bug, not a result of IRQ sharing or 'working hard'.
But something had initiated this freezing. Although i do not know the
Bug, i should be able to avoide this Problem by do some prevention ?!
>
>> How can i disable the output of messages (about dropped packets from my
>> firewall) to my terminal ?
>
> Edit the value of kernel.printk in /etc/sysctl.conf.
Ok i did add:
kernel.printk= 4 4 1 7
to </etc/sysctl.conf>
>
>> How can i stabilize my IRQ-System with the kernel 2.6.31.1 ?
>
> I would expect the standard kernel version for 'lenny' or the 2.6.30
> kernel from 'sid' to be more stable.
Ok i will try the kernel from Debian Sid. :-)
>
>> What debug features should i disable ?
>
> No idea, you didn't even specify what you enabled...
I will add some enabled features next week.
regards Markus
^ permalink raw reply
* Re: [RFC take2] pkt_sched: gen_estimator: Dont report fake rate estimators
From: Jarek Poplawski @ 2009-10-02 11:25 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, kaber, netdev
In-Reply-To: <4AC5D78D.3030400@gmail.com>
On Fri, Oct 02, 2009 at 12:35:57PM +0200, Eric Dumazet wrote:
> Here is second attempt to make this change, thanks Jarek !
>
> This is indeed less intrusive !
>
> [RFC] pkt_sched: gen_estimator: Dont report fake rate estimators
>
> We currently send TCA_STATS_RATE_EST elements to netlink users, even if no estimator
> is running.
>
> # tc -s -d qdisc
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> Sent 112833764978 bytes 1495081739 pkt (dropped 0, overlimits 0 requeues 0)
> rate 0bit 0pps backlog 0b 0p requeues 0
>
> User has no way to tell if the "rate 0bit 0pps" is a real estimation, or a fake
> one (because no estimator is active)
>
> After this patch, tc command output is :
> $ tc -s -d qdisc
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
> Sent 561075 bytes 1196 pkt (dropped 0, overlimits 0 requeues 0)
> backlog 0b 0p requeues 0
>
> We add a parameter to gnet_stats_copy_rate_est() function so that
> it can use gen_estimator_active(bstats, r), as suggested by Jarek.
So you prefer the additional parameter version, but since these
_active tests are not needed e.g. for HTB classes, which got it
active by default, so maybe bstats == NULL would let skip such a test?
...
> --- a/include/net/gen_stats.h
> +++ b/include/net/gen_stats.h
> @@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
> extern int gnet_stats_copy_basic(struct gnet_dump *d,
> struct gnet_stats_basic_packed *b);
> extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
> + const struct gnet_stats_basic_packed *bstats,
It seems these *b/*bstats defs could look more consistent. Otherwise
it looks OK to me.
Thanks,
Jarek P.
^ permalink raw reply
* Re: [BUG net-2.6] bluetooth/rfcomm : sleeping function called from invalid context at mm/slub.c:1719
From: Dave Young @ 2009-10-02 11:01 UTC (permalink / raw)
To: Oliver Hartkopp; +Cc: Marcel Holtmann, Linux Netdev List, linux-bluetooth
In-Reply-To: <4AC59D8A.6000102@hartkopp.net>
On Fri, Oct 2, 2009 at 2:28 PM, Oliver Hartkopp <oliver@hartkopp.net> wrote:
> Hello Marcel,
>
> with current net-2.6 tree ...
>
> While starting my PPP Bluetooth dialup networking, i got this:
Hi, oliver
please try following patch:
http://patchwork.kernel.org/patch/51326/
>
> [ 722.461549] PPP generic driver version 2.4.2
> [ 722.477519] BUG: sleeping function called from invalid context at
> mm/slub.c:1719
> [ 722.477530] in_atomic(): 1, irqs_disabled(): 0, pid: 4677, name: pppd
> [ 722.477537] 3 locks held by pppd/4677:
> [ 722.477542] #0: (rfcomm_mutex){+.+.+.}, at: [<fa5df2a1>]
> rfcomm_dlc_open+0x28/0x2d6 [rfcomm]
> [ 722.477568] #1: (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.+.}, at:
> [<fa5414f8>] l2cap_sock_connect+0x62/0x2c6 [l2cap]
> [ 722.477589] #2: (&hdev->lock){+...+.}, at: [<fa5415b4>]
> l2cap_sock_connect+0x11e/0x2c6 [l2cap]
> [ 722.477613] Pid: 4677, comm: pppd Not tainted 2.6.31-08939-gdb8abec-dirty #21
> [ 722.477619] Call Trace:
> [ 722.477633] [<c1042a2b>] ? __debug_show_held_locks+0x1e/0x20
> [ 722.477644] [<c10212a1>] __might_sleep+0xc9/0xce
> [ 722.477655] [<c1078b62>] __kmalloc+0x6d/0xfb
> [ 722.477666] [<c119e739>] ? kzalloc+0xb/0xd
> [ 722.477674] [<c119e739>] kzalloc+0xb/0xd
> [ 722.477683] [<c119ef1a>] device_private_init+0x15/0x3d
> [ 722.477693] [<c11a0e1b>] dev_set_drvdata+0x18/0x26
> [ 722.477718] [<f8b7ca1b>] hci_conn_init_sysfs+0x3d/0xc7 [bluetooth]
> [ 722.477737] [<f8b791b3>] hci_conn_add+0x1c0/0x1d5 [bluetooth]
> [ 722.477756] [<f8b79360>] hci_connect+0x71/0x17d [bluetooth]
> [ 722.477769] [<fa54162c>] l2cap_sock_connect+0x196/0x2c6 [l2cap]
> [ 722.477782] [<c1246e3d>] kernel_connect+0xd/0x12
> [ 722.477795] [<fa5df3c3>] rfcomm_dlc_open+0x14a/0x2d6 [rfcomm]
> [ 722.477810] [<fa5e10fa>] ? rfcomm_tty_open+0x73/0x227 [rfcomm]
> [ 722.477825] [<fa5e1130>] rfcomm_tty_open+0xa9/0x227 [rfcomm]
> [ 722.477836] [<c1022e3f>] ? default_wake_function+0x0/0xd
> [ 722.477847] [<c1180c79>] tty_open+0x29e/0x399
> [ 722.477858] [<c107e9bd>] chrdev_open+0x13f/0x156
> [ 722.477868] [<c107b0d3>] __dentry_open+0x11b/0x20f
> [ 722.477878] [<c107b261>] nameidata_to_filp+0x2c/0x43
> [ 722.477888] [<c107e87e>] ? chrdev_open+0x0/0x156
> [ 722.477898] [<c1084e9e>] do_filp_open+0x3c6/0x70a
> [ 722.477910] [<c108d3e4>] ? alloc_fd+0xc8/0xd2
> [ 722.477920] [<c108d3e4>] ? alloc_fd+0xc8/0xd2
> [ 722.477930] [<c107aebc>] do_sys_open+0x4a/0xe7
> [ 722.477940] [<c1002acc>] ? restore_all_notrace+0x0/0x18
> [ 722.477950] [<c107af9b>] sys_open+0x1e/0x26
> [ 722.477959] [<c1002a18>] sysenter_do_call+0x12/0x36
> [ 729.658613] PPP BSD Compression module registered
> [ 729.684789] PPP Deflate Compression module registered
>
> Any idea?
>
> Regards,
> Oliver
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bluetooth" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Regards
dave
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox