* [PATCH 4/4 v3] net: Fix for dst_negative_advice
From: Krishna Kumar @ 2009-10-18 13:08 UTC (permalink / raw)
To: davem; +Cc: netdev, herbert, Krishna Kumar, dada1
In-Reply-To: <20091018130727.3960.32107.sendpatchset@localhost.localdomain>
From: Krishna Kumar <krkumar2@in.ibm.com>
dst_negative_advice() should check for changed dst and reset
sk_tx_queue_mapping accordingly. Pass sock to the callers of
dst_negative_advice.
(sk_reset_txq is defined just for use by dst_negative_advice. The
only way I could find to get around this is to move dst_negative_()
from dst.h to dst.c, include sock.h in dst.c, etc)
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
include/net/dst.h | 12 ++++++++++--
net/core/sock.c | 6 ++++++
net/dccp/timer.c | 4 ++--
net/decnet/af_decnet.c | 2 +-
net/ipv4/tcp_timer.c | 4 ++--
5 files changed, 21 insertions(+), 7 deletions(-)
diff -ruNp org/include/net/dst.h new/include/net/dst.h
--- org/include/net/dst.h 2009-10-16 21:30:56.000000000 +0530
+++ new/include/net/dst.h 2009-10-16 21:31:30.000000000 +0530
@@ -222,11 +222,19 @@ static inline void dst_confirm(struct ds
neigh_confirm(dst->neighbour);
}
-static inline void dst_negative_advice(struct dst_entry **dst_p)
+static inline void dst_negative_advice(struct dst_entry **dst_p,
+ struct sock *sk)
{
struct dst_entry * dst = *dst_p;
- if (dst && dst->ops->negative_advice)
+ if (dst && dst->ops->negative_advice) {
*dst_p = dst->ops->negative_advice(dst);
+
+ if (dst != *dst_p) {
+ extern void sk_reset_txq(struct sock *sk);
+
+ sk_reset_txq(sk);
+ }
+ }
}
static inline void dst_link_failure(struct sk_buff *skb)
diff -ruNp org/net/core/sock.c new/net/core/sock.c
--- org/net/core/sock.c 2009-10-16 21:30:56.000000000 +0530
+++ new/net/core/sock.c 2009-10-16 21:32:33.000000000 +0530
@@ -352,6 +352,12 @@ discard_and_relse:
}
EXPORT_SYMBOL(sk_receive_skb);
+void sk_reset_txq(struct sock *sk)
+{
+ sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk->sk_dst_cache;
diff -ruNp org/net/dccp/timer.c new/net/dccp/timer.c
--- org/net/dccp/timer.c 2009-10-16 21:30:56.000000000 +0530
+++ new/net/dccp/timer.c 2009-10-16 21:31:30.000000000 +0530
@@ -38,7 +38,7 @@ static int dccp_write_timeout(struct soc
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
retry_until = icsk->icsk_syn_retries ?
: sysctl_dccp_request_retries;
} else {
@@ -63,7 +63,7 @@ static int dccp_write_timeout(struct soc
Golden words :-).
*/
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
}
retry_until = sysctl_dccp_retries2;
diff -ruNp org/net/decnet/af_decnet.c new/net/decnet/af_decnet.c
--- org/net/decnet/af_decnet.c 2009-10-16 21:30:56.000000000 +0530
+++ new/net/decnet/af_decnet.c 2009-10-16 21:31:30.000000000 +0530
@@ -1955,7 +1955,7 @@ static int dn_sendmsg(struct kiocb *iocb
}
if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
mss = scp->segsize_rem;
fctype = scp->services_rem & NSP_FC_MASK;
diff -ruNp org/net/ipv4/tcp_timer.c new/net/ipv4/tcp_timer.c
--- org/net/ipv4/tcp_timer.c 2009-10-16 21:30:56.000000000 +0530
+++ new/net/ipv4/tcp_timer.c 2009-10-16 21:31:30.000000000 +0530
@@ -141,14 +141,14 @@ static int tcp_write_timeout(struct sock
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(&sk->sk_dst_cache, sk);
}
retry_until = sysctl_tcp_retries2;
^ permalink raw reply
* Re: PF_RING: Include in main line kernel?
From: Evgeniy Polyakov @ 2009-10-18 14:18 UTC (permalink / raw)
To: Harald Welte; +Cc: David Miller, greearb, deri, shemminger, brad.doctor, netdev
In-Reply-To: <20091018124337.GE27747@prithivi.gnumonks.org>
On Sun, Oct 18, 2009 at 02:43:37PM +0200, Harald Welte (laforge@gnumonks.org) wrote:
> How does it make it any easier? Even right now you can implement an entire
> protocol family in your own module, either by registering as netpoll handler,
> or even using the regular dev_add_pack().
Well, it does, since packet will be processed by the main stack after
that, and module will work with the copy only. But I agree that this is
a weak argument.
If it is still a blocking one, what about implementing additional
gpl-only list of handlers which will have 'consumed' skb check? I
believe it would be enough to put it only in single place after the
bridge?
--
Evgeniy Polyakov
^ permalink raw reply
* Re: PF_RING: Include in main line kernel?
From: Evgeniy Polyakov @ 2009-10-18 14:50 UTC (permalink / raw)
To: Harald Welte; +Cc: Luca Deri, Brent Cook, Brad Doctor, netdev
In-Reply-To: <20091018125014.GH27747@prithivi.gnumonks.org>
On Sun, Oct 18, 2009 at 02:50:14PM +0200, Harald Welte (laforge@gnumonks.org) wrote:
> > contrary to other socket types, PF_RING allows
> > - packets to be filtered using both BPF and ACL-like filters
> > - parsing information is returned as metadata with the packet (i.e.
> > you don't have to parse the packet again as it happens with BPF)
> > - ACL-like filters allows you to specify advanced features such as
> > port ranges or packet payload match
>
> So it seems there is some added features over the existing functionality, plus
> probably increased performance mainly to hooking earlier in the packet receive
> flow.
>
> What would normally be done is to try to make incremental changes
> to the existing code and extend their features/performacne, rather than
> adding something relatively similar alternative.
PF_PACKET as is can not be made faster - it requires a packet copy, so
virtually this is an end of the game, while mapped packet socket is
quite different and does not require that expensive copy. And while
currently difference between both goes down, it still exists and may
hummer some use cases.
PF_RING uses another ring structure and I saw comparisons of both (many
years ago though), where pf_ring was faster. Unfortunately there is no
way to easily adopt its mapping into pf_packet ring without breaking
compatibility, but I wonder whether performance different between both
still exists and can it be a main factor for the preference. If
difference is not visible, than I believe the only way for PF_RING is to
extend existing packet sockets with its other features.
--
Evgeniy Polyakov
^ permalink raw reply
* Re: [net-next-2.6 PATCH 1/4 revised] TCPCT part 1a: extend struct tcp_request_sock
From: William Allen Simpson @ 2009-10-18 15:57 UTC (permalink / raw)
To: Linux Kernel Network Developers
In-Reply-To: <4AD8AFC0.1090101@gmail.com>
William Allen Simpson wrote:
> Pass additional parameters associated with sending SYNACK. This
> is not as straightforward or architecturally clean as previously
> proposed, and has the unfortunate side effect of potentially
> including otherwise unneeded headers for related protocols, but
> that problem will affect very few files.
> ---
> include/net/extend_request_sock.h | 37
> +++++++++++++++++++++++++++++++++++++
> 1 files changed, 37 insertions(+), 0 deletions(-)
> create mode 100644 include/net/extend_request_sock.h
>
This technique appears to be unworkable:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9971870..30c4808 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -71,6 +71,7 @@
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/netdma.h>
+#include <net/extend_request_sock.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -1195,6 +1196,15 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.send_reset = tcp_v4_send_reset,
};
+struct request_sock_ops tcp4_extend_request_sock_ops __read_mostly = {
+ .family = PF_INET,
+ .obj_size = sizeof(struct extend_request_sock),
+ .rtx_syn_ack = tcp_v4_send_synack,
+ .send_ack = tcp_v4_reqsk_send_ack,
+ .destructor = tcp_v4_reqsk_destructor,
+ .send_reset = tcp_v4_send_reset,
+};
+
...
+ req = inet_reqsk_alloc(&tcp4_extend_request_sock_ops);
+ if (NULL == req)
+ goto drop;
+
Many hours of investigation demonstrated that the obj_size isn't actually
used to allocate the structure. Heck, it's not even checked to determine
whether there's enough room! Instead, the kernel crashes later, as the
extended variables are accessed!
Returning to the architecturally clean parameters of the previous patch
series, that has the distinct advantage of actually working....
^ permalink raw reply related
* Re: [PATCH/RFC] make unregister_netdev() delete more than 4 interfaces per second
From: Benjamin LaHaise @ 2009-10-18 16:13 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4ADA98EE.9040509@gmail.com>
On Sun, Oct 18, 2009 at 06:26:22AM +0200, Eric Dumazet wrote:
> Unfortunatly this slow down fast path by an order of magnitude.
>
> atomic_dec() is pretty cheap (and eventually could use a per_cpu thing,
> now we have a new and sexy per_cpu allocator), but atomic_dec_and_test()
> is not that cheap and more important forbids a per_cpu conversion.
dev_put() is not a fast path by any means. atomic_dec_and_test() costs
the same as atomic_dec() on any modern CPU -- the cost is in the cacheline
bouncing and serialisation both require. The case of the device count
becoming 0 is quite rare -- any device with a route on it will never hit
a reference count of 0.
-ben
^ permalink raw reply
* [net-next-2.6 PATCH 1/4 resent] TCPCT part 1a: add function parameter for sending SYNACK
From: William Allen Simpson @ 2009-10-18 16:14 UTC (permalink / raw)
To: Linux Kernel Network Developers
[-- Attachment #1: Type: text/plain, Size: 906 bytes --]
Add optional function parameters associated with sending SYNACK.
These parameters are not needed after sending SYNACK, and are not
used for retransmission. Avoids extending struct tcp_request_sock,
and avoids allocating kernel memory.
Only affects DCCP as it also uses common struct request_sock_ops,
but this void parameter is currently reserved for future use.
---
include/net/request_sock.h | 3 ++-
include/net/tcp.h | 3 ++-
net/dccp/ipv4.c | 5 +++--
net/dccp/ipv6.c | 5 +++--
net/dccp/minisocks.c | 2 +-
net/ipv4/inet_connection_sock.c | 2 +-
net/ipv4/tcp_ipv4.c | 12 +++++++-----
net/ipv4/tcp_minisocks.c | 2 +-
net/ipv4/tcp_output.c | 2 +-
net/ipv6/tcp_ipv6.c | 14 +++++++-------
10 files changed, 28 insertions(+), 22 deletions(-)
[-- Attachment #2: TCPCT+1-1.patch --]
[-- Type: text/plain, Size: 7276 bytes --]
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c719084..cdd9e8b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -33,7 +33,8 @@ struct request_sock_ops {
struct kmem_cache *slab;
char *slab_name;
int (*rtx_syn_ack)(struct sock *sk,
- struct request_sock *req);
+ struct request_sock *req,
+ void *extend_values);
void (*send_ack)(struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*send_reset)(struct sock *sk,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c7..28bcaf7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -443,7 +443,8 @@ extern int tcp_connect(struct sock *sk);
extern struct sk_buff * tcp_make_synack(struct sock *sk,
struct dst_entry *dst,
- struct request_sock *req);
+ struct request_sock *req,
+ void *extend_values);
extern int tcp_disconnect(struct sock *sk, int flags);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7302e14..6fc9ea3 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -473,7 +473,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
return &rt->u.dst;
}
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+ void *extend_unused)
{
int err = -1;
struct sk_buff *skb;
@@ -622,7 +623,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
dreq->dreq_iss = dccp_v4_init_sequence(skb);
dreq->dreq_service = service;
- if (dccp_v4_send_response(sk, req))
+ if (dccp_v4_send_response(sk, req, NULL))
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index a2afb55..63fb189 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -241,7 +241,8 @@ out:
}
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
+ void *extend_unused)
{
struct inet6_request_sock *ireq6 = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -468,7 +469,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
dreq->dreq_iss = dccp_v6_init_sequence(skb);
dreq->dreq_service = service;
- if (dccp_v6_send_response(sk, req))
+ if (dccp_v6_send_response(sk, req, NULL))
goto drop_and_free;
inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 5ca49ce..af226a0 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -184,7 +184,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
* counter (backoff, monitored by dccp_response_timer).
*/
req->retrans++;
- req->rsk_ops->rtx_syn_ack(sk, req);
+ req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
return NULL;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 9139e8f..b7314f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -504,7 +504,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh ||
(inet_rsk(req)->acked && req->retrans < max_retries))
- && !req->rsk_ops->rtx_syn_ack(parent, req)) {
+ && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
unsigned long timeo;
if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9971870..2d25bd4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -742,7 +742,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
* socket.
*/
static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst, void *extend_values)
{
const struct inet_request_sock *ireq = inet_rsk(req);
int err = -1;
@@ -752,7 +752,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req);
+ skb = tcp_make_synack(sk, dst, req, extend_values);
if (skb) {
struct tcphdr *th = tcp_hdr(skb);
@@ -773,9 +773,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
return err;
}
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+ void *extend_values)
{
- return __tcp_v4_send_synack(sk, req, NULL);
+ return __tcp_v4_send_synack(sk, req, NULL, extend_values);
}
/*
@@ -1333,7 +1334,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
}
tcp_rsk(req)->snt_isn = isn;
- if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
+ if (__tcp_v4_send_synack(sk, req, dst, NULL) ||
+ want_cookie)
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e320afe..8819882 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -537,7 +537,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* Enforce "SYN-ACK" according to figure 8, figure 6
* of RFC793, fixed by RFC1122.
*/
- req->rsk_ops->rtx_syn_ack(sk, req);
+ req->rsk_ops->rtx_syn_ack(sk, req, NULL);
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a..765d80f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2219,7 +2219,7 @@ int tcp_send_synack(struct sock *sk)
/* Prepare a SYN-ACK. */
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
- struct request_sock *req)
+ struct request_sock *req, void *extend_values)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4517630..3b3d7b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -460,7 +460,8 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+ void *extend_values)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -498,7 +499,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
goto done;
- skb = tcp_make_synack(sk, dst, req);
+ skb = tcp_make_synack(sk, dst, req, extend_values);
if (skb) {
struct tcphdr *th = tcp_hdr(skb);
@@ -1242,13 +1243,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
security_inet_conn_request(sk, skb, req);
- if (tcp_v6_send_synack(sk, req))
+ if (tcp_v6_send_synack(sk, req, NULL) ||
+ want_cookie)
goto drop;
- if (!want_cookie) {
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
- }
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ return 0;
drop:
if (req)
--
1.6.0.4
^ permalink raw reply related
* [net-next-2.6 PATCH 4/4 resent] TCPCT part 1d: initial SYN exchange with SYNACK data
From: William Allen Simpson @ 2009-10-18 16:28 UTC (permalink / raw)
To: Linux Kernel Network Developers
[-- Attachment #1: Type: text/plain, Size: 2666 bytes --]
-------- Original Message --------
Subject: [net-next-2.6 PATCH 4/4] TCPCT part 1: initial SYN exchange with SYNACK data
Date: Thu, 15 Oct 2009 01:36:48 -0400
From: William Allen Simpson <william.allen.simpson@gmail.com>
To: Linux Kernel Network Developers <netdev@vger.kernel.org>
References: <4AD6B31B.3060402@gmail.com> <4AD6B3E8.2050904@gmail.com> <4AD6B467.2080701@gmail.com>
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley). That patch was previously reviewed:
http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data. This is more flexible and
less subject to user configuration error. Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
"Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
"Re: what a new TCP header might look like", May 12, 1998.
ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration). There are no additions to
tcp_request_sock, and only 1 pointer and 1 flag byte in tcp_sock.
Allocations have been rearranged to avoid requiring GFP_ATOMIC, with
only one unavoidable exception in tcp_create_openreq_child(), where the
tcp_sock itself is created GFP_ATOMIC.
These functions will also be used in subsequent patches that implement
additional features.
Requires:
TCPCT part 1a: add function parameter for sending SYNACK
TCPCT part 1b: sysctl_tcp_cookie_size and TCP_COOKIE_TRANSACTIONS
TCPCT part 1c: redefine TCP header functions *_len_th(), cleanup
---
include/linux/tcp.h | 35 +++++++-
include/net/tcp.h | 72 ++++++++++++++--
net/ipv4/syncookies.c | 5 +-
net/ipv4/tcp.c | 133 +++++++++++++++++++++++++++-
net/ipv4/tcp_input.c | 82 +++++++++++++++---
net/ipv4/tcp_ipv4.c | 62 +++++++++++--
net/ipv4/tcp_minisocks.c | 43 +++++++---
net/ipv4/tcp_output.c | 223 ++++++++++++++++++++++++++++++++++++++++++---
net/ipv6/syncookies.c | 5 +-
net/ipv6/tcp_ipv6.c | 47 +++++++++-
10 files changed, 641 insertions(+), 66 deletions(-)
[-- Attachment #2: TCPCT+1-4.patch --]
[-- Type: text/plain, Size: 39341 bytes --]
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d304ba5..1c9a1d1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -252,26 +252,36 @@ struct tcp_options_received {
sack_ok : 4, /* SACK seen on SYN packet */
snd_wscale : 4, /* Window scaling received from sender */
rcv_wscale : 4; /* Window scaling to send to receiver */
-/* SACKs data */
+ u8 cookie_plus: 6; /* bytes in authenticator/cookie option */
u8 num_sacks; /* Number of SACK blocks */
- u16 user_mss; /* mss requested by user in ioctl */
+ u16 user_mss; /* mss requested by user in ioctl */
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
};
+static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+{
+ rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
+ rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+ rx_opt->cookie_plus = 0;
+}
+
/* This is the max number of SACKS that we'll generate and process. It's safe
* to increse this, although since:
* size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
* only four options will fit in a standard TCP header */
#define TCP_NUM_SACKS 4
+struct tcp_cookie_values;
+struct tcp_request_sock_ops;
+
struct tcp_request_sock {
struct inet_request_sock req;
#ifdef CONFIG_TCP_MD5SIG
/* Only used by TCP MD5 Signature so far. */
const struct tcp_request_sock_ops *af_specific;
#endif
- u32 rcv_isn;
- u32 snt_isn;
+ u32 rcv_isn;
+ u32 snt_isn;
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -441,6 +451,19 @@ struct tcp_sock {
/* TCP MD5 Signature Option information */
struct tcp_md5sig_info *md5sig_info;
#endif
+
+ /* When the cookie options are generated and exchanged, then this
+ * object holds a reference to them (cookie_values->kref). Also
+ * contains related tcp_cookie_transactions fields.
+ */
+ struct tcp_cookie_values *cookie_values;
+
+ u8 cookie_in_always:1,
+ cookie_out_never:1,
+ extend_timestamp:1,
+ s_data_constant:1,
+ s_data_in:1,
+ s_data_out:1;
};
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -459,6 +482,10 @@ struct tcp_timewait_sock {
u16 tw_md5_keylen;
u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
#endif
+ /* Few sockets in timewait have cookies; in that case, then this
+ * object holds a reference to it (tw_cookie_values->kref)
+ */
+ struct tcp_cookie_values *tw_cookie_values;
};
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 63d17fd..a2d2c0f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
#include <linux/dmaengine.h>
#include <linux/crypto.h>
#include <linux/cryptohash.h>
+#include <linux/kref.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -167,6 +168,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
/*
* TCP option lengths
@@ -177,6 +179,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_MD5SIG 18
+#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
+#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
+#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
+#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -344,11 +350,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
extern void tcp_enter_quickack_mode(struct sock *sk);
-static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
-{
- rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
-}
-
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
@@ -410,7 +411,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx,
- int estab);
+ u8 **cryptic, int estab);
extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
@@ -1482,6 +1483,65 @@ struct tcp_request_sock_ops {
#endif
};
+/**
+ * A tcp_sock contains a pointer to the current value, and this is cloned to
+ * the tcp_timewait_sock.
+ *
+ * @cookie_pair: variable data from the option exchange.
+ *
+ * @cookie_desired: user specified tcpct_cookie_desired. Zero
+ * indicates default (sysctl_tcp_cookie_size).
+ * After cookie sent, remembers size of cookie.
+ *
+ * @s_data_desired: user specified tcpct_s_data_desired. When the
+ * constant payload is specified (s_data_constant),
+ * holds its length instead.
+ *
+ * @s_data_payload: constant data that is to be included in the
+ * payload of SYN or SYNACK segments when the
+ * cookie option is present.
+ */
+struct tcp_cookie_values {
+ struct kref kref;
+ u8 cookie_pair[TCP_COOKIE_PAIR_SIZE];
+ u8 cookie_pair_size;
+ u8 cookie_desired;
+ u16 s_data_desired;
+ u8 s_data_payload[0];
+};
+
+static inline void tcp_cookie_values_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct tcp_cookie_values, kref));
+}
+
+/* The length of constant payload data. Note that s_data_desired is
+ * overloaded, depending on s_data_constant: either the length of constant
+ * data (returned here) or the limit on variable data.
+ */
+static inline int tcp_s_data_size(const struct tcp_sock *tp)
+{
+ return (NULL != tp->cookie_values && tp->s_data_constant)
+ ? tp->cookie_values->s_data_desired
+ : 0;
+}
+
+/* As tcp_request_sock has already been extended in other places, the
+ * only remaining method is to pass stack values along as function
+ * parameters. These parameters are not needed after sending SYNACK.
+ */
+struct tcp_extend_values {
+ u8 cookie_bakery[TCP_COOKIE_MAX];
+ u8 cookie_plus;
+ u8 cookie_in_always:1,
+ cookie_out_never:1;
+};
+
+static inline struct tcp_extend_values *tcp_xv(const void *extend_values)
+{
+ return (struct tcp_extend_values *)extend_values;
+}
+
extern void tcp_v4_init(void);
extern void tcp_init(void);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5ec678a..cdab491 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
{
+ struct tcp_options_received tcp_opt;
+ u8 *cryptic_value;
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct tcp_sock *tp = tcp_sk(sk);
@@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- struct tcp_options_received tcp_opt;
if (!sysctl_tcp_syncookies || !th->ack)
goto out;
@@ -278,7 +279,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0);
+ tcp_parse_options(skb, &tcp_opt, &cryptic_value, 0);
if (tcp_opt.saw_tstamp)
cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf13726..0b47ffe 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2039,8 +2039,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
int val;
int err = 0;
- /* This is a string value all the others are int's */
- if (optname == TCP_CONGESTION) {
+ /* These are data/string values, all the others are ints */
+ if (TCP_CONGESTION == optname) {
char name[TCP_CA_NAME_MAX];
if (optlen < 1)
@@ -2056,6 +2056,95 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = tcp_set_congestion_control(sk, name);
release_sock(sk);
return err;
+ } else if (TCP_COOKIE_TRANSACTIONS == optname) {
+ struct tcp_cookie_transactions ctd;
+ struct tcp_cookie_values *cvp = NULL;
+
+ if (sizeof(ctd) > optlen) {
+ return -EINVAL;
+ }
+ if (copy_from_user(&ctd, optval, sizeof(ctd))) {
+ return -EFAULT;
+ }
+ if (sizeof(ctd.tcpct_value) < ctd.tcpct_used) {
+ return -EINVAL;
+ }
+ if (0 == ctd.tcpct_cookie_desired) {
+ /* default to global value */
+ } else if ((0x1 & ctd.tcpct_cookie_desired)
+ || TCP_COOKIE_MAX < ctd.tcpct_cookie_desired
+ || TCP_COOKIE_MIN > ctd.tcpct_cookie_desired) {
+ return -EINVAL;
+ }
+
+ if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
+ /* Supercedes all other values */
+ lock_sock(sk);
+ if (NULL != tp->cookie_values) {
+ kref_put(&tp->cookie_values->kref,
+ tcp_cookie_values_release);
+ tp->cookie_values = NULL;
+ }
+ tp->cookie_in_always = 0; /* false */
+ tp->cookie_out_never = 1; /* true */
+ tp->extend_timestamp = 0; /* false */
+ tp->s_data_constant = 0; /* false */
+ tp->s_data_in = 0; /* false */
+ tp->s_data_out = 0; /* false */
+ release_sock(sk);
+ return err;
+ }
+
+ /* Allocate ancillary memory before locking.
+ */
+ if (0 < ctd.tcpct_used
+ || (NULL == tp->cookie_values
+ && (0 < sysctl_tcp_cookie_size
+ || 0 < ctd.tcpct_cookie_desired
+ || 0 < ctd.tcpct_s_data_desired))) {
+ cvp = kmalloc(sizeof(*cvp) + ctd.tcpct_used,
+ GFP_KERNEL);
+ if (NULL == cvp) {
+ return -ENOMEM;
+ }
+ }
+
+ lock_sock(sk);
+ tp->cookie_in_always = (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
+ tp->cookie_out_never = 0; /* false */
+ tp->extend_timestamp = (TCP_EXTEND_TIMESTAMP & ctd.tcpct_flags);
+ tp->s_data_in = 0; /* false */
+ tp->s_data_out = 0; /* false */
+
+ if (NULL == cvp) {
+ /* No cookies by default. */
+ tp->s_data_constant = 0; /* false */
+ } else if (0 == ctd.tcpct_used) {
+ /* No constant payload data. */
+ cvp->cookie_desired = ctd.tcpct_cookie_desired;
+ cvp->s_data_desired = ctd.tcpct_s_data_desired;
+ tp->cookie_values = cvp;
+ tp->s_data_constant = 0; /* false */
+ } else {
+ /* Changes in values are recorded by a change in
+ * pointer, ensuring that the cookie will differ,
+ * without separately hashing each value later.
+ */
+ if (unlikely(NULL != tp->cookie_values)) {
+ kref_put(&tp->cookie_values->kref,
+ tcp_cookie_values_release);
+ }
+ kref_init(&cvp->kref);
+ memcpy(cvp->s_data_payload, ctd.tcpct_value,
+ ctd.tcpct_used);
+ cvp->cookie_desired = ctd.tcpct_cookie_desired;
+ cvp->s_data_desired = ctd.tcpct_used;
+ tp->cookie_values = cvp;
+ tp->s_data_constant = 1; /* true */
+ }
+
+ release_sock(sk);
+ return err;
}
if (optlen < sizeof(int))
@@ -2387,6 +2476,46 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
return -EFAULT;
return 0;
+
+ case TCP_COOKIE_TRANSACTIONS: {
+ struct tcp_cookie_transactions ctd;
+ struct tcp_cookie_values *cvp = tp->cookie_values;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < sizeof(ctd))
+ return -EINVAL;
+
+ memset(&ctd, 0, sizeof(ctd));
+ ctd.tcpct_flags =
+ (tp->cookie_in_always ? TCP_COOKIE_IN_ALWAYS : 0)
+ | (tp->cookie_out_never ? TCP_COOKIE_OUT_NEVER : 0)
+ | (tp->extend_timestamp ? TCP_EXTEND_TIMESTAMP : 0)
+ | (tp->s_data_in ? TCP_S_DATA_IN : 0)
+ | (tp->s_data_out ? TCP_S_DATA_OUT : 0);
+
+ if (NULL != cvp) {
+ /* Cookie(s) saved, return as nonce */
+ if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) {
+ /* impossible? */
+ return -EINVAL;
+ }
+ memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
+ cvp->cookie_pair_size);
+ ctd.tcpct_used = cvp->cookie_pair_size;
+
+ ctd.tcpct_cookie_desired = cvp->cookie_desired;
+ ctd.tcpct_s_data_desired = cvp->s_data_desired;
+ }
+
+ if (copy_to_user(optval, &ctd, sizeof(ctd))) {
+ return -EFAULT;
+ }
+ if (put_user(sizeof(ctd), optlen)) {
+ return -EFAULT;
+ }
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784b..200afa8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3698,11 +3698,11 @@ old_ack:
* the fast version below fails.
*/
void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
- int estab)
+ u8 **cryptic, int estab)
{
unsigned char *ptr;
struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff * 4) - sizeof(struct tcphdr);
+ int length = tcp_option_len_th(th);
ptr = (unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
@@ -3782,6 +3782,19 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
*/
break;
#endif
+ case TCPOPT_COOKIE:
+ /* This option carries 3 different lengths.
+ */
+ if (TCPOLEN_COOKIE_MAX >= opsize
+ && TCPOLEN_COOKIE_MIN <= opsize) {
+ opt_rx->cookie_plus = opsize;
+ *cryptic = ptr;
+ } else if (TCPOLEN_COOKIE_PAIR == opsize) {
+ /* not yet implemented */
+ } else if (TCPOLEN_COOKIE_BASE == opsize) {
+ /* not yet implemented */
+ }
+ break;
}
ptr += opsize-2;
@@ -3810,17 +3823,21 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
* If it is wrong it falls back on tcp_parse_options().
*/
static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
- struct tcp_sock *tp)
+ struct tcp_sock *tp, u8 **cryptic)
{
- if (th->doff == sizeof(struct tcphdr) >> 2) {
+ /* In the spirit of fast parsing, compare doff directly to shifted
+ * constant values. Because equality is used, short doff can be
+ * ignored here, and checked later.
+ */
+ if ((sizeof(*th) >> 2) == th->doff) {
tp->rx_opt.saw_tstamp = 0;
return 0;
} else if (tp->rx_opt.tstamp_ok &&
- th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+ ((sizeof(*th)+TCPOLEN_TSTAMP_ALIGNED)>>2) == th->doff) {
if (tcp_parse_aligned_timestamp(tp, th))
return 1;
}
- tcp_parse_options(skb, &tp->rx_opt, 1);
+ tcp_parse_options(skb, &tp->rx_opt, cryptic, 1);
return 1;
}
@@ -3830,7 +3847,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
*/
u8 *tcp_parse_md5sig_option(struct tcphdr *th)
{
- int length = (th->doff << 2) - sizeof (*th);
+ int length = tcp_option_len_th(th);
u8 *ptr = (u8*)(th + 1);
/* If the TCP option is too short, we can short cut */
@@ -5070,10 +5087,11 @@ out:
static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, int syn_inerr)
{
+ u8 *cv;
struct tcp_sock *tp = tcp_sk(sk);
/* RFC1323: H1. Apply PAWS check first. */
- if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ if (tcp_fast_parse_options(skb, th, tp, &cv) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5361,11 +5379,14 @@ discard:
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ u8 *cryptic_value;
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_cookie_values *cvp = tp->cookie_values;
int saved_clamp = tp->rx_opt.mss_clamp;
+ int queued = 0;
- tcp_parse_options(skb, &tp->rx_opt, 0);
+ tcp_parse_options(skb, &tp->rx_opt, &cryptic_value, 0);
if (th->ack) {
/* rfc793:
@@ -5462,6 +5483,42 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* Change state from SYN-SENT only after copied_seq
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
+
+ if (NULL != cvp
+ && 0 < cvp->cookie_pair_size
+ && 0 < tp->rx_opt.cookie_plus) {
+ int cookie_size = tp->rx_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+ int cookie_pair_size = cvp->cookie_desired + cookie_size;
+
+ /* A cookie extension option was sent and returned.
+ * Note that each incoming SYNACK replaces the
+ * Responder cookie. The initial exchange is most
+ * fragile, as protection against spoofing relies
+ * entirely upon the sequence and timestamp (above).
+ * This replacement strategy allows the correct pair to
+ * pass through, while any others will be filtered via
+ * Responder verification later.
+ */
+ if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
+ memcpy(&cvp->cookie_pair[cvp->cookie_desired],
+ cryptic_value, cookie_size);
+ cvp->cookie_pair_size = cookie_pair_size;
+ }
+
+ if (tcp_header_len_th(th) < skb->len) {
+ /* Queue incoming transaction data. */
+ __skb_pull(skb, tcp_header_len_th(th));
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk, 0);
+ tp->s_data_in = 1; /* true */
+ queued = 1; /* should be amount? */
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tp->rcv_wup = TCP_SKB_CB(skb)->end_seq;
+ tp->copied_seq = TCP_SKB_CB(skb)->seq + 1;
+ }
+ }
+
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5513,11 +5570,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
- __kfree_skb(skb);
+ if (0 == queued)
+ __kfree_skb(skb);
return 0;
} else {
tcp_send_ack(sk);
}
+ if (0 < queued)
+ return 0; /* amount queued? */
return -1;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2d25bd4..7d5fd4d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -217,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (inet->opt)
inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
- tp->rx_opt.mss_clamp = 536;
+ tp->rx_opt.mss_clamp = TCP_MIN_RCVMSS;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
@@ -1211,9 +1211,12 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct inet_request_sock *ireq;
+ struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
+ u8 *cryptic_value;
+ struct inet_request_sock *ireq;
struct request_sock *req;
+ struct tcp_sock *tp = tcp_sk(sk);
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
@@ -1258,16 +1261,37 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
#endif
tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = 536;
- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
+ tmp_opt.mss_clamp = TCP_MIN_RCVMSS;
+ tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, &cryptic_value, 0);
+
+ if (0 < tmp_opt.cookie_plus
+ && tmp_opt.saw_tstamp
+ && !tp->cookie_out_never
+ && (0 < sysctl_tcp_cookie_size
+ || (NULL != tp->cookie_values
+ && 0 < tp->cookie_values->cookie_desired))) {
+#ifdef CONFIG_SYN_COOKIES
+ want_cookie = 0; /* not our kind of cookie */
+#endif
+ tmp_ext.cookie_out_never = 0; /* false */
+ tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+
+ /* secret recipe not yet implemented */
+ } else if (!tp->cookie_in_always) {
+ /* redundant indications, but ensure initialization. */
+ tmp_ext.cookie_out_never = 1; /* true */
+ tmp_ext.cookie_plus = 0;
+ } else {
+ goto drop_and_free;
+ }
+ tmp_ext.cookie_in_always = tp->cookie_in_always;
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
-
tcp_openreq_init(req, &tmp_opt, skb);
ireq = inet_rsk(req);
@@ -1334,7 +1358,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
}
tcp_rsk(req)->snt_isn = isn;
- if (__tcp_v4_send_synack(sk, req, dst, NULL) ||
+ if (__tcp_v4_send_synack(sk, req, dst, (void *)&tmp_ext) ||
want_cookie)
goto drop_and_free;
@@ -1812,7 +1836,7 @@ static int tcp_v4_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache = TCP_MIN_RCVMSS;
tp->reordering = sysctl_tcp_reordering;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
@@ -1828,6 +1852,19 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->af_specific = &tcp_sock_ipv4_specific;
#endif
+ /* TCP Cookie Transactions */
+ if (0 < sysctl_tcp_cookie_size) {
+ /* Default, cookies without s_data. */
+ tp->cookie_values =
+ kzalloc(sizeof(*tp->cookie_values), sk->sk_allocation);
+ if (NULL != tp->cookie_values) {
+ kref_init(&tp->cookie_values->kref);
+ }
+ }
+ /* Presumed zeroed, in order of appearance:
+ * cookie_in_always, cookie_out_never, extend_timestamp,
+ * s_data_constant, s_data_in, s_data_out
+ */
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1881,6 +1918,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
sk->sk_sndmsg_page = NULL;
}
+ /*
+ * If cookie or s_data exists, remove it.
+ */
+ if (NULL != tp->cookie_values) {
+ kref_put(&tp->cookie_values->kref,
+ tcp_cookie_values_release);
+ tp->cookie_values = NULL;
+ }
+
percpu_counter_dec(&tcp_sockets_allocated);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8819882..0d33f5c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -96,13 +96,14 @@ enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th)
{
- struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
struct tcp_options_received tmp_opt;
+ u8 *cryptic_value;
+ struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
int paws_reject = 0;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, &cryptic_value, 0);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tcptw->tw_ts_recent;
@@ -394,9 +395,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
/* Now setup tcp_sock */
newtp = tcp_sk(newsk);
newtp->pred_flags = 0;
- newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
- newtp->snd_up = treq->snt_isn + 1;
+
+ newtp->rcv_wup = newtp->copied_seq =
+ newtp->rcv_nxt = treq->rcv_isn + 1;
+
+ newtp->snd_sml = newtp->snd_una = newtp->snd_nxt =
+ newtp->snd_up = treq->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk));
tcp_prequeue_init(newtp);
@@ -429,9 +433,24 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
- newtp->write_seq = treq->snt_isn + 1;
- newtp->pushed_seq = newtp->write_seq;
+ newtp->write_seq = newtp->pushed_seq =
+ treq->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk));
+ /* TCP Cookie Transactions */
+ if (NULL != tcp_sk(sk)->cookie_values) {
+ /* Instead of reusing the original, replace with
+ * default, cookies without s_data.
+ */
+ newtp->cookie_values =
+ kzalloc(sizeof(*newtp->cookie_values), GFP_ATOMIC);
+ if (NULL != newtp->cookie_values) {
+ kref_init(&newtp->cookie_values->kref);
+ }
+ }
+ /* Presumed copied, in order of appearance:
+ * cookie_in_always, cookie_out_never, extend_timestamp,
+ * s_data_constant, s_data_in, s_data_out
+ */
newtp->rx_opt.saw_tstamp = 0;
newtp->rx_opt.dsack = 0;
@@ -495,15 +514,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
+ struct tcp_options_received tmp_opt;
+ u8 *cryptic_value;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
- struct tcp_options_received tmp_opt;
struct sock *child;
tmp_opt.saw_tstamp = 0;
- if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(skb, &tmp_opt, 0);
+ if (th->doff > (sizeof(*th) >> 2)) {
+ tcp_parse_options(skb, &tmp_opt, &cryptic_value, 0);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
@@ -596,7 +616,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* Invalid ACK: reset will be sent by listening socket
*/
if ((flg & TCP_FLAG_ACK) &&
- (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+ (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 +
+ tcp_s_data_size(tcp_sk(sk))))
return sk;
/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c235196..0a04684 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -370,6 +370,7 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_TS (1 << 1)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
+#define OPTION_COOKIE_EXTENSION (1 << 4)
struct tcp_out_options {
u8 options; /* bit field of OPTION_* */
@@ -377,8 +378,35 @@ struct tcp_out_options {
u8 num_sack_blocks; /* number of SACK blocks to include */
u16 mss; /* 0 to disable */
__u32 tsval, tsecr; /* need to include OPTION_TS */
+ u8 *cookie_copy; /* temporary pointer */
+ u8 cookie_size; /* bytes in copy */
};
+/* The sysctl int routines are generic, so check consistency here.
+ */
+static u8 tcp_cookie_size_check(u8 desired)
+{
+ if (0 < desired) {
+ /* previously specified */
+ return desired;
+ }
+ if (0 >= sysctl_tcp_cookie_size) {
+ /* no default specified */
+ return 0;
+ }
+ if (TCP_COOKIE_MIN > sysctl_tcp_cookie_size) {
+ return TCP_COOKIE_MIN;
+ }
+ if (TCP_COOKIE_MAX < sysctl_tcp_cookie_size) {
+ return TCP_COOKIE_MAX;
+ }
+ if (0x1 & sysctl_tcp_cookie_size) {
+ /* 8-bit multiple, illegal, fix it */
+ return (u8)(sysctl_tcp_cookie_size + 0x1);
+ }
+ return (u8)sysctl_tcp_cookie_size;
+}
+
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -395,11 +423,22 @@ struct tcp_out_options {
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
const struct tcp_out_options *opts,
__u8 **md5_hash) {
- if (unlikely(OPTION_MD5 & opts->options)) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ u8 options = opts->options; /* mungable copy */
+
+ if (unlikely(OPTION_MD5 & options)) {
+ if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+ *ptr++ = htonl((TCPOPT_COOKIE << 24) |
+ (TCPOLEN_COOKIE_BASE << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ } else {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ }
+ /* larger cookies are incompatible */
+ options &= ~OPTION_COOKIE_EXTENSION;
*md5_hash = (__u8 *)ptr;
ptr += 4;
} else {
@@ -412,12 +451,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
opts->mss);
}
- if (likely(OPTION_TS & opts->options)) {
- if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+ if (likely(OPTION_TS & options)) {
+ if (unlikely(OPTION_SACK_ADVERTISE & options)) {
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
+ options &= ~OPTION_SACK_ADVERTISE;
} else {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -428,15 +468,48 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*ptr++ = htonl(opts->tsecr);
}
- if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
- !(OPTION_TS & opts->options))) {
+ /* Specification requires after timestamp, so do it now.
+ */
+ if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
+ u8 *cookie_copy = opts->cookie_copy;
+ u8 cookie_size = opts->cookie_size;
+
+ if (unlikely(0x1 & cookie_size)) {
+ /* 8-bit multiple, illegal, ignore */
+ cookie_size = 0;
+ } else if (likely(0x2 & cookie_size)) {
+ __u8 *p = (__u8 *)ptr;
+
+ /* 16-bit multiple */
+ *p++ = TCPOPT_COOKIE;
+ *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
+ *p++ = *cookie_copy++;
+ *p++ = *cookie_copy++;
+ ptr++;
+ cookie_size -= 2;
+ } else {
+ /* 32-bit multiple */
+ *ptr++ = htonl(((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_COOKIE << 8) |
+ TCPOLEN_COOKIE_BASE) +
+ cookie_size);
+ }
+
+ if (0 < cookie_size) {
+ memcpy(ptr, cookie_copy, cookie_size);
+ ptr += (cookie_size >> 2);
+ }
+ }
+
+ if (unlikely(OPTION_SACK_ADVERTISE & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) |
TCPOLEN_SACK_PERM);
}
- if (unlikely(OPTION_WSCALE & opts->options)) {
+ if (unlikely(OPTION_WSCALE & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
@@ -471,11 +544,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5) {
struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_cookie_values *cvp = tp->cookie_values;
unsigned size = 0;
+ u8 cookie_size = (!tp->cookie_out_never && NULL != cvp)
+ ? tcp_cookie_size_check(cvp->cookie_desired)
+ : 0;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
+ if (0 < cookie_size) {
+ opts->options |= OPTION_COOKIE_EXTENSION;
+ }
opts->options |= OPTION_MD5;
size += TCPOLEN_MD5SIG_ALIGNED;
}
@@ -512,6 +592,63 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
size += TCPOLEN_SACKPERM_ALIGNED;
}
+ /* Having both authentication and cookies for security is redundant,
+ * and there's certainly not enough room. Instead, the cookie-less
+ * variant is proposed above.
+ *
+ * Consider the pessimal case with authentication. The options
+ * could look like:
+ * COOKIE|MD5(20) + MSS(4) + WSCALE(4) + SACK|TS(12) == 40
+ *
+ * (Currently, the timestamps && *MD5 test above prevents this.)
+ *
+ * Note that timestamps are required by the specification.
+ *
+ * Odd numbers of bytes are prohibited by the specification, ensuring
+ * that the cookie is 16-bit aligned, and the resulting cookie pair is
+ * 32-bit aligned.
+ */
+ if (NULL == *md5
+ && (OPTION_TS & opts->options)
+ && 0 < cookie_size) {
+ int need = TCPOLEN_COOKIE_BASE + cookie_size;
+ int remaining = MAX_TCP_OPTION_SPACE - size;
+
+ if (0x2 & need) {
+ /* 32-bit multiple */
+ need += 2; /* NOPs */
+
+ if (need > remaining) {
+ /* try shrinking cookie to fit */
+ cookie_size -= 2;
+ need -= 4;
+ }
+ }
+ while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
+ cookie_size -= 4;
+ need -= 4;
+ }
+ if (TCP_COOKIE_MIN <= cookie_size) {
+ opts->options |= OPTION_COOKIE_EXTENSION;
+ opts->cookie_copy = &cvp->cookie_pair[0];
+ opts->cookie_size = cookie_size;
+
+ /* Remember for future incarnations. */
+ cvp->cookie_desired = cookie_size;
+
+ if (cvp->cookie_desired != cvp->cookie_pair_size) {
+ /* Currently use random bytes as a nonce,
+ * assuming these are completely unpredictable
+ * by hostile users of the same system.
+ */
+ get_random_bytes(opts->cookie_copy,
+ cookie_size);
+ cvp->cookie_pair_size = cookie_size;
+ }
+
+ size += need;
+ }
+ }
return size;
}
@@ -520,14 +657,22 @@ static unsigned tcp_synack_options(struct sock *sk,
struct request_sock *req,
unsigned mss, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5) {
- unsigned size = 0;
+ struct tcp_md5sig_key **md5,
+ struct tcp_extend_values *xvp)
+{
struct inet_request_sock *ireq = inet_rsk(req);
+ unsigned size = 0;
+ u8 cookie_plus = (NULL != xvp && !xvp->cookie_out_never)
+ ? xvp->cookie_plus
+ : 0;
char doing_ts;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
if (*md5) {
+ if (0 < cookie_plus) {
+ opts->options |= OPTION_COOKIE_EXTENSION;
+ }
opts->options |= OPTION_MD5;
size += TCPOLEN_MD5SIG_ALIGNED;
}
@@ -561,6 +706,34 @@ static unsigned tcp_synack_options(struct sock *sk,
size += TCPOLEN_SACKPERM_ALIGNED;
}
+ /* Similar rationale to tcp_syn_options() applies here, too.
+ * If the <SYN> options fit, the same options should fit now!
+ */
+ if (NULL == *md5
+ && doing_ts
+ && 0 < cookie_plus) {
+ int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
+ int remaining = MAX_TCP_OPTION_SPACE - size;
+
+ if (0x2 & need) {
+ /* 32-bit multiple */
+ need += 2; /* NOPs */
+ }
+ if (need <= remaining) {
+ opts->options |= OPTION_COOKIE_EXTENSION;
+ opts->cookie_copy = &xvp->cookie_bakery[0];
+ opts->cookie_size = cookie_plus - TCPOLEN_COOKIE_BASE;
+
+ /* secret recipe not yet implemented */
+ get_random_bytes(opts->cookie_copy,
+ opts->cookie_size);
+
+ size += need;
+ } else {
+ /* There's no error return, so flag it. */
+ xvp->cookie_out_never = 1; /* true */
+ }
+ }
return size;
}
@@ -2229,14 +2402,15 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req, void *extend_values)
{
+ struct tcp_out_options opts;
+ struct tcp_extend_values *xvp = tcp_xv(extend_values);
struct inet_request_sock *ireq = inet_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);
struct tcphdr *th;
- int tcp_header_size;
- struct tcp_out_options opts;
struct sk_buff *skb;
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
+ int tcp_header_size;
int mss;
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
@@ -2274,7 +2448,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
tcp_header_size = tcp_synack_options(sk, req, mss,
- skb, &opts, &md5) +
+ skb, &opts, &md5, xvp) +
sizeof(struct tcphdr);
skb_push(skb, tcp_header_size);
@@ -2292,6 +2466,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
*/
tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+
+ /* If cookies are active, and constant data is available, copy it
+ * directly from the listening socket.
+ */
+ if (NULL != xvp
+ && !xvp->cookie_out_never
+ && 0 < xvp->cookie_plus
+ && tp->s_data_constant) {
+ const struct tcp_cookie_values *cvp = tp->cookie_values;
+
+ if (NULL != cvp
+ && 0 < cvp->s_data_desired) {
+ u8 *buf = skb_put(skb, cvp->s_data_desired);
+
+ memcpy(buf, cvp->s_data_payload, cvp->s_data_desired);
+ TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired;
+ }
+ }
+
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index cbe55e5..2839349 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_options_received tcp_opt;
+ u8 *cryptic_value;
struct inet_request_sock *ireq;
struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
@@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
- struct tcp_options_received tcp_opt;
if (!sysctl_tcp_syncookies || !th->ack)
goto out;
@@ -186,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0);
+ tcp_parse_options(skb, &tcp_opt, &cryptic_value, 0);
if (tcp_opt.saw_tstamp)
cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3b3d7b3..1320825 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1161,11 +1161,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_extend_values tmp_ext;
+ struct tcp_options_received tmp_opt;
+ u8 *cryptic_value;
struct inet6_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_options_received tmp_opt;
- struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req = NULL;
+ struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
@@ -1205,7 +1207,29 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, &cryptic_value, 0);
+
+ if (0 < tmp_opt.cookie_plus
+ && tmp_opt.saw_tstamp
+ && !tp->cookie_out_never
+ && (0 < sysctl_tcp_cookie_size
+ || (NULL != tp->cookie_values
+ && 0 < tp->cookie_values->cookie_desired))) {
+#ifdef CONFIG_SYN_COOKIES
+ want_cookie = 0; /* not our kind of cookie */
+#endif
+ tmp_ext.cookie_out_never = 0; /* false */
+ tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+
+ /* secret recipe not yet implemented */
+ } else if (!tp->cookie_in_always) {
+ /* redundant indications, but ensure initialization. */
+ tmp_ext.cookie_out_never = 1; /* true */
+ tmp_ext.cookie_plus = 0;
+ } else {
+ goto drop;
+ }
+ tmp_ext.cookie_in_always = tp->cookie_in_always;
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1243,7 +1267,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
security_inet_conn_request(sk, skb, req);
- if (tcp_v6_send_synack(sk, req, NULL) ||
+ if (tcp_v6_send_synack(sk, req, (void *)&tmp_ext) ||
want_cookie)
goto drop;
@@ -1848,7 +1872,7 @@ static int tcp_v6_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache = TCP_MIN_RCVMSS;
tp->reordering = sysctl_tcp_reordering;
@@ -1864,6 +1888,19 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
+ /* TCP Cookie Transactions */
+ if (0 < sysctl_tcp_cookie_size) {
+ /* Default, cookies without s_data. */
+ tp->cookie_values =
+ kzalloc(sizeof(*tp->cookie_values), sk->sk_allocation);
+ if (NULL != tp->cookie_values) {
+ kref_init(&tp->cookie_values->kref);
+ }
+ }
+ /* Presumed zeroed, in order of appearance:
+ * cookie_in_always, cookie_out_never, extend_timestamp,
+ * s_data_constant, s_data_in, s_data_out
+ */
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
--
1.6.0.4
^ permalink raw reply related
* Re: [PATCH][RFC]: ingress socket filter by mark
From: Eric Dumazet @ 2009-10-18 17:28 UTC (permalink / raw)
To: hadi; +Cc: netdev, David Miller, Atis Elsts, Maciej Z.enczykowski
In-Reply-To: <1255869758.4815.40.camel@dogo.mojatatu.com>
jamal a écrit :
> Maciej forced me to dig into this ;->
>
> at the socket level if a packet arrives with a different mark than
> what we bind to, drop it. I have tested this patch and it drops a packet
> with mismatching mark.
>
> There are several approaches - and i think the patch suggestion i have
> made here maybe too strict. I assume that if someone binds to a mark,
> they want to not only send packets with that mark but receive
> only if that mark is set.
> A looser check would be something along the line accept as well if mark
> is not set i.e
> if (sk->sk_mark && skb->mark && sk->sk_mark != skb->mark)
>
> Alternatively i could add one bit in the socket flags and have it so
> that check is made only if app has been explicit:
> if (sock_flag(sk, SOCK_CHK_SOMARK) && sk->sk_mark != skb->mark) drop
>
> Another approach is to set sock filter from app. I dont like this
> approach because it will be the least usable from app level and would be
> the least simple from kernel level.
>
> cheers,
> jamal
>
I vote for extending BPF, and not adding the price of a compare
for each packet. Only users wanting mark filtering should pay the price.
^ permalink raw reply
* Re: PF_RING: Include in main line kernel?
From: Luca Deri @ 2009-10-18 17:37 UTC (permalink / raw)
To: Harald Welte; +Cc: Brad Doctor, netdev
In-Reply-To: <20091018123811.GD27747@prithivi.gnumonks.org>
Harald
many thanks for your support. As I have stated before my wish is to
include into the the mainstream kernel some features that I have
implemented in PF_RING and on which many users rely since very long
time. I understand that there are some overlaps with PF_PACKET and I'm
willing to work with the kernel maintainers to address this issue.
The only thing I want to say is that PF_RING is *not* just for
accelerating packet capture. This was the minimal goal when in 2003 I
have coded the first release. PF_RING is a kernel module that
implements several features (e.g. advanced packet filtering,
extensible architecture by means of plugins, balancing, multicore/
multiqueue, packet modification and retransmission) that ease the
implementation of efficient applications not limited to packet capture
application. So in this view PF_RING has been designed to be a
superset of PF_PACKET, because the needs I (and many other people
have) are not of just having efficient packet capture.
This said I'm already at work to modify PF_RING so that it's a pure
module that does not require any change in the mainstream kernel (i.e.
net/core/dev.c). I'm almost done so I plan to release by tomorrow a
new PF_RING release that implements this. Of course some changes into
the kernel (such as Ben's patch) would ease PF_RING's life and pave
the way to new kernel modules.
Done that I will start working at the RFC that you proposed.
Cheers Luca
PS. Just to clarify, when I say 'packet filtering' I mean the ability
for packet capture applications to specify filters more advanced that
BPF (even if BPF is supported by PF_RING) that prevent those
applications from receiving packets they don't like, but that in any
case will continue their journey into the kernel; this has nothing to
do with netfilter filtering).
On Oct 18, 2009, at 2:38 PM, Harald Welte wrote:
> Hi Brad and Luca,
>
> On Wed, Oct 14, 2009 at 08:33:08AM -0600, Brad Doctor wrote:
>
>> On behalf of the users and developers of the PF_RING project, we
>> would
>> like to ask consideration to include the PF_RING module in the main
>> line kernel.
>
> First of all, let me state that I think the mainline support for
> nProbe/nTop is
> something that I have been hoping for many years. I think the
> performance you
> are achieving is remarkable, and it would be very usable to have this
> capability of high performance zero-copy packet access from
> userspace as a
> stock feature of the Linux kernel.
>
> The actual PF_RING implementation has been criticized a couple of
> times even in
> the past. One general point I remember from past discussions in the
> kernel
> network community was that there is too much overlap with PF_PACKET,
> and that
> this could possibly be extended with a ring buffer rather than
> replaced with a
> fairly similar alternative mechanism.
>
> So let's see what kind of solution the current discussion thread
> will come up
> with... let's hope eventually we'll have the functionality in the
> kernel.
> --
> - Harald Welte <laforge@gnumonks.org> http://laforge.gnumonks.org/
> =
> =
> =
> =
> =
> =
> ======================================================================
> "Privacy in residential applications is a desirable marketing option."
> (ETSI EN 300 175-7
> Ch. A6)
^ permalink raw reply
* Re: [PATCH/RFC] make unregister_netdev() delete more than 4 interfaces per second
From: Eric Dumazet @ 2009-10-18 17:51 UTC (permalink / raw)
To: Benjamin LaHaise; +Cc: netdev
In-Reply-To: <20091018161356.GA23395@kvack.org>
Benjamin LaHaise a écrit :
> On Sun, Oct 18, 2009 at 06:26:22AM +0200, Eric Dumazet wrote:
>> Unfortunatly this slow down fast path by an order of magnitude.
>>
>> atomic_dec() is pretty cheap (and eventually could use a per_cpu thing,
>> now we have a new and sexy per_cpu allocator), but atomic_dec_and_test()
>> is not that cheap and more important forbids a per_cpu conversion.
>
> dev_put() is not a fast path by any means. atomic_dec_and_test() costs
> the same as atomic_dec() on any modern CPU -- the cost is in the cacheline
> bouncing and serialisation both require. The case of the device count
> becoming 0 is quite rare -- any device with a route on it will never hit
> a reference count of 0.
You forgot af_packet sendmsg() users, and heavy routers where route cache
is stressed or disabled. I know several of them, they even added mmap TX
support to get better performance. They will be disapointed by your patch.
atomic_dec_and_test() is definitly more expensive, because of strong barrier
semantics and added test after the decrement.
refcnt being close to zero or not has not impact, even on 2 years old cpus.
Machines hardly had to dismantle a netdevice in a normal lifetime, so maybe
we were lazy with this insane msleep(250). This came from old linux times,
when cpus were soooo slow and programers soooo lazy :)
The msleep(250) should be tuned first. Then if this is really necessary
to dismantle 100.000 netdevices per second, we might have to think a bit more.
Just try msleep(1 or 2), it should work quite well.
^ permalink raw reply
* Re: [PATCH/RFC] make unregister_netdev() delete more than 4 interfaces per second
From: Benjamin LaHaise @ 2009-10-18 18:21 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4ADB55BC.5020107@gmail.com>
On Sun, Oct 18, 2009 at 07:51:56PM +0200, Eric Dumazet wrote:
> You forgot af_packet sendmsg() users, and heavy routers where route cache
> is stressed or disabled. I know several of them, they even added mmap TX
> support to get better performance. They will be disapointed by your patch.
If that's a problem, the cacheline overhead is a more serious issue.
AF_PACKET should really keep the reference on the device between syscalls.
Do you have a benchmark in mind that would show the overhead?
> atomic_dec_and_test() is definitly more expensive, because of strong barrier
> semantics and added test after the decrement.
> refcnt being close to zero or not has not impact, even on 2 years old cpus.
At least on x86, the atomic_dec_and_test() cost is pretty much identical to
atomic_dec(). If this really is a performance bottleneck, people should be
complaining about the cache miss overhead and lock overhead which will dwarf
the atomic_dec_and_test() cost vs atomic_dec(). Granted, I'm not saying
that it isn't an issue on other architectures, but for x86 the lock prefix
is what's expensive, not checking the flags or not after doing the operation.
If your complaint is about uninlining dev_put(), I'm indifferent to keeping
it inline or out of line and can change the patch to suit.
> Machines hardly had to dismantle a netdevice in a normal lifetime, so maybe
> we were lazy with this insane msleep(250). This came from old linux times,
> when cpus were soooo slow and programers soooo lazy :)
It's only now that machines can actually route one or more 10Gbps links
that it really becomes an issue. I've been hacking around it for some
time, but fixing it properly is starting to be a real requirement.
> The msleep(250) should be tuned first. Then if this is really necessary
> to dismantle 100.000 netdevices per second, we might have to think a bit more.
>
> Just try msleep(1 or 2), it should work quite well.
My goal is tearing down 100,000 interfaces in a few seconds, which really is
necessary. Right now we're running about 40,000 interfaces on a not yet
saturated 10Gbps link. Going to dual 10Gbps links means pushing more than
100,000 subscriber interfaces, and it looks like a modern dual socket system
can handle that.
A bigger concern is rtnl_lock(). It is a huge impediment to scaling up
interface creation/deletion on multicore systems. That's going to be a
lot more invasive to fix, though.
-ben
^ permalink raw reply
* [PATCH v2] can: provide library functions for skb allocation
From: Wolfgang Grandegger @ 2009-10-18 18:45 UTC (permalink / raw)
To: Linux Netdev List
Cc: SocketCAN Core Mailing List, Sebastian Haas, Gole, Anant,
Marc Kleine-Budde, David Miller
This patch makes the private functions alloc_can_skb() and
alloc_can_err_skb() of the at91_can driver public and adapts all
drivers to use these. While making the patch I realized, that
the skb's are *not* setup consistently. It's now done as shown
below:
skb->protocol = htons(ETH_P_CAN);
skb->pkt_type = PACKET_BROADCAST;
skb->ip_summed = CHECKSUM_UNNECESSARY;
*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
memset(*cf, 0, sizeof(struct can_frame));
The frame is zeroed out to avoid uninitialized data to be passed to
user space. Some drivers or library code did not set "pkt_type" or
"ip_summed". Also, "__constant_htons()" should not be used for
runtime invocations, as pointed out by David Miller.
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
---
drivers/net/can/at91_can.c | 32 ----------------------------
drivers/net/can/dev.c | 42 +++++++++++++++++++++++++++++++-------
drivers/net/can/sja1000/sja1000.c | 12 +---------
drivers/net/can/ti_hecc.c | 17 +++------------
drivers/net/can/usb/ems_usb.c | 16 +-------------
5 files changed, 43 insertions(+), 76 deletions(-)
Index: net-next-2.6/drivers/net/can/dev.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/dev.c
+++ net-next-2.6/drivers/net/can/dev.c
@@ -366,17 +366,12 @@ void can_restart(unsigned long data)
can_flush_echo_skb(dev);
/* send restart message upstream */
- skb = dev_alloc_skb(sizeof(struct can_frame));
+ skb = alloc_can_err_skb(dev, &cf);
if (skb == NULL) {
err = -ENOMEM;
goto restart;
}
- skb->dev = dev;
- skb->protocol = htons(ETH_P_CAN);
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
- memset(cf, 0, sizeof(struct can_frame));
- cf->can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED;
- cf->can_dlc = CAN_ERR_DLC;
+ cf->can_id |= CAN_ERR_RESTARTED;
netif_rx(skb);
@@ -449,6 +444,39 @@ static void can_setup(struct net_device
dev->features = NETIF_F_NO_CSUM;
}
+struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
+{
+ struct sk_buff *skb;
+
+ skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
+ if (unlikely(!skb))
+ return NULL;
+
+ skb->protocol = htons(ETH_P_CAN);
+ skb->pkt_type = PACKET_BROADCAST;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+ memset(*cf, 0, sizeof(struct can_frame));
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(alloc_can_skb);
+
+struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf)
+{
+ struct sk_buff *skb;
+
+ skb = alloc_can_skb(dev, cf);
+ if (unlikely(!skb))
+ return NULL;
+
+ (*cf)->can_id = CAN_ERR_FLAG;
+ (*cf)->can_dlc = CAN_ERR_DLC;
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(alloc_can_err_skb);
+
/*
* Allocate and setup space for the CAN network device
*/
Index: net-next-2.6/drivers/net/can/at91_can.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/at91_can.c
+++ net-next-2.6/drivers/net/can/at91_can.c
@@ -221,38 +221,6 @@ static inline void set_mb_mode(const str
set_mb_mode_prio(priv, mb, mode, 0);
}
-static struct sk_buff *alloc_can_skb(struct net_device *dev,
- struct can_frame **cf)
-{
- struct sk_buff *skb;
-
- skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
- if (unlikely(!skb))
- return NULL;
-
- skb->protocol = htons(ETH_P_CAN);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-
- return skb;
-}
-
-static struct sk_buff *alloc_can_err_skb(struct net_device *dev,
- struct can_frame **cf)
-{
- struct sk_buff *skb;
-
- skb = alloc_can_skb(dev, cf);
- if (unlikely(!skb))
- return NULL;
-
- memset(*cf, 0, sizeof(struct can_frame));
- (*cf)->can_id = CAN_ERR_FLAG;
- (*cf)->can_dlc = CAN_ERR_DLC;
-
- return skb;
-}
-
/*
* Swtich transceiver on or off
*/
Index: net-next-2.6/drivers/net/can/sja1000/sja1000.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/sja1000/sja1000.c
+++ net-next-2.6/drivers/net/can/sja1000/sja1000.c
@@ -296,11 +296,9 @@ static void sja1000_rx(struct net_device
uint8_t dlc;
int i;
- skb = dev_alloc_skb(sizeof(struct can_frame));
+ skb = alloc_can_skb(dev, &cf);
if (skb == NULL)
return;
- skb->dev = dev;
- skb->protocol = htons(ETH_P_CAN);
fi = priv->read_reg(priv, REG_FI);
dlc = fi & 0x0F;
@@ -351,15 +349,9 @@ static int sja1000_err(struct net_device
enum can_state state = priv->can.state;
uint8_t ecc, alc;
- skb = dev_alloc_skb(sizeof(struct can_frame));
+ skb = alloc_can_err_skb(dev, &cf);
if (skb == NULL)
return -ENOMEM;
- skb->dev = dev;
- skb->protocol = htons(ETH_P_CAN);
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
- memset(cf, 0, sizeof(struct can_frame));
- cf->can_id = CAN_ERR_FLAG;
- cf->can_dlc = CAN_ERR_DLC;
if (isrc & IRQ_DOI) {
/* data overrun interrupt */
Index: net-next-2.6/drivers/net/can/usb/ems_usb.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/usb/ems_usb.c
+++ net-next-2.6/drivers/net/can/usb/ems_usb.c
@@ -311,14 +311,10 @@ static void ems_usb_rx_can_msg(struct em
int i;
struct net_device_stats *stats = &dev->netdev->stats;
- skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ skb = alloc_can_skb(dev->netdev, &cf);
if (skb == NULL)
return;
- skb->protocol = htons(ETH_P_CAN);
-
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-
cf->can_id = msg->msg.can_msg.id;
cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
@@ -346,18 +342,10 @@ static void ems_usb_rx_err(struct ems_us
struct sk_buff *skb;
struct net_device_stats *stats = &dev->netdev->stats;
- skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame));
+ skb = alloc_can_err_skb(dev->netdev, &cf);
if (skb == NULL)
return;
- skb->protocol = htons(ETH_P_CAN);
-
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
- memset(cf, 0, sizeof(struct can_frame));
-
- cf->can_id = CAN_ERR_FLAG;
- cf->can_dlc = CAN_ERR_DLC;
-
if (msg->type == CPC_MSG_TYPE_CAN_STATE) {
u8 state = msg->msg.can_state;
Index: net-next-2.6/drivers/net/can/ti_hecc.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/ti_hecc.c
+++ net-next-2.6/drivers/net/can/ti_hecc.c
@@ -535,18 +535,15 @@ static int ti_hecc_rx_pkt(struct ti_hecc
u32 data, mbx_mask;
unsigned long flags;
- skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame));
+ skb = alloc_can_skb(priv->ndev, &cf);
if (!skb) {
if (printk_ratelimit())
dev_err(priv->ndev->dev.parent,
- "ti_hecc_rx_pkt: netdev_alloc_skb() failed\n");
+ "ti_hecc_rx_pkt: alloc_can_skb() failed\n");
return -ENOMEM;
}
- skb->protocol = __constant_htons(ETH_P_CAN);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
mbx_mask = BIT(mbxno);
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
if (data & HECC_CANMID_IDE)
cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
@@ -656,19 +653,13 @@ static int ti_hecc_error(struct net_devi
struct sk_buff *skb;
/* propogate the error condition to the can stack */
- skb = netdev_alloc_skb(ndev, sizeof(struct can_frame));
+ skb = alloc_can_err_skb(ndev, &cf);
if (!skb) {
if (printk_ratelimit())
dev_err(priv->ndev->dev.parent,
- "ti_hecc_error: netdev_alloc_skb() failed\n");
+ "ti_hecc_error: alloc_can_err_skb() failed\n");
return -ENOMEM;
}
- skb->protocol = __constant_htons(ETH_P_CAN);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
- memset(cf, 0, sizeof(struct can_frame));
- cf->can_id = CAN_ERR_FLAG;
- cf->can_dlc = CAN_ERR_DLC;
if (int_status & HECC_CANGIF_WLIF) { /* warning level int */
if ((int_status & HECC_CANGIF_BOIF) == 0) {
^ permalink raw reply
* Re: [PATCH] can: provide library functions for skb allocation
From: Wolfgang Grandegger @ 2009-10-18 18:46 UTC (permalink / raw)
To: David Miller; +Cc: netdev, socketcan-core, haas, anantgole, mkl
In-Reply-To: <20091017.235520.265660027.davem@davemloft.net>
David Miller wrote:
> From: Wolfgang Grandegger <wg@grandegger.com>
> Date: Thu, 15 Oct 2009 11:22:18 +0200
>
>> + skb->protocol = __constant_htons(ETH_P_CAN);
>
> Please don't use __constant_htonX() for runtime invocatios.
> It's only for situation which must be compile time evaluations
> such as case statements and static initializations.
>
> GCC can figure out that's it's constant if you just use
> plan htonX().
OK, I just dent out v2 of this patch.
Wolfgang.
^ permalink raw reply
* Re: [PATCH/RFC] make unregister_netdev() delete more than 4 interfaces per second
From: Eric Dumazet @ 2009-10-18 19:36 UTC (permalink / raw)
To: Benjamin LaHaise; +Cc: netdev
In-Reply-To: <20091018182144.GC23395@kvack.org>
Benjamin LaHaise a écrit :
>
> My goal is tearing down 100,000 interfaces in a few seconds, which really is
> necessary. Right now we're running about 40,000 interfaces on a not yet
> saturated 10Gbps link. Going to dual 10Gbps links means pushing more than
> 100,000 subscriber interfaces, and it looks like a modern dual socket system
> can handle that.
>
> A bigger concern is rtnl_lock(). It is a huge impediment to scaling up
> interface creation/deletion on multicore systems. That's going to be a
> lot more invasive to fix, though.
Dont forget synchronize_net() too (two calls per rollback_registered())
You need something to dismantle XXXXX interfaces at once, instead
of serializing one by one. Because in three years you'll want to dismantle
1.000.000 interfaces in one second.
Maybe defining an asynchronous unregister_netdev() function...
^ permalink raw reply
* Re: Kernel oops when clearing bgp neighbor info with TCP MD5SUM enabled
From: Anirban Sinha @ 2009-10-18 20:19 UTC (permalink / raw)
To: linux-kernel, Oleg Nesterov; +Cc: David Miller, netdev, Anirban Sinha
In-Reply-To: <4ADA7EDC.5010402@anirban.org>
Hi Oleg:
I have a question for you. The queue_work() routine which is called from schedule_work() does a put_cpu() which in turn does a enable_preempt(). Is this an attempt to trigger the scheduler? One of the side affects of this enable_preempt() is the crash that we see below. What is happening is that a timer callback routine, in this case inet_twdr_hangman(), tries a bunch of cleanup until a threshold is reached. If further cleanups needs to be done beyond the threshold, it queues a work function. Now when the timer callback is run in __run_timers(), the routine grabs the value of preempt_count before and after the callback function call. If the two counts do not match, it calls BUG() (line 1037 in kernel/timer.c). Is is it illegal to schedule a work function from within a timer callback? Wha
t would be a good solution? I have already posted in netdev but since workqueues and timers are general kernel infrastructure, I thought I might as well post the question in the main linux m
ailing list and to you.
Here's the output from my instrumented BUG() call:
[02:15:15.941981] Kernel panic - not syncing: <3>huh, entered ffffffff803fbd60
(inet_twdr_hangman+0x0/0xe0)with preempt_count 00000102, exited with 00000101?
I was thinking of a hacky solution, to replace schedule_work() with schedule_delayed_work() just to get around the issue. But I am sure this is just too hacky and probably not the ideal solution ...
Cheers,
Ani
Once upon a time, like on 09-10-17 7:35 PM, Anirban Sinha wrote:
>
>
> Once upon a time, like on 09-10-17 10:57 AM, Anirban Sinha wrote:
>> On Thu, 8 Oct 2009, David Miller wrote:
>>
>>>>>> We are noticing a kernel OOPS on 2.6.26 kernel when we issue the command
>>>>>> "clear ip bgp <bgp-peer-ip>" on Quagga BGP routing software.
>
> and btw, this is the crash (on mips) we are talking about:
>
> # [23:10:35.108808] Kernel bug detected[#1]:
> [23:10:35.112527] Cpu 0
> [23:10:35.114676] $ 0 : 0000000000000000 0000000014001fe0
> 0000000000000066 0000000000000004
> [23:10:35.122845] $ 4 : ffffffff80516c10 0000000014001fe0
> ffffffff8050c010 0000000000000004
> [23:10:35.131015] $ 8 : 0000000000000000 0000000000000041
> ffffffff805142e8 0000000000000001
> [23:10:35.139184] $12 : ffffffff80600000 ffffffff805f0000
> 0000000000000064 0000000000000190
> [23:10:35.147354] $16 : 0000000000000102 ffffffff803afdf0
> ffffffff80539040 ffffffff80600780
> [23:10:35.155526] $20 : ffffffff80540000 0000000000200200
> ffffffff804c0000 000000000000000a
> [23:10:35.163695] $24 : a3d70a3d70a3d70b 8000000000000003
> [23:10:35.171865] $28 : ffffffff8050c000 ffffffff8050fd90
> 9000000010030000 ffffffff801487a8
> [23:10:35.180035] Hi : 0000000000000000
> [23:10:35.183819] Lo : 0000000000000000
> [23:10:35.187603] epc : ffffffff801487a8 run_timer_softirq+0x198/0x258
> Tainted: P
> [23:10:35.196032] ra : ffffffff801487a8 run_timer_softirq+0x198/0x258
> [23:10:35.202395] Status: 14001fe3 KX SX UX KERNEL EXL IE
> [23:10:35.207814] Cause : 00808024
> [23:10:35.210911] PrId : 01041100 (SiByte SB1A)
> [23:10:35.215209] Modules linked in: xt_state ipt_REJECT iptable_filter
> nf_conntrack_ftp ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4
> ip_tables ebtable_filter ebtables bridge llc zeug_ipmcdrv(P) irqdisp(P)
> zvirt(P) zeugmod(P) softdog
> [23:10:35.236024] Process swapper (pid: 0, threadinfo=ffffffff8050c000,
> task=ffffffff805142e8, tls=0000000000000000)
> [23:10:35.246169] Stack : ffffffff8050fd90 ffffffff8050fd90
> 0000000014001fe0 ffffffff805ff3e0
> [23:10:35.254166] ffffffff806003c4 0000000000000001
> ffffffff8053f650 ffffffff805706d0
> [23:10:35.262337] ffffffff80572020 ffffffff80142280
> ffffffff806003c0 0000000000000000
> [23:10:35.270507] 0000000014001fe0 000000000000c5b0
> ffffffff8fefc520 ffffffff8feea52c
> [23:10:35.278676] 0000000000000015 0000000000004460
> 0000000000000940 ffffffff8fe1bf00
> [23:10:35.286846] ffffffff8fffdab0 ffffffff80142410
> 0000000000000000 ffffffff80142778
> [23:10:35.295017] ffffffff80103d20 ffffffff80103d20
> 0000000000000000 0000000014001fe1
> [23:10:35.303187] 0000000000040000 ffffffff8050c010
> 0000000000000000 a80000017f87c138
> [23:10:35.311357] 0000000014001fe0 ffffffffffff00fe
> 0000000000000004 a80000017e7e0680
> [23:10:35.319528] 0000000000000000 000000000000001d
> ffffffff8050ffe0 0000000000001f00
> [23:10:35.327696] ...
> [23:10:35.330536] Call Trace:
> [23:10:35.333201] [<ffffffff801487a8>] run_timer_softirq+0x198/0x258
> [23:10:35.339224] [<ffffffff80142280>] __do_softirq+0x198/0x288
> [23:10:35.344812] [<ffffffff80142410>] do_softirq+0xa0/0xa8
> [23:10:35.350057] [<ffffffff80142778>] irq_exit+0x70/0x88
> [23:10:35.355131] [<ffffffff80103d20>] ret_from_irq+0x0/0x4
> [23:10:35.360377] [<ffffffff801063f4>] cpu_idle+0x1c/0x88
> [23:10:35.365455]
> [23:10:35.367171]
> [23:10:35.367174] Code: 0040382d 0c04ef4c 00000000 <0200000d> 0c10ee9c
> 0260202d dfa60000 17a6ffe5 00000000
> [23:10:35.378822] Kernel panic - not syncing: Fatal exception in
> interrupt
>
^ permalink raw reply
* Re: [PATCH][RFC]: ingress socket filter by mark
From: jamal @ 2009-10-18 20:28 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, David Miller, Atis Elsts, Maciej Z.enczykowski
In-Reply-To: <4ADB5043.7070707@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 385 bytes --]
On Sun, 2009-10-18 at 19:28 +0200, Eric Dumazet wrote:
> I vote for extending BPF, and not adding the price of a compare
> for each packet. Only users wanting mark filtering should pay the price.
To be honest it nagged me as well;->
So here's a basic patch stolen from a patch i just saw that you
posted;-> I still havent tested. Let me know if it looks reasonable...
cheers,
jamal
[-- Attachment #2: filt-sock-m-2 --]
[-- Type: text/x-patch, Size: 778 bytes --]
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1354aaf..909193e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -123,7 +123,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */
#define SKF_AD_IFINDEX 8
#define SKF_AD_NLATTR 12
#define SKF_AD_NLATTR_NEST 16
-#define SKF_AD_MAX 20
+#define SKF_AD_MARK 20
+#define SKF_AD_MAX 24
#define SKF_NET_OFF (-0x100000)
#define SKF_LL_OFF (-0x200000)
diff --git a/net/core/filter.c b/net/core/filter.c
index d1d779c..e3987e1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -303,6 +303,9 @@ load_b:
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
+ case SKF_AD_MARK:
+ A = skb->mark;
+ continue;
case SKF_AD_NLATTR: {
struct nlattr *nla;
^ permalink raw reply related
* kernel panic in latest vanilla stable, while using nameif with "alive" pppoe interfaces
From: Denys Fedoryschenko @ 2009-10-18 21:02 UTC (permalink / raw)
To: netdev, linux-ppp, paulus, mostrows
I have server running as pppoe NAS.
Tried to rename customers without dropping pppd connections first, got panic
after few seconds.
Panic triggerable at 2.6.30.4 and 2.6.31.4
pppoe users running on eth2
pppoe flags:
1457 root /usr/sbin/pppoe-server -I eth2 -k -L 172.16.1.1 -R
172.16.1.2 -N 253 -C gpzone -S gpzone
Commands sequence that i think triggered that:
ip link set eth0 down
ip link set eth1 down
ip link set eth2 down
nameif etherx 00:16:76:8D:83:BA
nameif eth0 00:19:e0:72:4a:37
nameif eth1 00:19:e0:72:4a:4b
ip addr flush dev eth0
ip addr flush dev eth1
ip addr add X.X.X.X/29 dev eth0
ip addr add 192.168.2.177/24 dev eth0
ip addr add 192.168.0.1/32 dev eth1
ip addr add 127.0.0.0/8 dev lo
#ip link set eth0 up
ip link set eth0 up
ip link set eth1 up
ip link set lo up
ip route add 0.0.0.0/0 via X.X.X.X
[ 103.428591] r8169: eth0: link up
[ 103.430360] r8169: eth1: link up
[ 113.361528] BUG: unable to handle kernel
NULL pointer dereference
at 0000018f
[ 113.361717] IP:
[<f8868269>] pppoe_device_event+0x80/0x12c [pppoe]
[ 113.361853] *pdpt = 000000003411a001
*pde = 0000000000000000
Oct 18 23:59:40 194.146.153.93
[ 113.362012] Oops: 0000 [#1]
SMP
Oct 18 23:59:40 194.146.153.93
[ 113.362166] last sysfs file: /sys/devices/virtual/vc/vcs3/dev
[ 113.362246] Modules linked in:
netconsole
configfs
act_skbedit
sch_ingress
sch_prio
cls_flow
cls_u32
em_meta
cls_basic
xt_dscp
xt_DSCP
ipt_REJECT
ts_bm
xt_string
xt_hl
ifb
cls_fw
sch_tbf
sch_htb
act_ipt
act_mirred
xt_MARK
pppoe
pppox
ppp_generic
slhc
xt_TCPMSS
xt_mark
xt_tcpudp
iptable_mangle
iptable_nat
nf_nat
rtc_cmos
nf_conntrack_ipv4
rtc_core
nf_conntrack
rtc_lib
nf_defrag_ipv4
iptable_filter
ip_tables
x_tables
8021q
garp
stp
llc
loop
sata_sil
pata_atiixp
pata_acpi
ata_generic
libata
8139cp
usb_storage
mtdblock
mtd_blkdevs
mtd
sr_mod
cdrom
tulip
r8169
sky2
via_velocity
via_rhine
sis900
ne2k_pci
8390
skge
tg3
libphy
8139too
e1000
e100
usbhid
ohci_hcd
uhci_hcd
ehci_hcd
usbcore
nls_base
Oct 18 23:59:40 194.146.153.93
[ 113.362344]
[ 113.362344] Pid: 2858, comm: pppd Not tainted (2.6.31.4-build-0047 #7)
[ 113.362344] EIP: 0060:[<f8868269>] EFLAGS: 00010286 CPU: 0
[ 113.362344] EIP is at pppoe_device_event+0x80/0x12c [pppoe]
[ 113.362344] EAX: f4fbe000 EBX: ffffffff ECX: f6cea5a0 EDX: f7403680
[ 113.362344] ESI: 0000000f EDI: f6cea5e0 EBP: f4145e34 ESP: f4145e1c
[ 113.362344] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
[ 113.362344] Process pppd (pid: 2858, ti=f4145000 task=f4112ff0
task.ti=f4145000)
[ 113.362344] Stack:
[ 113.362344] f4fbe220
f4fbe000
f6cea5a0
f886a430
fffffff5
00000000
f4145e54
c01422b3
Oct 18 23:59:40 194.146.153.93
[ 113.362344] <0>
f4fbe000
00000009
f8a457d8
f4fbe000
f8850190
00001091
f4145e64
c0142361
Oct 18 23:59:40 194.146.153.93
[ 113.362344] <0>
ffffffff
00000000
f4145e74
c029ffbf
f4fbe000
000010d0
f4145e90
c029fa70
Oct 18 23:59:40 194.146.153.93
[ 113.362344] Call Trace:
[ 113.362344] [<c01422b3>] ? notifier_call_chain+0x2b/0x4a
[ 113.362344] [<c0142361>] ? raw_notifier_call_chain+0xc/0xe
[ 113.362344] [<c029ffbf>] ? dev_close+0x4c/0x8c
[ 113.362344] [<c029fa70>] ? dev_change_flags+0xa5/0x158
[ 113.362344] [<c02da633>] ? devinet_ioctl+0x21a/0x503
[ 113.362344] [<c02db693>] ? inet_ioctl+0x8d/0xa6
[ 113.362344] [<c0292b21>] ? sock_ioctl+0x1c8/0x1ec
[ 113.362344] [<c0292959>] ? sock_ioctl+0x0/0x1ec
[ 113.362344] [<c019af2b>] ? vfs_ioctl+0x22/0x69
[ 113.362344] [<c019b435>] ? do_vfs_ioctl+0x41f/0x459
[ 113.362344] [<c02934eb>] ? sys_send+0x18/0x1a
[ 113.362344] [<c011942f>] ? do_page_fault+0x242/0x26f
[ 113.362344] [<c019b49b>] ? sys_ioctl+0x2c/0x45
[ 113.362344] [<c0102975>] ? syscall_call+0x7/0xb
[ 113.362344] Code:
c9
00
00
00
89
c7
31
f6
83
c7
40
89
f8
e8
cc
60
a9
c7
8b
45
ec
05
20
02
00
00
89
45
e8
8b
4d
f0
8b
1c
b1
e9
8c
00
00
00
8b
45
ec
Oct 18 23:59:40 194.146.153.93
83
90
01
00
00
74
08
8b
9b
8c
01
00
00
eb
79
b8
c0
a6
86
f8
Oct 18 23:59:40 194.146.153.93
[ 113.362344] EIP: [<f8868269>]
pppoe_device_event+0x80/0x12c [pppoe]
SS:ESP 0068:f4145e1c
[ 113.362344] CR2: 000000000000018f
[ 113.373124] ---[ end trace f6fe64a307e97f3b ]---
[ 113.373203] Kernel panic - not syncing: Fatal exception in interrupt
[ 113.373286] Pid: 2858, comm: pppd Tainted: G D 2.6.31.4-build-0047
#7
[ 113.373379] Call Trace:
[ 113.373479] [<c02fc496>] ? printk+0xf/0x11
[ 113.373561] [<c02fc3e7>] panic+0x39/0xd9
[ 113.373656] [<c01059b7>] oops_end+0x8b/0x9a
[ 113.373727] [<c0118f6d>] no_context+0x13d/0x147
[ 113.373800] [<c011908a>] __bad_area_nosemaphore+0x113/0x11b
[ 113.373881] [<c02953b3>] ? sock_alloc_send_pskb+0x8b/0x24a
[ 113.373959] [<c0121801>] ? __wake_up_sync_key+0x3b/0x45
[ 113.374030] [<c0131967>] ? irq_exit+0x39/0x5c
[ 113.374107] [<c0104393>] ? do_IRQ+0x80/0x96
[ 113.374183] [<c0102f49>] ? common_interrupt+0x29/0x30
[ 113.374259] [<c011909f>] bad_area_nosemaphore+0xd/0x10
[ 113.374348] [<c0119301>] do_page_fault+0x114/0x26f
[ 113.374526] [<c01191ed>] ? do_page_fault+0x0/0x26f
[ 113.374605] [<c02fe506>] error_code+0x66/0x6c
[ 113.374683] [<c02d007b>] ? tcp_v4_send_ack+0xa3/0x10e
[ 113.374764] [<c01191ed>] ? do_page_fault+0x0/0x26f
[ 113.374850] [<f8868269>] ? pppoe_device_event+0x80/0x12c [pppoe]
[ 113.374928] [<c01422b3>] notifier_call_chain+0x2b/0x4a
[ 113.375012] [<c0142361>] raw_notifier_call_chain+0xc/0xe
[ 113.375097] [<c029ffbf>] dev_close+0x4c/0x8c
[ 113.375169] [<c029fa70>] dev_change_flags+0xa5/0x158
[ 113.375239] [<c02da633>] devinet_ioctl+0x21a/0x503
[ 113.375318] [<c02db693>] inet_ioctl+0x8d/0xa6
[ 113.375411] [<c0292b21>] sock_ioctl+0x1c8/0x1ec
[ 113.375491] [<c0292959>] ? sock_ioctl+0x0/0x1ec
[ 113.375574] [<c019af2b>] vfs_ioctl+0x22/0x69
[ 113.375653] [<c019b435>] do_vfs_ioctl+0x41f/0x459
[ 113.375734] [<c02934eb>] ? sys_send+0x18/0x1a
[ 113.375813] [<c011942f>] ? do_page_fault+0x242/0x26f
[ 113.375884] [<c019b49b>] sys_ioctl+0x2c/0x45
[ 113.375960] [<c0102975>] syscall_call+0x7/0xb
[ 113.376041] Rebooting in 5 seconds..
^ permalink raw reply
* possible circular locking dependency in ISDN PPP
From: Tilman Schmidt @ 2009-10-18 22:16 UTC (permalink / raw)
To: LKML, isdn4linux, Netdev
[-- Attachment #1: Type: text/plain, Size: 4958 bytes --]
A test of PPP over ISDN with ipppd, capidrv and the so far unmerged
CAPI port of the Gigaset driver produced the following lockdep
message:
=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.32-rc4-testing #7
-------------------------------------------------------
ipppd/28379 is trying to acquire lock:
(&netdev->queue_lock){......}, at: [<e62ad0fd>] isdn_net_device_busy+0x2c/0x74 [isdn]
but task is already holding lock:
(&netdev->local->xmit_lock){+.....}, at: [<e62aefc2>] isdn_net_write_super+0x3f/0x6e [isdn]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&netdev->local->xmit_lock){+.....}:
[<c0157e9c>] __lock_acquire+0xa12/0xb99
[<c01580ac>] lock_acquire+0x89/0xa0
[<c0373249>] _spin_lock+0x1b/0x2a
[<e62b9d1c>] isdn_ppp_xmit+0xf0/0x5b0 [isdn]
[<e62b03f0>] isdn_net_start_xmit+0x4c6/0x66b [isdn]
[<c0307e75>] dev_hard_start_xmit+0x251/0x2e4
[<c0317bcc>] sch_direct_xmit+0x4f/0x122
[<c030829c>] dev_queue_xmit+0x2ae/0x412
[<c030d748>] neigh_resolve_output+0x1f2/0x23c
[<c0329b9c>] ip_finish_output2+0x1b1/0x1db
[<c0329c25>] ip_finish_output+0x5f/0x62
[<c0329cb5>] ip_output+0x8d/0x92
[<c03290c0>] ip_local_out+0x18/0x1b
[<c032932c>] ip_push_pending_frames+0x269/0x2c1
[<c033fd78>] raw_sendmsg+0x618/0x6b0
[<c0347ac9>] inet_sendmsg+0x3b/0x48
[<c02fa5b5>] __sock_sendmsg+0x45/0x4e
[<c02fad4b>] sock_sendmsg+0xb8/0xce
[<c02faea0>] sys_sendmsg+0x13f/0x192
[<c02fbeb2>] sys_socketcall+0x157/0x18e
[<c0102974>] sysenter_do_call+0x12/0x32
-> #0 (&netdev->queue_lock){......}:
[<c0157da9>] __lock_acquire+0x91f/0xb99
[<c01580ac>] lock_acquire+0x89/0xa0
[<c03732db>] _spin_lock_irqsave+0x24/0x34
[<e62ad0fd>] isdn_net_device_busy+0x2c/0x74 [isdn]
[<e62aeee3>] isdn_net_writebuf_skb+0x6e/0xc2 [isdn]
[<e62aefd4>] isdn_net_write_super+0x51/0x6e [isdn]
[<e62bc26f>] isdn_ppp_write+0x3a8/0x3bc [isdn]
[<e62b785a>] isdn_write+0x1d9/0x1f9 [isdn]
[<c01c42c5>] vfs_write+0x84/0xdf
[<c01c43b9>] sys_write+0x3b/0x60
[<c0102974>] sysenter_do_call+0x12/0x32
other info that might help us debug this:
1 lock held by ipppd/28379:
#0: (&netdev->local->xmit_lock){+.....}, at: [<e62aefc2>] isdn_net_write_super+0x3f/0x6e [isdn]
stack backtrace:
Pid: 28379, comm: ipppd Not tainted 2.6.32-rc4-testing #7
Call Trace:
[<c03710dc>] ? printk+0xf/0x13
[<c015714d>] print_circular_bug+0x90/0x9c
[<c0157da9>] __lock_acquire+0x91f/0xb99
[<c01580ac>] lock_acquire+0x89/0xa0
[<e62ad0fd>] ? isdn_net_device_busy+0x2c/0x74 [isdn]
[<c03732db>] _spin_lock_irqsave+0x24/0x34
[<e62ad0fd>] ? isdn_net_device_busy+0x2c/0x74 [isdn]
[<e62ad0fd>] isdn_net_device_busy+0x2c/0x74 [isdn]
[<e62aeee3>] isdn_net_writebuf_skb+0x6e/0xc2 [isdn]
[<e62aefd4>] isdn_net_write_super+0x51/0x6e [isdn]
[<e62bc26f>] isdn_ppp_write+0x3a8/0x3bc [isdn]
[<e62b785a>] isdn_write+0x1d9/0x1f9 [isdn]
[<c01c3b6c>] ? rw_verify_area+0x8a/0xad
[<e62b7681>] ? isdn_write+0x0/0x1f9 [isdn]
[<c01c42c5>] vfs_write+0x84/0xdf
[<c01c43b9>] sys_write+0x3b/0x60
[<c0102974>] sysenter_do_call+0x12/0x32
The message appeared shortly after initiating the connection,
during the PPP negotiation, just when the IP address was assigned.
The system continued to run normally, and the connection was
successfully established. Full log showing the entire connection
(with capidrv and Gigaset driver debugging output enabled, 70 kB),
available at http://www.phoenixsoftware.de/~ts/ppp-lockprob-full.log
in case someone's interested. It shows the messages from ipppd
about the IP address assignment arriving in the middle of the
lockdep message.
I cannot say whether this is a regression. My previous tests of
that scenario were done on a machine with an Nvidia graphics card
where the lockdep machinery would refuse to run because of the
kernel being tainted by the Nvidia driver, so I wouldn't have seen
anything one way or another.
Btw, one of those "NOHZ: local_softirq_pending 08" messages is also
present in the log, but that's 28 seconds later so I'd be surprised
if the two were related.
Any hints about the possible cause and seriousness of that
message would be welcome. I'm particularly interested, of course,
in finding out whether the Gigaset driver might somehow be causing
it, even though it doesn't appear anywhere in the backtraces.
aTdHvAaNnKcSe
Tilman
--
Tilman Schmidt E-Mail: tilman@imap.cc
Bonn, Germany
Diese Nachricht besteht zu 100% aus wiederverwerteten Bits.
Ungeöffnet mindestens haltbar bis: (siehe Rückseite)
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 254 bytes --]
^ permalink raw reply
* Re: [PATCH 0/2] Reduce number of GFP_ATOMIC allocation failures
From: Karol Lewandowski @ 2009-10-18 22:18 UTC (permalink / raw)
To: Karol Lewandowski
Cc: Mel Gorman, Andrew Morton, stable, Rafael J. Wysocki,
David Miller, Frans Pop, reinette chatre, Kalle Valo,
John W. Linville, Pekka Enberg, Bartlomiej Zolnierkiewicz, netdev,
linux-kernel, linux-mm@kvack.org
In-Reply-To: <20091017183421.GA3370@bizet.domek.prywatny>
On Sat, Oct 17, 2009 at 08:34:21PM +0200, Karol Lewandowski wrote:
> I'll go now for another round of bisecting... and hopefully this time
> I'll be able to trigger this problem on different/faster computer with
> e100-based card.
No luck with that either.
I've tried merging 'akpm' (517d08699b25) into clean 2.6.30 tree and
got suspend-breakage which makes it untestable for me. (I've tried
reverting drm, suspend, and other commits... all that failed.)
Is there mm-related git tree hidden somewhere? ... or broken out
mm-related patches that were sent to Andrew ... or maybe it's possible
to get "git log -p" from Mel's private repo? Anything?
Thanks.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* [PATCH] fix section mismatch in fec.c
From: Steven King @ 2009-10-18 22:25 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev
fec_enet_init is called by both fec_probe and fec_resume, so it shouldn't
be marked as __init.
Signed-off-by: Steven King <sfking@fdwdc.com>
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2923438..e8218a3 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -1654,7 +1654,8 @@ static const struct net_device_ops fec_netdev_ops = {
*
* index is only used in legacy code
*/
-int __init fec_enet_init(struct net_device *dev, int index)
+static int
+fec_enet_init(struct net_device *dev, int index)
{
struct fec_enet_private *fep = netdev_priv(dev);
struct bufdesc *cbd_base;
--
Steven King -- sfking at fdwdc dot com
^ permalink raw reply related
* Re: [PATCH 0/2] Reduce number of GFP_ATOMIC allocation failures
From: Frans Pop @ 2009-10-18 22:31 UTC (permalink / raw)
To: Karol Lewandowski
Cc: Mel Gorman, Andrew Morton, stable, Rafael J. Wysocki,
David Miller, reinette chatre, Kalle Valo, John W. Linville,
Pekka Enberg, Bartlomiej Zolnierkiewicz, netdev, linux-kernel,
linux-mm@kvack.org
In-Reply-To: <20091018221844.GA2061@bizet.domek.prywatny>
Hi Karol,
On Monday 19 October 2009, Karol Lewandowski wrote:
> On Sat, Oct 17, 2009 at 08:34:21PM +0200, Karol Lewandowski wrote:
> > I'll go now for another round of bisecting... and hopefully this time
> > I'll be able to trigger this problem on different/faster computer with
> > e100-based card.
>
> No luck with that either.
>
> I've tried merging 'akpm' (517d08699b25) into clean 2.6.30 tree and
> got suspend-breakage which makes it untestable for me. (I've tried
> reverting drm, suspend, and other commits... all that failed.)
>
> Is there mm-related git tree hidden somewhere? ... or broken out
> mm-related patches that were sent to Andrew ... or maybe it's possible
> to get "git log -p" from Mel's private repo? Anything?
Please try reverting 373c0a7e + 8aa7e847 [1] on top of 2.6.31. I've finally
been able to solidly trace the main regression to that. I'm doing some
final confirmation tests now and will mail detailed results afterwards.
It would be great if you could confirm if that fixes the issue for you too.
Cheers,
FJP
[1] The first commit is a build fix for the second.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: [PATCH] iputils: ping by mark
From: Maciej Żenczykowski @ 2009-10-18 22:57 UTC (permalink / raw)
To: hadi; +Cc: Rob.Townley, YOSHIFUJI Hideaki, netdev
In-Reply-To: <1255865866.4815.21.camel@dogo.mojatatu.com>
> It works fine with tcp and udp and to emphasize: i have never seen it
> broken.
Really? Ok, so we're doing something very differently...
My testing was done on a 2.6.26 kernel (but AFAICT from browsing the
code, the behaviour in question should not have changed till the last
few patches posted in the last 2-3 weeks).
^ permalink raw reply
* [PATCH 3/9] pcmcia: use pcmcia_loop_config in misc pcmcia drivers
From: Dominik Brodowski @ 2009-10-18 23:07 UTC (permalink / raw)
To: linux-pcmcia
Cc: Dominik Brodowski, David S. Miller, John W. Linville, Jiri Kosina,
David Sterba, netdev, linux-wireless
In-Reply-To: <1255907255-28297-2-git-send-email-linux@dominikbrodowski.net>
Use pcmcia_loop_config() in a few drivers missed during the first
round. On fmvj18x_cs.c it -- strangely -- only requries us to set
conf.ConfigIndex, which is done by the core, so include an empty
loop function which returns 0 unconditionally.
CC: David S. Miller <davem@davemloft.net>
CC: John W. Linville <linville@tuxdriver.com>
CC: Jiri Kosina <jkosina@suse.cz>
CC: David Sterba <dsterba@suse.cz>
CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
drivers/char/pcmcia/ipwireless/main.c | 103 +++++++--------------------------
drivers/char/pcmcia/synclink_cs.c | 64 ++++++++-------------
drivers/net/pcmcia/fmvj18x_cs.c | 22 +++++--
drivers/net/wireless/libertas/if_cs.c | 67 +++++++++-------------
4 files changed, 88 insertions(+), 168 deletions(-)
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 5216fce..263a18f 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -79,14 +79,32 @@ static void signalled_reboot_callback(void *callback_data)
schedule_work(&ipw->work_reboot);
}
+static int ipwireless_ioprobe(struct pcmcia_device *p_dev,
+ cistpl_cftable_entry_t *cfg,
+ cistpl_cftable_entry_t *dflt,
+ unsigned int vcc,
+ void *priv_data)
+{
+ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
+ p_dev->io.BasePort1 = cfg->io.win[0].base;
+ p_dev->io.NumPorts1 = cfg->io.win[0].len;
+ p_dev->io.IOAddrLines = 16;
+
+ p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1;
+
+ /* 0x40 causes it to generate level mode interrupts. */
+ /* 0x04 enables IREQ pin. */
+ p_dev->conf.ConfigIndex = cfg->index | 0x44;
+ return pcmcia_request_io(p_dev, &p_dev->io);
+}
+
static int config_ipwireless(struct ipw_dev *ipw)
{
struct pcmcia_device *link = ipw->link;
- int ret;
+ int ret = 0;
tuple_t tuple;
unsigned short buf[64];
cisparse_t parse;
- unsigned short cor_value;
memreq_t memreq_attr_memory;
memreq_t memreq_common_memory;
@@ -97,103 +115,26 @@ static int config_ipwireless(struct ipw_dev *ipw)
tuple.TupleDataMax = sizeof(buf);
tuple.TupleOffset = 0;
- tuple.DesiredTuple = RETURN_FIRST_TUPLE;
-
- ret = pcmcia_get_first_tuple(link, &tuple);
-
- while (ret == 0) {
- ret = pcmcia_get_tuple_data(link, &tuple);
-
- if (ret != 0) {
- cs_error(link, GetTupleData, ret);
- goto exit0;
- }
- ret = pcmcia_get_next_tuple(link, &tuple);
- }
-
- tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
-
- ret = pcmcia_get_first_tuple(link, &tuple);
-
- if (ret != 0) {
- cs_error(link, GetFirstTuple, ret);
- goto exit0;
- }
-
- ret = pcmcia_get_tuple_data(link, &tuple);
-
- if (ret != 0) {
- cs_error(link, GetTupleData, ret);
- goto exit0;
- }
-
- ret = pcmcia_parse_tuple(&tuple, &parse);
-
- if (ret != 0) {
- cs_error(link, ParseTuple, ret);
- goto exit0;
- }
-
- link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
- link->io.BasePort1 = parse.cftable_entry.io.win[0].base;
- link->io.NumPorts1 = parse.cftable_entry.io.win[0].len;
- link->io.IOAddrLines = 16;
-
- link->irq.IRQInfo1 = parse.cftable_entry.irq.IRQInfo1;
-
- /* 0x40 causes it to generate level mode interrupts. */
- /* 0x04 enables IREQ pin. */
- cor_value = parse.cftable_entry.index | 0x44;
- link->conf.ConfigIndex = cor_value;
-
- /* IRQ and I/O settings */
- tuple.DesiredTuple = CISTPL_CONFIG;
-
- ret = pcmcia_get_first_tuple(link, &tuple);
-
+ ret = pcmcia_loop_config(link, ipwireless_ioprobe, NULL);
if (ret != 0) {
- cs_error(link, GetFirstTuple, ret);
- goto exit0;
- }
-
- ret = pcmcia_get_tuple_data(link, &tuple);
-
- if (ret != 0) {
- cs_error(link, GetTupleData, ret);
+ cs_error(link, RequestIO, ret);
goto exit0;
}
- ret = pcmcia_parse_tuple(&tuple, &parse);
-
- if (ret != 0) {
- cs_error(link, GetTupleData, ret);
- goto exit0;
- }
link->conf.Attributes = CONF_ENABLE_IRQ;
- link->conf.ConfigBase = parse.config.base;
- link->conf.Present = parse.config.rmask[0];
link->conf.IntType = INT_MEMORY_AND_IO;
link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT;
link->irq.Handler = ipwireless_interrupt;
link->irq.Instance = ipw->hardware;
- ret = pcmcia_request_io(link, &link->io);
-
- if (ret != 0) {
- cs_error(link, RequestIO, ret);
- goto exit0;
- }
-
request_region(link->io.BasePort1, link->io.NumPorts1,
IPWIRELESS_PCCARD_NAME);
/* memory settings */
-
tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
ret = pcmcia_get_first_tuple(link, &tuple);
-
if (ret != 0) {
cs_error(link, GetFirstTuple, ret);
goto exit1;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index caf6e4d..429b731 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -575,55 +575,39 @@ static int mgslpc_probe(struct pcmcia_device *link)
#define CS_CHECK(fn, ret) \
do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0)
+static int mgslpc_ioprobe(struct pcmcia_device *p_dev,
+ cistpl_cftable_entry_t *cfg,
+ cistpl_cftable_entry_t *dflt,
+ unsigned int vcc,
+ void *priv_data)
+{
+ if (cfg->io.nwin > 0) {
+ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
+ if (!(cfg->io.flags & CISTPL_IO_8BIT))
+ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
+ if (!(cfg->io.flags & CISTPL_IO_16BIT))
+ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
+ p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK;
+ p_dev->io.BasePort1 = cfg->io.win[0].base;
+ p_dev->io.NumPorts1 = cfg->io.win[0].len;
+ return pcmcia_request_io(p_dev, &p_dev->io);
+ }
+ return -ENODEV;
+}
+
static int mgslpc_config(struct pcmcia_device *link)
{
MGSLPC_INFO *info = link->priv;
- tuple_t tuple;
- cisparse_t parse;
- int last_fn, last_ret;
- u_char buf[64];
- cistpl_cftable_entry_t dflt = { 0 };
- cistpl_cftable_entry_t *cfg;
+ int last_fn = RequestIO;
+ int last_ret;
if (debug_level >= DEBUG_LEVEL_INFO)
printk("mgslpc_config(0x%p)\n", link);
- tuple.Attributes = 0;
- tuple.TupleData = buf;
- tuple.TupleDataMax = sizeof(buf);
- tuple.TupleOffset = 0;
-
- /* get CIS configuration entry */
-
- tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
- CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple));
-
- cfg = &(parse.cftable_entry);
- CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple));
- CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse));
-
- if (cfg->flags & CISTPL_CFTABLE_DEFAULT) dflt = *cfg;
- if (cfg->index == 0)
+ last_ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL);
+ if (last_ret != 0)
goto cs_failed;
- link->conf.ConfigIndex = cfg->index;
- link->conf.Attributes |= CONF_ENABLE_IRQ;
-
- /* IO window settings */
- link->io.NumPorts1 = 0;
- if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) {
- cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io;
- link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
- if (!(io->flags & CISTPL_IO_8BIT))
- link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
- if (!(io->flags & CISTPL_IO_16BIT))
- link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
- link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK;
- link->io.BasePort1 = io->win[0].base;
- link->io.NumPorts1 = io->win[0].len;
- CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io));
- }
-
link->conf.Attributes = CONF_ENABLE_IRQ;
link->conf.IntType = INT_MEMORY_AND_IO;
link->conf.ConfigIndex = 8;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 7e01fbd..c7a2bbf 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -341,14 +341,23 @@ static int ungermann_try_io_port(struct pcmcia_device *link)
return ret; /* RequestIO failed */
}
+static int fmvj18x_ioprobe(struct pcmcia_device *p_dev,
+ cistpl_cftable_entry_t *cfg,
+ cistpl_cftable_entry_t *dflt,
+ unsigned int vcc,
+ void *priv_data)
+{
+ return 0; /* strange, but that's what the code did already before... */
+}
+
+
static int fmvj18x_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
local_info_t *lp = netdev_priv(dev);
tuple_t tuple;
- cisparse_t parse;
u_short buf[32];
- int i, last_fn = 0, last_ret = 0, ret;
+ int i, last_fn = RequestIO, last_ret = 0, ret;
unsigned int ioaddr;
cardtype_t cardtype;
char *card_name = "unknown";
@@ -362,12 +371,11 @@ static int fmvj18x_config(struct pcmcia_device *link)
tuple.DesiredTuple = CISTPL_FUNCE;
tuple.TupleOffset = 0;
if (pcmcia_get_first_tuple(link, &tuple) == 0) {
+ last_ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL);
+ if (last_ret != 0)
+ goto cs_failed;
+
/* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */
- tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
- CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple));
- CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple));
- CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse));
- link->conf.ConfigIndex = parse.cftable_entry.index;
switch (link->manf_id) {
case MANFID_TDK:
cardtype = TDK;
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 6238176..cb40c38 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -793,18 +793,37 @@ static void if_cs_release(struct pcmcia_device *p_dev)
* configure the card at this point -- we wait until we receive a card
* insertion event.
*/
+
+static int if_cs_ioprobe(struct pcmcia_device *p_dev,
+ cistpl_cftable_entry_t *cfg,
+ cistpl_cftable_entry_t *dflt,
+ unsigned int vcc,
+ void *priv_data)
+{
+ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
+ p_dev->io.BasePort1 = cfg->io.win[0].base;
+ p_dev->io.NumPorts1 = cfg->io.win[0].len;
+
+ /* Do we need to allocate an interrupt? */
+ if (cfg->irq.IRQInfo1)
+ p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+
+ /* IO window settings */
+ if (cfg->io.nwin != 1) {
+ lbs_pr_err("wrong CIS (check number of IO windows)\n");
+ return -ENODEV;
+ }
+
+ /* This reserves IO space but doesn't actually enable it */
+ return pcmcia_request_io(p_dev, &p_dev->io);
+}
+
static int if_cs_probe(struct pcmcia_device *p_dev)
{
int ret = -ENOMEM;
unsigned int prod_id;
struct lbs_private *priv;
struct if_cs_card *card;
- /* CIS parsing */
- tuple_t tuple;
- cisparse_t parse;
- cistpl_cftable_entry_t *cfg = &parse.cftable_entry;
- cistpl_io_t *io = &cfg->io;
- u_char buf[64];
lbs_deb_enter(LBS_DEB_CS);
@@ -823,43 +842,11 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
p_dev->conf.Attributes = 0;
p_dev->conf.IntType = INT_MEMORY_AND_IO;
- tuple.Attributes = 0;
- tuple.TupleData = buf;
- tuple.TupleDataMax = sizeof(buf);
- tuple.TupleOffset = 0;
-
- tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
- if ((ret = pcmcia_get_first_tuple(p_dev, &tuple)) != 0 ||
- (ret = pcmcia_get_tuple_data(p_dev, &tuple)) != 0 ||
- (ret = pcmcia_parse_tuple(&tuple, &parse)) != 0)
- {
- lbs_pr_err("error in pcmcia_get_first_tuple etc\n");
- goto out1;
- }
-
- p_dev->conf.ConfigIndex = cfg->index;
-
- /* Do we need to allocate an interrupt? */
- if (cfg->irq.IRQInfo1) {
- p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
- }
-
- /* IO window settings */
- if (cfg->io.nwin != 1) {
- lbs_pr_err("wrong CIS (check number of IO windows)\n");
- ret = -ENODEV;
+ if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) {
+ lbs_pr_err("error in pcmcia_loop_config\n");
goto out1;
}
- p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
- p_dev->io.BasePort1 = io->win[0].base;
- p_dev->io.NumPorts1 = io->win[0].len;
- /* This reserves IO space but doesn't actually enable it */
- ret = pcmcia_request_io(p_dev, &p_dev->io);
- if (ret) {
- lbs_pr_err("error in pcmcia_request_io\n");
- goto out1;
- }
/*
* Allocate an interrupt line. Note that this does not assign
--
1.6.0.4
^ permalink raw reply related
* [PATCH 2/9] pcmcia: use pre-determined values
From: Dominik Brodowski @ 2009-10-18 23:07 UTC (permalink / raw)
To: linux-pcmcia
Cc: Dominik Brodowski, David S. Miller, John W. Linville, netdev,
linux-wireless
In-Reply-To: <1255907255-28297-1-git-send-email-linux@dominikbrodowski.net>
A few PCMCIA network drivers can make use of values provided by the pcmcia
core, instead of tedious, independent CIS parsing.
xirc32ps_cs.c: manf_id
hostap_cs.c: multifunction count
b43/pcmcia.c: ConfigBase address and "Present"
smc91c92_cs.c: By default, mhz_setup() can use VERS_1 as it is stored
in struct pcmcia_device. Only some cards require workarounds, such as
reading out VERS_1 twice.
CC: David S. Miller <davem@davemloft.net>
CC: John W. Linville <linville@tuxdriver.com>
CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
drivers/net/pcmcia/smc91c92_cs.c | 11 +++++++++--
drivers/net/pcmcia/xirc2ps_cs.c | 5 ++---
drivers/net/wireless/b43/pcmcia.c | 20 --------------------
drivers/net/wireless/hostap/hostap_cs.c | 21 +--------------------
4 files changed, 12 insertions(+), 45 deletions(-)
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 7bde2cd..af03759 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -545,6 +545,14 @@ static int mhz_setup(struct pcmcia_device *link)
u_char *buf, *station_addr;
int rc;
+ /* Read the station address from the CIS. It is stored as the last
+ (fourth) string in the Version 1 Version/ID tuple. */
+ if ((link->prod_id[3]) &&
+ (cvt_ascii_address(dev, link->prod_id[3]) == 0))
+ return 0;
+
+ /* Workarounds for broken cards start here. */
+
cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL);
if (!cfg_mem)
return -1;
@@ -557,8 +565,7 @@ static int mhz_setup(struct pcmcia_device *link)
tuple->TupleData = (cisdata_t *)buf;
tuple->TupleDataMax = 255;
- /* Read the station address from the CIS. It is stored as the last
- (fourth) string in the Version 1 Version/ID tuple. */
+ /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */
tuple->DesiredTuple = CISTPL_VERS_1;
if (first_tuple(link, tuple, parse) != 0) {
rc = -1;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index cf84231..3dd6ba6 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -792,13 +792,12 @@ xirc2ps_config(struct pcmcia_device * link)
tuple.TupleOffset = 0;
/* Is this a valid card */
- tuple.DesiredTuple = CISTPL_MANFID;
- if ((err=first_tuple(link, &tuple, &parse))) {
+ if (link->has_manf_id == 0) {
printk(KNOT_XIRC "manfid not found in CIS\n");
goto failure;
}
- switch(parse.manfid.manf) {
+ switch (link->manf_id) {
case MANFID_XIRCOM:
local->manf_str = "Xircom";
break;
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 6c3a749..cd14b7e 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -65,35 +65,15 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
struct ssb_bus *ssb;
win_req_t win;
memreq_t mem;
- tuple_t tuple;
- cisparse_t parse;
int err = -ENOMEM;
int res = 0;
- unsigned char buf[64];
ssb = kzalloc(sizeof(*ssb), GFP_KERNEL);
if (!ssb)
goto out_error;
err = -ENODEV;
- tuple.DesiredTuple = CISTPL_CONFIG;
- tuple.Attributes = 0;
- tuple.TupleData = buf;
- tuple.TupleDataMax = sizeof(buf);
- tuple.TupleOffset = 0;
- res = pcmcia_get_first_tuple(dev, &tuple);
- if (res != 0)
- goto err_kfree_ssb;
- res = pcmcia_get_tuple_data(dev, &tuple);
- if (res != 0)
- goto err_kfree_ssb;
- res = pcmcia_parse_tuple(&tuple, &parse);
- if (res != 0)
- goto err_kfree_ssb;
-
- dev->conf.ConfigBase = parse.config.base;
- dev->conf.Present = parse.config.rmask[0];
dev->conf.Attributes = CONF_ENABLE_IRQ;
dev->conf.IntType = INT_MEMORY_AND_IO;
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index ad8eab4..31b60dd 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -274,9 +274,6 @@ static int sandisk_enable_wireless(struct net_device *dev)
conf_reg_t reg;
struct hostap_interface *iface = netdev_priv(dev);
local_info_t *local = iface->local;
- tuple_t tuple;
- cisparse_t *parse = NULL;
- u_char buf[64];
struct hostap_cs_priv *hw_priv = local->hw_priv;
if (hw_priv->link->io.NumPorts1 < 0x42) {
@@ -285,28 +282,13 @@ static int sandisk_enable_wireless(struct net_device *dev)
goto done;
}
- parse = kmalloc(sizeof(cisparse_t), GFP_KERNEL);
- if (parse == NULL) {
- ret = -ENOMEM;
- goto done;
- }
-
- tuple.Attributes = TUPLE_RETURN_COMMON;
- tuple.TupleData = buf;
- tuple.TupleDataMax = sizeof(buf);
- tuple.TupleOffset = 0;
-
if (hw_priv->link->manf_id != 0xd601 || hw_priv->link->card_id != 0x0101) {
/* No SanDisk manfid found */
ret = -ENODEV;
goto done;
}
- tuple.DesiredTuple = CISTPL_LONGLINK_MFC;
- if (pcmcia_get_first_tuple(hw_priv->link, &tuple) ||
- pcmcia_get_tuple_data(hw_priv->link, &tuple) ||
- pcmcia_parse_tuple(&tuple, parse) ||
- parse->longlink_mfc.nfn < 2) {
+ if (hw_priv->link->socket->functions < 2) {
/* No multi-function links found */
ret = -ENODEV;
goto done;
@@ -354,7 +336,6 @@ static int sandisk_enable_wireless(struct net_device *dev)
udelay(10);
done:
- kfree(parse);
return ret;
}
--
1.6.0.4
^ permalink raw reply related
* [PATCH 6/9] pcmcia: convert net pcmcia drivers to use new CIS helpers
From: Dominik Brodowski @ 2009-10-18 23:07 UTC (permalink / raw)
To: linux-pcmcia; +Cc: Dominik Brodowski, David S. Miller, netdev
In-Reply-To: <1255907255-28297-5-git-send-email-linux@dominikbrodowski.net>
Use the new CIS helpers in net pcmcia drivers, which allows for
a few code cleanups.
CC: David S. Miller <davem@davemloft.net>
CC: netdev@vger.kernel.org
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
drivers/net/pcmcia/3c574_cs.c | 18 ++--
drivers/net/pcmcia/3c589_cs.c | 24 ++---
drivers/net/pcmcia/fmvj18x_cs.c | 51 ++++-----
drivers/net/pcmcia/nmclan_cs.c | 19 ++--
drivers/net/pcmcia/smc91c92_cs.c | 238 +++++++++++---------------------------
drivers/net/pcmcia/xirc2ps_cs.c | 101 ++++++----------
6 files changed, 154 insertions(+), 297 deletions(-)
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index b58965a..6449290 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -344,13 +344,13 @@ static int tc574_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
struct el3_private *lp = netdev_priv(dev);
- tuple_t tuple;
- __le16 buf[32];
int last_fn, last_ret, i, j;
unsigned int ioaddr;
__be16 *phys_addr;
char *cardname;
__u32 config;
+ u8 *buf;
+ size_t len;
phys_addr = (__be16 *)dev->dev_addr;
@@ -378,16 +378,14 @@ static int tc574_config(struct pcmcia_device *link)
/* The 3c574 normally uses an EEPROM for configuration info, including
the hardware address. The future products may include a modem chip
and put the address in the CIS. */
- tuple.Attributes = 0;
- tuple.TupleData = (cisdata_t *)buf;
- tuple.TupleDataMax = 64;
- tuple.TupleOffset = 0;
- tuple.DesiredTuple = 0x88;
- if (pcmcia_get_first_tuple(link, &tuple) == 0) {
- pcmcia_get_tuple_data(link, &tuple);
+
+ len = pcmcia_get_tuple(link, 0x88, &buf);
+ if (buf && len >= 6) {
for (i = 0; i < 3; i++)
- phys_addr[i] = htons(le16_to_cpu(buf[i]));
+ phys_addr[i] = htons(le16_to_cpu(buf[i * 2]));
+ kfree(buf);
} else {
+ kfree(buf); /* 0 < len < 6 */
EL3WINDOW(0);
for (i = 0; i < 3; i++)
phys_addr[i] = htons(read_eeprom(ioaddr, i + 10));
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 569fb06..ea04356 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -256,22 +256,15 @@ static int tc589_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
struct el3_private *lp = netdev_priv(dev);
- tuple_t tuple;
- __le16 buf[32];
- __be16 *phys_addr;
+ __be16 *phys_addr = NULL;
int last_fn, last_ret, i, j, multi = 0, fifo;
unsigned int ioaddr;
char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
+ u8 *buf;
+ size_t len;
DEBUG(0, "3c589_config(0x%p)\n", link);
- phys_addr = (__be16 *)dev->dev_addr;
- tuple.Attributes = 0;
- tuple.TupleData = (cisdata_t *)buf;
- tuple.TupleDataMax = sizeof(buf);
- tuple.TupleOffset = 0;
- tuple.Attributes = TUPLE_RETURN_COMMON;
-
/* Is this a 3c562? */
if (link->manf_id != MANFID_3COM)
printk(KERN_INFO "3c589_cs: hmmm, is this really a "
@@ -301,12 +294,13 @@ static int tc589_config(struct pcmcia_device *link)
/* The 3c589 has an extra EEPROM for configuration info, including
the hardware address. The 3c562 puts the address in the CIS. */
- tuple.DesiredTuple = 0x88;
- if (pcmcia_get_first_tuple(link, &tuple) == 0) {
- pcmcia_get_tuple_data(link, &tuple);
- for (i = 0; i < 3; i++)
- phys_addr[i] = htons(le16_to_cpu(buf[i]));
+ len = pcmcia_get_tuple(link, 0x88, &buf);
+ if (buf && len >= 6) {
+ for (i = 0; i < 3; i++)
+ phys_addr[i] = htons(le16_to_cpu(buf[i*2]));
+ kfree(buf);
} else {
+ kfree(buf); /* 0 < len < 6 */
for (i = 0; i < 3; i++)
phys_addr[i] = htons(read_eeprom(ioaddr, i));
if (phys_addr[0] == htons(0x6060)) {
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index c7a2bbf..58954a6 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -350,32 +350,30 @@ static int fmvj18x_ioprobe(struct pcmcia_device *p_dev,
return 0; /* strange, but that's what the code did already before... */
}
-
static int fmvj18x_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
local_info_t *lp = netdev_priv(dev);
- tuple_t tuple;
- u_short buf[32];
int i, last_fn = RequestIO, last_ret = 0, ret;
unsigned int ioaddr;
cardtype_t cardtype;
char *card_name = "unknown";
- u_char *node_id;
+ u8 *buf;
+ size_t len;
+ u_char buggybuf[32];
DEBUG(0, "fmvj18x_config(0x%p)\n", link);
- tuple.TupleData = (u_char *)buf;
- tuple.TupleDataMax = 64;
- tuple.TupleOffset = 0;
- tuple.DesiredTuple = CISTPL_FUNCE;
- tuple.TupleOffset = 0;
- if (pcmcia_get_first_tuple(link, &tuple) == 0) {
+ len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf);
+ if (buf)
+ kfree(buf);
+
+ if (len) {
+ /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */
last_ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL);
if (last_ret != 0)
goto cs_failed;
- /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */
switch (link->manf_id) {
case MANFID_TDK:
cardtype = TDK;
@@ -482,21 +480,21 @@ static int fmvj18x_config(struct pcmcia_device *link)
case CONTEC:
case NEC:
case KME:
- tuple.DesiredTuple = CISTPL_FUNCE;
- tuple.TupleOffset = 0;
- CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple));
- tuple.TupleOffset = 0;
- CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple));
if (cardtype == MBH10304) {
- /* MBH10304's CIS_FUNCE is corrupted */
- node_id = &(tuple.TupleData[5]);
card_name = "FMV-J182";
- } else {
- while (tuple.TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID ) {
- CS_CHECK(GetNextTuple, pcmcia_get_next_tuple(link, &tuple));
- CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple));
+
+ len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf);
+ if (len < 11) {
+ kfree(buf);
+ goto failed;
}
- node_id = &(tuple.TupleData[2]);
+ /* Read MACID from CIS */
+ for (i = 5; i < 11; i++)
+ dev->dev_addr[i] = buf[i];
+ kfree(buf);
+ } else {
+ if (pcmcia_get_mac_from_cis(link, dev))
+ goto failed;
if( cardtype == TDK ) {
card_name = "TDK LAK-CD021";
} else if( cardtype == LA501 ) {
@@ -509,9 +507,6 @@ static int fmvj18x_config(struct pcmcia_device *link)
card_name = "C-NET(PC)C";
}
}
- /* Read MACID from CIS */
- for (i = 0; i < 6; i++)
- dev->dev_addr[i] = node_id[i];
break;
case UNGERMANN:
/* Read MACID from register */
@@ -521,12 +516,12 @@ static int fmvj18x_config(struct pcmcia_device *link)
break;
case XXX10304:
/* Read MACID from Buggy CIS */
- if (fmvj18x_get_hwinfo(link, tuple.TupleData) == -1) {
+ if (fmvj18x_get_hwinfo(link, buggybuf) == -1) {
printk(KERN_NOTICE "fmvj18x_cs: unable to read hardware net address.\n");
goto failed;
}
for (i = 0 ; i < 6; i++) {
- dev->dev_addr[i] = tuple.TupleData[i];
+ dev->dev_addr[i] = buggybuf[i];
}
card_name = "FMV-J182";
break;
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 5ed6339..4b96b35 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -661,8 +661,8 @@ static int nmclan_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
mace_private *lp = netdev_priv(dev);
- tuple_t tuple;
- u_char buf[64];
+ u8 *buf;
+ size_t len;
int i, last_ret, last_fn;
unsigned int ioaddr;
@@ -677,14 +677,13 @@ static int nmclan_config(struct pcmcia_device *link)
ioaddr = dev->base_addr;
/* Read the ethernet address from the CIS. */
- tuple.DesiredTuple = 0x80 /* CISTPL_CFTABLE_ENTRY_MISC */;
- tuple.TupleData = buf;
- tuple.TupleDataMax = 64;
- tuple.TupleOffset = 0;
- tuple.Attributes = 0;
- CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple));
- CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple));
- memcpy(dev->dev_addr, tuple.TupleData, ETHER_ADDR_LEN);
+ len = pcmcia_get_tuple(link, 0x80, &buf);
+ if (!buf || len < ETHER_ADDR_LEN) {
+ kfree(buf);
+ goto failed;
+ }
+ memcpy(dev->dev_addr, buf, ETHER_ADDR_LEN);
+ kfree(buf);
/* Verify configuration by reading the MACE ID. */
{
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index af03759..df92bcd 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -126,12 +126,6 @@ struct smc_private {
int rx_ovrn;
};
-struct smc_cfg_mem {
- tuple_t tuple;
- cisparse_t parse;
- u_char buf[255];
-};
-
/* Special definitions for Megahertz multifunction cards */
#define MEGAHERTZ_ISR 0x0380
@@ -408,34 +402,7 @@ static int cvt_ascii_address(struct net_device *dev, char *s)
return 0;
}
-/*====================================================================*/
-
-static int first_tuple(struct pcmcia_device *handle, tuple_t *tuple,
- cisparse_t *parse)
-{
- int i;
-
- i = pcmcia_get_first_tuple(handle, tuple);
- if (i != 0)
- return i;
- i = pcmcia_get_tuple_data(handle, tuple);
- if (i != 0)
- return i;
- return pcmcia_parse_tuple(tuple, parse);
-}
-
-static int next_tuple(struct pcmcia_device *handle, tuple_t *tuple,
- cisparse_t *parse)
-{
- int i;
-
- if ((i = pcmcia_get_next_tuple(handle, tuple)) != 0 ||
- (i = pcmcia_get_tuple_data(handle, tuple)) != 0)
- return i;
- return pcmcia_parse_tuple(tuple, parse);
-}
-
-/*======================================================================
+/*====================================================================
Configuration stuff for Megahertz cards
@@ -490,15 +457,10 @@ static int mhz_mfc_config(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
struct smc_private *smc = netdev_priv(dev);
- struct smc_cfg_mem *cfg_mem;
win_req_t req;
memreq_t mem;
int i;
- cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL);
- if (!cfg_mem)
- return -ENOMEM;
-
link->conf.Attributes |= CONF_ENABLE_SPKR;
link->conf.Status = CCSR_AUDIO_ENA;
link->irq.Attributes =
@@ -510,7 +472,8 @@ static int mhz_mfc_config(struct pcmcia_device *link)
/* The Megahertz combo cards have modem-like CIS entries, so
we have to explicitly try a bunch of port combinations. */
if (pcmcia_loop_config(link, mhz_mfc_config_check, NULL))
- goto free_cfg_mem;
+ return -ENODEV;
+
dev->base_addr = link->io.BasePort1;
/* Allocate a memory window, for accessing the ISR */
@@ -519,7 +482,8 @@ static int mhz_mfc_config(struct pcmcia_device *link)
req.AccessSpeed = 0;
i = pcmcia_request_window(&link, &req, &link->win);
if (i != 0)
- goto free_cfg_mem;
+ return -ENODEV;
+
smc->base = ioremap(req.Base, req.Size);
mem.CardOffset = mem.Page = 0;
if (smc->manfid == MANFID_MOTOROLA)
@@ -531,18 +495,32 @@ static int mhz_mfc_config(struct pcmcia_device *link)
&& (smc->cardid == PRODID_MEGAHERTZ_EM3288))
mhz_3288_power(link);
-free_cfg_mem:
- kfree(cfg_mem);
- return -ENODEV;
+ return 0;
}
+static int pcmcia_get_versmac(struct pcmcia_device *p_dev,
+ tuple_t *tuple,
+ void *priv)
+{
+ struct net_device *dev = priv;
+ cisparse_t parse;
+
+ if (pcmcia_parse_tuple(tuple, &parse))
+ return -EINVAL;
+
+ if ((parse.version_1.ns > 3) &&
+ (cvt_ascii_address(dev,
+ (parse.version_1.str + parse.version_1.ofs[3]))))
+ return 0;
+
+ return -EINVAL;
+};
+
static int mhz_setup(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
- struct smc_cfg_mem *cfg_mem;
- tuple_t *tuple;
- cisparse_t *parse;
- u_char *buf, *station_addr;
+ size_t len;
+ u8 *buf;
int rc;
/* Read the station address from the CIS. It is stored as the last
@@ -552,56 +530,22 @@ static int mhz_setup(struct pcmcia_device *link)
return 0;
/* Workarounds for broken cards start here. */
-
- cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL);
- if (!cfg_mem)
- return -1;
-
- tuple = &cfg_mem->tuple;
- parse = &cfg_mem->parse;
- buf = cfg_mem->buf;
-
- tuple->Attributes = tuple->TupleOffset = 0;
- tuple->TupleData = (cisdata_t *)buf;
- tuple->TupleDataMax = 255;
-
/* Ugh -- the EM1144 card has two VERS_1 tuples!?! */
- tuple->DesiredTuple = CISTPL_VERS_1;
- if (first_tuple(link, tuple, parse) != 0) {
- rc = -1;
- goto free_cfg_mem;
- }
- /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */
- if (next_tuple(link, tuple, parse) != 0)
- first_tuple(link, tuple, parse);
- if (parse->version_1.ns > 3) {
- station_addr = parse->version_1.str + parse->version_1.ofs[3];
- if (cvt_ascii_address(dev, station_addr) == 0) {
- rc = 0;
- goto free_cfg_mem;
- }
- }
+ if (!pcmcia_loop_tuple(link, CISTPL_VERS_1, pcmcia_get_versmac, dev))
+ return 0;
/* Another possibility: for the EM3288, in a special tuple */
- tuple->DesiredTuple = 0x81;
- if (pcmcia_get_first_tuple(link, tuple) != 0) {
- rc = -1;
- goto free_cfg_mem;
- }
- if (pcmcia_get_tuple_data(link, tuple) != 0) {
- rc = -1;
- goto free_cfg_mem;
- }
- buf[12] = '\0';
- if (cvt_ascii_address(dev, buf) == 0) {
- rc = 0;
- goto free_cfg_mem;
- }
rc = -1;
-free_cfg_mem:
- kfree(cfg_mem);
- return rc;
-}
+ len = pcmcia_get_tuple(link, 0x81, &buf);
+ if (buf && len >= 13) {
+ buf[12] = '\0';
+ if (cvt_ascii_address(dev, buf))
+ rc = 0;
+ }
+ kfree(buf);
+
+ return rc;
+};
/*======================================================================
@@ -691,58 +635,21 @@ static int smc_config(struct pcmcia_device *link)
return i;
}
+
static int smc_setup(struct pcmcia_device *link)
{
struct net_device *dev = link->priv;
- struct smc_cfg_mem *cfg_mem;
- tuple_t *tuple;
- cisparse_t *parse;
- cistpl_lan_node_id_t *node_id;
- u_char *buf, *station_addr;
- int i, rc;
-
- cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL);
- if (!cfg_mem)
- return -ENOMEM;
-
- tuple = &cfg_mem->tuple;
- parse = &cfg_mem->parse;
- buf = cfg_mem->buf;
-
- tuple->Attributes = tuple->TupleOffset = 0;
- tuple->TupleData = (cisdata_t *)buf;
- tuple->TupleDataMax = 255;
/* Check for a LAN function extension tuple */
- tuple->DesiredTuple = CISTPL_FUNCE;
- i = first_tuple(link, tuple, parse);
- while (i == 0) {
- if (parse->funce.type == CISTPL_FUNCE_LAN_NODE_ID)
- break;
- i = next_tuple(link, tuple, parse);
- }
- if (i == 0) {
- node_id = (cistpl_lan_node_id_t *)parse->funce.data;
- if (node_id->nb == 6) {
- for (i = 0; i < 6; i++)
- dev->dev_addr[i] = node_id->id[i];
- rc = 0;
- goto free_cfg_mem;
- }
- }
+ if (!pcmcia_get_mac_from_cis(link, dev))
+ return 0;
+
/* Try the third string in the Version 1 Version/ID tuple. */
if (link->prod_id[2]) {
- station_addr = link->prod_id[2];
- if (cvt_ascii_address(dev, station_addr) == 0) {
- rc = 0;
- goto free_cfg_mem;
- }
+ if (cvt_ascii_address(dev, link->prod_id[2]) == 0)
+ return 0;
}
-
- rc = -1;
-free_cfg_mem:
- kfree(cfg_mem);
- return rc;
+ return -1;
}
/*====================================================================*/
@@ -801,41 +708,31 @@ static int osi_load_firmware(struct pcmcia_device *link)
return err;
}
-static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid)
+static int pcmcia_osi_mac(struct pcmcia_device *p_dev,
+ tuple_t *tuple,
+ void *priv)
{
- struct net_device *dev = link->priv;
- struct smc_cfg_mem *cfg_mem;
- tuple_t *tuple;
- u_char *buf;
- int i, rc;
+ struct net_device *dev = priv;
+ int i;
- cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL);
- if (!cfg_mem)
- return -1;
+ if (tuple->TupleDataLen < 8)
+ return -EINVAL;
+ if (tuple->TupleData[0] != 0x04)
+ return -EINVAL;
+ for (i = 0; i < 6; i++)
+ dev->dev_addr[i] = tuple->TupleData[i+2];
+ return 0;
+};
- tuple = &cfg_mem->tuple;
- buf = cfg_mem->buf;
- tuple->Attributes = TUPLE_RETURN_COMMON;
- tuple->TupleData = (cisdata_t *)buf;
- tuple->TupleDataMax = 255;
- tuple->TupleOffset = 0;
+static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid)
+{
+ struct net_device *dev = link->priv;
+ int rc;
/* Read the station address from tuple 0x90, subtuple 0x04 */
- tuple->DesiredTuple = 0x90;
- i = pcmcia_get_first_tuple(link, tuple);
- while (i == 0) {
- i = pcmcia_get_tuple_data(link, tuple);
- if ((i != 0) || (buf[0] == 0x04))
- break;
- i = pcmcia_get_next_tuple(link, tuple);
- }
- if (i != 0) {
- rc = -1;
- goto free_cfg_mem;
- }
- for (i = 0; i < 6; i++)
- dev->dev_addr[i] = buf[i+2];
+ if (pcmcia_loop_tuple(link, 0x90, pcmcia_osi_mac, dev))
+ return -1;
if (((manfid == MANFID_OSITECH) &&
(cardid == PRODID_OSITECH_SEVEN)) ||
@@ -843,7 +740,7 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid)
(cardid == PRODID_PSION_NET100))) {
rc = osi_load_firmware(link);
if (rc)
- goto free_cfg_mem;
+ return rc;
} else if (manfid == MANFID_OSITECH) {
/* Make sure both functions are powered up */
set_bits(0x300, link->io.BasePort1 + OSITECH_AUI_PWR);
@@ -853,10 +750,7 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid)
inw(link->io.BasePort1 + OSITECH_AUI_PWR),
inw(link->io.BasePort1 + OSITECH_RESET_ISR));
}
- rc = 0;
-free_cfg_mem:
- kfree(cfg_mem);
- return rc;
+ return 0;
}
static int smc91c92_suspend(struct pcmcia_device *link)
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 3dd6ba6..8ed8449 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -371,28 +371,6 @@ static void do_powerdown(struct net_device *dev);
static int do_stop(struct net_device *dev);
/*=============== Helper functions =========================*/
-static int
-first_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse)
-{
- int err;
-
- if ((err = pcmcia_get_first_tuple(handle, tuple)) == 0 &&
- (err = pcmcia_get_tuple_data(handle, tuple)) == 0)
- err = pcmcia_parse_tuple(tuple, parse);
- return err;
-}
-
-static int
-next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse)
-{
- int err;
-
- if ((err = pcmcia_get_next_tuple(handle, tuple)) == 0 &&
- (err = pcmcia_get_tuple_data(handle, tuple)) == 0)
- err = pcmcia_parse_tuple(tuple, parse);
- return err;
-}
-
#define SelectPage(pgnr) outb((pgnr), ioaddr + XIRCREG_PR)
#define GetByte(reg) ((unsigned)inb(ioaddr + (reg)))
#define GetWord(reg) ((unsigned)inw(ioaddr + (reg)))
@@ -761,6 +739,26 @@ xirc2ps_config_check(struct pcmcia_device *p_dev,
}
+
+static int pcmcia_get_mac_ce(struct pcmcia_device *p_dev,
+ tuple_t *tuple,
+ void *priv)
+{
+ struct net_device *dev = priv;
+ int i;
+
+ if (tuple->TupleDataLen != 13)
+ return -EINVAL;
+ if ((tuple->TupleData[0] != 2) || (tuple->TupleData[1] != 1) ||
+ (tuple->TupleData[2] != 6))
+ return -EINVAL;
+ /* another try (James Lehmer's CE2 version 4.1)*/
+ for (i = 2; i < 6; i++)
+ dev->dev_addr[i] = tuple->TupleData[i+2];
+ return 0;
+};
+
+
/****************
* xirc2ps_config() is scheduled to run after a CARD_INSERTION event
* is received, to configure the PCMCIA socket, and to make the
@@ -774,9 +772,9 @@ xirc2ps_config(struct pcmcia_device * link)
unsigned int ioaddr;
tuple_t tuple;
cisparse_t parse;
- int err, i;
- u_char buf[64];
- cistpl_lan_node_id_t *node_id = (cistpl_lan_node_id_t*)parse.funce.data;
+ int err;
+ u8 *buf;
+ size_t len;
local->dingo_ccr = NULL;
@@ -827,49 +825,28 @@ xirc2ps_config(struct pcmcia_device * link)
}
/* get the ethernet address from the CIS */
- tuple.DesiredTuple = CISTPL_FUNCE;
- for (err = first_tuple(link, &tuple, &parse); !err;
- err = next_tuple(link, &tuple, &parse)) {
- /* Once I saw two CISTPL_FUNCE_LAN_NODE_ID entries:
- * the first one with a length of zero the second correct -
- * so I skip all entries with length 0 */
- if (parse.funce.type == CISTPL_FUNCE_LAN_NODE_ID
- && ((cistpl_lan_node_id_t *)parse.funce.data)->nb)
- break;
- }
- if (err) { /* not found: try to get the node-id from tuple 0x89 */
- tuple.DesiredTuple = 0x89; /* data layout looks like tuple 0x22 */
- if ((err = pcmcia_get_first_tuple(link, &tuple)) == 0 &&
- (err = pcmcia_get_tuple_data(link, &tuple)) == 0) {
- if (tuple.TupleDataLen == 8 && *buf == CISTPL_FUNCE_LAN_NODE_ID)
- memcpy(&parse, buf, 8);
- else
- err = -1;
- }
- }
- if (err) { /* another try (James Lehmer's CE2 version 4.1)*/
- tuple.DesiredTuple = CISTPL_FUNCE;
- for (err = first_tuple(link, &tuple, &parse); !err;
- err = next_tuple(link, &tuple, &parse)) {
- if (parse.funce.type == 0x02 && parse.funce.data[0] == 1
- && parse.funce.data[1] == 6 && tuple.TupleDataLen == 13) {
- buf[1] = 4;
- memcpy(&parse, buf+1, 8);
- break;
+ err = pcmcia_get_mac_from_cis(link, dev);
+
+ /* not found: try to get the node-id from tuple 0x89 */
+ if (err) {
+ len = pcmcia_get_tuple(link, 0x89, &buf);
+ /* data layout looks like tuple 0x22 */
+ if (buf && len == 8) {
+ if (*buf == CISTPL_FUNCE_LAN_NODE_ID)
+ memcpy(&parse, buf, 8);
+ else
+ err = -1;
}
- }
+ kfree(buf);
}
+
+ if (err)
+ err = pcmcia_loop_tuple(link, CISTPL_FUNCE, pcmcia_get_mac_ce, dev);
+
if (err) {
printk(KNOT_XIRC "node-id not found in CIS\n");
goto failure;
}
- node_id = (cistpl_lan_node_id_t *)parse.funce.data;
- if (node_id->nb != 6) {
- printk(KNOT_XIRC "malformed node-id in CIS\n");
- goto failure;
- }
- for (i=0; i < 6; i++)
- dev->dev_addr[i] = node_id->id[i];
link->io.IOAddrLines =10;
link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
--
1.6.0.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox