* strange tcp behavior
@ 2007-08-02 6:19 john
2007-08-02 9:55 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: john @ 2007-08-02 6:19 UTC (permalink / raw)
To: netdev
1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
Seq=0 Len=0
1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN, ACK]
Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
Seq=1 Ack=1 Win=1500 Len=0
1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO localhost
1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
Seq=1 Ack=17 Win=32792 Len=0
1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
Seq=17 Len=0
1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000 [RST,
ACK] Seq=0 Ack=0 Win=32792 Len=0
Can someone please comment as to why, tcp stack sends rst packet from the
wrong source port in this situation.
This is the same problem that was described in my first two posts, witch
unfortunately nobody seemed to notice.
Here is source code witch can reproduce the behavior described, the client
side code is a complete mess but with a little bit it works.
Server:
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <poll.h>
#include <fcntl.h>
void main(void) {
int ms;
int ss;
struct sockaddr_in sa;
char *str = "HELLO FRIEND";
struct pollfd fd;
int flags;
ms = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
flags = fcntl(ms, F_GETFL, 0);
fcntl(ms, F_SETFL, flags | O_NONBLOCK);
memset(&sa, 0, sizeof(sa));
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = htonl(INADDR_ANY);
sa.sin_port = htons(25);
bind(ms, (struct sockaddr *) &sa, sizeof(sa));
listen(ms, 0);
fd.fd = ms;
fd.events = POLLIN;
while(poll(&fd, 1, -1)) {
ss = accept(ms, NULL, NULL);
usleep(10000);
send(ss, str, strlen(str), MSG_NOSIGNAL);
close(ss);
memset(&fd, 0, sizeof(fd));
fd.fd = ms;
fd.events = POLLIN;
}
}
Client:
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <linux/if_ether.h>
//#include <arpa/inet.h>
//#include <linux/if_ether.h>
struct sockaddr_in localaddr;
struct sockaddr_in remoteaddr;
struct sockaddr rawaddr;
int sdl, sdr;
struct tcphdr header;
struct pheader_t {
uint32_t saddr;
uint32_t daddr;
uint8_t r;
uint8_t protocol;
uint16_t length;
};
struct pheader_t pheader;
unsigned short tbuf[2048];
unsigned char buf[2048];
char *msg = "EHLO localhost\r\n";
unsigned char *p;
char *src_addr = "127.0.0.1";
char *dst_addr = "127.0.0.1";
unsigned short sprt = 50000;
unsigned short dprt = 25;
struct timeval tv;
unsigned seq, ack_seq;
int data;
void mysend(void) {
int i, sum;
int len;
if(data) {
len = strlen(msg);
memcpy((char *) tbuf + sizeof(pheader) + sizeof(header),
msg, len);
} else
len = 0;
bzero(&pheader, sizeof(pheader));
pheader.saddr = (in_addr_t) inet_addr(src_addr);
pheader.daddr = (in_addr_t) inet_addr(dst_addr);
pheader.protocol = 6;
pheader.length = htons(sizeof(header) + len);
memcpy(tbuf, &pheader, sizeof(pheader));
memcpy((char *) tbuf + sizeof(pheader), &header, sizeof(header));
sum = 0;
for(i = 0; i < (sizeof(pheader) + sizeof(header)) / 2 + len / 2;
i++) {
sum += tbuf[i];
sum = (sum & 0x0000ffff) + (sum >> 16);
}
header.check = ~sum;
memcpy((char *) tbuf + sizeof(pheader), &header, sizeof(header));
sendto(sdr, (char *) tbuf + sizeof(pheader), sizeof(header) +
len, 0, (struct sockaddr *) &remoteaddr, sizeof(remoteaddr));
}
void main(void)
{
gettimeofday(&tv, NULL);
srand(tv.tv_sec & tv.tv_usec);
remoteaddr.sin_family = AF_INET;
remoteaddr.sin_addr.s_addr = (in_addr_t) inet_addr(dst_addr);
sdl = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL));
strcpy(rawaddr.sa_data, "lo");
bind(sdl, (struct sockaddr *) &rawaddr, sizeof(rawaddr));
sdr = socket(AF_INET, SOCK_RAW, IPPROTO_TCP);
bzero(&header, sizeof(header));
header.source = htons(sprt);
header.dest = htons(dprt);
seq = rand();
ack_seq = 0;
header.seq = htonl(seq);
header.ack_seq = htonl(ack_seq);
header.doff = sizeof(header) / 4;
header.syn = 1;
header.window = htons(1500);
mysend();
while(1) {
recvfrom(sdl, buf, sizeof(buf), 0, NULL, NULL);
// p = buf + (*buf & 0x0f) * 4;
p = (buf + 14) + (*(buf + 14) & 0x0f) * 4;
if(ntohs(((struct tcphdr *)p)->source) == dprt &&
ntohs(((struct tcphdr *)p)->dest) == sprt && ((struct
tcphdr *)p)->syn == 1 && ((struct tcphdr *)p)->ack == 1)
break;
}
bzero(&header, sizeof(header));
header.source = htons(sprt);
header.dest = htons(dprt);
seq = ntohl(((struct tcphdr *)p)->ack_seq);
ack_seq = ntohl(((struct tcphdr *)p)->seq) + 1;
header.seq = htonl(seq);
header.ack_seq = htonl(ack_seq);
header.doff = sizeof(header) / 4;
header.ack = 1;
header.window = htons(1500);
mysend();
bzero(&header, sizeof(header));
header.source = htons(sprt);
header.dest = htons(dprt);
header.seq = htonl(seq);
header.ack_seq = htonl(ack_seq);
header.doff = sizeof(header) / 4;
header.ack = 1;
header.psh = 1;
header.window = htons(1500);
data = 1;
mysend();
data = 0;
// usleep(300);
bzero(&header, sizeof(header));
header.source = htons(sprt);
header.dest = htons(dprt);
seq += strlen(msg);
header.seq = htonl(seq);
header.ack_seq = htonl(ack_seq);
header.doff = sizeof(header) / 4;
header.rst = 1;
header.window = htons(1500);
mysend();
}
I traced this behavior way back to 2.4.0-test9-pre3 kernel.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 6:19 strange tcp behavior john
@ 2007-08-02 9:55 ` Evgeniy Polyakov
2007-08-02 10:16 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 9:55 UTC (permalink / raw)
To: john; +Cc: netdev
On Thu, Aug 02, 2007 at 09:19:06AM +0300, john@screen.lv (john@screen.lv) wrote:
> 1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
> Seq=0 Len=0
> 1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN, ACK]
> Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
> 1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
> Seq=1 Ack=1 Win=1500 Len=0
> 1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO localhost
> 1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
> Seq=1 Ack=17 Win=32792 Len=0
> 1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
> Seq=17 Len=0
> 1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000 [RST,
> ACK] Seq=0 Ack=0 Win=32792 Len=0
>
> Can someone please comment as to why, tcp stack sends rst packet from the
> wrong source port in this situation.
Besides the fact, that test applications do not run if started not as
root, I got this:
13:51:12.180241 IP localhost.localdomain.50000 > localhost.localdomain.10250: S 906222067:906222067(0) win 1500
13:51:12.180279 IP localhost.localdomain.10250 > localhost.localdomain.50000: S 2011233747:2011233747(0) ack 906222068
win 32792 <mss 16396>
13:51:12.180293 IP localhost.localdomain.50000 > localhost.localdomain.10250: R 906222068:906222068(0) win 0
13:51:12.180320 IP localhost.localdomain.50000 > localhost.localdomain.10250: . ack 1 win 1500
13:51:12.180329 IP localhost.localdomain.10250 > localhost.localdomain.50000: R 2011233748:2011233748(0) win 0
13:51:12.180341 IP localhost.localdomain.50000 > localhost.localdomain.10250: P 1:17(16) ack 1 win 1500
13:51:12.180349 IP localhost.localdomain.10250 > localhost.localdomain.50000: R 2011233748:2011233748(0) win 0
13:51:12.180361 IP localhost.localdomain.50000 > localhost.localdomain.10250: R 906222084:906222084(0) win 1500
I.e. there is no bug in this session.
FC7 2.6.22.1-27.fc7 kernel.
Here is vanilla (with my patches, unrelated to the problem though)
2.6.22-rc5:
09:33:37.650279 IP localhost.50000 > localhost.10250: S 1326688203:1326688203(0) win 1500
09:33:37.664391 IP localhost.10250 > localhost.50000: S 3637551175:3637551175(0) ack 1326688204 win 32792 <mss 16396>
09:33:37.664417 IP localhost.50000 > localhost.10250: R 1326688204:1326688204(0) win 0
09:33:37.650451 IP localhost.50000 > localhost.10250: . ack 1 win 1500
09:33:37.650467 IP localhost.10250 > localhost.50000: R 3637551176:3637551176(0) win 0
09:33:37.650481 IP localhost.50000 > localhost.10250: P 1:17(16) ack 1 win 1500
09:33:37.650493 IP localhost.10250 > localhost.50000: R 3637551176:3637551176(0) win 0
09:33:37.650507 IP localhost.50000 > localhost.10250: R 1326688220:1326688220(0) win 1500
Is it possible that your tcpdump is screwed?
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 9:55 ` Evgeniy Polyakov
@ 2007-08-02 10:16 ` Evgeniy Polyakov
2007-08-02 11:38 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 10:16 UTC (permalink / raw)
To: john; +Cc: netdev
On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> On Thu, Aug 02, 2007 at 09:19:06AM +0300, john@screen.lv (john@screen.lv) wrote:
> > 1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
> > Seq=0 Len=0
> > 1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN, ACK]
> > Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
> > 1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
> > Seq=1 Ack=1 Win=1500 Len=0
> > 1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO localhost
> > 1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
> > Seq=1 Ack=17 Win=32792 Len=0
> > 1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
> > Seq=17 Len=0
> > 1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000 [RST,
> > ACK] Seq=0 Ack=0 Win=32792 Len=0
> >
> > Can someone please comment as to why, tcp stack sends rst packet from the
> > wrong source port in this situation.
>
> Besides the fact, that test applications do not run if started not as
> root, I got this:
And it actually does not initializes a session, since tird line below
shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
like in your server) and unmodified client.
Please provide application which can trigger the issue and I will help
to debug this issue. If it will help you to debug client, I can run
tcpdump on public server (say 194.85.82.65, please tell me your source
address) to collect dumps. Current code does not trigger the issue on my
machines (and works not like was intended by you). Ugh, and code really
looks horrible...
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 10:16 ` Evgeniy Polyakov
@ 2007-08-02 11:38 ` Simon Arlott
2007-08-02 12:04 ` Evgeniy Polyakov
[not found] ` <46860.212.93.96.73.1186055105.squirrel@mail.screen.lv>
0 siblings, 2 replies; 28+ messages in thread
From: Simon Arlott @ 2007-08-02 11:38 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev
On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
> On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
>> On Thu, Aug 02, 2007 at 09:19:06AM +0300, john@screen.lv (john@screen.lv) wrote:
>> > 1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
>> > Seq=0 Len=0
>> > 1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN, ACK]
>> > Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
>> > 1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
>> > Seq=1 Ack=1 Win=1500 Len=0
>> > 1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO localhost
>> > 1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
>> > Seq=1 Ack=17 Win=32792 Len=0
>> > 1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
>> > Seq=17 Len=0
>> > 1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000 [RST,
>> > ACK] Seq=0 Ack=0 Win=32792 Len=0
>> >
>> > Can someone please comment as to why, tcp stack sends rst packet from the
>> > wrong source port in this situation.
>>
>> Besides the fact, that test applications do not run if started not as
>> root, I got this:
>
> And it actually does not initializes a session, since tird line below
> shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
> like in your server) and unmodified client.
> Please provide application which can trigger the issue and I will help
> to debug this issue. If it will help you to debug client, I can run
> tcpdump on public server (say 194.85.82.65, please tell me your source
> address) to collect dumps. Current code does not trigger the issue on my
> machines (and works not like was intended by you). Ugh, and code really
> looks horrible...
>
I just got multiple RSTs instead of a connection too. The second RST looks
like it's from another connection - and a RST for a RST is wrong...
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 11:38 ` Simon Arlott
@ 2007-08-02 12:04 ` Evgeniy Polyakov
2007-08-02 12:28 ` Evgeniy Polyakov
[not found] ` <46860.212.93.96.73.1186055105.squirrel@mail.screen.lv>
1 sibling, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 12:04 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev
On Thu, Aug 02, 2007 at 12:38:59PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> I just got multiple RSTs instead of a connection too. The second RST looks
> like it's from another connection - and a RST for a RST is wrong...
You should use iptables rule to block non-raw access:
iptables -I INPUT -p tcp --dport 50000 -j DROP
but even in that case I got valid session.
> --
> Simon Arlott
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
[not found] ` <46860.212.93.96.73.1186055105.squirrel@mail.screen.lv>
@ 2007-08-02 12:15 ` Simon Arlott
2007-08-02 17:15 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-02 12:15 UTC (permalink / raw)
To: john; +Cc: johnpol, netdev
(Don't remove CC:s, don't top post)
>> On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
>>> On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov
>>> (johnpol@2ka.mipt.ru) wrote:
>>>> On Thu, Aug 02, 2007 at 09:19:06AM +0300, john@screen.lv
>>>> (john@screen.lv) wrote:
>>>> > 1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
>>>> > Seq=0 Len=0
>>>> > 1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN,
>>>> ACK]
>>>> > Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
>>>> > 1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
>>>> > Seq=1 Ack=1 Win=1500 Len=0
>>>> > 1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO
>>>> localhost
>>>> > 1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
>>>> > Seq=1 Ack=17 Win=32792 Len=0
>>>> > 1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
>>>> > Seq=17 Len=0
>>>> > 1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000
>>>> [RST,
>>>> > ACK] Seq=0 Ack=0 Win=32792 Len=0
>>>> >
>>>> > Can someone please comment as to why, tcp stack sends rst packet
>>>> from the
>>>> > wrong source port in this situation.
>>>>
>>>> Besides the fact, that test applications do not run if started not as
>>>> root, I got this:
>>>
>>> And it actually does not initializes a session, since tird line below
>>> shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
>>> like in your server) and unmodified client.
>>> Please provide application which can trigger the issue and I will help
>>> to debug this issue. If it will help you to debug client, I can run
>>> tcpdump on public server (say 194.85.82.65, please tell me your source
>>> address) to collect dumps. Current code does not trigger the issue on my
>>> machines (and works not like was intended by you). Ugh, and code really
>>> looks horrible...
>>>
>>
>> I just got multiple RSTs instead of a connection too. The second RST looks
>> like it's from another connection - and a RST for a RST is wrong...
On Thu, August 2, 2007 12:45, john@screen.lv wrote:
> you need to add iptables rule for this to
> work, or else the tcp resets connection too early because it does not know
> that something is listening on 50000 port.
>
> iptables -I INPUT -p tcp --dport 50000 -j DROP should do the job.
You didn't mention this before.
Without the server running:
13:02:23.314352 IP 127.0.0.1.50000 > 127.0.0.1.2500: S 53123695:53123695(0) win 1500
13:02:23.314442 IP 127.0.0.1.2500 > 127.0.0.1.50000: R 0:0(0) ack 53123696 win 0
13:02:25.906975 IP 127.0.0.1.3315 > 127.0.0.1.49197: P 1285306902:1285307318(416) ack 1267361915 win 1024
<nop,nop,timestamp 3575709021 3575672670>
13:02:25.907060 IP 127.0.0.1.49197 > 127.0.0.1.3315: . ack 416 win 1541 <nop,nop,timestamp 3575709021
3575709021>
With the server running:
13:05:55.234696 IP 127.0.0.1.50000 > 127.0.0.1.2500: S 1960601450:1960601450(0) win 1500
13:05:55.234799 IP 127.0.0.1.2500 > 127.0.0.1.50000: S 2171862150:2171862150(0) ack 1960601451 win 32792
<mss 16396>
13:05:55.238271 IP 127.0.0.1.50000 > 127.0.0.1.2500: . ack 1 win 1500
13:05:55.240034 IP 127.0.0.1.50000 > 127.0.0.1.2500: P 1:17(16) ack 1 win 1500
13:05:55.240132 IP 127.0.0.1.2500 > 127.0.0.1.50000: . ack 17 win 32792
13:05:55.242251 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 1960601467:1960601467(0) win 1500
13:05:55.253884 IP 127.0.0.1.56434 > 127.0.0.1.50000: R 2171862151:2171862151(0) ack 1960601467 win 32792
Weird. I resent your final RST a few times with a delay:
13:13:05.199275 IP 127.0.0.1.50000 > 127.0.0.1.2500: S 83018811:83018811(0) win 1500
13:13:05.199378 IP 127.0.0.1.2500 > 127.0.0.1.50000: S 2627922927:2627922927(0) ack 83018812 win 32792 <mss
16396>
13:13:05.203368 IP 127.0.0.1.50000 > 127.0.0.1.2500: . ack 1 win 1500
13:13:05.205049 IP 127.0.0.1.50000 > 127.0.0.1.2500: P 1:17(16) ack 1 win 1500
13:13:05.205173 IP 127.0.0.1.2500 > 127.0.0.1.50000: . ack 17 win 32792
13:13:05.206463 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 83018828:83018828(0) win 1500
13:13:05.207656 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 83018828:83018828(0) win 1500
13:13:05.217664 IP 127.0.0.1.55271 > 127.0.0.1.50000: R 2627922928:2627922928(0) ack 83018828 win 32792
13:13:05.510239 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 83018828:83018828(0) win 1500
13:13:05.511644 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 83018828:83018828(0) win 1500
13:13:05.512764 IP 127.0.0.1.50000 > 127.0.0.1.2500: R 83018828:83018828(0) win 1500
I don't know where that extra RST is coming from.
This test would be more convincing between two hosts, since your bizarre
client is using raw sockets as root and could be doing anything.
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 12:04 ` Evgeniy Polyakov
@ 2007-08-02 12:28 ` Evgeniy Polyakov
0 siblings, 0 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 12:28 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev
On Thu, Aug 02, 2007 at 04:04:53PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> On Thu, Aug 02, 2007 at 12:38:59PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> > I just got multiple RSTs instead of a connection too. The second RST looks
> > like it's from another connection - and a RST for a RST is wrong...
>
> You should use iptables rule to block non-raw access:
> iptables -I INPUT -p tcp --dport 50000 -j DROP
>
> but even in that case I got valid session.
Ok, I can now reproduce the problem.
I will try to debug it further.
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 12:15 ` Simon Arlott
@ 2007-08-02 17:15 ` Simon Arlott
2007-08-02 18:08 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-02 17:15 UTC (permalink / raw)
To: john; +Cc: johnpol, netdev
On 02/08/07 13:15, Simon Arlott wrote:
> (Don't remove CC:s, don't top post)
>>> On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
>>>> On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov
>>>> (johnpol@2ka.mipt.ru) wrote:
>>>>> On Thu, Aug 02, 2007 at 09:19:06AM +0300, john@screen.lv
>>>>> (john@screen.lv) wrote:
>>>>> > 1186035057.207629 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [SYN]
>>>>> > Seq=0 Len=0
>>>>> > 1186035057.207632 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [SYN,
>>>>> ACK]
>>>>> > Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
>>>>> > 1186035057.207666 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [ACK]
>>>>> > Seq=1 Ack=1 Win=1500 Len=0
>>>>> > 1186035057.207699 127.0.0.1 -> 127.0.0.1 SMTP Command: EHLO
>>>>> localhost
>>>>> > 1186035057.207718 127.0.0.1 -> 127.0.0.1 TCP smtp > 50000 [ACK]
>>>>> > Seq=1 Ack=17 Win=32792 Len=0
>>>>> > 1186035057.207736 127.0.0.1 -> 127.0.0.1 TCP 50000 > smtp [RST]
>>>>> > Seq=17 Len=0
>>>>> > 1186035057.223934 127.0.0.1 -> 127.0.0.1 TCP 33787 > 50000
>>>>> [RST,
>>>>> > ACK] Seq=0 Ack=0 Win=32792 Len=0
>>>>> >
>>>>> > Can someone please comment as to why, tcp stack sends rst packet
>>>>> from the
>>>>> > wrong source port in this situation.
> I don't know where that extra RST is coming from.
> This test would be more convincing between two hosts, since your bizarre
> client is using raw sockets as root and could be doing anything.
Server 192.168.7.8 (2.6.23)
Client 192.168.7.4 (2.6.20)
17:33:45.326246 IP 192.168.7.4.50000 > 192.168.7.8.2500: S 1385353579:1385353579(0) win 1500
17:33:45.326418 IP 192.168.7.8.2500 > 192.168.7.4.50000: S 1388203102:1388203102(0) ack 1385353580 win 14360 <mss 7180>
17:33:45.348833 IP 192.168.7.4.50000 > 192.168.7.8.2500: . ack 1 win 1500
17:33:45.349977 IP 192.168.7.4.50000 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500
17:33:45.350117 IP 192.168.7.8.2500 > 192.168.7.4.50000: . ack 17 win 14360
17:33:45.351273 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 1385353596:1385353596(0) win 1500
17:33:45.360878 IP 192.168.7.8.48186 > 192.168.7.4.50000: R 1388203103:1388203103(0) ack 1385353596 win 14360
Seems to be losing the source port information when it decides to send
that final RST|ACK. It's going through the "TCPAbortOnClose" path:
tcp_close:
-> tcp_set_state(sk, TCP_CLOSE)
-> inet_put_port(&tcp_hashinfo, sk)
Perhaps it's losing the port information here?
-> tcp_send_active_reset(sk, GFP_KERNEL)
"TCP_CLOSE socket is finished"
Should these two calls be the other way round?
Also, I don't think it should be sending a RST after the other side has
sent one - the connection no longer exists so there is nothing on the
other side to reset.
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 17:15 ` Simon Arlott
@ 2007-08-02 18:08 ` Evgeniy Polyakov
2007-08-02 18:48 ` Evgeniy Polyakov
2007-08-02 18:58 ` Simon Arlott
0 siblings, 2 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 18:08 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Thu, Aug 02, 2007 at 06:15:52PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> 17:33:45.351273 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 1385353596:1385353596(0) win 1500
> 17:33:45.360878 IP 192.168.7.8.48186 > 192.168.7.4.50000: R 1388203103:1388203103(0) ack 1385353596 win 14360
>
> Seems to be losing the source port information when it decides to send
> that final RST|ACK. It's going through the "TCPAbortOnClose" path:
>
> tcp_close:
> -> tcp_set_state(sk, TCP_CLOSE)
> -> inet_put_port(&tcp_hashinfo, sk)
> Perhaps it's losing the port information here?
> -> tcp_send_active_reset(sk, GFP_KERNEL)
>
> "TCP_CLOSE socket is finished"
> Should these two calls be the other way round?
>
>
> Also, I don't think it should be sending a RST after the other side has
> sent one - the connection no longer exists so there is nothing on the
> other side to reset.
Problem is not in tcp_send_active_reset(), when socket is being released
it is already damaged.
Problem is that inet_autobind() function is called for socket, which is
already dead, but not yet completely - it smells bad (since it has its
port freed), but stil alive (accessible via send()), so for its last
word inet_sendmsg() tries to bind it again, and only after that time it
will be eventually closed and freed completely.
So, following patch fixes problem for me.
Another solution might not to release port until socket is being
released, but that can lead to performance degradation.
Correct me if sk_err can be reset.
Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..6790b23 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -168,8 +169,14 @@ void inet_sock_destruct(struct sock *sk)
static int inet_autobind(struct sock *sk)
{
struct inet_sock *inet;
+
/* We may need to bind the socket. */
lock_sock(sk);
+ if (sk->sk_err) {
+ release_sock(sk);
+ return sk->sk_err;
+ }
+
inet = inet_sk(sk);
if (!inet->num) {
if (sk->sk_prot->get_port(sk, 0)) {
@@ -686,8 +703,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
/* We may need to bind the socket. */
- if (!inet_sk(sk)->num && inet_autobind(sk))
- return -EAGAIN;
+ if (!inet_sk(sk)->num) {
+ int err = inet_autobind(sk);
+ if (err)
+ return err;
+ }
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
@@ -698,8 +718,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
struct sock *sk = sock->sk;
/* We may need to bind the socket. */
- if (!inet_sk(sk)->num && inet_autobind(sk))
- return -EAGAIN;
+ if (!inet_sk(sk)->num) {
+ int err = inet_autobind(sk);
+ if (err)
+ return err;
+ }
if (sk->sk_prot->sendpage)
return sk->sk_prot->sendpage(sk, page, offset, size, flags);
--
Evgeniy Polyakov
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 18:08 ` Evgeniy Polyakov
@ 2007-08-02 18:48 ` Evgeniy Polyakov
2007-08-02 22:02 ` David Miller
2007-08-03 2:21 ` David Miller
2007-08-02 18:58 ` Simon Arlott
1 sibling, 2 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-02 18:48 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> So, following patch fixes problem for me.
Or this one. Essentially the same though.
Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..7c47ef5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -168,8 +168,14 @@ void inet_sock_destruct(struct sock *sk)
static int inet_autobind(struct sock *sk)
{
struct inet_sock *inet;
+
/* We may need to bind the socket. */
lock_sock(sk);
+ if (sk->sk_err || (sk->sk_state == TCP_CLOSE)) {
+ release_sock(sk);
+ return sk->sk_err;
+ }
+
inet = inet_sk(sk);
if (!inet->num) {
if (sk->sk_prot->get_port(sk, 0)) {
@@ -686,8 +692,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
/* We may need to bind the socket. */
- if (!inet_sk(sk)->num && inet_autobind(sk))
- return -EAGAIN;
+ if (!inet_sk(sk)->num) {
+ int err = inet_autobind(sk);
+ if (err)
+ return err;
+ }
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
@@ -698,8 +707,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
struct sock *sk = sock->sk;
/* We may need to bind the socket. */
- if (!inet_sk(sk)->num && inet_autobind(sk))
- return -EAGAIN;
+ if (!inet_sk(sk)->num) {
+ int err = inet_autobind(sk);
+ if (err)
+ return err;
+ }
if (sk->sk_prot->sendpage)
return sk->sk_prot->sendpage(sk, page, offset, size, flags);
--
Evgeniy Polyakov
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 18:08 ` Evgeniy Polyakov
2007-08-02 18:48 ` Evgeniy Polyakov
@ 2007-08-02 18:58 ` Simon Arlott
2007-08-03 8:25 ` Evgeniy Polyakov
1 sibling, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-02 18:58 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev, David Miller
On 02/08/07 19:08, Evgeniy Polyakov wrote:
> On Thu, Aug 02, 2007 at 06:15:52PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>> 17:33:45.351273 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 1385353596:1385353596(0) win 1500
>> 17:33:45.360878 IP 192.168.7.8.48186 > 192.168.7.4.50000: R 1388203103:1388203103(0) ack 1385353596 win 14360
>
> Problem is not in tcp_send_active_reset(), when socket is being released
> it is already damaged.
> Problem is that inet_autobind() function is called for socket, which is
> already dead, but not yet completely - it smells bad (since it has its
> port freed), but stil alive (accessible via send()), so for its last
> word inet_sendmsg() tries to bind it again, and only after that time it
> will be eventually closed and freed completely.
>
> So, following patch fixes problem for me.
> Another solution might not to release port until socket is being
> released, but that can lead to performance degradation.
> Correct me if sk_err can be reset.
19:24:32.897071 IP 192.168.7.4.50000 > 192.168.7.8.2500: S 705362199:705362199(0) win 1500
19:24:32.897211 IP 192.168.7.8.2500 > 192.168.7.4.50000: S 4159455228:4159455228(0) ack 705362200 win 14360 <mss 7180>
19:24:32.920784 IP 192.168.7.4.50000 > 192.168.7.8.2500: . ack 1 win 1500
19:24:32.921732 IP 192.168.7.4.50000 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500
19:24:32.921795 IP 192.168.7.8.2500 > 192.168.7.4.50000: . ack 17 win 14360
19:24:32.922881 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 705362216:705362216(0) win 1500
19:24:34.927717 IP 192.168.7.8.2500 > 192.168.7.4.50000: R 1:1(0) ack 17 win 14360
According to RFC 793, the RST from .4 means that the connection
is CLOSED.
Reset Processing
The receiver of a RST first validates it, then changes state. If the
receiver was in the LISTEN state, it ignores it. If the receiver was
in SYN-RECEIVED state and had previously been in the LISTEN state,
then the receiver returns to the LISTEN state, otherwise the receiver
aborts the connection and goes to the CLOSED state. If the receiver
was in any other state, it aborts the connection and advises the user
and goes to the CLOSED state.
So when the call to close() is made without reading:
Abort
Format: ABORT (local connection name)
This command causes all pending SENDs and RECEIVES to be
aborted, the TCB to be removed, and a special RESET message to
be sent to the TCP on the other side of the connection.
Isn't there no other side of the connection to send the RESET too?
> Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 06c08e5..6790b23 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -168,8 +169,14 @@ void inet_sock_destruct(struct sock *sk)
> static int inet_autobind(struct sock *sk)
> {
> struct inet_sock *inet;
> +
> /* We may need to bind the socket. */
> lock_sock(sk);
> + if (sk->sk_err) {
> + release_sock(sk);
> + return sk->sk_err;
> + }
> +
> inet = inet_sk(sk);
> if (!inet->num) {
> if (sk->sk_prot->get_port(sk, 0)) {
> @@ -686,8 +703,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
> struct sock *sk = sock->sk;
>
> /* We may need to bind the socket. */
> - if (!inet_sk(sk)->num && inet_autobind(sk))
> - return -EAGAIN;
> + if (!inet_sk(sk)->num) {
> + int err = inet_autobind(sk);
> + if (err)
> + return err;
> + }
>
> return sk->sk_prot->sendmsg(iocb, sk, msg, size);
> }
> @@ -698,8 +718,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
> struct sock *sk = sock->sk;
>
> /* We may need to bind the socket. */
> - if (!inet_sk(sk)->num && inet_autobind(sk))
> - return -EAGAIN;
> + if (!inet_sk(sk)->num) {
> + int err = inet_autobind(sk);
> + if (err)
> + return err;
> + }
>
> if (sk->sk_prot->sendpage)
> return sk->sk_prot->sendpage(sk, page, offset, size, flags);
>
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 18:48 ` Evgeniy Polyakov
@ 2007-08-02 22:02 ` David Miller
2007-08-03 2:21 ` David Miller
1 sibling, 0 replies; 28+ messages in thread
From: David Miller @ 2007-08-02 22:02 UTC (permalink / raw)
To: johnpol; +Cc: simon, john, netdev
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Thu, 2 Aug 2007 22:48:42 +0400
> On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> > So, following patch fixes problem for me.
>
> Or this one. Essentially the same though.
Thanks a lot for figuring out this bug Evgeniy, I'll look at
this later. I'm very surprised autobind isn't guarded properly
as this is a case that Alexey Kuznetsov and I used to audit from
time to time.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 18:48 ` Evgeniy Polyakov
2007-08-02 22:02 ` David Miller
@ 2007-08-03 2:21 ` David Miller
2007-08-03 8:22 ` Evgeniy Polyakov
1 sibling, 1 reply; 28+ messages in thread
From: David Miller @ 2007-08-03 2:21 UTC (permalink / raw)
To: johnpol; +Cc: simon, john, netdev
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Thu, 2 Aug 2007 22:48:42 +0400
> On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> > So, following patch fixes problem for me.
>
> Or this one. Essentially the same though.
>
> Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
So, this bug got introduced partly in 2.3.15, which is when
we SMP threaded the networking stack.
The error check was present in inet_sendmsg() previously, it
looked like this:
int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
if (sk->shutdown & SEND_SHUTDOWN) {
if (!(msg->msg_flags&MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 1);
return(-EPIPE);
}
if (sk->prot->sendmsg == NULL)
return(-EOPNOTSUPP);
if(sk->err)
return sock_error(sk);
/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
return -EAGAIN;
return sk->prot->sendmsg(sk, msg, size);
}
I believe the idea was to move the sk->err check down into
tcp_sendmsg().
But this raises a major issue.
What in the world are we doing allowing stream sockets to autobind?
That is totally bogus. Even if we autobind, that won't make a connect
happen.
There is logic down in TCP to handle all of these details properly
as long as we don't do this bogus autobind stuff.
do_tcp_sendpages() and tcp_sendmsg() both invoke sk_stream_wait_connect()
if TCP is in a state where data sending is not possible. Inside of
sk_stream_wait_connect() it handles socket errors as first priority,
then if no socket errors are pending it checks if we are trying to
connect currently and if not returns -EPIPE. It is exactly what we
want under these circumstances.
So the bug is purely that autobind is attempted for TCP sockets at
all.
TCP's sendpage handles this correctly already, it calls directly down
into tcp_sendpage(), inet_sendpage() is not used at all.
So the fix is to make tcp_sendmsg() direct as well, that bypasses all
of this autobind madness. The error checking and state verification
in TCP's sendmsg() and sendpage() implementations will do the right
thing.
Comments?
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c209361..185c7ec 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,7 +281,7 @@ extern int tcp_v4_remember_stamp(struct sock *sk);
extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
+extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size);
extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..e681034 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -831,7 +831,7 @@ const struct proto_ops inet_stream_ops = {
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
- .sendmsg = inet_sendmsg,
+ .sendmsg = tcp_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da4c0b6..7e74011 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -658,9 +658,10 @@ static inline int select_size(struct sock *sk)
return tmp;
}
-int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
{
+ struct sock *sk = sock->sk;
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3f5f742..9c94627 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2425,7 +2425,6 @@ struct proto tcp_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
- .sendmsg = tcp_sendmsg,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = tcp_v4_hash,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index eed0937..b5f9637 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -484,7 +484,7 @@ const struct proto_ops inet6_stream_ops = {
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
.getsockopt = sock_common_getsockopt, /* ok */
- .sendmsg = inet_sendmsg, /* ok */
+ .sendmsg = tcp_sendmsg, /* ok */
.recvmsg = sock_common_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f10f368..cbdb784 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2115,7 +2115,6 @@ struct proto tcpv6_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
- .sendmsg = tcp_sendmsg,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v6_do_rcv,
.hash = tcp_v6_hash,
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 2:21 ` David Miller
@ 2007-08-03 8:22 ` Evgeniy Polyakov
2007-08-03 20:04 ` David Miller
2007-08-03 21:17 ` David Miller
0 siblings, 2 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-03 8:22 UTC (permalink / raw)
To: David Miller; +Cc: simon, john, netdev
On Thu, Aug 02, 2007 at 07:21:34PM -0700, David Miller (davem@davemloft.net) wrote:
> > On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> > > So, following patch fixes problem for me.
> >
> > Or this one. Essentially the same though.
> >
> > Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
>
> So, this bug got introduced partly in 2.3.15, which is when
> we SMP threaded the networking stack.
>
> The error check was present in inet_sendmsg() previously, it
> looked like this:
>
> int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
> struct scm_cookie *scm)
> {
> struct sock *sk = sock->sk;
>
> if (sk->shutdown & SEND_SHUTDOWN) {
> if (!(msg->msg_flags&MSG_NOSIGNAL))
> send_sig(SIGPIPE, current, 1);
> return(-EPIPE);
> }
This one would caught our problem.
> if (sk->prot->sendmsg == NULL)
> return(-EOPNOTSUPP);
> if(sk->err)
> return sock_error(sk);
And this one too.
> /* We may need to bind the socket. */
> if (inet_autobind(sk) != 0)
> return -EAGAIN;
>
> return sk->prot->sendmsg(sk, msg, size);
> }
>
> I believe the idea was to move the sk->err check down into
> tcp_sendmsg().
>
> But this raises a major issue.
>
> What in the world are we doing allowing stream sockets to autobind?
> That is totally bogus. Even if we autobind, that won't make a connect
> happen.
For accepted socket it is perfectly valid assumption - we could autobind
it during the first send. Or may bind it during accept. Its a matter of
taste I think. Autobinding during first sending can end up being a
protection against DoS in some obscure rare case...
> There is logic down in TCP to handle all of these details properly
> as long as we don't do this bogus autobind stuff.
Yes, TCP sending function will catch this problems.
> do_tcp_sendpages() and tcp_sendmsg() both invoke sk_stream_wait_connect()
> if TCP is in a state where data sending is not possible. Inside of
> sk_stream_wait_connect() it handles socket errors as first priority,
> then if no socket errors are pending it checks if we are trying to
> connect currently and if not returns -EPIPE. It is exactly what we
> want under these circumstances.
>
> So the bug is purely that autobind is attempted for TCP sockets at
> all.
>
> TCP's sendpage handles this correctly already, it calls directly down
> into tcp_sendpage(), inet_sendpage() is not used at all.
>
> So the fix is to make tcp_sendmsg() direct as well, that bypasses all
> of this autobind madness. The error checking and state verification
> in TCP's sendmsg() and sendpage() implementations will do the right
> thing.
>
> Comments?
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index c209361..185c7ec 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -281,7 +281,7 @@ extern int tcp_v4_remember_stamp(struct sock *sk);
>
> extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
>
> -extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
> +extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
> struct msghdr *msg, size_t size);
Maybe recvmsg should be changed too for symmetry?
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-02 18:58 ` Simon Arlott
@ 2007-08-03 8:25 ` Evgeniy Polyakov
2007-08-03 11:21 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-03 8:25 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Thu, Aug 02, 2007 at 07:58:03PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> 19:24:32.897071 IP 192.168.7.4.50000 > 192.168.7.8.2500: S 705362199:705362199(0) win 1500
> 19:24:32.897211 IP 192.168.7.8.2500 > 192.168.7.4.50000: S 4159455228:4159455228(0) ack 705362200 win 14360 <mss 7180>
> 19:24:32.920784 IP 192.168.7.4.50000 > 192.168.7.8.2500: . ack 1 win 1500
> 19:24:32.921732 IP 192.168.7.4.50000 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500
> 19:24:32.921795 IP 192.168.7.8.2500 > 192.168.7.4.50000: . ack 17 win 14360
> 19:24:32.922881 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 705362216:705362216(0) win 1500
> 19:24:34.927717 IP 192.168.7.8.2500 > 192.168.7.4.50000: R 1:1(0) ack 17 win 14360
>
> According to RFC 793, the RST from .4 means that the connection
> is CLOSED.
RFC 2525 - common tcp problems, says we should send RST in this case,
although it does not specify should we send it if socket is in CLOSED
state or not. Well, we send :)
Even if tcp_send_active_reset() will check if socket is in CLOSED state
and will not send data, but is still there, it will not be easily
triggered though, but it can be possible.
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 8:25 ` Evgeniy Polyakov
@ 2007-08-03 11:21 ` Simon Arlott
2007-08-03 11:56 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-03 11:21 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev, David Miller
On Fri, August 3, 2007 09:25, Evgeniy Polyakov wrote:
> On Thu, Aug 02, 2007 at 07:58:03PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>> 19:24:32.897071 IP 192.168.7.4.50000 > 192.168.7.8.2500: S 705362199:705362199(0) win 1500
>> 19:24:32.897211 IP 192.168.7.8.2500 > 192.168.7.4.50000: S 4159455228:4159455228(0) ack 705362200 win
>> 14360 <mss 7180>
>> 19:24:32.920784 IP 192.168.7.4.50000 > 192.168.7.8.2500: . ack 1 win 1500
>> 19:24:32.921732 IP 192.168.7.4.50000 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500
>> 19:24:32.921795 IP 192.168.7.8.2500 > 192.168.7.4.50000: . ack 17 win 14360
>> 19:24:32.922881 IP 192.168.7.4.50000 > 192.168.7.8.2500: R 705362216:705362216(0) win 1500
>> 19:24:34.927717 IP 192.168.7.8.2500 > 192.168.7.4.50000: R 1:1(0) ack 17 win 14360
>>
>> According to RFC 793, the RST from .4 means that the connection
>> is CLOSED.
>
> RFC 2525 - common tcp problems, says we should send RST in this case,
> although it does not specify should we send it if socket is in CLOSED
> state or not. Well, we send :)
> Even if tcp_send_active_reset() will check if socket is in CLOSED state
> and will not send data, but is still there, it will not be easily
> triggered though, but it can be possible.
Since the connection is considered closed, couldn't another socket re-use it?
Socket A: Recv data (unread)
Socket A: Recv RST
Socket B: Reuses connection (same IPs/ports)
Socket A: Close
Wouldn't that disrupt socket B's use of the connection?
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 11:21 ` Simon Arlott
@ 2007-08-03 11:56 ` Evgeniy Polyakov
2007-08-03 12:03 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-03 11:56 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Fri, Aug 03, 2007 at 12:21:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> Since the connection is considered closed, couldn't another socket re-use it?
>
> Socket A: Recv data (unread)
> Socket A: Recv RST
> Socket B: Reuses connection (same IPs/ports)
> Socket A: Close
>
> Wouldn't that disrupt socket B's use of the connection?
Then it will drop our data, since there were no appropriate handhsake.
> --
> Simon Arlott
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 11:56 ` Evgeniy Polyakov
@ 2007-08-03 12:03 ` Simon Arlott
2007-08-03 12:09 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-03 12:03 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev, David Miller
On Fri, August 3, 2007 12:56, Evgeniy Polyakov wrote:
> On Fri, Aug 03, 2007 at 12:21:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>> Since the connection is considered closed, couldn't another socket re-use it?
>>
>> Socket A: Recv data (unread)
>> Socket A: Recv RST
>> Socket B: Reuses connection (same IPs/ports)
>> Socket A: Close
>>
>> Wouldn't that disrupt socket B's use of the connection?
>
> Then it will drop our data, since there were no appropriate handhsake.
Couldn't the sequence numbers be close enough to make the RST valid?
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 12:03 ` Simon Arlott
@ 2007-08-03 12:09 ` Evgeniy Polyakov
2007-08-03 16:51 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-03 12:09 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Fri, Aug 03, 2007 at 01:03:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> On Fri, August 3, 2007 12:56, Evgeniy Polyakov wrote:
> > On Fri, Aug 03, 2007 at 12:21:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> >> Since the connection is considered closed, couldn't another socket re-use it?
> >>
> >> Socket A: Recv data (unread)
> >> Socket A: Recv RST
> >> Socket B: Reuses connection (same IPs/ports)
> >> Socket A: Close
> >>
> >> Wouldn't that disrupt socket B's use of the connection?
> >
> > Then it will drop our data, since there were no appropriate handhsake.
>
> Couldn't the sequence numbers be close enough to make the RST valid?
It does not matter - if connection is not in synchronized state all
unrelated data is dropped, so remote side is only allowed to receive syn
flag only, anything else must be dropped. If remote side does not do
that, it violates RFC.
> --
> Simon Arlott
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 12:09 ` Evgeniy Polyakov
@ 2007-08-03 16:51 ` Simon Arlott
2007-08-03 17:39 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-03 16:51 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev, David Miller
[-- Attachment #1: Type: text/plain, Size: 3937 bytes --]
On 03/08/07 13:09, Evgeniy Polyakov wrote:
> On Fri, Aug 03, 2007 at 01:03:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>> On Fri, August 3, 2007 12:56, Evgeniy Polyakov wrote:
>> > On Fri, Aug 03, 2007 at 12:21:46PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>> >> Since the connection is considered closed, couldn't another socket re-use it?
>> >>
>> >> Socket A: Recv data (unread)
>> >> Socket A: Recv RST
>> >> Socket B: Reuses connection (same IPs/ports)
>> >> Socket A: Close
>> >>
>> >> Wouldn't that disrupt socket B's use of the connection?
>> >
>> > Then it will drop our data, since there were no appropriate handhsake.
>>
>> Couldn't the sequence numbers be close enough to make the RST valid?
>
> It does not matter - if connection is not in synchronized state all
> unrelated data is dropped, so remote side is only allowed to receive syn
> flag only, anything else must be dropped. If remote side does not do
> that, it violates RFC.
Except the remote side has a connection, because another one can be made
before the existing connection is closed:
17:37:37.377571 IP 192.168.7.4.50550 > 192.168.7.8.2500: S 134077329:134077329(0) win 1500 (raw)
17:37:37.382352 IP 192.168.7.8.2500 > 192.168.7.4.50550: S 3460060233:3460060233(0) ack 134077330 win 14360 <mss 7180> (accept)
17:37:37.377966 IP 192.168.7.4.50550 > 192.168.7.8.2500: . ack 1 win 1500 (raw)
17:37:37.378128 IP 192.168.7.4.50550 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500 (raw)
17:37:37.378162 IP 192.168.7.8.2500 > 192.168.7.4.50550: . ack 17 win 14360
17:37:37.378131 IP 192.168.7.4.50550 > 192.168.7.8.2500: R 134077346:134077346(0) win 1500 (raw)
17:37:37.412709 IP 192.168.7.4.50550 > 192.168.7.8.2500: SWE 3257207813:3257207813(0) win 14280 <mss 7140,sackOK,timestamp 3601441543 0,nop,wscale 5> (connect)
17:37:37.412785 IP 192.168.7.8.2500 > 192.168.7.4.50550: SE 3495384256:3495384256(0) ack 3257207814 win 14336 <mss 7180,sackOK,timestamp 4294812905 3601441543,nop,wscale 6> (accept)
17:37:37.412960 IP 192.168.7.4.50550 > 192.168.7.8.2500: . ack 1 win 447 <nop,nop,timestamp 3601441543 4294812905>
17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360 (close (previous connection))
17:37:47.417649 IP 192.168.7.8.2500 > 192.168.7.4.50550: F 1:1(0) ack 1 win 224 <nop,nop,timestamp 4294822910 3601441543> (close)
17:37:47.417993 IP 192.168.7.4.50550 > 192.168.7.8.2500: F 1:1(0) ack 2 win 447 <nop,nop,timestamp 3601444045 4294822910> (read returned)
17:37:47.418466 IP 192.168.7.8.2500 > 192.168.7.4.50550: . ack 2 win 224 <nop,nop,timestamp 4294822911 3601444045>
The second connection also modified the RST|ACK that was sent compared to no second connection:
17:38:03.532703 IP 192.168.7.4.50550 > 192.168.7.8.2500: S 82517575:82517575(0) win 1500 (raw)
17:38:03.532832 IP 192.168.7.8.2500 > 192.168.7.4.50550: S 3495449795:3495449795(0) ack 82517576 win 14360 <mss 7180> (accept)
17:38:03.533388 IP 192.168.7.4.50550 > 192.168.7.8.2500: . ack 1 win 1500 (raw)
17:38:03.533457 IP 192.168.7.4.50550 > 192.168.7.8.2500: P 1:17(16) ack 1 win 1500 (raw)
17:38:03.533597 IP 192.168.7.8.2500 > 192.168.7.4.50550: . ack 17 win 14360
17:38:03.533589 IP 192.168.7.4.50550 > 192.168.7.8.2500: R 82517592:82517592(0) win 1500 (raw)
17:38:04.536277 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 1:1(0) ack 17 win 14360 (close)
17:38:04.536277 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 1:1(0) ack 17 win 14360
vs
17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
What happened there ?
On the server, run tcptest-server.c, which waits for 1s on the first connection then 10s on the second connection.
On the client, run:
iptables -I INPUT -i eth0 -p tcp --dport 50550 -j DROP; ./client; iptables -D INPUT -i eth0 -p tcp --dport 50550 -j DROP; ./tcptest-client
(client.c from john's original email)
--
Simon Arlott
[-- Attachment #2: tcptest-server.c --]
[-- Type: text/x-csrc, Size: 1712 bytes --]
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <poll.h>
#include <fcntl.h>
#define PORT 2500
#define xerror(str) do { perror(str); exit(1); } while (0)
int main(void) {
struct sockaddr_in sa;
int l, s, tmp;
int t = 0;
memset(&sa, 0, sizeof(sa));
l = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (!l)
xerror("socket");
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = htonl(INADDR_ANY);
sa.sin_port = htons(PORT);
tmp = 1;
setsockopt(l, SOL_SOCKET, SO_REUSEADDR, (char*)&tmp, sizeof(tmp));
if (bind(l, (struct sockaddr*)&sa, sizeof(sa)) != 0)
xerror("bind");
if (listen(l, 0) != 0)
xerror("listen");
printf("server %d ready...\n", getpid());
for (t = 1; t <= 2; t++) {
s = accept(l, NULL, NULL);
switch (fork()) {
case -1:
xerror("fork");
break;
case 0:
switch (t) {
case 1:
printf("server %d accepted connection\n", getpid());
#if 0
tmp = fcntl(s, F_GETFL, 0);
if (fcntl(s, F_SETFL, tmp | O_NONBLOCK) != 0)
xerror("fcntl");
if (send(s, "AAAAAAA", 7, 0) != 7)
xerror("send");
#endif
printf("server %d waiting for 1 second...\n", getpid());
sleep(1);
printf("server %d closing connection\n", getpid());
close(s);
return 0;
break;
case 2:
printf("server %d accepted connection\n", getpid());
printf("server %d waiting for 10 seconds...\n", getpid());
sleep(10);
printf("server %d closing connection\n", getpid());
close(s);
return 0;
break;
}
break;
default:
close(s);
}
}
wait(NULL);
wait(NULL);
return 0;
}
[-- Attachment #3: tcptest-client.c --]
[-- Type: text/x-csrc, Size: 1408 bytes --]
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define SERVER_IP "192.168.7.8"
#define SRC_PORT 50550
#define DST_PORT 2500
#define xerror(str) do { perror(str); exit(1); } while (0)
int main(void) {
struct sockaddr_in sa;
int s;
int t = 0;
char buf[32];
int ret, tmp;
memset(&sa, 0, sizeof(sa));
for (t = 1; t <= 1; t++) {
switch (fork()) {
case -1:
xerror("fork");
break;
case 0:
printf("client %d binding port...\n", getpid());
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (!s)
xerror("socket");
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = htonl(INADDR_ANY);
sa.sin_port = htons(SRC_PORT);
tmp = 1;
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char*)&tmp, sizeof(tmp)) != 0)
xerror("setsockopt");
if (bind(s, (struct sockaddr*)&sa, sizeof(sa)) != 0)
xerror("bind");
printf("client %d connecting...\n", getpid());
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = inet_addr(SERVER_IP);
sa.sin_port = htons(DST_PORT);
if (connect(s, (struct sockaddr*)&sa, sizeof(sa)) != 0)
xerror("connect");
printf("client %d waiting in read()...\n", getpid());
ret = read(s, buf, 32);
printf("client %d read() returned %d\n", getpid(), ret);
close(s);
return 0;
}
wait(NULL);
}
return 0;
}
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 16:51 ` Simon Arlott
@ 2007-08-03 17:39 ` Evgeniy Polyakov
2007-08-03 18:29 ` Simon Arlott
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-03 17:39 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Fri, Aug 03, 2007 at 05:51:42PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> 17:38:03.533589 IP 192.168.7.4.50550 > 192.168.7.8.2500: R 82517592:82517592(0) win 1500 (raw)
> vs
> 17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
> What happened there ?
You mean what will happend if second rst (4259643274) is close enough to
first (82517592) to reset the connection? If this will be session hijiking
attack first (known) implemented by Kevin Mitnik. So far things moved
forward and sequence number generation algorithm changed a lot.
It is the same situation, which would happen if you will spam remote
side with RST packets with arbitrary sequence number in hope that it
will reset some connection.
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 17:39 ` Evgeniy Polyakov
@ 2007-08-03 18:29 ` Simon Arlott
2007-08-04 16:03 ` Evgeniy Polyakov
0 siblings, 1 reply; 28+ messages in thread
From: Simon Arlott @ 2007-08-03 18:29 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: john, netdev, David Miller
On 03/08/07 18:39, Evgeniy Polyakov wrote:
> On Fri, Aug 03, 2007 at 05:51:42PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
>
>> 17:38:03.533589 IP 192.168.7.4.50550 > 192.168.7.8.2500: R 82517592:82517592(0) win 1500 (raw)
>> vs
>> 17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
>> What happened there ?
Erm... you seem to have removed parts of my message in a way that doesn't
make sense...
On Fri, Aug 03, 2007 at 05:51:42PM +0100, Simon Arlott wrote:
> 17:38:04.536277 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 1:1(0) ack 17 win 14360
> vs
> 17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
> What happened there ?
The first one is the RST sent when the connection is close()d without
reading, and the second one is the same RST but after other connection
has been made on the same ports using a different socket.
> It is the same situation, which would happen if you will spam remote
> side with RST packets with arbitrary sequence number in hope that it
> will reset some connection.
Isn't it still possible that the connection that got reset is left open
(possibly for days) until another connection using the same ports is
using roughly the same sequence numbers?
--
Simon Arlott
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 8:22 ` Evgeniy Polyakov
@ 2007-08-03 20:04 ` David Miller
2007-08-04 16:49 ` Evgeniy Polyakov
2007-08-03 21:17 ` David Miller
1 sibling, 1 reply; 28+ messages in thread
From: David Miller @ 2007-08-03 20:04 UTC (permalink / raw)
To: johnpol; +Cc: simon, john, netdev
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Fri, 3 Aug 2007 12:22:42 +0400
> On Thu, Aug 02, 2007 at 07:21:34PM -0700, David Miller (davem@davemloft.net) wrote:
> > What in the world are we doing allowing stream sockets to autobind?
> > That is totally bogus. Even if we autobind, that won't make a connect
> > happen.
>
> For accepted socket it is perfectly valid assumption - we could autobind
> it during the first send. Or may bind it during accept. Its a matter of
> taste I think. Autobinding during first sending can end up being a
> protection against DoS in some obscure rare case...
accept()ed socket is by definition fully bound and already in
established state.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 8:22 ` Evgeniy Polyakov
2007-08-03 20:04 ` David Miller
@ 2007-08-03 21:17 ` David Miller
2007-08-04 16:51 ` Evgeniy Polyakov
1 sibling, 1 reply; 28+ messages in thread
From: David Miller @ 2007-08-03 21:17 UTC (permalink / raw)
To: johnpol; +Cc: simon, john, netdev
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Fri, 3 Aug 2007 12:22:42 +0400
> Maybe recvmsg should be changed too for symmetry?
I took a look at this, and it's not %100 trivial.
Let's do this later, and only sendmsg for now in order to
fix the bug in the stable branches.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 18:29 ` Simon Arlott
@ 2007-08-04 16:03 ` Evgeniy Polyakov
0 siblings, 0 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-04 16:03 UTC (permalink / raw)
To: Simon Arlott; +Cc: john, netdev, David Miller
On Fri, Aug 03, 2007 at 07:29:58PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> On 03/08/07 18:39, Evgeniy Polyakov wrote:
> > On Fri, Aug 03, 2007 at 05:51:42PM +0100, Simon Arlott (simon@fire.lp0.eu) wrote:
> >
> >> 17:38:03.533589 IP 192.168.7.4.50550 > 192.168.7.8.2500: R 82517592:82517592(0) win 1500 (raw)
> >> vs
> >> 17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
> >> What happened there ?
>
> Erm... you seem to have removed parts of my message in a way that doesn't
> make sense...
Sorry, I left line I tought were enough to understand your point.
> On Fri, Aug 03, 2007 at 05:51:42PM +0100, Simon Arlott wrote:
> > 17:38:04.536277 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 1:1(0) ack 17 win 14360
> > vs
> > 17:37:38.383085 IP 192.168.7.8.2500 > 192.168.7.4.50550: R 4259643274:4259643274(0) ack 1171836829 win 14360
> > What happened there ?
>
> The first one is the RST sent when the connection is close()d without
> reading, and the second one is the same RST but after other connection
> has been made on the same ports using a different socket.
I understood it, and your question is about possibility for those
numbers to be roughly the same. Answer is 'no', it is not possible
(possible, but with extremely low probability).
If it is - this is a bug in ISN generation algo and must be fixed.
> > It is the same situation, which would happen if you will spam remote
> > side with RST packets with arbitrary sequence number in hope that it
> > will reset some connection.
>
> Isn't it still possible that the connection that got reset is left open
> (possibly for days) until another connection using the same ports is
> using roughly the same sequence numbers?
Of course it is possible, but it very unlikely. Practically it is
impossible in modern OSes - ISN generation algos are designed to prevent
this from happening.
> --
> Simon Arlott
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 20:04 ` David Miller
@ 2007-08-04 16:49 ` Evgeniy Polyakov
0 siblings, 0 replies; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-04 16:49 UTC (permalink / raw)
To: David Miller; +Cc: simon, john, netdev
On Fri, Aug 03, 2007 at 01:04:51PM -0700, David Miller (davem@davemloft.net) wrote:
> From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
> Date: Fri, 3 Aug 2007 12:22:42 +0400
>
> > On Thu, Aug 02, 2007 at 07:21:34PM -0700, David Miller (davem@davemloft.net) wrote:
> > > What in the world are we doing allowing stream sockets to autobind?
> > > That is totally bogus. Even if we autobind, that won't make a connect
> > > happen.
> >
> > For accepted socket it is perfectly valid assumption - we could autobind
> > it during the first send. Or may bind it during accept. Its a matter of
> > taste I think. Autobinding during first sending can end up being a
> > protection against DoS in some obscure rare case...
>
> accept()ed socket is by definition fully bound and already in
> established state.
That what I meant - it binds during accept (well it can not be called
real binding), but could be autobound during first send to needed port.
Maybe that was one of intentions, don't know.
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-03 21:17 ` David Miller
@ 2007-08-04 16:51 ` Evgeniy Polyakov
2007-08-05 3:21 ` David Miller
0 siblings, 1 reply; 28+ messages in thread
From: Evgeniy Polyakov @ 2007-08-04 16:51 UTC (permalink / raw)
To: David Miller; +Cc: simon, john, netdev
On Fri, Aug 03, 2007 at 02:17:17PM -0700, David Miller (davem@davemloft.net) wrote:
> From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
> Date: Fri, 3 Aug 2007 12:22:42 +0400
>
> > Maybe recvmsg should be changed too for symmetry?
>
> I took a look at this, and it's not %100 trivial.
>
> Let's do this later, and only sendmsg for now in order to
> fix the bug in the stable branches.
I've tested your patch, besides there was an offset in one of hooks,
it works perfectly ok.
Feel free to add my ack, tested-by or whatever is needed for this :)
Your patch fixes the problem.
Actually inet_sendmsg() can be renamed to something less misleading,
since it is not used by TCP now.
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: strange tcp behavior
2007-08-04 16:51 ` Evgeniy Polyakov
@ 2007-08-05 3:21 ` David Miller
0 siblings, 0 replies; 28+ messages in thread
From: David Miller @ 2007-08-05 3:21 UTC (permalink / raw)
To: johnpol; +Cc: simon, john, netdev
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Sat, 4 Aug 2007 20:51:51 +0400
> On Fri, Aug 03, 2007 at 02:17:17PM -0700, David Miller (davem@davemloft.net) wrote:
> > From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
> > Date: Fri, 3 Aug 2007 12:22:42 +0400
> >
> > > Maybe recvmsg should be changed too for symmetry?
> >
> > I took a look at this, and it's not %100 trivial.
> >
> > Let's do this later, and only sendmsg for now in order to
> > fix the bug in the stable branches.
>
> I've tested your patch, besides there was an offset in one of hooks,
> it works perfectly ok.
>
> Feel free to add my ack, tested-by or whatever is needed for this :)
> Your patch fixes the problem.
It is already merged to Linus's tree long before you found a chance to
test it :-) So it would be difficult for me to do so.
^ permalink raw reply [flat|nested] 28+ messages in thread
end of thread, other threads:[~2007-08-05 3:21 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-02 6:19 strange tcp behavior john
2007-08-02 9:55 ` Evgeniy Polyakov
2007-08-02 10:16 ` Evgeniy Polyakov
2007-08-02 11:38 ` Simon Arlott
2007-08-02 12:04 ` Evgeniy Polyakov
2007-08-02 12:28 ` Evgeniy Polyakov
[not found] ` <46860.212.93.96.73.1186055105.squirrel@mail.screen.lv>
2007-08-02 12:15 ` Simon Arlott
2007-08-02 17:15 ` Simon Arlott
2007-08-02 18:08 ` Evgeniy Polyakov
2007-08-02 18:48 ` Evgeniy Polyakov
2007-08-02 22:02 ` David Miller
2007-08-03 2:21 ` David Miller
2007-08-03 8:22 ` Evgeniy Polyakov
2007-08-03 20:04 ` David Miller
2007-08-04 16:49 ` Evgeniy Polyakov
2007-08-03 21:17 ` David Miller
2007-08-04 16:51 ` Evgeniy Polyakov
2007-08-05 3:21 ` David Miller
2007-08-02 18:58 ` Simon Arlott
2007-08-03 8:25 ` Evgeniy Polyakov
2007-08-03 11:21 ` Simon Arlott
2007-08-03 11:56 ` Evgeniy Polyakov
2007-08-03 12:03 ` Simon Arlott
2007-08-03 12:09 ` Evgeniy Polyakov
2007-08-03 16:51 ` Simon Arlott
2007-08-03 17:39 ` Evgeniy Polyakov
2007-08-03 18:29 ` Simon Arlott
2007-08-04 16:03 ` Evgeniy Polyakov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).