* [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 7:02 ` Shmulik Ladkani
2015-02-26 16:15 ` Marcel Holtmann
2015-02-26 2:10 ` [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields Eyal Birger
` (5 subsequent siblings)
6 siblings, 2 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
struct hci_req_ctrl is never used outside of struct bt_skb_cb;
Inlining it frees 8 bytes on a 64 bit system in skb->cb[] allowing
the addition of more ancillary data.
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
include/net/bluetooth/bluetooth.h | 10 +++-------
net/bluetooth/hci_core.c | 12 ++++++------
net/bluetooth/hci_event.c | 4 ++--
net/bluetooth/hci_request.c | 6 +++---
net/bluetooth/hci_sock.c | 2 +-
5 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index e00455a..0989366 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -275,21 +275,17 @@ struct hci_dev;
typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
-struct hci_req_ctrl {
- bool start;
- u8 event;
- hci_req_complete_t complete;
-};
-
struct bt_skb_cb {
__u8 pkt_type;
__u8 incoming;
__u16 opcode;
__u16 expect;
__u8 force_active;
+ bool req_start;
+ u8 req_event;
+ hci_req_complete_t req_complete;
struct l2cap_chan *chan;
struct l2cap_ctrl control;
- struct hci_req_ctrl req;
bdaddr_t bdaddr;
__le16 psm;
};
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3322d3f..85a0655 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3517,7 +3517,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -4195,7 +4195,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
if (!skb)
return true;
- return bt_cb(skb)->req.start;
+ return bt_cb(skb)->req_start;
}
static void hci_resend_last(struct hci_dev *hdev)
@@ -4255,14 +4255,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
* command queue (hdev->cmd_q).
*/
if (hdev->sent_cmd) {
- req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+ req_complete = bt_cb(hdev->sent_cmd)->req_complete;
if (req_complete) {
/* We must set the complete callback to NULL to
* avoid calling the callback more than once if
* this function gets called again.
*/
- bt_cb(hdev->sent_cmd)->req.complete = NULL;
+ bt_cb(hdev->sent_cmd)->req_complete = NULL;
goto call_complete;
}
@@ -4271,12 +4271,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
/* Remove all pending commands belonging to this request */
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
while ((skb = __skb_dequeue(&hdev->cmd_q))) {
- if (bt_cb(skb)->req.start) {
+ if (bt_cb(skb)->req_start) {
__skb_queue_head(&hdev->cmd_q, skb);
break;
}
- req_complete = bt_cb(skb)->req.complete;
+ req_complete = bt_cb(skb)->req_complete;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3fb094..8e8c433 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3106,7 +3106,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
cancel_delayed_work(&hdev->cmd_timer);
if (ev->status ||
- (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+ (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event))
hci_req_cmd_complete(hdev, opcode, ev->status);
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
@@ -5039,7 +5039,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
skb_pull(skb, HCI_EVENT_HDR_SIZE);
- if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+ if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) {
struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(cmd_hdr->opcode);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b59f92c..db2f45a 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
return -ENODATA;
skb = skb_peek_tail(&req->cmd_q);
- bt_cb(skb)->req.complete = complete;
+ bt_cb(skb)->req_complete = complete;
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}
if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = true;
- bt_cb(skb)->req.event = event;
+ bt_cb(skb)->req_event = event;
skb_queue_tail(&req->cmd_q, skb);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d65c5b..f003818 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -965,7 +965,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->req_start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl
2015-02-26 2:10 ` [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl Eyal Birger
@ 2015-02-26 7:02 ` Shmulik Ladkani
2015-02-26 16:15 ` Marcel Holtmann
1 sibling, 0 replies; 30+ messages in thread
From: Shmulik Ladkani @ 2015-02-26 7:02 UTC (permalink / raw)
To: Eyal Birger; +Cc: davem, willemb, edumazet, marcel, netdev
On Thu, 26 Feb 2015 04:10:06 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> struct hci_req_ctrl is never used outside of struct bt_skb_cb;
> Inlining it frees 8 bytes on a 64 bit system in skb->cb[] allowing
> the addition of more ancillary data.
>
> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl
2015-02-26 2:10 ` [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl Eyal Birger
2015-02-26 7:02 ` Shmulik Ladkani
@ 2015-02-26 16:15 ` Marcel Holtmann
2015-02-26 21:22 ` Eyal Birger
1 sibling, 1 reply; 30+ messages in thread
From: Marcel Holtmann @ 2015-02-26 16:15 UTC (permalink / raw)
To: Eyal Birger; +Cc: David S. Miller, willemb, edumazet, shmulik.ladkani, netdev
Hi Eyal,
> struct hci_req_ctrl is never used outside of struct bt_skb_cb;
> Inlining it frees 8 bytes on a 64 bit system in skb->cb[] allowing
> the addition of more ancillary data.
>
> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
> ---
> include/net/bluetooth/bluetooth.h | 10 +++-------
> net/bluetooth/hci_core.c | 12 ++++++------
> net/bluetooth/hci_event.c | 4 ++--
> net/bluetooth/hci_request.c | 6 +++---
> net/bluetooth/hci_sock.c | 2 +-
> 5 files changed, 15 insertions(+), 19 deletions(-)
it would be a really good idea to actually send this to linux-bluetooth@vger.kernel.org for review.
Regards
Marcel
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl
2015-02-26 16:15 ` Marcel Holtmann
@ 2015-02-26 21:22 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 21:22 UTC (permalink / raw)
To: Marcel Holtmann; +Cc: netdev@vger.kernel.org
Hi Marcel,
On Thu, Feb 26, 2015 at 6:15 PM, Marcel Holtmann <marcel@holtmann.org> wrote:
> Hi Eyal,
>
>> struct hci_req_ctrl is never used outside of struct bt_skb_cb;
>> Inlining it frees 8 bytes on a 64 bit system in skb->cb[] allowing
>> the addition of more ancillary data.
>>
>> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
>> ---
>> include/net/bluetooth/bluetooth.h | 10 +++-------
>> net/bluetooth/hci_core.c | 12 ++++++------
>> net/bluetooth/hci_event.c | 4 ++--
>> net/bluetooth/hci_request.c | 6 +++---
>> net/bluetooth/hci_sock.c | 2 +-
>> 5 files changed, 15 insertions(+), 19 deletions(-)
>
> it would be a really good idea to actually send this to linux-bluetooth@vger.kernel.org for review.
>
Thanks! I have done that in v2.
Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
2015-02-26 2:10 ` [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 7:30 ` Shmulik Ladkani
2015-02-26 11:31 ` David Laight
2015-02-26 2:10 ` [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp() Eyal Birger
` (4 subsequent siblings)
6 siblings, 2 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
On 64 bit systems, struct bt_skb_cb size is padded by 6 bytes in order
to reach 8 byte alignment.
Convert boolean fields force_active, incoming and req_start to bit fields
in order to eliminate the padding.
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
include/net/bluetooth/bluetooth.h | 6 +++---
net/bluetooth/hci_core.c | 2 +-
net/bluetooth/hci_request.c | 2 +-
net/bluetooth/hci_sock.c | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 0989366..bb1851e 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -277,11 +277,11 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
struct bt_skb_cb {
__u8 pkt_type;
- __u8 incoming;
__u16 opcode;
__u16 expect;
- __u8 force_active;
- bool req_start;
+ __u8 force_active:1;
+ __u8 incoming:1;
+ __u8 req_start:1;
u8 req_event;
hci_req_complete_t req_complete;
struct l2cap_chan *chan;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 85a0655..80f40e8 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3517,7 +3517,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req_start = true;
+ bt_cb(skb)->req_start = 1;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index db2f45a..f857e76 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -116,7 +116,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}
if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->req_start = true;
+ bt_cb(skb)->req_start = 1;
bt_cb(skb)->req_event = event;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f003818..37198fb 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -965,7 +965,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req_start = true;
+ bt_cb(skb)->req_start = 1;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields
2015-02-26 2:10 ` [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields Eyal Birger
@ 2015-02-26 7:30 ` Shmulik Ladkani
2015-02-26 13:22 ` Eyal Birger
2015-02-26 11:31 ` David Laight
1 sibling, 1 reply; 30+ messages in thread
From: Shmulik Ladkani @ 2015-02-26 7:30 UTC (permalink / raw)
To: Eyal Birger; +Cc: davem, willemb, edumazet, marcel, netdev
On Thu, 26 Feb 2015 04:10:07 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> @@ -277,11 +277,11 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
>
> struct bt_skb_cb {
> __u8 pkt_type;
> - __u8 incoming;
> __u16 opcode;
> __u16 expect;
> - __u8 force_active;
> - bool req_start;
> + __u8 force_active:1;
> + __u8 incoming:1;
> + __u8 req_start:1;
> u8 req_event;
> hci_req_complete_t req_complete;
> struct l2cap_chan *chan;
There's a:
bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON;
assignment in l2cap_core.c.
No bluetooth expert, no idea if BT_POWER_FORCE_ACTIVE_ON is 1 by definition.
If not, maybe prefer:
bt_cb(skb)->force_active = !!BT_POWER_FORCE_ACTIVE_ON
Besides that,
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields
2015-02-26 7:30 ` Shmulik Ladkani
@ 2015-02-26 13:22 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 13:22 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Marcel Holtmann,
netdev@vger.kernel.org
On Thu, Feb 26, 2015 at 9:30 AM, Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
> On Thu, 26 Feb 2015 04:10:07 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> @@ -277,11 +277,11 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
>>
>> struct bt_skb_cb {
>> __u8 pkt_type;
>> - __u8 incoming;
>> __u16 opcode;
>> __u16 expect;
>> - __u8 force_active;
>> - bool req_start;
>> + __u8 force_active:1;
>> + __u8 incoming:1;
>> + __u8 req_start:1;
>> u8 req_event;
>> hci_req_complete_t req_complete;
>> struct l2cap_chan *chan;
>
> There's a:
> bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON;
> assignment in l2cap_core.c.
>
> No bluetooth expert, no idea if BT_POWER_FORCE_ACTIVE_ON is 1 by definition.
> If not, maybe prefer:
> bt_cb(skb)->force_active = !!BT_POWER_FORCE_ACTIVE_ON
>
I think I can keep force_active as __u8.
Will update on v2.
Thanks!
> Besides that,
> Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
^ permalink raw reply [flat|nested] 30+ messages in thread
* RE: [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields
2015-02-26 2:10 ` [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields Eyal Birger
2015-02-26 7:30 ` Shmulik Ladkani
@ 2015-02-26 11:31 ` David Laight
2015-02-26 13:21 ` Eyal Birger
1 sibling, 1 reply; 30+ messages in thread
From: David Laight @ 2015-02-26 11:31 UTC (permalink / raw)
To: 'Eyal Birger', davem@davemloft.net
Cc: willemb@google.com, edumazet@google.com,
shmulik.ladkani@gmail.com, marcel@holtmann.org,
netdev@vger.kernel.org
From: Eyal Birger
> On 64 bit systems, struct bt_skb_cb size is padded by 6 bytes in order
> to reach 8 byte alignment.
> Convert boolean fields force_active, incoming and req_start to bit fields
> in order to eliminate the padding.
...
> +++ b/include/net/bluetooth/bluetooth.h
...
> struct bt_skb_cb {
> __u8 pkt_type;
> - __u8 incoming;
> __u16 opcode;
> __u16 expect;
> - __u8 force_active;
> - bool req_start;
> + __u8 force_active:1;
> + __u8 incoming:1;
> + __u8 req_start:1;
> u8 req_event;
> hci_req_complete_t req_complete;
> struct l2cap_chan *chan;
You've generated some pad bytes, best to put everything on its natural alignment.
The old version had req_complete at offset 9, the new one at offset 8.
Not sure how this removes 6 bytes of pad.
David
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields
2015-02-26 11:31 ` David Laight
@ 2015-02-26 13:21 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 13:21 UTC (permalink / raw)
To: David Laight
Cc: davem@davemloft.net, willemb@google.com, edumazet@google.com,
shmulik.ladkani@gmail.com, marcel@holtmann.org,
netdev@vger.kernel.org
Hi,
On Thu, Feb 26, 2015 at 1:31 PM, David Laight <David.Laight@aculab.com> wrote:
> From: Eyal Birger
>> On 64 bit systems, struct bt_skb_cb size is padded by 6 bytes in order
>> to reach 8 byte alignment.
>> Convert boolean fields force_active, incoming and req_start to bit fields
>> in order to eliminate the padding.
> ...
>> +++ b/include/net/bluetooth/bluetooth.h
> ...
>> struct bt_skb_cb {
>> __u8 pkt_type;
>> - __u8 incoming;
>> __u16 opcode;
>> __u16 expect;
>> - __u8 force_active;
>> - bool req_start;
>> + __u8 force_active:1;
>> + __u8 incoming:1;
>> + __u8 req_start:1;
>> u8 req_event;
>> hci_req_complete_t req_complete;
>> struct l2cap_chan *chan;
>
> You've generated some pad bytes, best to put everything on its natural alignment.
> The old version had req_complete at offset 9, the new one at offset 8.
> Not sure how this removes 6 bytes of pad.
>
Hmm, I tried different variations here. At a certain point, I had 6
bytes of padding.
Seems pahole is playing peekaboo on my holes :)
Anyway, since req_complete has to be pointer aligned, it does not
reside at offset 9,
but at offset 16, which creates a hole.
Thanks. I will update appropriately in v2.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp()
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
2015-02-26 2:10 ` [PATCH net-next 1/7] net: bluetooth: compact struct bt_skb_cb by inlining struct hci_req_ctrl Eyal Birger
2015-02-26 2:10 ` [PATCH net-next 2/7] net: bluetooth: compact struct bt_skb_cb by converting boolean fields to bit fields Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 4:40 ` Eric Dumazet
2015-02-26 2:10 ` [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[] Eyal Birger
` (3 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
Commit 3b885787ea4112 ("net: Generalize socket rx gap / receive queue overflow cmsg")
allowed receiving packet dropcount information as a socket level option.
RXRPC sockets recvmsg function was changed to support this by calling
sock_recv_ts_and_drops() instead of sock_recv_timestamp().
However, protocol families wishing to receive dropcount should call
sock_queue_rcv_skb() or set the dropcount specifically (as done
in packet_rcv()). This was not done for rxrpc and thus this feature
never worked on these sockets.
Formalizing this by not calling sock_recv_ts_and_drops() in rxrpc as
part of an effort to move skb->dropcount into skb->cb[]
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
net/rxrpc/ar-recvmsg.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4575485..d58ba70 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
&call->conn->trans->peer->srx, len);
msg->msg_namelen = len;
}
- sock_recv_ts_and_drops(msg, &rx->sk, skb);
+ sock_recv_timestamp(msg, &rx->sk, skb);
}
/* receive the message */
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp()
2015-02-26 2:10 ` [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp() Eyal Birger
@ 2015-02-26 4:40 ` Eric Dumazet
2015-02-26 7:30 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Eric Dumazet @ 2015-02-26 4:40 UTC (permalink / raw)
To: Eyal Birger; +Cc: davem, willemb, edumazet, shmulik.ladkani, marcel, netdev
On Thu, 2015-02-26 at 04:10 +0200, Eyal Birger wrote:
> Commit 3b885787ea4112 ("net: Generalize socket rx gap / receive queue overflow cmsg")
> allowed receiving packet dropcount information as a socket level option.
> RXRPC sockets recvmsg function was changed to support this by calling
> sock_recv_ts_and_drops() instead of sock_recv_timestamp().
>
> However, protocol families wishing to receive dropcount should call
> sock_queue_rcv_skb() or set the dropcount specifically (as done
> in packet_rcv()). This was not done for rxrpc and thus this feature
> never worked on these sockets.
>
> Formalizing this by not calling sock_recv_ts_and_drops() in rxrpc as
> part of an effort to move skb->dropcount into skb->cb[]
>
> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
> ---
> net/rxrpc/ar-recvmsg.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
> index 4575485..d58ba70 100644
> --- a/net/rxrpc/ar-recvmsg.c
> +++ b/net/rxrpc/ar-recvmsg.c
> @@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
> &call->conn->trans->peer->srx, len);
> msg->msg_namelen = len;
> }
> - sock_recv_ts_and_drops(msg, &rx->sk, skb);
> + sock_recv_timestamp(msg, &rx->sk, skb);
> }
I guess this should be the last patch of the serie, as this will
overwrite skb->mark, and rxrpc seems to care about skb->mark
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp()
2015-02-26 4:40 ` Eric Dumazet
@ 2015-02-26 7:30 ` Eyal Birger
2015-02-26 14:31 ` Eric Dumazet
0 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 7:30 UTC (permalink / raw)
To: Eric Dumazet
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Shmulik Ladkani,
Marcel Holtmann, netdev@vger.kernel.org
>> However, protocol families wishing to receive dropcount should call
>> sock_queue_rcv_skb() or set the dropcount specifically (as done
>> in packet_rcv()). This was not done for rxrpc and thus this feature
>> never worked on these sockets.
>>
>> Formalizing this by not calling sock_recv_ts_and_drops() in rxrpc as
>> part of an effort to move skb->dropcount into skb->cb[]
>>
>> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
>> ---
>> net/rxrpc/ar-recvmsg.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
>> index 4575485..d58ba70 100644
>> --- a/net/rxrpc/ar-recvmsg.c
>> +++ b/net/rxrpc/ar-recvmsg.c
>> @@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
>> &call->conn->trans->peer->srx, len);
>> msg->msg_namelen = len;
>> }
>> - sock_recv_ts_and_drops(msg, &rx->sk, skb);
>> + sock_recv_timestamp(msg, &rx->sk, skb);
>> }
>
> I guess this should be the last patch of the serie, as this will
> overwrite skb->mark, and rxrpc seems to care about skb->mark
>
Maybe I didn't understand your comment. Note this patch does not fix
SO_RXQ_OVFL on rxrpc sockets.
I essentially reverted 3b885787ea4112 for these sockets.
It may be possible to compact rxrpc skb->cb[] usage by tricking the size
of the resend_at variable as you suggested.
However, since SO_RXQ_OVFL never really worked on these sockets,
I limited the patch series scope to moving skb->dropcount; This patch only
signals that rxrpc can't support this feature unless some space is freed in its
skb->cb[] use.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp()
2015-02-26 7:30 ` Eyal Birger
@ 2015-02-26 14:31 ` Eric Dumazet
2015-02-26 14:55 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Eric Dumazet @ 2015-02-26 14:31 UTC (permalink / raw)
To: Eyal Birger
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Shmulik Ladkani,
Marcel Holtmann, netdev@vger.kernel.org
On Thu, 2015-02-26 at 09:30 +0200, Eyal Birger wrote:
> Maybe I didn't understand your comment. Note this patch does not fix
> SO_RXQ_OVFL on rxrpc sockets.
>
> I essentially reverted 3b885787ea4112 for these sockets.
>
> It may be possible to compact rxrpc skb->cb[] usage by tricking the size
> of the resend_at variable as you suggested.
>
> However, since SO_RXQ_OVFL never really worked on these sockets,
> I limited the patch series scope to moving skb->dropcount; This patch only
> signals that rxrpc can't support this feature unless some space is freed in its
> skb->cb[] use.
> --
My point is : If you stick this patch early in the serie, then skb->mark
will be overwritten by sock_recv_timestamp() (to store skb->dropcount)
and rxrpc breaks as it actively relies on skb->mark (apparently)
We dont care if SO_RXQ_OVFL is broken for rxprpc : Nobody noticed yet.
But breaking rxrpc skb->mark early in the serie does not help bisection
if needed later.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp()
2015-02-26 14:31 ` Eric Dumazet
@ 2015-02-26 14:55 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 14:55 UTC (permalink / raw)
To: Eric Dumazet
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Shmulik Ladkani,
Marcel Holtmann, netdev@vger.kernel.org
On Thu, Feb 26, 2015 at 4:31 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2015-02-26 at 09:30 +0200, Eyal Birger wrote:
>
>> Maybe I didn't understand your comment. Note this patch does not fix
>> SO_RXQ_OVFL on rxrpc sockets.
>>
>> I essentially reverted 3b885787ea4112 for these sockets.
>>
>> It may be possible to compact rxrpc skb->cb[] usage by tricking the size
>> of the resend_at variable as you suggested.
>>
>> However, since SO_RXQ_OVFL never really worked on these sockets,
>> I limited the patch series scope to moving skb->dropcount; This patch only
>> signals that rxrpc can't support this feature unless some space is freed in its
>> skb->cb[] use.
>> --
>
> My point is : If you stick this patch early in the serie, then skb->mark
> will be overwritten by sock_recv_timestamp() (to store skb->dropcount)
> and rxrpc breaks as it actively relies on skb->mark (apparently)
>
skb->dropcount is stored prior to enqueue in sock_queue_rcv_skb().
This function is not called in rxrpc.
sock_recv_timestamp() is called from the recvmsg code, and does not alter
skb->dropcount.
What am I missing?
> We dont care if SO_RXQ_OVFL is broken for rxprpc : Nobody noticed yet.
>
This was my point in this patch - to make it explicit. When rxrpc called
sock_recv_ts_and_drops() it allegedly supported receiving drops, when
dropcount was never set.
> But breaking rxrpc skb->mark early in the serie does not help bisection
> if needed later.
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
` (2 preceding siblings ...)
2015-02-26 2:10 ` [PATCH net-next 3/7] net: rxrpc: change call to sock_recv_ts_and_drops() on rxrpc recvmsg to sock_recv_timestamp() Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 4:03 ` Eyal Birger
2015-02-26 17:26 ` Willem de Bruijn
2015-02-26 2:10 ` [PATCH net-next 5/7] net: use common macro for assering skb->cb[] available size in protocol families Eyal Birger
` (2 subsequent siblings)
6 siblings, 2 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
As part of an effort to move skb->dropcount to skb->cb[], 4 bytes
of additional room are needed in skb->cb[] in packet sockets.
Store the skb original length in skb->dev instead of skb->cb[] for
this purpose.
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
net/packet/af_packet.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9c28cec..26889fd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -216,7 +216,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
static void packet_flush_mclist(struct sock *sk);
struct packet_skb_cb {
- unsigned int origlen;
union {
struct sockaddr_pkt pkt;
struct sockaddr_ll ll;
@@ -224,6 +223,7 @@ struct packet_skb_cb {
};
#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
+#define PACKET_SKB_ORIGLEN(__skb) ((unsigned long *)&(__skb)->dev)
#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
#define GET_PBLOCK_DESC(x, bid) \
@@ -1825,13 +1825,12 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
- PACKET_SKB_CB(skb)->origlen = skb->len;
+ *PACKET_SKB_ORIGLEN(skb) = skb->len;
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
skb_set_owner_r(skb, sk);
- skb->dev = NULL;
skb_dst_drop(skb);
/* drop conntrack reference */
@@ -3006,7 +3005,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
- aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+ aux.tp_len = *PACKET_SKB_ORIGLEN(skb);
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 2:10 ` [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[] Eyal Birger
@ 2015-02-26 4:03 ` Eyal Birger
2015-02-26 17:26 ` Willem de Bruijn
1 sibling, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 4:03 UTC (permalink / raw)
To: David Miller
Cc: Willem de Bruijn, Eric Dumazet, Shmulik Ladkani, marcel,
netdev@vger.kernel.org, Eyal Birger
>
> sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
>
> - PACKET_SKB_CB(skb)->origlen = skb->len;
> + *PACKET_SKB_ORIGLEN(skb) = skb->len;
>
> if (pskb_trim(skb, snaplen))
> goto drop_n_acct;
>
> skb_set_owner_r(skb, sk);
> - skb->dev = NULL;
> skb_dst_drop(skb);
There is a problem here. skb->dev must still be valid on the call to
skb_set_owner_r().
Sorry about that. I'll spin a second version along with any other
comments raised on this series.
Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 2:10 ` [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[] Eyal Birger
2015-02-26 4:03 ` Eyal Birger
@ 2015-02-26 17:26 ` Willem de Bruijn
2015-02-26 18:38 ` Eyal Birger
1 sibling, 1 reply; 30+ messages in thread
From: Willem de Bruijn @ 2015-02-26 17:26 UTC (permalink / raw)
To: Eyal Birger
Cc: David Miller, Eric Dumazet, shmulik.ladkani, marcel,
Network Development
On Wed, Feb 25, 2015 at 9:10 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
> As part of an effort to move skb->dropcount to skb->cb[], 4 bytes
> of additional room are needed in skb->cb[] in packet sockets.
>
> Store the skb original length in skb->dev instead of skb->cb[] for
> this purpose.
Another option is to delay preparation of the full sockaddr_ll struct until
when it's needed. It often is not used at all (if msg_name == NULL).
sll_family, sll_protocol and sll_pkttype can be derived when needed
in packet_recvmsg. The first two are the head of sockaddr_ll, so
a shorter struct filled in in packet_rcv only from sll_ifindex onwards
would save the 4B needed for origlen. Actually, a union is simpler:
struct __packet_cb_sockaddr_ll {
union {
struct {
unsigned short sll_family;
__be16 sll_protocol;
}
unsigned int sll_origlen;
}
/* etc.. */
};
The family and protocol can just overwrite sll_origlen in packet_recvmsg
before the memcpy into msg->msg_name.
Also interesting would be to avoid copying the address in the fast path
as it may never be used, especially for long addresses (INFINIBAND_ALEN
and FWNET_ALEN). From what I can tell, all but one header_ops.parse
implementations return bytes from inside the packet, so could take an
unsigned char ** as argument, store that pointer and postpone the
memcpy to packet_recvmsg (handling skb_trim correctly). The exception is
fwnet_header_parse, which would require some workaround.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 17:26 ` Willem de Bruijn
@ 2015-02-26 18:38 ` Eyal Birger
2015-02-26 18:51 ` Willem de Bruijn
0 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 18:38 UTC (permalink / raw)
To: Willem de Bruijn
Cc: David Miller, Eric Dumazet, Shmulik Ladkani, Marcel Holtmann,
Network Development
Hi,
On Thu, Feb 26, 2015 at 7:26 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Wed, Feb 25, 2015 at 9:10 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
>> As part of an effort to move skb->dropcount to skb->cb[], 4 bytes
>> of additional room are needed in skb->cb[] in packet sockets.
>>
>> Store the skb original length in skb->dev instead of skb->cb[] for
>> this purpose.
>
> Another option is to delay preparation of the full sockaddr_ll struct until
> when it's needed. It often is not used at all (if msg_name == NULL).
>
> sll_family, sll_protocol and sll_pkttype can be derived when needed
> in packet_recvmsg. The first two are the head of sockaddr_ll, so
> a shorter struct filled in in packet_rcv only from sll_ifindex onwards
> would save the 4B needed for origlen. Actually, a union is simpler:
>
> struct __packet_cb_sockaddr_ll {
> union {
> struct {
> unsigned short sll_family;
> __be16 sll_protocol;
> }
> unsigned int sll_origlen;
> }
> /* etc.. */
> };
>
> The family and protocol can just overwrite sll_origlen in packet_recvmsg
> before the memcpy into msg->msg_name.
>
That's an interesting option.
My personal inclination is not to inline sockaddr_ll in the packet cb structure.
IMHO it would probably make sense to do it as part of your below suggestion.
> Also interesting would be to avoid copying the address in the fast path
> as it may never be used, especially for long addresses (INFINIBAND_ALEN
> and FWNET_ALEN). From what I can tell, all but one header_ops.parse
> implementations return bytes from inside the packet, so could take an
> unsigned char ** as argument, store that pointer and postpone the
> memcpy to packet_recvmsg (handling skb_trim correctly). The exception is
> fwnet_header_parse, which would require some workaround.
I think it may be interesting to pursue this option, though I don't
think it should
be part of this specific effort.
Thanks!
Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 18:38 ` Eyal Birger
@ 2015-02-26 18:51 ` Willem de Bruijn
2015-02-26 18:55 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Willem de Bruijn @ 2015-02-26 18:51 UTC (permalink / raw)
To: Eyal Birger
Cc: David Miller, Eric Dumazet, Shmulik Ladkani, Marcel Holtmann,
Network Development
On Thu, Feb 26, 2015 at 1:38 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
> Hi,
>
> On Thu, Feb 26, 2015 at 7:26 PM, Willem de Bruijn <willemb@google.com> wrote:
>> On Wed, Feb 25, 2015 at 9:10 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
>>> As part of an effort to move skb->dropcount to skb->cb[], 4 bytes
>>> of additional room are needed in skb->cb[] in packet sockets.
>>>
>>> Store the skb original length in skb->dev instead of skb->cb[] for
>>> this purpose.
>>
>> Another option is to delay preparation of the full sockaddr_ll struct until
>> when it's needed. It often is not used at all (if msg_name == NULL).
>>
>> sll_family, sll_protocol and sll_pkttype can be derived when needed
>> in packet_recvmsg. The first two are the head of sockaddr_ll, so
>> a shorter struct filled in in packet_rcv only from sll_ifindex onwards
>> would save the 4B needed for origlen. Actually, a union is simpler:
>>
>> struct __packet_cb_sockaddr_ll {
>> union {
>> struct {
>> unsigned short sll_family;
>> __be16 sll_protocol;
>> }
>> unsigned int sll_origlen;
>> }
>> /* etc.. */
>> };
>>
>> The family and protocol can just overwrite sll_origlen in packet_recvmsg
>> before the memcpy into msg->msg_name.
>>
>
> That's an interesting option.
> My personal inclination is not to inline sockaddr_ll in the packet cb structure.
It already is, isn't it?
> IMHO it would probably make sense to do it as part of your below suggestion.
>
>> Also interesting would be to avoid copying the address in the fast path
>> as it may never be used, especially for long addresses (INFINIBAND_ALEN
>> and FWNET_ALEN). From what I can tell, all but one header_ops.parse
>> implementations return bytes from inside the packet, so could take an
>> unsigned char ** as argument, store that pointer and postpone the
>> memcpy to packet_recvmsg (handling skb_trim correctly). The exception is
>> fwnet_header_parse, which would require some workaround.
>
> I think it may be interesting to pursue this option, though I don't
> think it should
> be part of this specific effort.
Absolutely. This is a lot more work. Too much to fold into this patchset.
I was more speculating in general. I may look into this at some point.
>
> Thanks!
> Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[]
2015-02-26 18:51 ` Willem de Bruijn
@ 2015-02-26 18:55 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 18:55 UTC (permalink / raw)
To: Willem de Bruijn
Cc: David Miller, Eric Dumazet, Shmulik Ladkani, Marcel Holtmann,
Network Development
On Thu, Feb 26, 2015 at 8:51 PM, Willem de Bruijn <willemb@google.com> wrote:
> On Thu, Feb 26, 2015 at 1:38 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
>> Hi,
>>
>> On Thu, Feb 26, 2015 at 7:26 PM, Willem de Bruijn <willemb@google.com> wrote:
>>>
>>> Another option is to delay preparation of the full sockaddr_ll struct until
>>> when it's needed. It often is not used at all (if msg_name == NULL).
>>>
>>> sll_family, sll_protocol and sll_pkttype can be derived when needed
>>> in packet_recvmsg. The first two are the head of sockaddr_ll, so
>>> a shorter struct filled in in packet_rcv only from sll_ifindex onwards
>>> would save the 4B needed for origlen. Actually, a union is simpler:
>>>
>>> struct __packet_cb_sockaddr_ll {
>>> union {
>>> struct {
>>> unsigned short sll_family;
>>> __be16 sll_protocol;
>>> }
>>> unsigned int sll_origlen;
>>> }
>>> /* etc.. */
>>> };
>>>
>>> The family and protocol can just overwrite sll_origlen in packet_recvmsg
>>> before the memcpy into msg->msg_name.
>>>
>>
>> That's an interesting option.
>> My personal inclination is not to inline sockaddr_ll in the packet cb structure.
>
> It already is, isn't it?
>
I meant I prefer not to break out sockaddr_ll's individual fields in
packet_skb_cb.
Sorry for being ambiguous.
>> IMHO it would probably make sense to do it as part of your below suggestion.
>>
>>> Also interesting would be to avoid copying the address in the fast path
>>> as it may never be used, especially for long addresses (INFINIBAND_ALEN
>>> and FWNET_ALEN). From what I can tell, all but one header_ops.parse
>>> implementations return bytes from inside the packet, so could take an
>>> unsigned char ** as argument, store that pointer and postpone the
>>> memcpy to packet_recvmsg (handling skb_trim correctly). The exception is
>>> fwnet_header_parse, which would require some workaround.
>>
>> I think it may be interesting to pursue this option, though I don't
>> think it should
>> be part of this specific effort.
>
> Absolutely. This is a lot more work. Too much to fold into this patchset.
> I was more speculating in general. I may look into this at some point.
>
>>
>> Thanks!
>> Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH net-next 5/7] net: use common macro for assering skb->cb[] available size in protocol families
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
` (3 preceding siblings ...)
2015-02-26 2:10 ` [PATCH net-next 4/7] net: packet: use skb->dev as storage for skb orig len instead of skb->cb[] Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 2:10 ` [PATCH net-next 6/7] net: add common accessor for setting dropcount on packets Eyal Birger
2015-02-26 2:10 ` [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[] Eyal Birger
6 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
As part of an effort to move skb->dropcount to skb->cb[] use a common
macro in protocol families using skb->cb[] for ancillary data to
validate available room in skb->cb[].
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
include/net/sock.h | 3 +++
net/bluetooth/af_bluetooth.c | 3 +--
net/can/bcm.c | 2 +-
net/can/raw.c | 6 +++---
net/ipv4/af_inet.c | 2 +-
net/ipv4/tcp.c | 3 +--
net/ipv6/af_inet6.c | 2 +-
net/packet/af_packet.c | 3 +--
net/sctp/protocol.c | 3 +--
9 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1..a2502d2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2078,6 +2078,9 @@ static inline int sock_intr_errno(long timeo)
return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}
+#define sock_skb_cb_check_size(size) \
+ BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ce22e0c..4b904c9 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs);
static int __init bt_init(void)
{
- struct sk_buff *skb;
int err;
- BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(struct bt_skb_cb));
BT_INFO("Core ver %s", VERSION);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ee9ffd9..d559f92 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/
- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
diff --git a/net/can/raw.c b/net/can/raw.c
index 00c13ef..94601b7 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -95,8 +95,8 @@ struct raw_sock {
*/
static inline unsigned int *raw_flags(struct sk_buff *skb)
{
- BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
- sizeof(unsigned int)));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+ sizeof(unsigned int));
/* return pointer after struct sockaddr_can */
return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
@@ -135,7 +135,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
* containing the interface index.
*/
- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d2e49ba..4ce954c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1675,7 +1675,7 @@ static int __init inet_init(void)
struct list_head *r;
int rc = -EINVAL;
- BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+ sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
rc = proto_register(&tcp_prot, 1);
if (rc)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0f..4b57ea8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3005,12 +3005,11 @@ static void __init tcp_init_mem(void)
void __init tcp_init(void)
{
- struct sk_buff *skb = NULL;
unsigned long limit;
int max_rshare, max_wshare, cnt;
unsigned int i;
- BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e8c4400..6bafcc2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -824,7 +824,7 @@ static int __init inet6_init(void)
struct list_head *r;
int err = 0;
- BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+ sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
/* Register the socket-side information for inet6_create. */
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 26889fd..6ce6e00 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1810,8 +1810,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
skb = nskb;
}
- BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
- sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_family = AF_PACKET;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8f34b27..53b7acd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1322,8 +1322,7 @@ static __init int sctp_init(void)
int max_share;
int order;
- BUILD_BUG_ON(sizeof(struct sctp_ulpevent) >
- sizeof(((struct sk_buff *) 0)->cb));
+ sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH net-next 6/7] net: add common accessor for setting dropcount on packets
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
` (4 preceding siblings ...)
2015-02-26 2:10 ` [PATCH net-next 5/7] net: use common macro for assering skb->cb[] available size in protocol families Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 4:43 ` Eric Dumazet
2015-02-26 2:10 ` [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[] Eyal Birger
6 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
As part of an effort to move skb->dropcount to skb->cb[], use
a common function in order to set dropcount in struct sk_buff.
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
include/net/sock.h | 5 +++++
net/core/sock.c | 2 +-
net/packet/af_packet.c | 2 +-
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index a2502d2..d462f5e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2081,6 +2081,11 @@ static inline int sock_intr_errno(long timeo)
#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+static inline void sock_skb_set_dropcount(struct sock *sk, struct sk_buff *skb)
+{
+ skb->dropcount = atomic_read(&sk->sk_drops);
+}
+
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20..9c74fc8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb_dst_force(skb);
spin_lock_irqsave(&list->lock, flags);
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6ce6e00..cc51b87 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1837,7 +1837,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 6/7] net: add common accessor for setting dropcount on packets
2015-02-26 2:10 ` [PATCH net-next 6/7] net: add common accessor for setting dropcount on packets Eyal Birger
@ 2015-02-26 4:43 ` Eric Dumazet
0 siblings, 0 replies; 30+ messages in thread
From: Eric Dumazet @ 2015-02-26 4:43 UTC (permalink / raw)
To: Eyal Birger; +Cc: davem, willemb, edumazet, shmulik.ladkani, marcel, netdev
On Thu, 2015-02-26 at 04:10 +0200, Eyal Birger wrote:
> As part of an effort to move skb->dropcount to skb->cb[], use
> a common function in order to set dropcount in struct sk_buff.
>
> Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
> ---
> include/net/sock.h | 5 +++++
> net/core/sock.c | 2 +-
> net/packet/af_packet.c | 2 +-
> 3 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index a2502d2..d462f5e 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2081,6 +2081,11 @@ static inline int sock_intr_errno(long timeo)
> #define sock_skb_cb_check_size(size) \
> BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
>
> +static inline void sock_skb_set_dropcount(struct sock *sk, struct sk_buff *skb)
const struct sock *sk
> +{
> + skb->dropcount = atomic_read(&sk->sk_drops);
> +}
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 2:10 [PATCH net-next 0/7] net: move skb->dropcount to skb->cb[] Eyal Birger
` (5 preceding siblings ...)
2015-02-26 2:10 ` [PATCH net-next 6/7] net: add common accessor for setting dropcount on packets Eyal Birger
@ 2015-02-26 2:10 ` Eyal Birger
2015-02-26 8:49 ` Shmulik Ladkani
6 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 2:10 UTC (permalink / raw)
To: davem; +Cc: willemb, edumazet, shmulik.ladkani, marcel, netdev, Eyal Birger
Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
unionized skb->mark and skb->dropcount in order to allow recording
of the socket drop count while maintaining struct sk_buff size.
skb->dropcount was introduced since there was no available room
in skb->cb[] in packet sockets. However, its introduction led to
the inability to export skb->mark, or any other aliased field to
userspace if so desired.
Moving the dropcount metric to skb->cb[] eliminates this problem
at the expense of 4 bytes less in skb->cb[] for protocol families
using it.
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
include/linux/skbuff.h | 2 --
include/net/sock.h | 18 ++++++++++++++++--
net/socket.c | 4 ++--
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d898b32..bba1330 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @mark: Generic packet mark
- * @dropcount: total number of sk_receive_queue overflows
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
@@ -641,7 +640,6 @@ struct sk_buff {
#endif
union {
__u32 mark;
- __u32 dropcount;
__u32 reserved_tailroom;
};
diff --git a/include/net/sock.h b/include/net/sock.h
index d462f5e..8807586 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}
+struct sock_skb_cb {
+ u32 dropcount;
+};
+
+/* Store sock_skb_cb at the end of skb->cb[] so protocol families
+ * using skb->cb[] would keep using it directly and utilize its
+ * alignement guarantee.
+ */
+#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
+ sizeof(struct sock_skb_cb)))
+
+#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
+ SOCK_SKB_CB_OFFSET))
+
#define sock_skb_cb_check_size(size) \
- BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+ BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
static inline void sock_skb_set_dropcount(struct sock *sk, struct sk_buff *skb)
{
- skb->dropcount = atomic_read(&sk->sk_drops);
+ SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index bbedbfc..b78cf60 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -731,9 +731,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
- if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+ if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
- sizeof(__u32), &skb->dropcount);
+ sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}
void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
--
2.1.4
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 2:10 ` [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[] Eyal Birger
@ 2015-02-26 8:49 ` Shmulik Ladkani
2015-02-26 10:17 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Shmulik Ladkani @ 2015-02-26 8:49 UTC (permalink / raw)
To: Eyal Birger; +Cc: davem, willemb, edumazet, marcel, netdev
On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> diff --git a/include/net/sock.h b/include/net/sock.h
> index d462f5e..8807586 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
> return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
> }
>
> +struct sock_skb_cb {
> + u32 dropcount;
> +};
> +
> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
> + * using skb->cb[] would keep using it directly and utilize its
> + * alignement guarantee.
> + */
> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
> + sizeof(struct sock_skb_cb)))
> +
> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
> + SOCK_SKB_CB_OFFSET))
> +
> #define sock_skb_cb_check_size(size) \
> - BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
> + BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>
You didn't take care of aligning the 'sock_skb_cb'.
(Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).
How about:
struct sock_skb_cb {
/* protocol families specifc CBs */
u8 reserved[__SOCK_SKB_CB_OFFSET];
struct __sock_skb_cb x; // name me better!
};
Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:
struct __sock_skb_cb { // name me better!
u32 dropcount;
};
#define __SOCK_SKB_CB_OFFSET_UNALIGNED \
(FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))
#define __SOCK_SKB_CB_OFFSET \
(__SOCK_SKB_CB_OFFSET_UNALIGNED - \
(__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))
This also takes care for alignement of '__sock_skb_cb x' within the CB.
Then,
#define sock_skb_cb_check_size(size) \
BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))
Regards,
Shmulik
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 8:49 ` Shmulik Ladkani
@ 2015-02-26 10:17 ` Eyal Birger
2015-02-26 10:48 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 10:17 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: David Miller, Willem de Bruijn, Eric Dumazet, marcel,
netdev@vger.kernel.org
On Thu, Feb 26, 2015 at 10:49 AM, Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
> On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index d462f5e..8807586 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
>> return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
>> }
>>
>> +struct sock_skb_cb {
>> + u32 dropcount;
>> +};
>> +
>> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
>> + * using skb->cb[] would keep using it directly and utilize its
>> + * alignement guarantee.
>> + */
>> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
>> + sizeof(struct sock_skb_cb)))
>> +
>> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
>> + SOCK_SKB_CB_OFFSET))
>> +
>> #define sock_skb_cb_check_size(size) \
>> - BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
>> + BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>>
>
> You didn't take care of aligning the 'sock_skb_cb'.
> (Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).
>
> How about:
>
> struct sock_skb_cb {
> /* protocol families specifc CBs */
> u8 reserved[__SOCK_SKB_CB_OFFSET];
> struct __sock_skb_cb x; // name me better!
> };
>
> Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:
>
> struct __sock_skb_cb { // name me better!
> u32 dropcount;
> };
>
> #define __SOCK_SKB_CB_OFFSET_UNALIGNED \
> (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))
>
> #define __SOCK_SKB_CB_OFFSET \
> (__SOCK_SKB_CB_OFFSET_UNALIGNED - \
> (__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))
>
> This also takes care for alignement of '__sock_skb_cb x' within the CB.
>
> Then,
>
> #define sock_skb_cb_check_size(size) \
> BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))
>
Agreed. Will add to v2.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 10:17 ` Eyal Birger
@ 2015-02-26 10:48 ` Eyal Birger
2015-02-26 15:32 ` Shmulik Ladkani
0 siblings, 1 reply; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 10:48 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Marcel Holtmann,
netdev@vger.kernel.org
On Thu, Feb 26, 2015 at 12:17 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
> On Thu, Feb 26, 2015 at 10:49 AM, Shmulik Ladkani
> <shmulik.ladkani@gmail.com> wrote:
>> On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>>> diff --git a/include/net/sock.h b/include/net/sock.h
>>> index d462f5e..8807586 100644
>>> --- a/include/net/sock.h
>>> +++ b/include/net/sock.h
>>> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
>>> return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
>>> }
>>>
>>> +struct sock_skb_cb {
>>> + u32 dropcount;
>>> +};
>>> +
>>> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
>>> + * using skb->cb[] would keep using it directly and utilize its
>>> + * alignement guarantee.
>>> + */
>>> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
>>> + sizeof(struct sock_skb_cb)))
>>> +
>>> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
>>> + SOCK_SKB_CB_OFFSET))
>>> +
>>> #define sock_skb_cb_check_size(size) \
>>> - BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
>>> + BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>>>
>>
>> You didn't take care of aligning the 'sock_skb_cb'.
>> (Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).
>>
>> How about:
>>
>> struct sock_skb_cb {
>> /* protocol families specifc CBs */
>> u8 reserved[__SOCK_SKB_CB_OFFSET];
>> struct __sock_skb_cb x; // name me better!
>> };
>>
>> Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:
>>
>> struct __sock_skb_cb { // name me better!
>> u32 dropcount;
>> };
>>
>> #define __SOCK_SKB_CB_OFFSET_UNALIGNED \
>> (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))
>>
>> #define __SOCK_SKB_CB_OFFSET \
>> (__SOCK_SKB_CB_OFFSET_UNALIGNED - \
>> (__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))
>>
>> This also takes care for alignement of '__sock_skb_cb x' within the CB.
>>
>> Then,
>>
>> #define sock_skb_cb_check_size(size) \
>> BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))
>>
>
> Agreed. Will add to v2.
After giving it some additional thought and research, I don't think
this is necessary.
The sizeof operator on struct sock_skb_cb would give a padded result
which would take care of the structure alignment.
This is under the assumption that skb->cb[] size is of a proper multiple (which
I think is rather safe).
Objections?
Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 10:48 ` Eyal Birger
@ 2015-02-26 15:32 ` Shmulik Ladkani
2015-02-26 18:52 ` Eyal Birger
0 siblings, 1 reply; 30+ messages in thread
From: Shmulik Ladkani @ 2015-02-26 15:32 UTC (permalink / raw)
To: Eyal Birger
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Marcel Holtmann,
netdev@vger.kernel.org
On Thu, 26 Feb 2015 12:48:14 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> After giving it some additional thought and research, I don't think
> this is necessary.
>
> The sizeof operator on struct sock_skb_cb would give a padded result
> which would take care of the structure alignment.
>
> This is under the assumption that skb->cb[] size is of a proper multiple (which
> I think is rather safe).
Yes, you are correct.
With the assumption that skb->cb and 'cb' size are properly aligned and
sized (compile time asserted, btw?), you can simply:
struct your_thing { // name it
u32 dropcount;
};
struct sock_skb_cb {
/* protocol families specifc CBs */
u8 pf_reserved[PAD];
struct your_thing sock_cb;
};
where:
#define PAD (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct your_thing))
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH net-next 7/7] net: move skb->dropcount to skb->cb[]
2015-02-26 15:32 ` Shmulik Ladkani
@ 2015-02-26 18:52 ` Eyal Birger
0 siblings, 0 replies; 30+ messages in thread
From: Eyal Birger @ 2015-02-26 18:52 UTC (permalink / raw)
To: Shmulik Ladkani
Cc: David Miller, Willem de Bruijn, Eric Dumazet, Marcel Holtmann,
netdev@vger.kernel.org
On Thu, Feb 26, 2015 at 5:32 PM, Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
> On Thu, 26 Feb 2015 12:48:14 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> After giving it some additional thought and research, I don't think
>> this is necessary.
>>
>> The sizeof operator on struct sock_skb_cb would give a padded result
>> which would take care of the structure alignment.
>>
>> This is under the assumption that skb->cb[] size is of a proper multiple (which
>> I think is rather safe).
>
> Yes, you are correct.
>
> With the assumption that skb->cb and 'cb' size are properly aligned and
> sized (compile time asserted, btw?), you can simply:
>
> struct your_thing { // name it
> u32 dropcount;
> };
>
> struct sock_skb_cb {
> /* protocol families specifc CBs */
> u8 pf_reserved[PAD];
> struct your_thing sock_cb;
> };
>
> where:
>
> #define PAD (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct your_thing))
>
struct sock_skb_cb as I see it represents the sock generic metadata
required in the skb->cb[] and its placement at the end of skb->cb[]
is an implementation detail - so it, in my view, is what you referred to as
"struct your_thing".
Coding-wise I would rather avoid the additional enclosing struct and stick
with the proposed implementation.
Eyal.
^ permalink raw reply [flat|nested] 30+ messages in thread