* [net-next 3/7] bna: TX Intr Coalescing Fix
From: Rasesh Mody @ 2012-12-11 22:24 UTC (permalink / raw)
To: davem, netdev
Cc: bhutchings, David.Laight, adapter_linux_open_src_team,
Rasesh Mody
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
Change Details:
For Tx IB, IPM was enabled with inter_pkt_timeo of 0. This caused the
Tx IB not to generate interrupt till inter_pkt_count of packets have been
received. Correct definition for BFI_TX_INTERPKT_TIMEO & BFI_TX_INTERPKT_COUNT
Signed-off-by: Rasesh Mody <rmody@brocade.com>
---
drivers/net/ethernet/brocade/bna/bna_hw_defs.h | 3 ++-
drivers/net/ethernet/brocade/bna/bna_tx_rx.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/brocade/bna/bna_hw_defs.h b/drivers/net/ethernet/brocade/bna/bna_hw_defs.h
index b8c4e21..af3f7bb 100644
--- a/drivers/net/ethernet/brocade/bna/bna_hw_defs.h
+++ b/drivers/net/ethernet/brocade/bna/bna_hw_defs.h
@@ -46,7 +46,8 @@
#define BFI_MAX_INTERPKT_COUNT 0xFF
#define BFI_MAX_INTERPKT_TIMEO 0xF /* in 0.5us units */
#define BFI_TX_COALESCING_TIMEO 20 /* 20 * 5 = 100us */
-#define BFI_TX_INTERPKT_COUNT 32
+#define BFI_TX_INTERPKT_COUNT 12 /* Pkt Cnt = 12 */
+#define BFI_TX_INTERPKT_TIMEO 15 /* 15 * 0.5 = 7.5us */
#define BFI_RX_COALESCING_TIMEO 12 /* 12 * 5 = 60us */
#define BFI_RX_INTERPKT_COUNT 6 /* Pkt Cnt = 6 */
#define BFI_RX_INTERPKT_TIMEO 3 /* 3 * 0.5 = 1.5us */
diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index bb5467b..4df6d4b 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
@@ -3569,7 +3569,7 @@ bna_tx_create(struct bna *bna, struct bnad *bnad,
if (intr_info->intr_type == BNA_INTR_T_INTX)
txq->ib.intr_vector = (1 << txq->ib.intr_vector);
txq->ib.coalescing_timeo = tx_cfg->coalescing_timeo;
- txq->ib.interpkt_timeo = 0; /* Not used */
+ txq->ib.interpkt_timeo = BFI_TX_INTERPKT_TIMEO;
txq->ib.interpkt_count = BFI_TX_INTERPKT_COUNT;
/* TCB */
--
1.7.1
^ permalink raw reply related
* [net-next 4/7] bna: Rx Page Based Allocation
From: Rasesh Mody @ 2012-12-11 22:24 UTC (permalink / raw)
To: davem, netdev
Cc: bhutchings, David.Laight, adapter_linux_open_src_team,
Rasesh Mody
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
Change Details:
Enhanced support for GRO. Page-base allocation method for Rx buffers is
used in GRO. Skb allocation has been removed in Rx path to use always warm-cache
skbs provided by napi_get_frags.
Signed-off-by: Rasesh Mody <rmody@brocade.com>
---
drivers/net/ethernet/brocade/bna/bnad.c | 318 ++++++++++++++++++++++++------
drivers/net/ethernet/brocade/bna/bnad.h | 19 ++
2 files changed, 273 insertions(+), 64 deletions(-)
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index da5470a..22d52ef 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -266,53 +266,181 @@ bnad_msix_tx(int irq, void *data)
return IRQ_HANDLED;
}
+static inline void
+bnad_rxq_alloc_uninit(struct bnad *bnad, struct bna_rcb *rcb)
+{
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
+
+ unmap_q->reuse_pi = -1;
+ unmap_q->alloc_order = -1;
+ unmap_q->map_size = 0;
+ unmap_q->type = BNAD_RXBUF_NONE;
+}
+
+/* Default is page-based allocation. Multi-buffer support - TBD */
+static int
+bnad_rxq_alloc_init(struct bnad *bnad, struct bna_rcb *rcb)
+{
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
+ int mtu, order;
+
+ bnad_rxq_alloc_uninit(bnad, rcb);
+
+ mtu = bna_enet_mtu_get(&bnad->bna.enet);
+ order = get_order(mtu);
+
+ if (bna_is_small_rxq(rcb->id)) {
+ unmap_q->alloc_order = 0;
+ unmap_q->map_size = rcb->rxq->buffer_size;
+ } else {
+ unmap_q->alloc_order = order;
+ unmap_q->map_size =
+ (rcb->rxq->buffer_size > 2048) ?
+ PAGE_SIZE << order : 2048;
+ }
+
+ BUG_ON(((PAGE_SIZE << order) % unmap_q->map_size));
+
+ unmap_q->type = BNAD_RXBUF_PAGE;
+
+ return 0;
+}
+
+static inline void
+bnad_rxq_cleanup_page(struct bnad *bnad, struct bnad_rx_unmap *unmap)
+{
+ if (!unmap->page)
+ return;
+
+ dma_unmap_page(&bnad->pcidev->dev,
+ dma_unmap_addr(&unmap->vector, dma_addr),
+ unmap->vector.len, DMA_FROM_DEVICE);
+ put_page(unmap->page);
+ unmap->page = NULL;
+ dma_unmap_addr_set(&unmap->vector, dma_addr, 0);
+ unmap->vector.len = 0;
+}
+
+static inline void
+bnad_rxq_cleanup_skb(struct bnad *bnad, struct bnad_rx_unmap *unmap)
+{
+ if (!unmap->skb)
+ return;
+
+ dma_unmap_single(&bnad->pcidev->dev,
+ dma_unmap_addr(&unmap->vector, dma_addr),
+ unmap->vector.len, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(unmap->skb);
+ unmap->skb = NULL;
+ dma_unmap_addr_set(&unmap->vector, dma_addr, 0);
+ unmap->vector.len = 0;
+}
+
static void
bnad_rxq_cleanup(struct bnad *bnad, struct bna_rcb *rcb)
{
- struct bnad_rx_unmap *unmap_q = rcb->unmap_q;
- struct sk_buff *skb;
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
int i;
for (i = 0; i < rcb->q_depth; i++) {
- struct bnad_rx_unmap *unmap = &unmap_q[i];
+ struct bnad_rx_unmap *unmap = &unmap_q->unmap[i];
- skb = unmap->skb;
- if (!skb)
- continue;
+ if (BNAD_RXBUF_IS_PAGE(unmap_q->type))
+ bnad_rxq_cleanup_page(bnad, unmap);
+ else
+ bnad_rxq_cleanup_skb(bnad, unmap);
+ }
+ bnad_rxq_alloc_uninit(bnad, rcb);
+}
- unmap->skb = NULL;
- dma_unmap_single(&bnad->pcidev->dev,
- dma_unmap_addr(&unmap->vector, dma_addr),
- unmap->vector.len, DMA_FROM_DEVICE);
- dma_unmap_addr_set(&unmap->vector, dma_addr, 0);
- unmap->vector.len = 0;
- dev_kfree_skb_any(skb);
+static u32
+bnad_rxq_refill_page(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc)
+{
+ u32 alloced, prod, q_depth;
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
+ struct bnad_rx_unmap *unmap, *prev;
+ struct bna_rxq_entry *rxent;
+ struct page *page;
+ u32 page_offset, alloc_size;
+ dma_addr_t dma_addr;
+
+ prod = rcb->producer_index;
+ q_depth = rcb->q_depth;
+
+ alloc_size = PAGE_SIZE << unmap_q->alloc_order;
+ alloced = 0;
+
+ while (nalloc--) {
+ unmap = &unmap_q->unmap[prod];
+
+ if (unmap_q->reuse_pi < 0) {
+ page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
+ unmap_q->alloc_order);
+ page_offset = 0;
+ } else {
+ prev = &unmap_q->unmap[unmap_q->reuse_pi];
+ page = prev->page;
+ page_offset = prev->page_offset + unmap_q->map_size;
+ get_page(page);
+ }
+
+ if (unlikely(!page)) {
+ BNAD_UPDATE_CTR(bnad, rxbuf_alloc_failed);
+ rcb->rxq->rxbuf_alloc_failed++;
+ goto finishing;
+ }
+
+ dma_addr = dma_map_page(&bnad->pcidev->dev, page, page_offset,
+ unmap_q->map_size, DMA_FROM_DEVICE);
+
+ unmap->page = page;
+ unmap->page_offset = page_offset;
+ dma_unmap_addr_set(&unmap->vector, dma_addr, dma_addr);
+ unmap->vector.len = unmap_q->map_size;
+ page_offset += unmap_q->map_size;
+
+ if (page_offset < alloc_size)
+ unmap_q->reuse_pi = prod;
+ else
+ unmap_q->reuse_pi = -1;
+
+ rxent = &((struct bna_rxq_entry *)rcb->sw_q)[prod];
+ BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr);
+ BNA_QE_INDX_INC(prod, q_depth);
+ alloced++;
}
+
+finishing:
+ if (likely(alloced)) {
+ rcb->producer_index = prod;
+ smp_mb();
+ if (likely(test_bit(BNAD_RXQ_POST_OK, &rcb->flags)))
+ bna_rxq_prod_indx_doorbell(rcb);
+ }
+
+ return alloced;
}
-/* Allocate and post BNAD_RXQ_REFILL_THRESHOLD_SHIFT buffers at a time */
-static void
-bnad_rxq_post(struct bnad *bnad, struct bna_rcb *rcb)
+static u32
+bnad_rxq_refill_skb(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc)
{
- u32 to_alloc, alloced, prod, q_depth, buff_sz;
- struct bnad_rx_unmap *unmap_q = rcb->unmap_q;
+ u32 alloced, prod, q_depth, buff_sz;
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
struct bnad_rx_unmap *unmap;
struct bna_rxq_entry *rxent;
struct sk_buff *skb;
dma_addr_t dma_addr;
buff_sz = rcb->rxq->buffer_size;
- alloced = 0;
- to_alloc = BNA_QE_FREE_CNT(rcb, rcb->q_depth);
- if (!(to_alloc >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT))
- return;
-
prod = rcb->producer_index;
q_depth = rcb->q_depth;
- while (to_alloc--) {
- skb = netdev_alloc_skb_ip_align(bnad->netdev,
- buff_sz);
+ alloced = 0;
+ while (nalloc--) {
+ unmap = &unmap_q->unmap[prod];
+
+ skb = netdev_alloc_skb_ip_align(bnad->netdev, buff_sz);
+
if (unlikely(!skb)) {
BNAD_UPDATE_CTR(bnad, rxbuf_alloc_failed);
rcb->rxq->rxbuf_alloc_failed++;
@@ -320,13 +448,13 @@ bnad_rxq_post(struct bnad *bnad, struct bna_rcb *rcb)
}
dma_addr = dma_map_single(&bnad->pcidev->dev, skb->data,
buff_sz, DMA_FROM_DEVICE);
- rxent = &((struct bna_rxq_entry *)rcb->sw_q)[prod];
- BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr);
- unmap = &unmap_q[prod];
unmap->skb = skb;
dma_unmap_addr_set(&unmap->vector, dma_addr, dma_addr);
unmap->vector.len = buff_sz;
+
+ rxent = &((struct bna_rxq_entry *)rcb->sw_q)[prod];
+ BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr);
BNA_QE_INDX_INC(prod, q_depth);
alloced++;
}
@@ -338,6 +466,24 @@ finishing:
if (likely(test_bit(BNAD_RXQ_POST_OK, &rcb->flags)))
bna_rxq_prod_indx_doorbell(rcb);
}
+
+ return alloced;
+}
+
+static inline void
+bnad_rxq_post(struct bnad *bnad, struct bna_rcb *rcb)
+{
+ struct bnad_rx_unmap_q *unmap_q = rcb->unmap_q;
+ u32 to_alloc;
+
+ to_alloc = BNA_QE_FREE_CNT(rcb, rcb->q_depth);
+ if (!(to_alloc >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT))
+ return;
+
+ if (BNAD_RXBUF_IS_PAGE(unmap_q->type))
+ bnad_rxq_refill_page(bnad, rcb, to_alloc);
+ else
+ bnad_rxq_refill_skb(bnad, rcb, to_alloc);
}
#define flags_cksum_prot_mask (BNA_CQ_EF_IPV4 | BNA_CQ_EF_L3_CKSUM_OK | \
@@ -354,17 +500,62 @@ finishing:
#define flags_udp6 (BNA_CQ_EF_IPV6 | \
BNA_CQ_EF_UDP | BNA_CQ_EF_L4_CKSUM_OK)
+static inline struct sk_buff *
+bnad_cq_prepare_skb(struct bnad_rx_ctrl *rx_ctrl,
+ struct bnad_rx_unmap_q *unmap_q,
+ struct bnad_rx_unmap *unmap,
+ u32 length, u32 flags)
+{
+ struct bnad *bnad = rx_ctrl->bnad;
+ struct sk_buff *skb;
+
+ if (BNAD_RXBUF_IS_PAGE(unmap_q->type)) {
+ skb = napi_get_frags(&rx_ctrl->napi);
+ if (unlikely(!skb))
+ return NULL;
+
+ dma_unmap_page(&bnad->pcidev->dev,
+ dma_unmap_addr(&unmap->vector, dma_addr),
+ unmap->vector.len, DMA_FROM_DEVICE);
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ unmap->page, unmap->page_offset, length);
+ skb->len += length;
+ skb->data_len += length;
+ skb->truesize += length;
+
+ unmap->page = NULL;
+ unmap->vector.len = 0;
+
+ return skb;
+ }
+
+ skb = unmap->skb;
+ BUG_ON(!skb);
+
+ dma_unmap_single(&bnad->pcidev->dev,
+ dma_unmap_addr(&unmap->vector, dma_addr),
+ unmap->vector.len, DMA_FROM_DEVICE);
+
+ skb_put(skb, length);
+
+ skb->protocol = eth_type_trans(skb, bnad->netdev);
+
+ unmap->skb = NULL;
+ unmap->vector.len = 0;
+ return skb;
+}
+
static u32
bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget)
{
- struct bna_cq_entry *cq, *cmpl, *next_cmpl;
+ struct bna_cq_entry *cq, *cmpl;
struct bna_rcb *rcb = NULL;
- struct bnad_rx_unmap *unmap_q, *unmap;
- unsigned int packets = 0;
+ struct bnad_rx_unmap_q *unmap_q;
+ struct bnad_rx_unmap *unmap;
struct sk_buff *skb;
- u32 flags, masked_flags;
struct bna_pkt_rate *pkt_rt = &ccb->pkt_rate;
- struct bnad_rx_ctrl *rx_ctrl = (struct bnad_rx_ctrl *)(ccb->ctrl);
+ struct bnad_rx_ctrl *rx_ctrl = ccb->ctrl;
+ u32 packets = 0, length = 0, flags, masked_flags;
prefetch(bnad->netdev);
@@ -373,6 +564,8 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget)
while (cmpl->valid && (packets < budget)) {
packets++;
+ flags = ntohl(cmpl->flags);
+ length = ntohs(cmpl->length);
BNA_UPDATE_PKT_CNT(pkt_rt, ntohs(cmpl->length));
if (bna_is_small_rxq(cmpl->rxq_id))
@@ -381,32 +574,25 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget)
rcb = ccb->rcb[0];
unmap_q = rcb->unmap_q;
- unmap = &unmap_q[rcb->consumer_index];
+ unmap = &unmap_q->unmap[rcb->consumer_index];
- skb = unmap->skb;
- BUG_ON(!(skb));
- unmap->skb = NULL;
- dma_unmap_single(&bnad->pcidev->dev,
- dma_unmap_addr(&unmap->vector, dma_addr),
- unmap->vector.len, DMA_FROM_DEVICE);
- unmap->vector.len = 0;
- BNA_QE_INDX_INC(rcb->consumer_index, rcb->q_depth);
- BNA_QE_INDX_INC(ccb->producer_index, ccb->q_depth);
- next_cmpl = &cq[ccb->producer_index];
+ if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR |
+ BNA_CQ_EF_FCS_ERROR |
+ BNA_CQ_EF_TOO_LONG))) {
+ if (BNAD_RXBUF_IS_PAGE(unmap_q->type))
+ bnad_rxq_cleanup_page(bnad, unmap);
+ else
+ bnad_rxq_cleanup_skb(bnad, unmap);
- prefetch(next_cmpl);
-
- flags = ntohl(cmpl->flags);
- if (unlikely
- (flags &
- (BNA_CQ_EF_MAC_ERROR | BNA_CQ_EF_FCS_ERROR |
- BNA_CQ_EF_TOO_LONG))) {
- dev_kfree_skb_any(skb);
rcb->rxq->rx_packets_with_error++;
goto next;
}
- skb_put(skb, ntohs(cmpl->length));
+ skb = bnad_cq_prepare_skb(ccb->ctrl, unmap_q, unmap,
+ length, flags);
+
+ if (unlikely(!skb))
+ break;
masked_flags = flags & flags_cksum_prot_mask;
@@ -421,22 +607,24 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget)
skb_checksum_none_assert(skb);
rcb->rxq->rx_packets++;
- rcb->rxq->rx_bytes += skb->len;
- skb->protocol = eth_type_trans(skb, bnad->netdev);
+ rcb->rxq->rx_bytes += length;
if (flags & BNA_CQ_EF_VLAN)
__vlan_hwaccel_put_tag(skb, ntohs(cmpl->vlan_tag));
- if (skb->ip_summed == CHECKSUM_UNNECESSARY)
- napi_gro_receive(&rx_ctrl->napi, skb);
+ if (BNAD_RXBUF_IS_PAGE(unmap_q->type))
+ napi_gro_frags(&rx_ctrl->napi);
else
netif_receive_skb(skb);
next:
cmpl->valid = 0;
- cmpl = next_cmpl;
+ BNA_QE_INDX_INC(rcb->consumer_index, rcb->q_depth);
+ BNA_QE_INDX_INC(ccb->producer_index, ccb->q_depth);
+ cmpl = &cq[ccb->producer_index];
}
+ napi_gro_flush(&rx_ctrl->napi, false);
if (likely(test_bit(BNAD_RXQ_STARTED, &ccb->rcb[0]->flags)))
bna_ib_ack_disable_irq(ccb->i_dbell, packets);
@@ -956,8 +1144,7 @@ bnad_cb_rx_post(struct bnad *bnad, struct bna_rx *rx)
struct bna_ccb *ccb;
struct bna_rcb *rcb;
struct bnad_rx_ctrl *rx_ctrl;
- int i;
- int j;
+ int i, j;
for (i = 0; i < BNAD_MAX_RXP_PER_RX; i++) {
rx_ctrl = &rx_info->rx_ctrl[i];
@@ -972,6 +1159,7 @@ bnad_cb_rx_post(struct bnad *bnad, struct bna_rx *rx)
if (!rcb)
continue;
+ bnad_rxq_alloc_init(bnad, rcb);
set_bit(BNAD_RXQ_STARTED, &rcb->flags);
set_bit(BNAD_RXQ_POST_OK, &rcb->flags);
bnad_rxq_post(bnad, rcb);
@@ -1861,9 +2049,11 @@ bnad_setup_rx(struct bnad *bnad, u32 rx_id)
/* Fill Unmap Q memory requirements */
BNAD_FILL_UNMAPQ_MEM_REQ(&res_info[BNA_RX_RES_MEM_T_UNMAPQ],
- rx_config->num_paths + ((rx_config->rxp_type == BNA_RXP_SINGLE)
- ? 0 : rx_config->num_paths), (bnad->rxq_depth *
- sizeof(struct bnad_rx_unmap)));
+ rx_config->num_paths +
+ ((rx_config->rxp_type == BNA_RXP_SINGLE) ?
+ 0 : rx_config->num_paths),
+ ((bnad->rxq_depth * sizeof(struct bnad_rx_unmap)) +
+ sizeof(struct bnad_rx_unmap_q)));
/* Allocate resource */
err = bnad_rx_res_alloc(bnad, res_info, rx_id);
diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h
index db132c9..134d534 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.h
+++ b/drivers/net/ethernet/brocade/bna/bnad.h
@@ -233,10 +233,29 @@ struct bnad_rx_vector {
};
struct bnad_rx_unmap {
+ struct page *page;
+ u32 page_offset;
struct sk_buff *skb;
struct bnad_rx_vector vector;
};
+enum bnad_rxbuf_type {
+ BNAD_RXBUF_NONE = 0,
+ BNAD_RXBUF_SKB = 1,
+ BNAD_RXBUF_PAGE = 2,
+ BNAD_RXBUF_MULTI = 3
+};
+
+#define BNAD_RXBUF_IS_PAGE(_type) ((_type) == BNAD_RXBUF_PAGE)
+
+struct bnad_rx_unmap_q {
+ int reuse_pi;
+ int alloc_order;
+ u32 map_size;
+ enum bnad_rxbuf_type type;
+ struct bnad_rx_unmap unmap[0];
+};
+
/* Bit mask values for bnad->cfg_flags */
#define BNAD_CF_DIM_ENABLED 0x01 /* DIM */
#define BNAD_CF_PROMISC 0x02
--
1.7.1
^ permalink raw reply related
* [net-next 5/7] bna: Add RX State
From: Rasesh Mody @ 2012-12-11 22:24 UTC (permalink / raw)
To: davem, netdev
Cc: bhutchings, David.Laight, adapter_linux_open_src_team,
Rasesh Mody
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
Change Details:
- BNA state machine for Rx in start_wait state moves it to stop_wait on
receipt of RX_E_STOP. In Rx stop_wait state, on receipt of
RX_E_STARTED event does enet stop
RX_E_STOPPED event does rx_cleanup_cbfn
rx_cleanup_cbfn in this case is called without post_cbfn. post_cbfn
happens only after RX_E_STARTED event is received in start_wait. Without
doing post_cbfn, NAPI remains disabled and in cleanup we try to disable
again causing endless wait. ifconfig process and other workers can thus
get stuck.
- Introducing start_stop_wait state for Rx. This state handles the case of
if post_cbfn is not done simply do stop without the cleanup.
Signed-off-by: Rasesh Mody <rmody@brocade.com>
---
drivers/net/ethernet/brocade/bna/bna_tx_rx.c | 27 +++++++++++++++++++++++++-
1 files changed, 26 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index 4df6d4b..ea6f4a0 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
@@ -1355,6 +1355,8 @@ bfa_fsm_state_decl(bna_rx, stopped,
struct bna_rx, enum bna_rx_event);
bfa_fsm_state_decl(bna_rx, start_wait,
struct bna_rx, enum bna_rx_event);
+bfa_fsm_state_decl(bna_rx, start_stop_wait,
+ struct bna_rx, enum bna_rx_event);
bfa_fsm_state_decl(bna_rx, rxf_start_wait,
struct bna_rx, enum bna_rx_event);
bfa_fsm_state_decl(bna_rx, started,
@@ -1432,7 +1434,7 @@ static void bna_rx_sm_start_wait(struct bna_rx *rx,
{
switch (event) {
case RX_E_STOP:
- bfa_fsm_set_state(rx, bna_rx_sm_stop_wait);
+ bfa_fsm_set_state(rx, bna_rx_sm_start_stop_wait);
break;
case RX_E_FAIL:
@@ -1488,6 +1490,29 @@ bna_rx_sm_rxf_stop_wait(struct bna_rx *rx, enum bna_rx_event event)
}
+static void
+bna_rx_sm_start_stop_wait_entry(struct bna_rx *rx)
+{
+}
+
+static void
+bna_rx_sm_start_stop_wait(struct bna_rx *rx, enum bna_rx_event event)
+{
+ switch (event) {
+ case RX_E_FAIL:
+ case RX_E_STOPPED:
+ bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+ break;
+
+ case RX_E_STARTED:
+ bna_rx_enet_stop(rx);
+ break;
+
+ default:
+ bfa_sm_fault(event);
+ }
+}
+
void
bna_rx_sm_started_entry(struct bna_rx *rx)
{
--
1.7.1
^ permalink raw reply related
* [net-next 7/7] bna: Driver Version Updated to 3.1.2.1
From: Rasesh Mody @ 2012-12-11 22:24 UTC (permalink / raw)
To: davem, netdev
Cc: bhutchings, David.Laight, adapter_linux_open_src_team,
Rasesh Mody
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
Signed-off-by: Rasesh Mody <rmody@brocade.com>
---
drivers/net/ethernet/brocade/bna/bnad.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h
index 134d534..72ba586 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.h
+++ b/drivers/net/ethernet/brocade/bna/bnad.h
@@ -71,7 +71,7 @@ struct bnad_rx_ctrl {
#define BNAD_NAME "bna"
#define BNAD_NAME_LEN 64
-#define BNAD_VERSION "3.0.23.0"
+#define BNAD_VERSION "3.1.2.1"
#define BNAD_MAILBOX_MSIX_INDEX 0
#define BNAD_MAILBOX_MSIX_VECTORS 1
--
1.7.1
^ permalink raw reply related
* [net-next 6/7] bna: Firmware update
From: Rasesh Mody @ 2012-12-11 22:24 UTC (permalink / raw)
To: davem, netdev
Cc: bhutchings, David.Laight, adapter_linux_open_src_team,
Rasesh Mody
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
Change Details:
- Added Stats clear counter to the bfi_enet_stats_mac structure and
ethtool stats
- Modified the firmware naming convention to contain the firmware image
version (3.1.0.0). The new convention is
<firmware-image>-<firmware-version>.bin The change will enforce loading
only compatible firmware with this driver and also avoid over-writing
the old firmware image in-order to load new version driver as the
firmware names used to be the same.
Signed-off-by: Rasesh Mody <rmody@brocade.com>
---
drivers/net/ethernet/brocade/bna/bfi_enet.h | 1 +
drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 1 +
drivers/net/ethernet/brocade/bna/cna.h | 4 ++--
3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/brocade/bna/bfi_enet.h b/drivers/net/ethernet/brocade/bna/bfi_enet.h
index eef6e1f..7d10e33 100644
--- a/drivers/net/ethernet/brocade/bna/bfi_enet.h
+++ b/drivers/net/ethernet/brocade/bna/bfi_enet.h
@@ -787,6 +787,7 @@ struct bfi_enet_stats_bpc {
/* MAC Rx Statistics */
struct bfi_enet_stats_mac {
+ u64 stats_clr_cnt; /* times this stats cleared */
u64 frame_64; /* both rx and tx counter */
u64 frame_65_127; /* both rx and tx counter */
u64 frame_128_255; /* both rx and tx counter */
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 40e1e84..455b5a2 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -102,6 +102,7 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
"rx_unmap_q_alloc_failed",
"rxbuf_alloc_failed",
+ "mac_stats_clr_cnt",
"mac_frame_64",
"mac_frame_65_127",
"mac_frame_128_255",
diff --git a/drivers/net/ethernet/brocade/bna/cna.h b/drivers/net/ethernet/brocade/bna/cna.h
index 32e8f17..14ca931 100644
--- a/drivers/net/ethernet/brocade/bna/cna.h
+++ b/drivers/net/ethernet/brocade/bna/cna.h
@@ -37,8 +37,8 @@
extern char bfa_version[];
-#define CNA_FW_FILE_CT "ctfw.bin"
-#define CNA_FW_FILE_CT2 "ct2fw.bin"
+#define CNA_FW_FILE_CT "ctfw-3.1.0.0.bin"
+#define CNA_FW_FILE_CT2 "ct2fw-3.1.0.0.bin"
#define FC_SYMNAME_MAX 256 /*!< max name server symbolic name size */
#pragma pack(1)
--
1.7.1
^ permalink raw reply related
* Re: [tcpdump-workers] vlan tagged packets and libpcap breakage
From: Ani Sinha @ 2012-12-11 22:36 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Michael Richardson, netdev, tcpdump-workers, Francesco Ruggeri
In-Reply-To: <87mwyi9h1x.fsf@xmission.com>
>
> It is possible to test for the presence of support of the new vlan bpf
> extensions by attempting to load a filter that uses them. As only valid
> filters can be loaded, old kernels that do not support filtering of vlan
> tags will fail to load the a test filter with uses them.
Unfortunately I do not see this. The sk_chk_filter() does not have a
default in the case statement and the check will not detect an unknown
instruction. It will fail when the filter is run and as far as I can see,
the packet will be dropped. Something like this might help?
diff --git a/net/core/filter.c b/net/core/filter.c
index c23543c..96338aa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -548,6 +548,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
return -EINVAL;
/* Some instructions need special checks */
switch (code) {
+ /* for unknown instruction, return EINVAL */
+ default : return -EINVAL;
case BPF_S_ALU_DIV_K:
/* check for division by zero */
if (ftest->k == 0)
^ permalink raw reply related
* Re: [tcpdump-workers] vlan tagged packets and libpcap breakage
From: Eric Dumazet @ 2012-12-11 23:04 UTC (permalink / raw)
To: ani
Cc: Eric W. Biederman, Michael Richardson, netdev, tcpdump-workers,
Francesco Ruggeri
In-Reply-To: <alpine.OSX.2.00.1212111432430.78903@animac.local>
On Tue, 2012-12-11 at 14:36 -0800, Ani Sinha wrote:
> >
> > It is possible to test for the presence of support of the new vlan bpf
> > extensions by attempting to load a filter that uses them. As only valid
> > filters can be loaded, old kernels that do not support filtering of vlan
> > tags will fail to load the a test filter with uses them.
>
> Unfortunately I do not see this. The sk_chk_filter() does not have a
> default in the case statement and the check will not detect an unknown
> instruction. It will fail when the filter is run and as far as I can see,
> the packet will be dropped. Something like this might help?
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c23543c..96338aa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -548,6 +548,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
> return -EINVAL;
> /* Some instructions need special checks */
> switch (code) {
> + /* for unknown instruction, return EINVAL */
> + default : return -EINVAL;
> case BPF_S_ALU_DIV_K:
> /* check for division by zero */
> if (ftest->k == 0)
This patch is wrong.
Check lines 546, 547, 548 where we do the check for unknown instructions
code = codes[code];
if (!code)
return -EINVAL;
If you want to test ANCILLARY possible values, its already too late, as
old kernels wont use any patch anyway.
^ permalink raw reply
* Re: [tcpdump-workers] vlan tagged packets and libpcap breakage
From: Stephen Hemminger @ 2012-12-11 23:12 UTC (permalink / raw)
To: ani
Cc: Eric W. Biederman, Michael Richardson, netdev, tcpdump-workers,
Francesco Ruggeri
In-Reply-To: <alpine.OSX.2.00.1212111432430.78903@animac.local>
On Tue, 11 Dec 2012 14:36:33 -0800 (PST)
Ani Sinha <ani@aristanetworks.com> wrote:
> >
> > It is possible to test for the presence of support of the new vlan bpf
> > extensions by attempting to load a filter that uses them. As only valid
> > filters can be loaded, old kernels that do not support filtering of vlan
> > tags will fail to load the a test filter with uses them.
>
> Unfortunately I do not see this. The sk_chk_filter() does not have a
> default in the case statement and the check will not detect an unknown
> instruction. It will fail when the filter is run and as far as I can see,
> the packet will be dropped. Something like this might help?
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c23543c..96338aa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -548,6 +548,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
> return -EINVAL;
> /* Some instructions need special checks */
> switch (code) {
> + /* for unknown instruction, return EINVAL */
> + default : return -EINVAL;
> case BPF_S_ALU_DIV_K:
> /* check for division by zero */
> if (ftest->k == 0)
Did you test this? I think it will blow up for some existing instructions
like BPF_S_ALU_XOR_X or any of the other non-special instructions.
Also it is not formatted correctly for the kernel programming style.
ERROR: space prohibited before that ':' (ctx:WxW)
#86: FILE: net/core/filter.c:552:
+ default : return -EINVAL;
^
^ permalink raw reply
* Re: linux-next: some merging notes
From: Benjamin Herrenschmidt @ 2012-12-11 23:19 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Linus, linux-next, LKML, Paul Mackerras, linuxppc-dev,
David Miller, netdev, Rusty Russell, Greg KH, Steven Rostedt,
Olof Johansson, Arnd Bergmann, linux-arm-kernel, Tomi Valkeinen,
N, Mugunthan V, Nathan Fontenot, Bill Pemberton
In-Reply-To: <20121212091552.02c72c8926f9f9147b080d68@canb.auug.org.au>
On Wed, 2012-12-12 at 09:15 +1100, Stephen Rothwell wrote:
> The powerpc tree
> (git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git#next)
> contains a commit that breaks the building of
> lib/pSeries-reconfig-notifier-error-inject.c. I applied a patch to
> linux-next to disable CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT.
I will put a fix in before I send the pull request.
Cheers,
Ben.
^ permalink raw reply
* Re: pull request: wireless-next 2012-12-11
From: David Miller @ 2012-12-11 23:27 UTC (permalink / raw)
To: linville; +Cc: linux-wireless, netdev
In-Reply-To: <20121211215546.GA3566@tuxdriver.com>
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 11 Dec 2012 16:55:46 -0500
> The following changes since commit 75be437230b06fca87908a787f70de0ce7fbab8c:
>
> net: gro: avoid double copy in skb_gro_receive() (2012-12-11 13:44:09 -0500)
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next.git for-davem
Pulled, thanks John.
^ permalink raw reply
* Re: [net-next 0/7] bna: Driver Version Updated to 3.1.2.1
From: David Miller @ 2012-12-11 23:28 UTC (permalink / raw)
To: rmody; +Cc: netdev, bhutchings, David.Laight, adapter_linux_open_src_team
In-Reply-To: <1355264696-8927-1-git-send-email-rmody@brocade.com>
From: Rasesh Mody <rmody@brocade.com>
Date: Tue, 11 Dec 2012 14:24:49 -0800
> Hello Dave,
>
> Resubmitting the patch set with review feedback addressed.
>
> The following patch-set includes Tx Rx changes, bug fixes, firmware
> update, code cleanup and enhancements.
>
> This also updates the BNA driver to v3.1.2.1.
>
> The patches have been compiled and tested against 3.7.0-rc3.
Series applied, thanks.
^ permalink raw reply
* [PATCH 4/5] net: sfc: fix return value check in efx_ptp_probe_channel().
From: Cyril Roelandt @ 2012-12-12 0:24 UTC (permalink / raw)
To: linux-kernel
Cc: kernel-janitors, Cyril Roelandt, linux-net-drivers, bhutchings,
netdev
In-Reply-To: <1355271894-5284-1-git-send-email-tipecaml@gmail.com>
The ptp_clock_register() returns ERR_PTR() and never returns NULL. Replace the
NULL check by a call to IS_ERR().
Signed-off-by: Cyril Roelandt <tipecaml@gmail.com>
---
drivers/net/ethernet/sfc/ptp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 0767043f..9bcc38c 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -930,7 +930,7 @@ static int efx_ptp_probe_channel(struct efx_channel *channel)
ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info,
&efx->pci_dev->dev);
- if (!ptp->phc_clock)
+ if (IS_ERR(ptp->phc_clock))
goto fail3;
INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
--
1.7.10.4
^ permalink raw reply related
* Re: vlan tagged packets and libpcap breakage
From: Ani Sinha @ 2012-12-12 0:46 UTC (permalink / raw)
To: Eric Dumazet
Cc: netdev, Francesco Ruggeri, Eric W. Biederman, tcpdump-workers,
Michael Richardson
In-Reply-To: <1355267060.27891.139.camel@edumazet-glaptop>
On Tue, Dec 11, 2012 at 3:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2012-12-11 at 14:36 -0800, Ani Sinha wrote:
>> >
>> > It is possible to test for the presence of support of the new vlan bpf
>> > extensions by attempting to load a filter that uses them. As only valid
>> > filters can be loaded, old kernels that do not support filtering of vlan
>> > tags will fail to load the a test filter with uses them.
>>
>> Unfortunately I do not see this. The sk_chk_filter() does not have a
>> default in the case statement and the check will not detect an unknown
>> instruction. It will fail when the filter is run and as far as I can see,
>> the packet will be dropped. Something like this might help?
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index c23543c..96338aa 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -548,6 +548,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
>> return -EINVAL;
>> /* Some instructions need special checks */
>> switch (code) {
>> + /* for unknown instruction, return EINVAL */
>> + default : return -EINVAL;
>> case BPF_S_ALU_DIV_K:
>> /* check for division by zero */
>> if (ftest->k == 0)
>
> This patch is wrong.
yes I generated this patch wrong.
>
> Check lines 546, 547, 548 where we do the check for unknown instructions
>
> code = codes[code];
> if (!code)
> return -EINVAL;
yepph it's OK here.
>
> If you want to test ANCILLARY possible values, its already too late, as
> old kernels wont use any patch anyway.
yepph, I was looking at possible ancilliary values. Basically this
case statement :
#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
code = BPF_S_ANC_##CODE; \
break
switch (ftest->k) {
ANCILLARY(PROTOCOL);
ANCILLARY(PKTTYPE);
ANCILLARY(IFINDEX);
ANCILLARY(NLATTR);
ANCILLARY(NLATTR_NEST);
ANCILLARY(MARK);
ANCILLARY(QUEUE);
ANCILLARY(HATYPE);
ANCILLARY(RXHASH);
ANCILLARY(CPU);
ANCILLARY(ALU_XOR_X);
ANCILLARY(VLAN_TAG);
ANCILLARY(VLAN_TAG_PRESENT);
}
_______________________________________________
tcpdump-workers mailing list
tcpdump-workers@lists.tcpdump.org
https://lists.sandelman.ca/mailman/listinfo/tcpdump-workers
^ permalink raw reply
* Re: [PATCH net-next v5] bridge: export multicast database via netlink
From: Stephen Hemminger @ 2012-12-12 0:48 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, Jesper Dangaard Brouer, Thomas Graf,
David S. Miller
In-Reply-To: <1354874688-24564-1-git-send-email-amwang@redhat.com>
On Fri, 7 Dec 2012 18:04:48 +0800
Cong Wang <amwang@redhat.com> wrote:
> From: Cong Wang <amwang@redhat.com>
>
> V5: fix two bugs pointed out by Thomas
> remove seq check for now, mark it as TODO
>
> V4: remove some useless #include
> some coding style fix
>
> V3: drop debugging printk's
> update selinux perm table as well
>
> V2: drop patch 1/2, export ifindex directly
> Redesign netlink attributes
> Improve netlink seq check
> Handle IPv6 addr as well
>
> This patch exports bridge multicast database via netlink
> message type RTM_GETMDB. Similar to fdb, but currently bridge-specific.
> We may need to support modify multicast database too (RTM_{ADD,DEL}MDB).
>
> (Thanks to Thomas for patient reviews)
>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Thomas Graf <tgraf@suug.ch>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Signed-off-by: Cong Wang <amwang@redhat.com>
>
Applied, but required some manual fixing. It required adding if_bridge.h
to include/linux in iproute2 exported headers. Also patch still had some fuzz
against current version.
^ permalink raw reply
* Re: [tcpdump-workers] vlan tagged packets and libpcap breakage
From: Ani Sinha @ 2012-12-12 0:50 UTC (permalink / raw)
To: Eric Dumazet
Cc: Eric W. Biederman, Michael Richardson, netdev, tcpdump-workers,
Francesco Ruggeri
In-Reply-To: <1355267060.27891.139.camel@edumazet-glaptop>
On Tue, Dec 11, 2012 at 3:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2012-12-11 at 14:36 -0800, Ani Sinha wrote:
>> >
>> > It is possible to test for the presence of support of the new vlan bpf
> If you want to test ANCILLARY possible values, its already too late, as
> old kernels wont use any patch anyway.
>
So basically this means that if we generate a filter with these
special negative offset values and expect that the kernel will
complain if it does not recognize the newer values then we would be
wrong. And you are right. Old kernels never knew about them and the
code wasn't written in a way to return EINVAL if it didn't recognize a
special negative anciliary offset value.
^ permalink raw reply
* [PATCH] solos-pci: fix double-free of TX skb in DMA mode
From: David Woodhouse @ 2012-12-12 0:57 UTC (permalink / raw)
To: netdev; +Cc: nathan
[-- Attachment #1: Type: text/plain, Size: 1234 bytes --]
We weren't clearing card->tx_skb[port] when processing the TX done interrupt.
If there wasn't another skb ready to transmit immediately, this led to a
double-free because we'd free it *again* next time we did have a packet to
send.
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: stable@kernel.org
---
drivers/atm/solos-pci.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 6619a8a..c909b7b 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -945,10 +945,11 @@ static uint32_t fpga_tx(struct solos_card *card)
for (port = 0; tx_pending; tx_pending >>= 1, port++) {
if (tx_pending & 1) {
struct sk_buff *oldskb = card->tx_skb[port];
- if (oldskb)
+ if (oldskb) {
pci_unmap_single(card->dev, SKB_CB(oldskb)->dma_addr,
oldskb->len, PCI_DMA_TODEVICE);
-
+ card->tx_skb[port] = NULL;
+ }
spin_lock(&card->tx_queue_lock);
skb = skb_dequeue(&card->tx_queue[port]);
if (!skb)
--
1.8.0.1
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 6171 bytes --]
^ permalink raw reply related
* Goodluck
From: Allen and Violet Large @ 2012-12-11 19:12 UTC (permalink / raw)
This is a personal email directed to you. My wife and I won a Jackpot
Lottery of $11.3 million in July and have voluntarily decided to donate
the sum of $500,000.00 USD to you as part of our own charity project to
improve the lot of 10 lucky individuals all over the world. If you have
received this email then you are one of the lucky recipients and all you
have to do is get back with us so that we can send your details to the
payout bank.
Please note that you have to contact my private email for more
informations (allen_violetlarge03@yahoo.co.jp)
You can verify this by visiting the web pages below.
http://www.dailymail.co.uk/news/article-1326473/Canadian-couple-Allen-Violet-Large-away-entire-11-2m-lottery-win.html
http://www.cbc.ca/news/canada/nova-scotia/story/2010/11/04/ns-allen-violet-large-lottery-winning.html
Goodluck,
Allen and Violet Large
Email: allen_violetlarge03@yahoo.co.jp
^ permalink raw reply
* Re: netdevice wanrouter: Convert directly reference of netdev->priv
From: Paul Gortmaker @ 2012-12-12 0:58 UTC (permalink / raw)
To: Dan Carpenter; +Cc: wangchen, netdev, David Miller
In-Reply-To: <20121203090405.GA12089@elgon.mountain>
On Mon, Dec 3, 2012 at 4:04 AM, Dan Carpenter <dan.carpenter@oracle.com> wrote:
> Hello Wang Chen,
>
> The patch 7be6065b39c3: "netdevice wanrouter: Convert directly
> reference of netdev->priv" from Nov 20, 2008, leads to the following
> Smatch warning:
> net/wanrouter/wanmain.c:610 wanrouter_device_new_if()
> error: potential NULL dereference 'dev'.
>
> This is an old patch from 2008. It removed the allocation in
> wanrouter_device_new_if() so it looks like wanrouter has been completely
> broken for four years.
Hi Dan,
Crap -- wishing I'd seen this earlier. There was an RFC patch for
sending wanrouter to the bitbucket from Joe Perches, but aside
from the obvious build failures in it that Dave found (and I fixed)
there wasn't any real feedback (either positive or negative) to it:
http://patchwork.ozlabs.org/patch/198830/
Knowing it has been non-functional for ~4 years is I think a key
bit of information in justifying a removal, so folks like yourself
and JuliaL don't waste cycles fixing/auditing dead code. But it
will need to be 3.9 material now, it seems.
Paul.
--
>
> @@ -589,10 +591,6 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
> err = -EPROTONOSUPPORT;
> goto out;
> } else {
> - dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
> - err = -ENOBUFS;
> - if (dev == NULL)
> - goto out;
> err = wandev->new_if(wandev, dev, cnf);
>
> "dev" is still NULL after the call to ->new_if().
>
> }
>
> Here is what the code looks like now:
>
> net/wanrouter/wanmain.c
> 590 if (cnf->config_id == WANCONFIG_MPPP) {
> 591 printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
> 592 wandev->name);
> 593 err = -EPROTONOSUPPORT;
> 594 goto out;
> 595 } else {
>
> We were supposed to allocate "dev" here.
>
> 596 err = wandev->new_if(wandev, dev, cnf);
> 597 }
> 598
> 599 if (!err) {
> 600 /* Register network interface. This will invoke init()
> 601 * function supplied by the driver. If device registered
> 602 * successfully, add it to the interface list.
> 603 */
> 604
> 605 #ifdef WANDEBUG
> 606 printk(KERN_INFO "%s: registering interface %s...\n",
> 607 wanrouter_modname, dev->name);
> 608 #endif
> 609
> 610 err = register_netdev(dev);
> ^^^^^^^^^^^^^^^^^^^^
>
> The kernel will always oops inside the call to register_netdev() because
> "dev" is still NULL.
>
> I suspect we should just revert the patch?
>
> regards,
> dan carpenter
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH net-next] bnx2: Fix accidental reversions.
From: Michael Chan @ 2012-12-12 2:24 UTC (permalink / raw)
To: davem; +Cc: netdev
Commit 4ce45e02469c382699f4c5f6df727aea9dd2e1ca
"bnx2: Add BNX2 prefix to CHIP ID and name macros"
accidentally reverted 2 commits to use pci_ioumap() and to make
pci_error_handlers const. This fixes those mistakes.
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
drivers/net/ethernet/broadcom/bnx2.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index c16526d..a1adfaf 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8572,7 +8572,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
error:
- iounmap(bp->regview);
+ pci_iounmap(pdev, bp->regview);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
@@ -8750,7 +8750,7 @@ static void bnx2_io_resume(struct pci_dev *pdev)
rtnl_unlock();
}
-static struct pci_error_handlers bnx2_err_handler = {
+static const struct pci_error_handlers bnx2_err_handler = {
.error_detected = bnx2_io_error_detected,
.slot_reset = bnx2_io_slot_reset,
.resume = bnx2_io_resume,
--
1.6.4.GIT
^ permalink raw reply related
* [PATCH net-next] pkt_sched: avoid requeues if possible
From: Eric Dumazet @ 2012-12-12 1:54 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Jamal Hadi Salim, John Fastabend
From: Eric Dumazet <edumazet@google.com>
With BQL being deployed, we can more likely have following behavior :
We dequeue a packet from qdisc in dequeue_skb(), then we realize target
tx queue is in XOFF state in sch_direct_xmit(), and we have to hold the
skb into gso_skb for later.
This shows in stats (tc -s qdisc dev eth0) as requeues.
Problem of these requeues is that high priority packets can not be
dequeued as long as this (possibly low prio and big TSO packet) is not
removed from gso_skb.
At 1Gbps speed, a full size TSO packet is 500 us of extra latency.
In some cases, we know that all packets dequeued from a qdisc are
for a particular and known txq :
- If device is non multi queue
- For all MQ/MQPRIO slave qdiscs
This patch introduces a new qdisc flag, TCQ_F_ONETXQUEUE to mark
this capability, so that dequeue_skb() is allowed to dequeue a packet
only if the associated txq is not stopped.
This indeed reduce latencies for high prio packets (or improve fairness
with sfq/fq_codel), and almost remove qdisc 'requeues'.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
---
include/net/sch_generic.h | 7 +++++++
net/sched/sch_api.c | 2 ++
net/sched/sch_generic.c | 11 ++++++-----
net/sched/sch_mq.c | 4 +++-
net/sched/sch_mqprio.c | 4 ++++
5 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4616f46..1540f9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -50,6 +50,13 @@ struct Qdisc {
#define TCQ_F_INGRESS 2
#define TCQ_F_CAN_BYPASS 4
#define TCQ_F_MQROOT 8
+#define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for
+ * q->dev_queue : It can test
+ * netif_xmit_frozen_or_stopped() before
+ * dequeueing next packet.
+ * Its true for MQ/MQPRIO slaves, or non
+ * multiqueue device.
+ */
#define TCQ_F_WARN_NONWC (1 << 16)
int padded;
const struct Qdisc_ops *ops;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4799c48..d84f7e7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -833,6 +833,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
goto err_out3;
}
lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
+ if (!netif_is_multiqueue(dev))
+ sch->flags |= TCQ_F_ONETXQUEUE;
}
sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aefc150..5d81a44 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -53,20 +53,19 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
{
struct sk_buff *skb = q->gso_skb;
+ const struct netdev_queue *txq = q->dev_queue;
if (unlikely(skb)) {
- struct net_device *dev = qdisc_dev(q);
- struct netdev_queue *txq;
-
/* check the reason of requeuing without tx lock first */
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb));
if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
skb = NULL;
} else {
- skb = q->dequeue(q);
+ if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+ skb = q->dequeue(q);
}
return skb;
@@ -686,6 +685,8 @@ static void attach_one_default_qdisc(struct net_device *dev,
netdev_info(dev, "activation failed\n");
return;
}
+ if (!netif_is_multiqueue(dev))
+ qdisc->flags |= TCQ_F_ONETXQUEUE;
}
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 0a4b2f9..5da78a1 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,6 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
+ qdisc->flags |= TCQ_F_ONETXQUEUE;
}
sch->flags |= TCQ_F_MQROOT;
@@ -150,7 +151,8 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
dev_deactivate(dev);
*old = dev_graft_qdisc(dev_queue, new);
-
+ if (new)
+ new->flags |= TCQ_F_ONETXQUEUE;
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d1831ca..accec33 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,6 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
goto err;
}
priv->qdiscs[i] = qdisc;
+ qdisc->flags |= TCQ_F_ONETXQUEUE;
}
/* If the mqprio options indicate that hardware should own
@@ -205,6 +206,9 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
+ if (new)
+ new->flags |= TCQ_F_ONETXQUEUE;
+
if (dev->flags & IFF_UP)
dev_activate(dev);
^ permalink raw reply related
* Re: [PATCH net-next] bnx2: Fix accidental reversions.
From: David Miller @ 2012-12-12 2:28 UTC (permalink / raw)
To: mchan; +Cc: netdev
In-Reply-To: <1355279060-24192-1-git-send-email-mchan@broadcom.com>
From: "Michael Chan" <mchan@broadcom.com>
Date: Tue, 11 Dec 2012 18:24:20 -0800
> Commit 4ce45e02469c382699f4c5f6df727aea9dd2e1ca
> "bnx2: Add BNX2 prefix to CHIP ID and name macros"
>
> accidentally reverted 2 commits to use pci_ioumap() and to make
> pci_error_handlers const. This fixes those mistakes.
>
> Signed-off-by: Michael Chan <mchan@broadcom.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next 4/7] openvswitch: add ipv6 'set' action
From: Tom Herbert @ 2012-12-12 3:14 UTC (permalink / raw)
To: Jesse Gross
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
David Miller
In-Reply-To: <1354214149-33651-5-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
> This patch adds ipv6 set action functionality. It allows to change
> traffic class, flow label, hop-limit, ipv6 source and destination
> address fields.
>
I have to wonder about these patches and the underlying design
direction. Aren't these sort of things and more already implemented
by IPtables but in a modular and extensible fashion? Has there been
any thought into hooking OVS to IP tables to leverage all the existing
functionality?
Thanks,
Tom
> Signed-off-by: Ansis Atteka <aatteka-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
> ---
> net/openvswitch/actions.c | 93 ++++++++++++++++++++++++++++++++++++++++++++
> net/openvswitch/datapath.c | 20 ++++++++++
> 2 files changed, 113 insertions(+)
>
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 0811447..a58ed27 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -28,6 +28,7 @@
> #include <linux/if_arp.h>
> #include <linux/if_vlan.h>
> #include <net/ip.h>
> +#include <net/ipv6.h>
> #include <net/checksum.h>
> #include <net/dsfield.h>
>
> @@ -162,6 +163,53 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
> *addr = new_addr;
> }
>
> +static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
> + __be32 addr[4], const __be32 new_addr[4])
> +{
> + int transport_len = skb->len - skb_transport_offset(skb);
> +
> + if (l4_proto == IPPROTO_TCP) {
> + if (likely(transport_len >= sizeof(struct tcphdr)))
> + inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
> + addr, new_addr, 1);
> + } else if (l4_proto == IPPROTO_UDP) {
> + if (likely(transport_len >= sizeof(struct udphdr))) {
> + struct udphdr *uh = udp_hdr(skb);
> +
> + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
> + inet_proto_csum_replace16(&uh->check, skb,
> + addr, new_addr, 1);
> + if (!uh->check)
> + uh->check = CSUM_MANGLED_0;
> + }
> + }
> + }
> +}
> +
> +static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
> + __be32 addr[4], const __be32 new_addr[4],
> + bool recalculate_csum)
> +{
> + if (recalculate_csum)
> + update_ipv6_checksum(skb, l4_proto, addr, new_addr);
> +
> + skb->rxhash = 0;
> + memcpy(addr, new_addr, sizeof(__be32[4]));
> +}
> +
> +static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
> +{
> + nh->priority = tc >> 4;
> + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
> +}
> +
> +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
> +{
> + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
> + nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
> + nh->flow_lbl[2] = fl & 0x000000FF;
> +}
> +
> static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
> {
> csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
> @@ -195,6 +243,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
> return 0;
> }
>
> +static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
> +{
> + struct ipv6hdr *nh;
> + int err;
> + __be32 *saddr;
> + __be32 *daddr;
> +
> + err = make_writable(skb, skb_network_offset(skb) +
> + sizeof(struct ipv6hdr));
> + if (unlikely(err))
> + return err;
> +
> + nh = ipv6_hdr(skb);
> + saddr = (__be32 *)&nh->saddr;
> + daddr = (__be32 *)&nh->daddr;
> +
> + if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
> + set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
> + ipv6_key->ipv6_src, true);
> +
> + if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
> + unsigned int offset = 0;
> + int flags = IP6_FH_F_SKIP_RH;
> + bool recalc_csum = true;
> +
> + if (ipv6_ext_hdr(nh->nexthdr))
> + recalc_csum = ipv6_find_hdr(skb, &offset,
> + NEXTHDR_ROUTING, NULL,
> + &flags) != NEXTHDR_ROUTING;
> +
> + set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
> + ipv6_key->ipv6_dst, recalc_csum);
> + }
> +
> + set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
> + set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
> + nh->hop_limit = ipv6_key->ipv6_hlimit;
> +
> + return 0;
> +}
> +
> /* Must follow make_writable() since that can move the skb data. */
> static void set_tp_port(struct sk_buff *skb, __be16 *port,
> __be16 new_port, __sum16 *check)
> @@ -347,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb,
> err = set_ipv4(skb, nla_data(nested_attr));
> break;
>
> + case OVS_KEY_ATTR_IPV6:
> + err = set_ipv6(skb, nla_data(nested_attr));
> + break;
> +
> case OVS_KEY_ATTR_TCP:
> err = set_tcp(skb, nla_data(nested_attr));
> break;
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 4c4b62c..fd4a6a4 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -479,6 +479,7 @@ static int validate_set(const struct nlattr *a,
>
> switch (key_type) {
> const struct ovs_key_ipv4 *ipv4_key;
> + const struct ovs_key_ipv6 *ipv6_key;
>
> case OVS_KEY_ATTR_PRIORITY:
> case OVS_KEY_ATTR_ETHERNET:
> @@ -500,6 +501,25 @@ static int validate_set(const struct nlattr *a,
>
> break;
>
> + case OVS_KEY_ATTR_IPV6:
> + if (flow_key->eth.type != htons(ETH_P_IPV6))
> + return -EINVAL;
> +
> + if (!flow_key->ip.proto)
> + return -EINVAL;
> +
> + ipv6_key = nla_data(ovs_key);
> + if (ipv6_key->ipv6_proto != flow_key->ip.proto)
> + return -EINVAL;
> +
> + if (ipv6_key->ipv6_frag != flow_key->ip.frag)
> + return -EINVAL;
> +
> + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
> + return -EINVAL;
> +
> + break;
> +
> case OVS_KEY_ATTR_TCP:
> if (flow_key->ip.proto != IPPROTO_TCP)
> return -EINVAL;
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] tun: allow setting ethernet addresss while running
From: Jan Engelhardt @ 2012-12-12 3:27 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: davem, netdev, jasowang
In-Reply-To: <1355188560-8388-1-git-send-email-shemminger@vyatta.com>
On Tuesday 2012-12-11 02:16, Stephen Hemminger wrote:
>This is a pure software device, and ok with live address change.
>--- a/drivers/net/tun.c
>+++ b/drivers/net/tun.c
>@@ -849,6 +849,7 @@ static void tun_net_init(struct net_device *dev)
> /* Ethernet TAP Device */
> ether_setup(dev);
> dev->priv_flags &= ~IFF_TX_SKB_SHARING;
>+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
>
> eth_hw_addr_random(dev);
Would this possibly apply to L2TP devices as well?
^ permalink raw reply
* Re: [PATCH net-next rfc 0/2] Allow unpriveledge user to disable tuntap queue
From: Jason Wang @ 2012-12-12 3:29 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: pmoore, netdev, linux-kernel, mprivozn
In-Reply-To: <20121211124616.GC15435@redhat.com>
On 12/11/2012 08:46 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 11, 2012 at 07:03:45PM +0800, Jason Wang wrote:
>> This series is an rfc that tries to solve the issue that the queues of tuntap
>> could not be disabled/enabled by unpriveledged user. This is needed for
>> unpriveledge userspace such as qemu since guest may change the number of queues
>> at any time, qemu needs to configure the tuntap to disable/enable a specific
>> queue.
>>
>> Instead of introducting new flag/ioctls, this series tries to re-use the current
>> TUNSETQUEUE and IFF_ATTACH_QUEUE/IFF_DETACH_QUEUE. After this change,
>> IFF_DETACH_QUEUE is used to disable a specific queue instead of detaching all
>> its state from tuntap. IFF_ATTACH_QUEUE is used to do: 1) creating new queue to
>> a tuntap device, in this situation, previous DAC check is still done. 2)
>> re-enable the queue previously disabled by IFF_DETACH_QUEUE, in this situation,
>> we can bypass some checking when we do during queue creating (the check need to
>> be done here needs discussion.
>>
>> Management software (such as libvirt) then can do:
>> - TUNSETIFF to creating device and queue 0
>> - TUNSETQUEUE to create the rest of queues
>> - Passing them to unpriveledge userspace (such as qemu)
> Sorry I find this somewhat confusing.
> Why doesn't management call TUNSETIFF to create all queues -
> seems cleaner, no? Also has the advantage that it works
> without selinux changes.
The issue is how to return those fds through TUNSETIFF. Looks like
there's no space in ifreq for TUNSETIFF, we need another new ioctls to
do this.
>
> So why don't we simply fix TUNSETQUEUE such that
> 1. It only works if already attached to device by TUNSETIFF
> 2. It does not attach/detach, instead simply enables/disables the queue
This is just what this patch does, the only different is when calling
TUNSETQUEUE through a fd without attaching to the device, it is used to
create the queue.
> This way no new flags, just tweak the semantics of the
> existing ones. Need to do this before 3.8 is out though
> otherwise we'll end up maintaining the old semantics forever.
>
Yes, I will try to solve this issue soon.
>> Then the unpriveledge userspace can enable and disable a specific queue through
>> IFF_ATTACH_QUEUE and IFF_DETACH_QUEUE.
>>
>> This is done by introducing a enabled flags were used to notify whether the
>> queue is enabled, and tuntap only send/receive packets when it was enabled.
>>
>> Please comment, thanks!
>>
>> Jason Wang (2):
>> tuntap: forbid calling TUNSETQUEUE for a persistent device with no
>> queues
>> tuntap: allow unpriveledge user to enable and disable queues
>>
>> drivers/net/tun.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 files changed, 73 insertions(+), 5 deletions(-)
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next rfc 2/2] tuntap: allow unpriveledge user to enable and disable queues
From: Jason Wang @ 2012-12-12 3:34 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: pmoore, netdev, linux-kernel, mprivozn
In-Reply-To: <20121211123012.GB15435@redhat.com>
On 12/11/2012 08:30 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 11, 2012 at 07:03:47PM +0800, Jason Wang wrote:
>> Currently, when a file is attached to tuntap through TUNSETQUEUE, the uid/gid
>> and CAP_NET_ADMIN were checked, and we use this ioctl to create and destroy
>> queues. Sometimes, userspace such as qemu need to the ability to enable and
>> disable a specific queue without priveledge since guest operating system may
>> change the number of queues it want use.
>>
>> To support this kind of ability, this patch introduce a flag enabled which is
>> used to track whether the queue is enabled by userspace. And also restrict that
>> only one deivce could be used for a queue to attach. With this patch, the DAC
>> checking when adding queues through IFF_ATTACH_QUEUE is still done and after
>> this, IFF_DETACH_QUEUE/IFF_ATTACH_QUEUE could be used to disable/enable this
>> queue.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>> drivers/net/tun.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-----
>> 1 files changed, 73 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index d593f56..43831a7 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -138,6 +138,7 @@ struct tun_file {
>> /* only used for fasnyc */
>> unsigned int flags;
>> u16 queue_index;
>> + bool enabled;
>> };
>>
>> struct tun_flow_entry {
>> @@ -345,9 +346,11 @@ unlock:
>> static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>> {
>> struct tun_struct *tun = netdev_priv(dev);
>> + struct tun_file *tfile;
>> struct tun_flow_entry *e;
>> u32 txq = 0;
>> u32 numqueues = 0;
>> + int i;
>>
>> rcu_read_lock();
>> numqueues = tun->numqueues;
>> @@ -366,6 +369,19 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
>> txq -= numqueues;
>> }
>>
>> + tfile = rcu_dereference(tun->tfiles[txq]);
>> + if (unlikely(!tfile->enabled))
> This unlikely tag is suspicious. It should be perfectly
> legal to use less queues than created.
Ok. will remove this check.
>
>> + /* tun_detach() should make sure there's at least one queue
>> + * could be used to do the tranmission.
>> + */
>> + for (i = 0; i < numqueues; i++) {
>> + tfile = rcu_dereference(tun->tfiles[i]);
>> + if (tfile->enabled) {
>> + txq = i;
>> + break;
>> + }
>> + }
>> +
> Worst case this will do a linear scan over all queueus on each packet.
> Instead, I think we need a list of all queues and only install
> the active ones in the array.
Another method is using another variable e.g. active_queues to track how
many queues were enabled. And re-shuffle the pointers during
detaching/attaching to make sure [0, active_queues) to be enabled
queues, and [active_queues, num_queues) to be disabled queues. Then we
could avoid this issue.
>
>> rcu_read_unlock();
>> return txq;
>> }
>> @@ -386,6 +402,36 @@ static void tun_set_real_num_queues(struct tun_struct *tun)
>> netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
>> }
>>
>> +static int tun_enable(struct tun_file *tfile)
>> +{
>> + if (tfile->enabled == true)
> simply if (tfile->enabled)
Right.
>> + return -EINVAL;
> Actually it's better to have operations be
> idempotent. If it's enabled, enabling should
> be a NOP not an error.
Ok.
>> +
>> + tfile->enabled = true;
>> + return 0;
>> +}
>> +
>> +static int tun_disable(struct tun_file *tfile)
>> +{
>> + struct tun_struct *tun = rcu_dereference_protected(tfile->tun,
>> + lockdep_rtnl_is_held());
>> + u16 index = tfile->queue_index;
>> +
>> + if (!tun)
>> + return -EINVAL;
>> +
>> + if (tun->numqueues == 1)
>> + return -EINVAL;
> So if there's a single queue we can't disable it,
> but if there are > 1 we can disable them all.
> This seems arbitrary.
>
The question is whether we can allow the userspace to disable all queues
which looks useless to me. So I try to forbid this.
>> +
>> + BUG_ON(index >= tun->numqueues);
>> + tfile->enabled = false;
>> +
>> + synchronize_net();
>> + tun_flow_delete_by_queue(tun, index);
>> +
>> + return 0;
>> +}
>> +
>> static void __tun_detach(struct tun_file *tfile, bool clean)
>> {
>> struct tun_file *ntfile;
>> @@ -446,6 +492,7 @@ static void tun_detach_all(struct net_device *dev)
>> BUG_ON(!tfile);
>> wake_up_all(&tfile->wq.wait);
>> rcu_assign_pointer(tfile->tun, NULL);
>> + tfile->enabled = false;
>> --tun->numqueues;
>> }
>> BUG_ON(tun->numqueues != 0);
>> @@ -490,6 +537,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>> rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
>> sock_hold(&tfile->sk);
>> tun->numqueues++;
>> + tfile->enabled = true;
>>
>> tun_set_real_num_queues(tun);
>>
>> @@ -672,6 +720,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>> if (txq >= tun->numqueues)
>> goto drop;
>>
>> + /* Drop packet if the queue was not enabled */
>> + if (!tfile->enabled)
>> + goto drop;
>> +
>> tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
>>
>> BUG_ON(!tfile);
>> @@ -1010,6 +1062,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
>> bool zerocopy = false;
>> int err;
>>
>> + if (!tfile->enabled)
>> + return -EINVAL;
>> +
>> if (!(tun->flags & TUN_NO_PI)) {
>> if ((len -= sizeof(pi)) > total_len)
>> return -EINVAL;
>> @@ -1199,6 +1254,9 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>> struct tun_pi pi = { 0, skb->protocol };
>> ssize_t total = 0;
>>
>> + if (!tfile->enabled)
>> + return -EINVAL;
>> +
>> if (!(tun->flags & TUN_NO_PI)) {
>> if ((len -= sizeof(pi)) < 0)
>> return -EINVAL;
>> @@ -1769,15 +1827,21 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>> if (dev->netdev_ops != &tap_netdev_ops &&
>> dev->netdev_ops != &tun_netdev_ops)
>> ret = -EINVAL;
>> - else if (tun_not_capable(tun))
>> - ret = -EPERM;
>> - /* TUNSETIFF is needed to do permission checking */
>> - else if (tun->numqueues == 0)
>> - ret = -EPERM;
>> - else
>> - ret = tun_attach(tun, file);
>> + else {
>> + if (!rcu_dereference(tfile->tun)) {
> Should be rcu_dereference_protected.
True.
>
>> + if (tun_not_capable(tun) ||
>> + tun->numqueues == 0)
>> + ret = -EPERM;
>> + else
>> + ret = tun_attach(tun, file);
>> + }
>> + else {
>> + /* FIXME: permission check? */
>> + ret = tun_enable(tfile);
>> + }
>> + }
>> } else if (ifr->ifr_flags & IFF_DETACH_QUEUE)
>> - __tun_detach(tfile, false);
>> + tun_disable(tfile);
>> else
>> ret = -EINVAL;
>>
>> @@ -2085,6 +2149,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
>> tfile->socket.file = file;
>> tfile->socket.ops = &tun_socket_ops;
>>
>> + tfile->enabled = false;
>> sock_init_data(&tfile->socket, &tfile->sk);
>> sk_change_net(&tfile->sk, tfile->net);
>>
>> --
>> 1.7.1
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox