* [PATCH net-next v4 2/7] ibmvnic: Update and clean up reset TX pool routine
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Update TX pool reset routine to accommodate new TSO pool array. Introduce
a function that resets one TX pool, and use that function to initialize
each pool in both pool arrays.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 45 +++++++++++++++++++++-----------------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9c7d19c926f9..4dc304422ece 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -557,36 +557,41 @@ static int init_rx_pools(struct net_device *netdev)
return 0;
}
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_tx_pool *tx_pool)
+{
+ int rc, i;
+
+ rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+ if (rc)
+ return rc;
+
+ memset(tx_pool->tx_buff, 0,
+ tx_pool->num_buffers *
+ sizeof(struct ibmvnic_tx_buff));
+
+ for (i = 0; i < tx_pool->num_buffers; i++)
+ tx_pool->free_map[i] = i;
+
+ tx_pool->consumer_index = 0;
+ tx_pool->producer_index = 0;
+
+ return 0;
+}
+
static int reset_tx_pools(struct ibmvnic_adapter *adapter)
{
- struct ibmvnic_tx_pool *tx_pool;
int tx_scrqs;
- int i, j, rc;
+ int i, rc;
tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
for (i = 0; i < tx_scrqs; i++) {
- netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
-
- tx_pool = &adapter->tx_pool[i];
-
- rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+ rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
if (rc)
return rc;
-
- rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+ rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
if (rc)
return rc;
-
- memset(tx_pool->tx_buff, 0,
- adapter->req_tx_entries_per_subcrq *
- sizeof(struct ibmvnic_tx_buff));
-
- for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
- tx_pool->free_map[j] = j;
-
- tx_pool->consumer_index = 0;
- tx_pool->producer_index = 0;
- tx_pool->tso_index = 0;
}
return 0;
--
2.15.0
^ permalink raw reply related
* [PATCH net-next v4 3/7] ibmvnic: Update release TX pool routine
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Introduce function that frees one TX pool. Use that to release
each pool in both the standard TX pool and TSO pool arrays.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4dc304422ece..258d54e3a616 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -608,25 +608,30 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
adapter->vpd = NULL;
}
+static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_tx_pool *tx_pool)
+{
+ kfree(tx_pool->tx_buff);
+ kfree(tx_pool->free_map);
+ free_long_term_buff(adapter, &tx_pool->long_term_buff);
+}
+
static void release_tx_pools(struct ibmvnic_adapter *adapter)
{
- struct ibmvnic_tx_pool *tx_pool;
int i;
if (!adapter->tx_pool)
return;
for (i = 0; i < adapter->num_active_tx_pools; i++) {
- netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
- tx_pool = &adapter->tx_pool[i];
- kfree(tx_pool->tx_buff);
- free_long_term_buff(adapter, &tx_pool->long_term_buff);
- free_long_term_buff(adapter, &tx_pool->tso_ltb);
- kfree(tx_pool->free_map);
+ release_one_tx_pool(adapter, &adapter->tx_pool[i]);
+ release_one_tx_pool(adapter, &adapter->tso_pool[i]);
}
kfree(adapter->tx_pool);
adapter->tx_pool = NULL;
+ kfree(adapter->tso_pool);
+ adapter->tso_pool = NULL;
adapter->num_active_tx_pools = 0;
}
--
2.15.0
^ permalink raw reply related
* [PATCH net-next v4 4/7] ibmvnic: Update TX pool initialization routine
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Introduce function that initializes one TX pool. Use that to
create each pool entry in both the standard TX pool and TSO
pool arrays.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 90 ++++++++++++++++++++------------------
1 file changed, 48 insertions(+), 42 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 258d54e3a616..2bb5d562dde1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -635,13 +635,43 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
adapter->num_active_tx_pools = 0;
}
+static int init_one_tx_pool(struct net_device *netdev,
+ struct ibmvnic_tx_pool *tx_pool,
+ int num_entries, int buf_size)
+{
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ tx_pool->tx_buff = kcalloc(num_entries,
+ sizeof(struct ibmvnic_tx_buff),
+ GFP_KERNEL);
+ if (!tx_pool->tx_buff)
+ return -1;
+
+ if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
+ num_entries * buf_size))
+ return -1;
+
+ tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL);
+ if (!tx_pool->free_map)
+ return -1;
+
+ for (i = 0; i < num_entries; i++)
+ tx_pool->free_map[i] = i;
+
+ tx_pool->consumer_index = 0;
+ tx_pool->producer_index = 0;
+ tx_pool->num_buffers = num_entries;
+ tx_pool->buf_size = buf_size;
+
+ return 0;
+}
+
static int init_tx_pools(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- struct device *dev = &adapter->vdev->dev;
- struct ibmvnic_tx_pool *tx_pool;
int tx_subcrqs;
- int i, j;
+ int i, rc;
tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
adapter->tx_pool = kcalloc(tx_subcrqs,
@@ -649,53 +679,29 @@ static int init_tx_pools(struct net_device *netdev)
if (!adapter->tx_pool)
return -1;
+ adapter->tso_pool = kcalloc(tx_subcrqs,
+ sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
+ if (!adapter->tso_pool)
+ return -1;
+
adapter->num_active_tx_pools = tx_subcrqs;
for (i = 0; i < tx_subcrqs; i++) {
- tx_pool = &adapter->tx_pool[i];
-
- netdev_dbg(adapter->netdev,
- "Initializing tx_pool[%d], %lld buffs\n",
- i, adapter->req_tx_entries_per_subcrq);
-
- tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
- sizeof(struct ibmvnic_tx_buff),
- GFP_KERNEL);
- if (!tx_pool->tx_buff) {
- dev_err(dev, "tx pool buffer allocation failed\n");
- release_tx_pools(adapter);
- return -1;
- }
-
- if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
- adapter->req_tx_entries_per_subcrq *
- (adapter->req_mtu + VLAN_HLEN))) {
- release_tx_pools(adapter);
- return -1;
- }
-
- /* alloc TSO ltb */
- if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
- IBMVNIC_TSO_BUFS *
- IBMVNIC_TSO_BUF_SZ)) {
+ rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
+ adapter->req_tx_entries_per_subcrq,
+ adapter->req_mtu + VLAN_HLEN);
+ if (rc) {
release_tx_pools(adapter);
- return -1;
+ return rc;
}
- tx_pool->tso_index = 0;
-
- tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
- sizeof(int), GFP_KERNEL);
- if (!tx_pool->free_map) {
+ init_one_tx_pool(netdev, &adapter->tso_pool[i],
+ IBMVNIC_TSO_BUFS,
+ IBMVNIC_TSO_BUF_SZ);
+ if (rc) {
release_tx_pools(adapter);
- return -1;
+ return rc;
}
-
- for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
- tx_pool->free_map[j] = j;
-
- tx_pool->consumer_index = 0;
- tx_pool->producer_index = 0;
}
return 0;
--
2.15.0
^ permalink raw reply related
* [PATCH net-next v4 5/7] ibmvnic: Update TX and TX completion routines
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Update TX and TX completion routines to account for TX pool
restructuring. TX routine first chooses the pool depending
on whether a packet is GSO or not, then uses it accordingly.
For the completion routine to know which pool it needs to use,
set the most significant bit of the correlator index to one
if the packet uses the TSO pool. On completion, unset the bit
and use the correlator index to release the buffer pool entry.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 55 +++++++++++++++++++-------------------
drivers/net/ethernet/ibm/ibmvnic.h | 1 +
2 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2bb5d562dde1..672e9221d4a5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1414,8 +1414,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
ret = NETDEV_TX_OK;
goto out;
}
+ if (skb_is_gso(skb))
+ tx_pool = &adapter->tso_pool[queue_num];
+ else
+ tx_pool = &adapter->tx_pool[queue_num];
- tx_pool = &adapter->tx_pool[queue_num];
tx_scrq = adapter->tx_scrq[queue_num];
txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -1423,20 +1426,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
index = tx_pool->free_map[tx_pool->consumer_index];
- if (skb_is_gso(skb)) {
- offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
- dst = tx_pool->tso_ltb.buff + offset;
- memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
- data_dma_addr = tx_pool->tso_ltb.addr + offset;
- tx_pool->tso_index++;
- if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
- tx_pool->tso_index = 0;
- } else {
- offset = index * (adapter->req_mtu + VLAN_HLEN);
- dst = tx_pool->long_term_buff.buff + offset;
- memset(dst, 0, adapter->req_mtu + VLAN_HLEN);
- data_dma_addr = tx_pool->long_term_buff.addr + offset;
- }
+ offset = index * tx_pool->buf_size;
+ dst = tx_pool->long_term_buff.buff + offset;
+ memset(dst, 0, tx_pool->buf_size);
+ data_dma_addr = tx_pool->long_term_buff.addr + offset;
if (skb_shinfo(skb)->nr_frags) {
int cur, i;
@@ -1459,8 +1452,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
}
tx_pool->consumer_index =
- (tx_pool->consumer_index + 1) %
- adapter->req_tx_entries_per_subcrq;
+ (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[index];
tx_buff->skb = skb;
@@ -1476,11 +1468,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.n_crq_elem = 1;
tx_crq.v1.n_sge = 1;
tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
- tx_crq.v1.correlator = cpu_to_be32(index);
+
if (skb_is_gso(skb))
- tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+ tx_crq.v1.correlator =
+ cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK);
else
- tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+ tx_crq.v1.correlator = cpu_to_be32(index);
+ tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
tx_crq.v1.sge_len = cpu_to_be32(skb->len);
tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
@@ -1543,7 +1537,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (tx_pool->consumer_index == 0)
tx_pool->consumer_index =
- adapter->req_tx_entries_per_subcrq - 1;
+ tx_pool->num_buffers - 1;
else
tx_pool->consumer_index--;
@@ -2547,6 +2541,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
struct ibmvnic_sub_crq_queue *scrq)
{
struct device *dev = &adapter->vdev->dev;
+ struct ibmvnic_tx_pool *tx_pool;
struct ibmvnic_tx_buff *txbuff;
union sub_crq *next;
int index;
@@ -2566,7 +2561,14 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
continue;
}
index = be32_to_cpu(next->tx_comp.correlators[i]);
- txbuff = &adapter->tx_pool[pool].tx_buff[index];
+ if (index & IBMVNIC_TSO_POOL_MASK) {
+ tx_pool = &adapter->tso_pool[pool];
+ index &= ~IBMVNIC_TSO_POOL_MASK;
+ } else {
+ tx_pool = &adapter->tx_pool[pool];
+ }
+
+ txbuff = &tx_pool->tx_buff[index];
for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
if (!txbuff->data_dma[j])
@@ -2589,11 +2591,10 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
num_entries += txbuff->num_entries;
- adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
- producer_index] = index;
- adapter->tx_pool[pool].producer_index =
- (adapter->tx_pool[pool].producer_index + 1) %
- adapter->req_tx_entries_per_subcrq;
+ tx_pool->free_map[tx_pool->producer_index] = index;
+ tx_pool->producer_index =
+ (tx_pool->producer_index + 1) %
+ tx_pool->num_buffers;
}
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index a2e21b39074f..89efe700eafe 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -43,6 +43,7 @@
#define IBMVNIC_TSO_BUF_SZ 65536
#define IBMVNIC_TSO_BUFS 64
+#define IBMVNIC_TSO_POOL_MASK 0x80000000
#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
#define IBMVNIC_BUFFER_HLEN 500
--
2.15.0
^ permalink raw reply related
* [PATCH net-next v4 6/7] ibmvnic: Improve TX buffer accounting
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Improve TX pool buffer accounting to prevent the producer
index from overruning the consumer. First, set the next free
index to an invalid value if it is in use. If next buffer
to be consumed is in use, drop the packet.
Finally, if the transmit fails for some other reason, roll
back the consumer index and set the free map entry to its original
value. This should also be done if the DMA map fails.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
drivers/net/ethernet/ibm/ibmvnic.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 672e9221d4a5..af6f8193cb67 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1426,6 +1426,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
index = tx_pool->free_map[tx_pool->consumer_index];
+ if (index == IBMVNIC_INVALID_MAP) {
+ dev_kfree_skb_any(skb);
+ tx_send_failed++;
+ tx_dropped++;
+ ret = NETDEV_TX_OK;
+ goto out;
+ }
+
+ tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
+
offset = index * tx_pool->buf_size;
dst = tx_pool->long_term_buff.buff + offset;
memset(dst, 0, tx_pool->buf_size);
@@ -1522,7 +1532,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_map_failed++;
tx_dropped++;
ret = NETDEV_TX_OK;
- goto out;
+ goto tx_err_out;
}
lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
(u64)tx_buff->indir_dma,
@@ -1534,13 +1544,6 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
}
if (lpar_rc != H_SUCCESS) {
dev_err(dev, "tx failed with code %ld\n", lpar_rc);
-
- if (tx_pool->consumer_index == 0)
- tx_pool->consumer_index =
- tx_pool->num_buffers - 1;
- else
- tx_pool->consumer_index--;
-
dev_kfree_skb_any(skb);
tx_buff->skb = NULL;
@@ -1556,7 +1559,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_send_failed++;
tx_dropped++;
ret = NETDEV_TX_OK;
- goto out;
+ goto tx_err_out;
}
if (atomic_add_return(num_entries, &tx_scrq->used)
@@ -1569,7 +1572,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_bytes += skb->len;
txq->trans_start = jiffies;
ret = NETDEV_TX_OK;
+ goto out;
+tx_err_out:
+ /* roll back consumer index and map array*/
+ if (tx_pool->consumer_index == 0)
+ tx_pool->consumer_index =
+ tx_pool->num_buffers - 1;
+ else
+ tx_pool->consumer_index--;
+ tx_pool->free_map[tx_pool->consumer_index] = index;
out:
netdev->stats.tx_dropped += tx_dropped;
netdev->stats.tx_bytes += tx_bytes;
--
2.15.0
^ permalink raw reply related
* [PATCH net-next v4 7/7] ibmvnic: Update TX pool cleaning routine
From: Thomas Falcon @ 2018-03-16 1:20 UTC (permalink / raw)
To: netdev; +Cc: jallen, nfont, Thomas Falcon
In-Reply-To: <1521163223-11478-1-git-send-email-tlfalcon@linux.vnet.ibm.com>
Update routine that cleans up any outstanding transmits that
have not received completions when the device needs to close.
Introduces a helper function that cleans one TX pool to make
code more readable.
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
v4: Update to use the number of buffers in the TX pool struct
instead of a fixed value saved in the adapter struct. Earlier
implementation resulted in a crash.
---
drivers/net/ethernet/ibm/ibmvnic.c | 40 +++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index af6f8193cb67..5632c030811b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1128,34 +1128,42 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
}
}
-static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_tx_pool *tx_pool)
{
- struct ibmvnic_tx_pool *tx_pool;
struct ibmvnic_tx_buff *tx_buff;
u64 tx_entries;
+ int i;
+
+ if (!tx_pool && !tx_pool->tx_buff)
+ return;
+
+ tx_entries = tx_pool->num_buffers;
+
+ for (i = 0; i < tx_entries; i++) {
+ tx_buff = &tx_pool->tx_buff[i];
+ if (tx_buff && tx_buff->skb) {
+ dev_kfree_skb_any(tx_buff->skb);
+ tx_buff->skb = NULL;
+ }
+ }
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
int tx_scrqs;
- int i, j;
+ int i;
- if (!adapter->tx_pool)
+ if (!adapter->tx_pool || !adapter->tso_pool)
return;
tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
- tx_entries = adapter->req_tx_entries_per_subcrq;
/* Free any remaining skbs in the tx buffer pools */
for (i = 0; i < tx_scrqs; i++) {
- tx_pool = &adapter->tx_pool[i];
- if (!tx_pool && !tx_pool->tx_buff)
- continue;
-
netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
- for (j = 0; j < tx_entries; j++) {
- tx_buff = &tx_pool->tx_buff[j];
- if (tx_buff && tx_buff->skb) {
- dev_kfree_skb_any(tx_buff->skb);
- tx_buff->skb = NULL;
- }
- }
+ clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
+ clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
}
}
--
2.15.0
^ permalink raw reply related
* Re: [PATCH] mlx5: Remove call to ida_pre_get
From: Matthew Wilcox @ 2018-03-16 1:30 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Matan Barak, Maor Gottlieb, leon@kernel.org,
netdev@vger.kernel.org, linux-rdma@vger.kernel.org
In-Reply-To: <1521158282.3219.53.camel@mellanox.com>
On Thu, Mar 15, 2018 at 11:58:07PM +0000, Saeed Mahameed wrote:
> On Wed, 2018-03-14 at 19:57 -0700, Matthew Wilcox wrote:
> > From: Matthew Wilcox <mawilcox@microsoft.com>
> >
> > The mlx5 driver calls ida_pre_get() in a loop for no readily apparent
> > reason. The driver uses ida_simple_get() which will call
> > ida_pre_get()
> > by itself and there's no need to use ida_pre_get() unless using
> > ida_get_new().
> >
>
> Hi Matthew,
>
> Is this is causing any issues ? or just a simple cleanup ?
I'm removing the API. At the end of this cleanup, there will be no more
preallocation; instead we will rely on the slab allocator not sucking.
> Adding Maor, the author of this change,
>
> I believe the idea is to speed up insert_fte (which calls
> ida_simple_get) since insert_fte runs under the FTE write semaphore,
> in this case if ida_pre_get was successful before taking the semaphore
> for all the FTE nodes in the loop, this will be a huge win for
> ida_simple_get which will immediately return success without even
> trying to allocate.
I think that's misguided. The IDA allocator is only going to allocate
memory once in every 1024 allocations. Also, it does try to allocate,
even if there are preallocated nodes. So you're just wasting time,
unfortunately.
^ permalink raw reply
* Re: linux-next: manual merge of the net-next tree with the rdma-fixes tree
From: Jason Gunthorpe @ 2018-03-16 2:05 UTC (permalink / raw)
To: Doug Ledford
Cc: Stephen Rothwell, David Miller, Networking,
Linux-Next Mailing List, Linux Kernel Mailing List, Mark Bloch,
Leon Romanovsky
In-Reply-To: <1521163082.18703.191.camel@redhat.com>
On Thu, Mar 15, 2018 at 09:18:02PM -0400, Doug Ledford wrote:
> Here's the commit (from the rdma git repo) with the proper merge fix
> (although it also has other minor merge stuff that needs to be ignored):
>
> 2d873449a202 (Merge branch 'k.o/wip/dl-for-rc' into k.o/wip/dl-for-next)
Stephen,
If you merge the branches in the order:
rdma for-next, rdma for-rc, then net-next
you should not see a merge conflict as rdma for-next already has the
correct resolution.
Jason
^ permalink raw reply
* Re: [PATCH] xfrm: fix rcu_read_unlock usage in xfrm_local_error
From: Taehee Yoo @ 2018-03-16 2:30 UTC (permalink / raw)
To: Steffen Klassert; +Cc: davem, netdev
In-Reply-To: <20180315104817.ygpjsm4jc6cumroz@gauss3.secunet.de>
2018-03-15 19:48 GMT+09:00 Steffen Klassert <steffen.klassert@secunet.com>:
> On Tue, Mar 13, 2018 at 05:26:07PM +0900, Taehee Yoo wrote:
>> In the xfrm_local_error, rcu_read_unlock should be called when afinfo
>> is not NULL. because xfrm_state_get_afinfo calls rcu_read_unlock
>> if afinfo is NULL.
>>
>> Signed-off-by: Taehee Yoo <ap420073@gmail.com>
>
> Can you please add a 'Fixes:' tag, so that it can be
> correctly backported to the stable trees?
>
> Thanks!
Thank you for your review!
I will send V2 patch.
^ permalink raw reply
* [PATCH V2] xfrm: fix rcu_read_unlock usage in xfrm_local_error
From: Taehee Yoo @ 2018-03-16 2:35 UTC (permalink / raw)
To: davem, steffen.klassert; +Cc: netdev, Taehee Yoo
In the xfrm_local_error, rcu_read_unlock should be called when afinfo
is not NULL. because xfrm_state_get_afinfo calls rcu_read_unlock
if afinfo is NULL.
Fixes: af5d27c4e12b ("xfrm: remove xfrm_state_put_afinfo")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
V2 :
- Add Fixes tag
V1 :
- Initial patch
net/xfrm/xfrm_output.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 2346867..89b178a7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -285,8 +285,9 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
return;
afinfo = xfrm_state_get_afinfo(proto);
- if (afinfo)
+ if (afinfo) {
afinfo->local_error(skb, mtu);
- rcu_read_unlock();
+ rcu_read_unlock();
+ }
}
EXPORT_SYMBOL_GPL(xfrm_local_error);
--
2.9.3
^ permalink raw reply related
* Re: [PATCH v4 1/2] kernel.h: Introduce const_max() for VLA removal
From: Miguel Ojeda @ 2018-03-16 3:05 UTC (permalink / raw)
To: Kees Cook
Cc: Linus Torvalds, Andrew Morton, Josh Poimboeuf, Rasmus Villemoes,
Randy Dunlap, Ingo Molnar, David Laight, Ian Abbott, linux-input,
linux-btrfs, Network Development, Linux Kernel Mailing List,
Kernel Hardening
In-Reply-To: <CAGXu5j+Dv_wQgHWgT=9eSoKX7AcqnumuRsNFUoZ765WWizKytw@mail.gmail.com>
On Fri, Mar 16, 2018 at 12:49 AM, Kees Cook <keescook@chromium.org> wrote:
> On Thu, Mar 15, 2018 at 4:46 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>> What I'm *not* so much ok with is "const_max(5,sizeof(x))" erroring
>> out, or silently causing insane behavior due to hidden subtle type
>> casts..
>
> Yup! I like it as an explicit argument. Thanks!
>
What about something like this?
#define INTMAXT_MAX LLONG_MAX
typedef int64_t intmax_t;
#define const_max(x, y) \
__builtin_choose_expr( \
!__builtin_constant_p(x) || !__builtin_constant_p(y), \
__error_not_const_arg(), \
__builtin_choose_expr( \
(x) > INTMAXT_MAX || (y) > INTMAXT_MAX, \
__error_too_big(), \
__builtin_choose_expr( \
(intmax_t)(x) >= (intmax_t)(y), \
(x), \
(y) \
) \
) \
)
Works for different types, allows to mix negatives and positives and
returns the original type, e.g.:
const_max(-1, sizeof(char));
is of type 'long unsigned int', but:
const_max(2, sizeof(char));
is of type 'int'. While I am not a fan that the return type depends on
the arguments, it is useful if you are going to use the expression in
something that needs expects a precise (a printk() for instance?).
The check against the INTMAXT_MAX is there to avoid complexity (if we
do not handle those cases, it is safe to use intmax_t for the
comparison; otherwise you have to have another compile time branch for
the case positive-positive using uintmax_t) and also avoids odd
warnings for some cases above LLONG_MAX about comparisons with 0 for
unsigned expressions being always true. On the positive side, it
prevents using the macro for thing like "(size_t)-1".
Cheers,
Miguel
^ permalink raw reply
* Re: [RFC PATCH V1 01/12] audit: add container id
From: Richard Guy Briggs @ 2018-03-16 3:58 UTC (permalink / raw)
To: Stefan Berger
Cc: cgroups, containers, linux-api, Linux-Audit Mailing List,
linux-fsdevel, LKML, netdev, mszeredi, luto, jlayton, carlos,
viro, dhowells, simo, trondmy, eparis, serge, ebiederm, madzcar
In-Reply-To: <216d1ab1-531b-9185-2e31-34f162f08aad@linux.vnet.ibm.com>
On 2018-03-15 16:27, Stefan Berger wrote:
> On 03/01/2018 02:41 PM, Richard Guy Briggs wrote:
> > Implement the proc fs write to set the audit container ID of a process,
> > emitting an AUDIT_CONTAINER record to document the event.
> >
> > This is a write from the container orchestrator task to a proc entry of
> > the form /proc/PID/containerid where PID is the process ID of the newly
> > created task that is to become the first task in a container, or an
> > additional task added to a container.
> >
> > The write expects up to a u64 value (unset: 18446744073709551615).
> >
> > This will produce a record such as this:
> > type=UNKNOWN[1333] msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
> >
> > The "op" field indicates an initial set. The "pid" to "ses" fields are
> > the orchestrator while the "opid" field is the object's PID, the process
> > being "contained". Old and new container ID values are given in the
> > "contid" fields, while res indicates its success.
> >
> > It is not permitted to self-set, unset or re-set the container ID. A
> > child inherits its parent's container ID, but then can be set only once
> > after.
> >
> > See: https://github.com/linux-audit/audit-kernel/issues/32
> >
> >
> > /* audit_rule_data supports filter rules with both integer and string
> > * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
> > diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> > index 4e0a4ac..0ee1e59 100644
> > --- a/kernel/auditsc.c
> > +++ b/kernel/auditsc.c
> > @@ -2073,6 +2073,92 @@ int audit_set_loginuid(kuid_t loginuid)
> > return rc;
> > }
> >
> > +static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
> > +{
> > + struct task_struct *parent;
> > + u64 pcontainerid, ccontainerid;
> > + pid_t ppid;
> > +
> > + /* Don't allow to set our own containerid */
> > + if (current == task)
> > + return -EPERM;
> > + /* Don't allow the containerid to be unset */
> > + if (!cid_valid(containerid))
> > + return -EINVAL;
> > + /* if we don't have caps, reject */
> > + if (!capable(CAP_AUDIT_CONTROL))
> > + return -EPERM;
> > + /* if containerid is unset, allow */
> > + if (!audit_containerid_set(task))
> > + return 0;
>
> I am wondering whether there should be a check for the target process that
> will receive the containerid to not have CAP_SYS_ADMIN that would otherwise
> allow it to arbitrarily unshare()/clone() and leave the set of namespaces
> that may make up the container whose containerid we assign here?
This is a reasonable question. This has been debated and I understood
the conclusion was that without a clear definition of a "container", the
task still remains in that container that just now has more
sub-namespaces (in the case of hierarchical namespaces), we don't want
to restrict it in such a way and that allows it to create nested
containers. I see setns being more problematic if it could switch to
another existing namespace that was set up by the orchestrator for a
different container. The coming v2 patchset acknowledges this situation
with the network namespace being potentially shared by multiple
containers.
This is the motivation for the code below that allows to set the
containerid even if it is already inherited from its parent.
> > + /* it is already set, and not inherited from the parent, reject */
> > + ccontainerid = audit_get_containerid(task);
> > + rcu_read_lock();
> > + parent = rcu_dereference(task->real_parent);
> > + rcu_read_unlock();
> > + task_lock(parent);
> > + pcontainerid = audit_get_containerid(parent);
> > + ppid = task_tgid_nr(parent);
>
> ppid not needed...
Thanks for catching this. It was the vestige of a failed devel
experiment that didn't flush that useless appendage. :-)
> > + task_unlock(parent);
> > + if (ccontainerid != pcontainerid)
> > + return -EPERM;
> > + return 0;
> > +}
> > +
> > +static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
> > + u64 containerid, int rc)
> > +{
> > + struct audit_buffer *ab;
> > + uid_t uid;
> > + struct tty_struct *tty;
> > +
> > + if (!audit_enabled)
> > + return;
> > +
> > + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
> > + if (!ab)
> > + return;
> > +
> > + uid = from_kuid(&init_user_ns, task_uid(current));
> > + tty = audit_get_tty(current);
> > +
> > + audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
> > + audit_log_task_context(ab);
> > + audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
> > + from_kuid(&init_user_ns, audit_get_loginuid(current)),
> > + tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
> > + task_tgid_nr(task), oldcontainerid, containerid, !rc);
> > +
> > + audit_put_tty(tty);
> > + audit_log_end(ab);
> > +}
> > +
> > +/**
> > + * audit_set_containerid - set current task's audit_context containerid
> > + * @containerid: containerid value
> > + *
> > + * Returns 0 on success, -EPERM on permission failure.
> > + *
> > + * Called (set) from fs/proc/base.c::proc_containerid_write().
> > + */
> > +int audit_set_containerid(struct task_struct *task, u64 containerid)
> > +{
> > + u64 oldcontainerid;
> > + int rc;
> > +
> > + oldcontainerid = audit_get_containerid(task);
> > +
> > + rc = audit_set_containerid_perm(task, containerid);
> > + if (!rc) {
> > + task_lock(task);
> > + task->containerid = containerid;
> > + task_unlock(task);
> > + }
> > +
> > + audit_log_set_containerid(task, oldcontainerid, containerid, rc);
> > + return rc;
> > +}
> > +
> > /**
> > * __audit_mq_open - record audit data for a POSIX MQ open
> > * @oflag: open flag
>
>
- RGB
--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
^ permalink raw reply
* Re: [PATCH RFC 2/2] virtio_ring: support packed ring
From: Jason Wang @ 2018-03-16 4:03 UTC (permalink / raw)
To: Tiwei Bie, mst, virtualization, linux-kernel, netdev; +Cc: wexu
In-Reply-To: <20180223111801.15240-3-tiwei.bie@intel.com>
On 2018年02月23日 19:18, Tiwei Bie wrote:
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> drivers/virtio/virtio_ring.c | 699 +++++++++++++++++++++++++++++++++++++------
> include/linux/virtio_ring.h | 8 +-
> 2 files changed, 618 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index eb30f3e09a47..393778a2f809 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -58,14 +58,14 @@
>
> struct vring_desc_state {
> void *data; /* Data for callback. */
> - struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> + void *indir_desc; /* Indirect descriptor, if any. */
> + int num; /* Descriptor list length. */
> };
>
> struct vring_virtqueue {
> struct virtqueue vq;
>
> - /* Actual memory layout for this queue */
> - struct vring vring;
> + bool packed;
>
> /* Can we use weak barriers? */
> bool weak_barriers;
> @@ -87,11 +87,28 @@ struct vring_virtqueue {
> /* Last used index we've seen. */
> u16 last_used_idx;
>
> - /* Last written value to avail->flags */
> - u16 avail_flags_shadow;
> -
> - /* Last written value to avail->idx in guest byte order */
> - u16 avail_idx_shadow;
> + union {
> + /* Available for split ring */
> + struct {
> + /* Actual memory layout for this queue */
> + struct vring vring;
> +
> + /* Last written value to avail->flags */
> + u16 avail_flags_shadow;
> +
> + /* Last written value to avail->idx in
> + * guest byte order */
> + u16 avail_idx_shadow;
> + };
> +
> + /* Available for packed ring */
> + struct {
> + /* Actual memory layout for this queue */
> + struct vring_packed vring_packed;
> + u8 wrap_counter : 1;
> + bool chaining;
> + };
> + };
>
> /* How to notify other side. FIXME: commonalize hcalls! */
> bool (*notify)(struct virtqueue *vq);
> @@ -201,26 +218,37 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
> cpu_addr, size, direction);
> }
>
> -static void vring_unmap_one(const struct vring_virtqueue *vq,
> - struct vring_desc *desc)
> +static void vring_unmap_one(const struct vring_virtqueue *vq, void *_desc)
> {
Let's split the helpers to packed/split version like other helpers?
(Consider the caller has already known the type of vq).
> + u64 addr;
> + u32 len;
> u16 flags;
>
> if (!vring_use_dma_api(vq->vq.vdev))
> return;
>
> - flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> + if (vq->packed) {
> + struct vring_packed_desc *desc = _desc;
> +
> + addr = virtio64_to_cpu(vq->vq.vdev, desc->addr);
> + len = virtio32_to_cpu(vq->vq.vdev, desc->len);
> + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> + } else {
> + struct vring_desc *desc = _desc;
> +
> + addr = virtio64_to_cpu(vq->vq.vdev, desc->addr);
> + len = virtio32_to_cpu(vq->vq.vdev, desc->len);
> + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> + }
>
> if (flags & VRING_DESC_F_INDIRECT) {
> dma_unmap_single(vring_dma_dev(vq),
> - virtio64_to_cpu(vq->vq.vdev, desc->addr),
> - virtio32_to_cpu(vq->vq.vdev, desc->len),
> + addr, len,
> (flags & VRING_DESC_F_WRITE) ?
> DMA_FROM_DEVICE : DMA_TO_DEVICE);
> } else {
> dma_unmap_page(vring_dma_dev(vq),
> - virtio64_to_cpu(vq->vq.vdev, desc->addr),
> - virtio32_to_cpu(vq->vq.vdev, desc->len),
> + addr, len,
> (flags & VRING_DESC_F_WRITE) ?
> DMA_FROM_DEVICE : DMA_TO_DEVICE);
> }
> @@ -235,8 +263,9 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
> return dma_mapping_error(vring_dma_dev(vq), addr);
> }
>
> -static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
> - unsigned int total_sg, gfp_t gfp)
> +static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
> + unsigned int total_sg,
> + gfp_t gfp)
> {
> struct vring_desc *desc;
> unsigned int i;
> @@ -257,14 +286,32 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
> return desc;
> }
>
> -static inline int virtqueue_add(struct virtqueue *_vq,
> - struct scatterlist *sgs[],
> - unsigned int total_sg,
> - unsigned int out_sgs,
> - unsigned int in_sgs,
> - void *data,
> - void *ctx,
> - gfp_t gfp)
> +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> + unsigned int total_sg,
> + gfp_t gfp)
> +{
> + struct vring_packed_desc *desc;
> +
> + /*
> + * We require lowmem mappings for the descriptors because
> + * otherwise virt_to_phys will give us bogus addresses in the
> + * virtqueue.
> + */
> + gfp &= ~__GFP_HIGHMEM;
> +
> + desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> +
> + return desc;
> +}
> +
> +static inline int virtqueue_add_split(struct virtqueue *_vq,
> + struct scatterlist *sgs[],
> + unsigned int total_sg,
> + unsigned int out_sgs,
> + unsigned int in_sgs,
> + void *data,
> + void *ctx,
> + gfp_t gfp)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> struct scatterlist *sg;
> @@ -303,7 +350,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> /* If the host supports indirect descriptor tables, and we have multiple
> * buffers, then go indirect. FIXME: tune this threshold */
> if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> - desc = alloc_indirect(_vq, total_sg, gfp);
> + desc = alloc_indirect_split(_vq, total_sg, gfp);
> else {
> desc = NULL;
> WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
> @@ -437,6 +484,243 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> return -EIO;
> }
>
> +static inline int virtqueue_add_packed(struct virtqueue *_vq,
> + struct scatterlist *sgs[],
> + unsigned int total_sg,
> + unsigned int out_sgs,
> + unsigned int in_sgs,
> + void *data,
> + void *ctx,
> + gfp_t gfp)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + struct vring_packed_desc *desc;
> + struct scatterlist *sg;
> + unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
> + __virtio16 uninitialized_var(head_flags), flags;
> + int head, wrap_counter;
> + bool indirect;
> +
> + START_USE(vq);
> +
> + BUG_ON(data == NULL);
> + BUG_ON(ctx && vq->indirect);
> +
> + if (unlikely(vq->broken)) {
> + END_USE(vq);
> + return -EIO;
> + }
> +
> + if (total_sg > 1 && !vq->chaining && !vq->indirect) {
> + END_USE(vq);
> + return -ENOTSUPP;
> + }
> +
> +#ifdef DEBUG
> + {
> + ktime_t now = ktime_get();
> +
> + /* No kick or get, with .1 second between? Warn. */
> + if (vq->last_add_time_valid)
> + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> + > 100);
> + vq->last_add_time = now;
> + vq->last_add_time_valid = true;
> + }
> +#endif
> +
> + BUG_ON(total_sg == 0);
> +
> + head = vq->free_head;
> + wrap_counter = vq->wrap_counter;
> +
> + /* If the host supports indirect descriptor tables, and we have multiple
> + * buffers, then go indirect. FIXME: tune this threshold */
> + if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> + desc = alloc_indirect_packed(_vq, total_sg, gfp);
> + else {
> + desc = NULL;
> + WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
> + }
> +
> + if (desc) {
> + /* Use a single buffer which doesn't continue */
> + indirect = true;
> + /* Set up rest to use this indirect table. */
> + i = 0;
> + descs_used = 1;
> + } else {
> + indirect = false;
> + desc = vq->vring_packed.desc;
> + i = head;
> + descs_used = total_sg;
> +
> + if (total_sg > 1 && !vq->chaining) {
> + END_USE(vq);
> + return -ENOTSUPP;
> + }
> + }
> +
> + if (vq->vq.num_free < descs_used) {
> + pr_debug("Can't add buf len %i - avail = %i\n",
> + descs_used, vq->vq.num_free);
> + /* FIXME: for historical reasons, we force a notify here if
> + * there are outgoing parts to the buffer. Presumably the
> + * host should service the ring ASAP. */
> + if (out_sgs)
> + vq->notify(&vq->vq);
> + if (indirect)
> + kfree(desc);
> + END_USE(vq);
> + return -ENOSPC;
> + }
> +
> + for (n = 0; n < out_sgs; n++) {
> + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> + if (vring_mapping_error(vq, addr))
> + goto unmap_release;
> +
> + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> + VRING_DESC_F_AVAIL(vq->wrap_counter) |
> + VRING_DESC_F_USED(!vq->wrap_counter));
> + if (!indirect && i == head)
> + head_flags = flags;
> + else
> + desc[i].flags = flags;
> +
> + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
If it's a part of chain, we only need to do this for last buffer I think.
> + prev = i;
> + i++;
It looks to me prev is always i - 1?
> + if (!indirect && i >= vq->vring_packed.num) {
> + i = 0;
> + vq->wrap_counter ^= 1;
> + }
> + }
> + }
> + for (; n < (out_sgs + in_sgs); n++) {
> + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> + if (vring_mapping_error(vq, addr))
> + goto unmap_release;
> +
> + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> + VRING_DESC_F_WRITE |
> + VRING_DESC_F_AVAIL(vq->wrap_counter) |
> + VRING_DESC_F_USED(!vq->wrap_counter));
> + if (!indirect && i == head)
> + head_flags = flags;
> + else
> + desc[i].flags = flags;
> +
> + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
> + prev = i;
> + i++;
> + if (!indirect && i >= vq->vring_packed.num) {
> + i = 0;
> + vq->wrap_counter ^= 1;
> + }
> + }
> + }
> + /* Last one doesn't continue. */
> + if (!indirect && (head + 1) % vq->vring_packed.num == i)
> + head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
I can't get the why we need this here.
> + else
> + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> +
> + if (indirect) {
> + /* FIXME: to be implemented */
> +
> + /* Now that the indirect table is filled in, map it. */
> + dma_addr_t addr = vring_map_single(
> + vq, desc, total_sg * sizeof(struct vring_packed_desc),
> + DMA_TO_DEVICE);
> + if (vring_mapping_error(vq, addr))
> + goto unmap_release;
> +
> + head_flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT |
> + VRING_DESC_F_AVAIL(wrap_counter) |
> + VRING_DESC_F_USED(!wrap_counter));
> + vq->vring_packed.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
> + vq->vring_packed.desc[head].len = cpu_to_virtio32(_vq->vdev,
> + total_sg * sizeof(struct vring_packed_desc));
> + vq->vring_packed.desc[head].id = cpu_to_virtio32(_vq->vdev, head);
> + }
> +
> + /* We're using some buffers from the free list. */
> + vq->vq.num_free -= descs_used;
> +
> + /* Update free pointer */
> + if (indirect) {
> + n = head + 1;
> + if (n >= vq->vring_packed.num) {
> + n = 0;
> + vq->wrap_counter ^= 1;
> + }
> + vq->free_head = n;
detach_buf_packed() does not even touch free_head here, so need to
explain its meaning for packed ring.
> + } else
> + vq->free_head = i;
ID is only valid in the last descriptor in the list, so head + 1 should
be ok too?
> +
> + /* Store token and indirect buffer state. */
> + vq->desc_state[head].num = descs_used;
> + vq->desc_state[head].data = data;
> + if (indirect)
> + vq->desc_state[head].indir_desc = desc;
> + else
> + vq->desc_state[head].indir_desc = ctx;
> +
> + virtio_wmb(vq->weak_barriers);
Let's add a comment to explain the barrier here.
> + vq->vring_packed.desc[head].flags = head_flags;
> + vq->num_added++;
> +
> + pr_debug("Added buffer head %i to %p\n", head, vq);
> + END_USE(vq);
> +
> + return 0;
> +
> +unmap_release:
> + err_idx = i;
> + i = head;
> +
> + for (n = 0; n < total_sg; n++) {
> + if (i == err_idx)
> + break;
> + vring_unmap_one(vq, &desc[i]);
> + i++;
> + if (!indirect && i >= vq->vring_packed.num)
> + i = 0;
> + }
> +
> + vq->wrap_counter = wrap_counter;
> +
> + if (indirect)
> + kfree(desc);
> +
> + END_USE(vq);
> + return -EIO;
> +}
> +
> +static inline int virtqueue_add(struct virtqueue *_vq,
> + struct scatterlist *sgs[],
> + unsigned int total_sg,
> + unsigned int out_sgs,
> + unsigned int in_sgs,
> + void *data,
> + void *ctx,
> + gfp_t gfp)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> +
> + return vq->packed ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs,
> + in_sgs, data, ctx, gfp) :
> + virtqueue_add_split(_vq, sgs, total_sg, out_sgs,
> + in_sgs, data, ctx, gfp);
> +}
> +
> /**
> * virtqueue_add_sgs - expose buffers to other end
> * @vq: the struct virtqueue we're talking about.
> @@ -561,6 +845,12 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
> * event. */
> virtio_mb(vq->weak_barriers);
>
> + if (vq->packed) {
> + /* FIXME: to be implemented */
> + needs_kick = true;
> + goto out;
> + }
> +
> old = vq->avail_idx_shadow - vq->num_added;
> new = vq->avail_idx_shadow;
> vq->num_added = 0;
> @@ -579,6 +869,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
> } else {
> needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
> }
> +
> +out:
> END_USE(vq);
> return needs_kick;
> }
> @@ -628,8 +920,8 @@ bool virtqueue_kick(struct virtqueue *vq)
> }
> EXPORT_SYMBOL_GPL(virtqueue_kick);
>
> -static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
> - void **ctx)
> +static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> + void **ctx)
> {
> unsigned int i, j;
> __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> @@ -677,29 +969,81 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
> }
> }
>
> -static inline bool more_used(const struct vring_virtqueue *vq)
> +static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> + void **ctx)
> +{
> + struct vring_packed_desc *desc;
> + unsigned int i, j;
> +
> + /* Clear data ptr. */
> + vq->desc_state[head].data = NULL;
> +
> + i = head;
> +
> + for (j = 0; j < vq->desc_state[head].num; j++) {
> + desc = &vq->vring_packed.desc[i];
> + vring_unmap_one(vq, desc);
> + i++;
> + if (i >= vq->vring_packed.num)
> + i = 0;
> + }
> +
> + vq->vq.num_free += vq->desc_state[head].num;
It looks to me vq->free_head grows always, how can we make sure it does
not exceeds vq.num here?
> +
> + if (vq->indirect) {
> + u32 len;
> +
> + desc = vq->desc_state[head].indir_desc;
> + /* Free the indirect table, if any, now that it's unmapped. */
> + if (!desc)
> + return;
> +
> + len = virtio32_to_cpu(vq->vq.vdev,
> + vq->vring_packed.desc[head].len);
> +
> + BUG_ON(!(vq->vring_packed.desc[head].flags &
> + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
> + BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));
> +
> + for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
> + vring_unmap_one(vq, &desc[j]);
> +
> + kfree(desc);
> + vq->desc_state[head].indir_desc = NULL;
> + } else if (ctx) {
> + *ctx = vq->desc_state[head].indir_desc;
> + }
> +}
> +
> +static inline bool more_used_split(const struct vring_virtqueue *vq)
> {
> return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
> }
>
> -/**
> - * virtqueue_get_buf - get the next used buffer
> - * @vq: the struct virtqueue we're talking about.
> - * @len: the length written into the buffer
> - *
> - * If the device wrote data into the buffer, @len will be set to the
> - * amount written. This means you don't need to clear the buffer
> - * beforehand to ensure there's no data leakage in the case of short
> - * writes.
> - *
> - * Caller must ensure we don't call this with other virtqueue
> - * operations at the same time (except where noted).
> - *
> - * Returns NULL if there are no used buffers, or the "data" token
> - * handed to virtqueue_add_*().
> - */
> -void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> - void **ctx)
> +static inline bool more_used_packed(const struct vring_virtqueue *vq)
> +{
> + u16 last_used, flags;
> + bool avail, used;
> +
> + if (vq->vq.num_free == vq->vring.num)
> + return false;
> +
> + last_used = vq->last_used_idx;
> + flags = virtio16_to_cpu(vq->vq.vdev,
> + vq->vring_packed.desc[last_used].flags);
> + avail = flags & VRING_DESC_F_AVAIL(1);
> + used = flags & VRING_DESC_F_USED(1);
> +
> + return avail == used;
> +}
> +
> +static inline bool more_used(const struct vring_virtqueue *vq)
> +{
> + return vq->packed ? more_used_packed(vq) : more_used_split(vq);
> +}
> +
> +void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len,
> + void **ctx)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> void *ret;
> @@ -735,9 +1079,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> return NULL;
> }
>
> - /* detach_buf clears data, so grab it now. */
> + /* detach_buf_split clears data, so grab it now. */
> ret = vq->desc_state[i].data;
> - detach_buf(vq, i, ctx);
> + detach_buf_split(vq, i, ctx);
> vq->last_used_idx++;
> /* If we expect an interrupt for the next entry, tell host
> * by writing event index and flush out the write before
> @@ -754,6 +1098,87 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> END_USE(vq);
> return ret;
> }
> +
> +void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, unsigned int *len,
> + void **ctx)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + void *ret;
> + unsigned int i;
> + u16 last_used;
> +
> + START_USE(vq);
> +
> + if (unlikely(vq->broken)) {
> + END_USE(vq);
> + return NULL;
> + }
> +
> + if (!more_used(vq)) {
> + pr_debug("No more buffers in queue\n");
> + END_USE(vq);
> + return NULL;
> + }
> +
> + /* Only get used array entries after they have been exposed by host. */
> + virtio_rmb(vq->weak_barriers);
> +
> + last_used = vq->last_used_idx;
> +
> + i = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].id);
> + *len = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].len);
> +
> + if (unlikely(i >= vq->vring_packed.num)) {
> + BAD_RING(vq, "id %u out of range\n", i);
> + return NULL;
> + }
> + if (unlikely(!vq->desc_state[i].data)) {
> + BAD_RING(vq, "id %u is not a head!\n", i);
> + return NULL;
> + }
> +
> + /* detach_buf_packed clears data, so grab it now. */
> + ret = vq->desc_state[i].data;
> + detach_buf_packed(vq, i, ctx);
> +
> + vq->last_used_idx += vq->desc_state[i].num;
> + if (vq->last_used_idx >= vq->vring_packed.num)
> + vq->last_used_idx %= vq->vring_packed.num;
'-=' should be sufficient here?
> +
> + // FIXME: implement the desc event support
> +
> +#ifdef DEBUG
> + vq->last_add_time_valid = false;
> +#endif
> +
> + END_USE(vq);
> + return ret;
> +}
> +
> +/**
> + * virtqueue_get_buf - get the next used buffer
> + * @vq: the struct virtqueue we're talking about.
> + * @len: the length written into the buffer
> + *
> + * If the device wrote data into the buffer, @len will be set to the
> + * amount written. This means you don't need to clear the buffer
> + * beforehand to ensure there's no data leakage in the case of short
> + * writes.
> + *
> + * Caller must ensure we don't call this with other virtqueue
> + * operations at the same time (except where noted).
> + *
> + * Returns NULL if there are no used buffers, or the "data" token
> + * handed to virtqueue_add_*().
> + */
> +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> + void **ctx)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> +
> + return vq->packed ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
> + virtqueue_get_buf_ctx_split(_vq, len, ctx);
> +}
> EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
>
> void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
> @@ -761,6 +1186,24 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
> return virtqueue_get_buf_ctx(_vq, len, NULL);
> }
> EXPORT_SYMBOL_GPL(virtqueue_get_buf);
> +
> +static void virtqueue_disable_cb_split(struct virtqueue *_vq)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> +
> + if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> + if (!vq->event)
> + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
> + vq->avail_flags_shadow);
> + }
> +}
> +
> +static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
> +{
> + // FIXME: to be implemented
> +}
> +
> /**
> * virtqueue_disable_cb - disable callbacks
> * @vq: the struct virtqueue we're talking about.
> @@ -774,12 +1217,10 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
>
> - if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> - if (!vq->event)
> - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
> - }
> -
> + if (vq->packed)
> + virtqueue_disable_cb_packed(_vq);
> + else
> + virtqueue_disable_cb_split(_vq);
> }
> EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
>
> @@ -802,6 +1243,12 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
>
> START_USE(vq);
>
> + if (vq->packed) {
> + // FIXME: to be implemented
> + last_used_idx = vq->last_used_idx;
> + goto out;
> + }
> +
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
> @@ -813,6 +1260,7 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
> vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
> }
> vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
> +out:
> END_USE(vq);
> return last_used_idx;
> }
> @@ -832,6 +1280,12 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
> struct vring_virtqueue *vq = to_vvq(_vq);
>
> virtio_mb(vq->weak_barriers);
> + if (vq->packed) {
> + u16 flags = virtio16_to_cpu(vq->vq.vdev,
> + vq->vring_packed.desc[last_used_idx].flags);
> + return !(flags & VRING_DESC_F_AVAIL(1)) ==
> + !(flags & VRING_DESC_F_USED(1));
> + }
> return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
> }
> EXPORT_SYMBOL_GPL(virtqueue_poll);
> @@ -874,6 +1328,11 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
>
> START_USE(vq);
>
> + if (vq->packed) {
> + // FIXME: to be implemented
> + goto out;
> + }
> +
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> @@ -896,6 +1355,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
> return false;
> }
>
> +out:
> END_USE(vq);
> return true;
> }
> @@ -922,14 +1382,20 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> continue;
> /* detach_buf clears data, so grab it now. */
> buf = vq->desc_state[i].data;
> - detach_buf(vq, i, NULL);
> - vq->avail_idx_shadow--;
> - vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> + if (vq->packed)
> + detach_buf_packed(vq, i, NULL);
> + else {
> + detach_buf_split(vq, i, NULL);
> + vq->avail_idx_shadow--;
> + vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> + vq->avail_idx_shadow);
> + }
> END_USE(vq);
> return buf;
> }
> /* That should have freed everything. */
> - BUG_ON(vq->vq.num_free != vq->vring.num);
> + BUG_ON(vq->vq.num_free != (vq->packed ? vq->vring_packed.num :
> + vq->vring.num));
>
> END_USE(vq);
> return NULL;
> @@ -957,7 +1423,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
> EXPORT_SYMBOL_GPL(vring_interrupt);
>
> struct virtqueue *__vring_new_virtqueue(unsigned int index,
> - struct vring vring,
> + union vring_union vring,
> + bool packed,
> struct virtio_device *vdev,
> bool weak_barriers,
> bool context,
> @@ -965,19 +1432,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> void (*callback)(struct virtqueue *),
> const char *name)
> {
> - unsigned int i;
> + unsigned int num, i;
> struct vring_virtqueue *vq;
>
> - vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
> + num = packed ? vring.vring_packed.num : vring.vring_split.num;
> +
> + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
> GFP_KERNEL);
> if (!vq)
> return NULL;
>
> - vq->vring = vring;
> vq->vq.callback = callback;
> vq->vq.vdev = vdev;
> vq->vq.name = name;
> - vq->vq.num_free = vring.num;
> + vq->vq.num_free = num;
> vq->vq.index = index;
> vq->we_own_ring = false;
> vq->queue_dma_addr = 0;
> @@ -986,9 +1454,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> vq->weak_barriers = weak_barriers;
> vq->broken = false;
> vq->last_used_idx = 0;
> - vq->avail_flags_shadow = 0;
> - vq->avail_idx_shadow = 0;
> vq->num_added = 0;
> + vq->packed = packed;
> list_add_tail(&vq->vq.list, &vdev->vqs);
> #ifdef DEBUG
> vq->in_use = false;
> @@ -999,18 +1466,41 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> !context;
> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>
> + if (vq->packed) {
> + vq->vring_packed = vring.vring_packed;
> + vq->free_head = 0;
> + vq->wrap_counter = 1;
> +
> +#if 0
> + vq->chaining = virtio_has_feature(vdev,
> + VIRTIO_RING_F_LIST_DESC);
> +#else
> + vq->chaining = true;
Looks like in V10 there's no F_LIST_DESC.
> +#endif
> + } else {
> + vq->vring = vring.vring_split;
> + vq->avail_flags_shadow = 0;
> + vq->avail_idx_shadow = 0;
> +
> + /* Put everything in free lists. */
> + vq->free_head = 0;
> + for (i = 0; i < num-1; i++)
> + vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
> + }
> +
> /* No callback? Tell other side not to bother us. */
> if (!callback) {
> - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> - if (!vq->event)
> - vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
> + if (packed) {
> + // FIXME: to be implemented
> + } else {
> + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> + if (!vq->event)
> + vq->vring.avail->flags = cpu_to_virtio16(vdev,
> + vq->avail_flags_shadow);
> + }
> }
>
> - /* Put everything in free lists. */
> - vq->free_head = 0;
> - for (i = 0; i < vring.num-1; i++)
> - vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
> - memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
> + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
>
> return &vq->vq;
> }
> @@ -1058,6 +1548,14 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size,
> }
> }
>
> +static inline int
> +__vring_size(unsigned int num, unsigned long align, bool packed)
> +{
> + if (packed)
> + return vring_packed_size(num, align);
> + return vring_size(num, align);
> +}
> +
> struct virtqueue *vring_create_virtqueue(
> unsigned int index,
> unsigned int num,
> @@ -1074,7 +1572,8 @@ struct virtqueue *vring_create_virtqueue(
> void *queue = NULL;
> dma_addr_t dma_addr;
> size_t queue_size_in_bytes;
> - struct vring vring;
> + union vring_union vring;
> + bool packed;
>
> /* We assume num is a power of 2. */
> if (num & (num - 1)) {
> @@ -1082,9 +1581,13 @@ struct virtqueue *vring_create_virtqueue(
> return NULL;
> }
>
> + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> +
> /* TODO: allocate each queue chunk individually */
> - for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
> - queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> + for (; num && __vring_size(num, vring_align, packed) > PAGE_SIZE;
> + num /= 2) {
> + queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
> + packed),
> &dma_addr,
> GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
> if (queue)
> @@ -1096,17 +1599,21 @@ struct virtqueue *vring_create_virtqueue(
>
> if (!queue) {
> /* Try to get a single page. You are my only hope! */
> - queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> + queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
> + packed),
> &dma_addr, GFP_KERNEL|__GFP_ZERO);
> }
> if (!queue)
> return NULL;
>
> - queue_size_in_bytes = vring_size(num, vring_align);
> - vring_init(&vring, num, queue, vring_align);
> + queue_size_in_bytes = __vring_size(num, vring_align, packed);
> + if (packed)
> + vring_packed_init(&vring.vring_packed, num, queue, vring_align);
> + else
> + vring_init(&vring.vring_split, num, queue, vring_align);
Let's rename vring_init to vring_init_split() like other helpers?
>
> - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> - notify, callback, name);
> + vq = __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> + context, notify, callback, name);
> if (!vq) {
> vring_free_queue(vdev, queue_size_in_bytes, queue,
> dma_addr);
> @@ -1132,10 +1639,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
> void (*callback)(struct virtqueue *vq),
> const char *name)
> {
> - struct vring vring;
> - vring_init(&vring, num, pages, vring_align);
> - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> - notify, callback, name);
> + union vring_union vring;
> + bool packed;
> +
> + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> + if (packed)
> + vring_packed_init(&vring.vring_packed, num, pages, vring_align);
> + else
> + vring_init(&vring.vring_split, num, pages, vring_align);
> +
> + return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> + context, notify, callback, name);
> }
> EXPORT_SYMBOL_GPL(vring_new_virtqueue);
>
> @@ -1145,7 +1659,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
>
> if (vq->we_own_ring) {
> vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
> - vq->vring.desc, vq->queue_dma_addr);
> + vq->packed ? (void *)vq->vring_packed.desc :
> + (void *)vq->vring.desc,
> + vq->queue_dma_addr);
> }
> list_del(&_vq->list);
> kfree(vq);
> @@ -1159,14 +1675,18 @@ void vring_transport_features(struct virtio_device *vdev)
>
> for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
> switch (i) {
> +#if 0 // FIXME: to be implemented
> case VIRTIO_RING_F_INDIRECT_DESC:
> break;
> +#endif
> case VIRTIO_RING_F_EVENT_IDX:
> break;
> case VIRTIO_F_VERSION_1:
> break;
> case VIRTIO_F_IOMMU_PLATFORM:
> break;
> + case VIRTIO_F_RING_PACKED:
> + break;
> default:
> /* We don't understand this bit. */
> __virtio_clear_bit(vdev, i);
> @@ -1187,7 +1707,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
>
> struct vring_virtqueue *vq = to_vvq(_vq);
>
> - return vq->vring.num;
> + return vq->packed ? vq->vring_packed.num : vq->vring.num;
> }
> EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
>
> @@ -1224,6 +1744,7 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
> }
> EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
>
> +/* Only available for split ring */
Interesting, I think we need this for correctly configure pci. e.g in
setup_vq()?
> dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -1235,6 +1756,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> }
> EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
>
> +/* Only available for split ring */
> dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
Maybe it's better to rename this to get_device_addr().
Thanks
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -1246,6 +1768,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> }
> EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
>
> +/* Only available for split ring */
> const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> {
> return &to_vvq(vq)->vring;
> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> index bbf32524ab27..a0075894ad16 100644
> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
> struct virtio_device;
> struct virtqueue;
>
> +union vring_union {
> + struct vring vring_split;
> + struct vring_packed vring_packed;
> +};
> +
> /*
> * Creates a virtqueue and allocates the descriptor ring. If
> * may_reduce_num is set, then this may allocate a smaller ring than
> @@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
>
> /* Creates a virtqueue with a custom layout. */
> struct virtqueue *__vring_new_virtqueue(unsigned int index,
> - struct vring vring,
> + union vring_union vring,
> + bool packed,
> struct virtio_device *vdev,
> bool weak_barriers,
> bool ctx,
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH v5 0/2] Remove false-positive VLAs when using max()
From: Kees Cook @ 2018-03-16 4:25 UTC (permalink / raw)
To: Andrew Morton
Cc: Kees Cook, Linus Torvalds, Josh Poimboeuf, Rasmus Villemoes,
Randy Dunlap, Miguel Ojeda, Ingo Molnar, David Laight, Ian Abbott,
linux-input, linux-btrfs, netdev, linux-kernel, kernel-hardening
Patch 1 adds const_max_t(), patch 2 uses it in all the places max()
was used for stack arrays. Commit log from patch 1:
---snip---
kernel.h: Introduce const_max_t() for VLA removal
In the effort to remove all VLAs from the kernel[1], it is desirable to
build with -Wvla. However, this warning is overly pessimistic, in that
it is only happy with stack array sizes that are declared as constant
expressions, and not constant values. One case of this is the evaluation
of the max() macro which, due to its construction, ends up converting
constant expression arguments into a constant value result. Attempts
to adjust the behavior of max() ran afoul of version-dependent compiler
behavior[2].
To work around this and still gain -Wvla coverage, this patch introduces
a new macro, const_max_t(), for use in these cases of stack array size
declaration, where the constant expressions are retained. Since this means
losing the double-evaluation protections of the max() macro, this macro is
designed to explicitly fail if used on non-constant arguments.
Older compilers will fail with the unhelpful message:
error: first argument to ‘__builtin_choose_expr’ not a constant
Newer compilers will fail with a hopefully more helpful message:
error: call to ‘__error_non_const_arg’ declared with attribute error: const_max_t() used with non-constant expression
To gain the ability to compare differing types, the desired type must
be explicitly declared, as with the existing max_t() macro. This is
needed when comparing different enum types and to allow things like:
int foo[const_max_t(size_t, 6, sizeof(something))];
[1] https://lkml.org/lkml/2018/3/7/621
[2] https://lkml.org/lkml/2018/3/10/170
---eol---
Hopefully this reads well as a summary from all the things that got tried.
I've tested this on allmodconfig builds with gcc 4.4.4 and 6.3.0, with and
without -Wvla.
-Kees
v5: explicit type argument
v4: forced size_t type
^ permalink raw reply
* [PATCH v5 1/2] kernel.h: Introduce const_max_t() for VLA removal
From: Kees Cook @ 2018-03-16 4:25 UTC (permalink / raw)
To: Andrew Morton
Cc: Kees Cook, Linus Torvalds, Josh Poimboeuf, Rasmus Villemoes,
Randy Dunlap, Miguel Ojeda, Ingo Molnar, David Laight, Ian Abbott,
linux-input, linux-btrfs, netdev, linux-kernel, kernel-hardening
In-Reply-To: <1521174359-46392-1-git-send-email-keescook@chromium.org>
In the effort to remove all VLAs from the kernel[1], it is desirable to
build with -Wvla. However, this warning is overly pessimistic, in that
it is only happy with stack array sizes that are declared as constant
expressions, and not constant values. One case of this is the evaluation
of the max() macro which, due to its construction, ends up converting
constant expression arguments into a constant value result. Attempts
to adjust the behavior of max() ran afoul of version-dependent compiler
behavior[2].
To work around this and still gain -Wvla coverage, this patch introduces
a new macro, const_max_t(), for use in these cases of stack array size
declaration, where the constant expressions are retained. Since this means
losing the double-evaluation protections of the max() macro, this macro is
designed to explicitly fail if used on non-constant arguments.
Older compilers will fail with the unhelpful message:
error: first argument to ‘__builtin_choose_expr’ not a constant
Newer compilers will fail with a hopefully more helpful message:
error: call to ‘__error_non_const_arg’ declared with attribute error: const_max_t() used with non-constant expression
To gain the ability to compare differing types, the desired type must
be explicitly declared, as with the existing max_t() macro. This is
needed when comparing different enum types and to allow things like:
int foo[const_max_t(size_t, 6, sizeof(something))];
[1] https://lkml.org/lkml/2018/3/7/621
[2] https://lkml.org/lkml/2018/3/10/170
Signed-off-by: Kees Cook <keescook@chromium.org>
---
include/linux/kernel.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fd291503576..e14531781568 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -820,6 +820,25 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
x, y)
/**
+ * const_max_t - return maximum of two compile-time constant expressions
+ * @type: type used for evaluation
+ * @x: first compile-time constant expression
+ * @y: second compile-time constant expression
+ *
+ * This has no multi-evaluation defenses, and must only ever be used with
+ * compile-time constant expressions (for example when calculating a stack
+ * array size).
+ */
+size_t __error_non_const_arg(void) \
+__compiletime_error("const_max_t() used with non-constant expression");
+#define const_max_t(type, x, y) \
+ __builtin_choose_expr(__builtin_constant_p(x) && \
+ __builtin_constant_p(y), \
+ (type)(x) > (type)(y) ? \
+ (type)(x) : (type)(y), \
+ __error_non_const_arg())
+
+/**
* min3 - return minimum of three values
* @x: first value
* @y: second value
--
2.7.4
^ permalink raw reply related
* [PATCH v5 2/2] Remove false-positive VLAs when using max()
From: Kees Cook @ 2018-03-16 4:25 UTC (permalink / raw)
To: Andrew Morton
Cc: Kees Cook, Linus Torvalds, Josh Poimboeuf, Rasmus Villemoes,
Randy Dunlap, Miguel Ojeda, Ingo Molnar, David Laight, Ian Abbott,
linux-input, linux-btrfs, netdev, linux-kernel, kernel-hardening
In-Reply-To: <1521174359-46392-1-git-send-email-keescook@chromium.org>
As part of removing VLAs from the kernel[1], we want to build with -Wvla,
but it is overly pessimistic and only accepts constant expressions for
stack array sizes, instead of also constant values. The max() macro
triggers the warning, so this refactors these uses of max() to use the
new const_max() instead.
[1] https://lkml.org/lkml/2018/3/7/621
Signed-off-by: Kees Cook <keescook@chromium.org>
---
drivers/input/touchscreen/cyttsp4_core.c | 2 +-
fs/btrfs/tree-checker.c | 3 ++-
lib/vsprintf.c | 5 +++--
net/ipv4/proc.c | 8 ++++----
net/ipv6/proc.c | 11 +++++------
5 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c
index 727c3232517c..7fb9bd48e41c 100644
--- a/drivers/input/touchscreen/cyttsp4_core.c
+++ b/drivers/input/touchscreen/cyttsp4_core.c
@@ -868,7 +868,7 @@ static void cyttsp4_get_mt_touches(struct cyttsp4_mt_data *md, int num_cur_tch)
struct cyttsp4_touch tch;
int sig;
int i, j, t = 0;
- int ids[max(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)];
+ int ids[const_max_t(size_t, CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)];
memset(ids, 0, si->si_ofs.tch_abs[CY_TCH_T].max * sizeof(int));
for (i = 0; i < num_cur_tch; i++) {
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index c3c8d48f6618..d83244e3821f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -341,7 +341,8 @@ static int check_dir_item(struct btrfs_root *root,
*/
if (key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_XATTR_ITEM_KEY) {
- char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+ char namebuf[const_max_t(size_t, BTRFS_NAME_LEN,
+ XATTR_NAME_MAX)];
read_extent_buffer(leaf, namebuf,
(unsigned long)(di + 1), name_len);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d7a708f82559..12ff57a36171 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -744,8 +744,9 @@ char *resource_string(char *buf, char *end, struct resource *res,
#define FLAG_BUF_SIZE (2 * sizeof(res->flags))
#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]")
#define RAW_BUF_SIZE sizeof("[mem - flags 0x]")
- char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
- 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
+ char sym[const_max_t(size_t,
+ 2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
+ 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
char *p = sym, *pend = sym + sizeof(sym);
int decode = (fmt[0] == 'R') ? 1 : 0;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..7f5c3b40dac9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,7 +46,7 @@
#include <net/sock.h>
#include <net/raw.h>
-#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
+#define TCPUDP_MIB_MAX const_max_t(size_t, UDP_MIB_MAX, TCP_MIB_MAX)
/*
* Report socket allocation statistics [mea@utu.fi]
@@ -404,7 +404,7 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
struct net *net = seq->private;
int i;
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, sizeof(buff));
seq_puts(seq, "\nTcp:");
for (i = 0; snmp4_tcp_list[i].name; i++)
@@ -421,7 +421,7 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
seq_printf(seq, " %lu", buff[i]);
}
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, sizeof(buff));
snmp_get_cpu_field_batch(buff, snmp4_udp_list,
net->mib.udp_statistics);
@@ -432,7 +432,7 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %lu", buff[i]);
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, sizeof(buff));
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..b68c233de296 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -30,10 +30,9 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
-#define MAX4(a, b, c, d) \
- max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
-#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
- IPSTATS_MIB_MAX, ICMP_MIB_MAX)
+#define SNMP_MIB_MAX const_max_t(u32, \
+ const_max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX), \
+ const_max_t(u32, IPSTATS_MIB_MAX, ICMP_MIB_MAX))
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
@@ -199,7 +198,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
int i;
if (pcpumib) {
- memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+ memset(buff, 0, sizeof(buff));
snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
for (i = 0; itemlist[i].name; i++)
@@ -218,7 +217,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
u64 buff64[SNMP_MIB_MAX];
int i;
- memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
+ memset(buff64, 0, sizeof(buff64));
snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
--
2.7.4
^ permalink raw reply related
* Re: [Intel-wired-lan] [PATCH v2 12/15] ice: Add stats and ethtool support
From: Stephen Hemminger @ 2018-03-16 4:41 UTC (permalink / raw)
To: Alexander Duyck
Cc: Anirudh Venkataramanan, Jakub Kicinski, Netdev, intel-wired-lan,
Andrew Lunn
In-Reply-To: <CAKgT0UcpfznPj5S5sBwMnC0WPhDaMnFJnQqS3_EMb5tZr10wAA@mail.gmail.com>
On Thu, 15 Mar 2018 17:50:10 -0700
Alexander Duyck <alexander.duyck@gmail.com> wrote:
> On Thu, Mar 15, 2018 at 4:52 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
> > On Thu, 15 Mar 2018 16:47:59 -0700
> > Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> wrote:
> >
> >> +
> >> +static const struct ice_stats ice_gstrings_vsi_stats[] = {
> >> + ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
> >> + ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
> >> + ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
> >> + ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
> >> + ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
> >> + ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
> >> + ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
> >> + ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
> >> + ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
> >> + ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
> >> + ICE_VSI_STAT("tx_linearize", tx_linearize),
> >> + ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
> >> + ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
> >> + ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
> >> +};
> >> +
> >
> > Ignoring feedback from maintainers is unlikely to help get your driver adopted.
>
> Your feedback wasn't ignored, the netdev stats are gone. I double
> checked and there was this in addition to the netdev stats before so I
> think the suggestion to remove the netdev stats was just taken
> literally.
>
> The VSI is a slightly different entity from the netdev itself. A
> netdev can be backed by a VSI in the case of the PF, but the VSI can
> be used in other ways such as what we did in i40e where we were using
> it to spawn queue groups to work with mqprio as a filter target and in
> that case the queue groups wouldn't have a netdev directly associated
> with them so in that case it might make sense to leave these as
> separate stats.
>
> - Alex
Sorry I was confused because eth_stats and thought is was a copy
of net_stats. This looks good.
After reading ice_stat_update40 I can see why you needed
to know starting point.
Since ice_fetch_u64_stats_per_ring is called only by ice_update_vsi_ring_stats,
and the update is only done by watchdog and when stats are fetched;
it doesn't look like you need to use the _irq variant of u64_stats_fetch_begin.
That would save having to disable local irq's while handling stats.
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
^ permalink raw reply
* Re: [PATCH 00/16] remove eight obsolete architectures
From: afzal mohammed @ 2018-03-16 4:50 UTC (permalink / raw)
To: Arnd Bergmann
Cc: David Howells, linux-arch, Linux Kernel Mailing List,
open list:DOCUMENTATION, linux-block, IDE-ML,
open list:HID CORE LAYER, Networking, linux-wireless, linux-pwm,
linux-rtc, linux-spi, linux-usb, dri-devel, linux-fbdev,
linux-watchdog, Linux FS-devel Mailing List, Linux-MM
In-Reply-To: <CAK8P3a10hBz7QYk5v5MfhVMPOwFnWYTn95WZp1HtHrd7-GQpRg@mail.gmail.com>
Hi,
On Thu, Mar 15, 2018 at 10:56:48AM +0100, Arnd Bergmann wrote:
> On Thu, Mar 15, 2018 at 10:42 AM, David Howells <dhowells@redhat.com> wrote:
> > Do we have anything left that still implements NOMMU?
Please don't kill !MMU.
> Yes, plenty.
> I've made an overview of the remaining architectures for my own reference[1].
> The remaining NOMMU architectures are:
>
> - arch/arm has ARMv7-M (Cortex-M microcontroller), which is actually
> gaining traction
ARMv7-R as well, also seems ARM is coming up with more !MMU's - v8-M,
v8-R. In addition, though only of academic interest, ARM MMU capable
platform's can run !MMU Linux.
afzal
> - arch/sh has an open-source J2 core that was added not that long ago,
> it seems to
> be the only SH compatible core that anyone is working on.
> - arch/microblaze supports both MMU/NOMMU modes (most use an MMU)
> - arch/m68k supports several NOMMU targets, both the coldfire SoCs and the
> classic processors
> - c6x has no MMU
^ permalink raw reply
* Re: [PATCH] [v2] Bluetooth: btrsi: rework dependencies
From: Kalle Valo @ 2018-03-16 4:54 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Marcel Holtmann, Johan Hedberg, Sebastian Reichel,
Amitkumar Karwar, Siva Rebbagondla, Linux Bluetooth mailing list,
LKML, linux-wireless, Networking
In-Reply-To: <CAK8P3a1DTbbRUoCoYsmaSAQBQhaePZ0Tx-g2+xuab3yRcGhLpw@mail.gmail.com>
Arnd Bergmann <arnd@arndb.de> writes:
> On Thu, Mar 15, 2018 at 7:30 PM, Marcel Holtmann <marcel@holtmann.org> wrote:
>> Hi Arnd,
>>
>>> The linkage between the bluetooth driver and the wireless
>>> driver is not defined properly, leading to build problems
>>> such as:
>>>
>>> warning: (BT_HCIRSI) selects RSI_COEX which has unmet direct
>>> dependencies (NETDEVICES && WLAN && WLAN_VENDOR_RSI && BT_HCIRSI &&
>>> RSI_91X)
>>> drivers/net/wireless/rsi/rsi_91x_main.o: In function `rsi_read_pkt':
>>> (.text+0x205): undefined reference to `rsi_bt_ops'
>>>
>>> As the dependency is actually the reverse (RSI_91X uses
>>> the BT_RSI driver, not the other way round), this changes
>>> the dependency to match, and enables the bluetooth driver
>>> from the RSI_COEX symbol.
>>>
>>> Fixes: 38aa4da50483 ("Bluetooth: btrsi: add new rsi bluetooth driver")
>>> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>>> ---
>>> v2: Pick a different from v1
>>> ---
>>> drivers/bluetooth/Kconfig | 4 +---
>>> drivers/net/wireless/rsi/Kconfig | 4 +++-
>>> 2 files changed, 4 insertions(+), 4 deletions(-)
>>
>> Acked-by: Marcel Holtmann <marcel@holtmann.org>
>>
>> Since I think Kalle still has to take it through his tree until the
>> btrsi driver makes it into net-next.
Yes, I have to take this as I haven't sent the original patch to Dave
yet.
> Kalle, please wait for v3 though, I just ran into another build
> failure caused by a typo in v2.
Ok, I saw it.
--
Kalle Valo
^ permalink raw reply
* Re: [PATCH RFC 2/2] virtio_ring: support packed ring
From: Tiwei Bie @ 2018-03-16 6:10 UTC (permalink / raw)
To: Jason Wang; +Cc: mst, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <8f73267a-e20e-de64-d8e0-3fd608dbf368@redhat.com>
On Fri, Mar 16, 2018 at 12:03:25PM +0800, Jason Wang wrote:
> On 2018年02月23日 19:18, Tiwei Bie wrote:
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > drivers/virtio/virtio_ring.c | 699 +++++++++++++++++++++++++++++++++++++------
> > include/linux/virtio_ring.h | 8 +-
> > 2 files changed, 618 insertions(+), 89 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index eb30f3e09a47..393778a2f809 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -58,14 +58,14 @@
> > struct vring_desc_state {
> > void *data; /* Data for callback. */
> > - struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> > + void *indir_desc; /* Indirect descriptor, if any. */
> > + int num; /* Descriptor list length. */
> > };
> > struct vring_virtqueue {
> > struct virtqueue vq;
> > - /* Actual memory layout for this queue */
> > - struct vring vring;
> > + bool packed;
> > /* Can we use weak barriers? */
> > bool weak_barriers;
> > @@ -87,11 +87,28 @@ struct vring_virtqueue {
> > /* Last used index we've seen. */
> > u16 last_used_idx;
> > - /* Last written value to avail->flags */
> > - u16 avail_flags_shadow;
> > -
> > - /* Last written value to avail->idx in guest byte order */
> > - u16 avail_idx_shadow;
> > + union {
> > + /* Available for split ring */
> > + struct {
> > + /* Actual memory layout for this queue */
> > + struct vring vring;
> > +
> > + /* Last written value to avail->flags */
> > + u16 avail_flags_shadow;
> > +
> > + /* Last written value to avail->idx in
> > + * guest byte order */
> > + u16 avail_idx_shadow;
> > + };
> > +
> > + /* Available for packed ring */
> > + struct {
> > + /* Actual memory layout for this queue */
> > + struct vring_packed vring_packed;
> > + u8 wrap_counter : 1;
> > + bool chaining;
> > + };
> > + };
> > /* How to notify other side. FIXME: commonalize hcalls! */
> > bool (*notify)(struct virtqueue *vq);
> > @@ -201,26 +218,37 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
> > cpu_addr, size, direction);
> > }
> > -static void vring_unmap_one(const struct vring_virtqueue *vq,
> > - struct vring_desc *desc)
> > +static void vring_unmap_one(const struct vring_virtqueue *vq, void *_desc)
> > {
>
> Let's split the helpers to packed/split version like other helpers?
> (Consider the caller has already known the type of vq).
Okay.
>
> > + u64 addr;
> > + u32 len;
> > u16 flags;
> > if (!vring_use_dma_api(vq->vq.vdev))
> > return;
> > - flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > + if (vq->packed) {
> > + struct vring_packed_desc *desc = _desc;
> > +
> > + addr = virtio64_to_cpu(vq->vq.vdev, desc->addr);
> > + len = virtio32_to_cpu(vq->vq.vdev, desc->len);
> > + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > + } else {
> > + struct vring_desc *desc = _desc;
> > +
> > + addr = virtio64_to_cpu(vq->vq.vdev, desc->addr);
> > + len = virtio32_to_cpu(vq->vq.vdev, desc->len);
> > + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
> > + }
> > if (flags & VRING_DESC_F_INDIRECT) {
> > dma_unmap_single(vring_dma_dev(vq),
> > - virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > - virtio32_to_cpu(vq->vq.vdev, desc->len),
> > + addr, len,
> > (flags & VRING_DESC_F_WRITE) ?
> > DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > } else {
> > dma_unmap_page(vring_dma_dev(vq),
> > - virtio64_to_cpu(vq->vq.vdev, desc->addr),
> > - virtio32_to_cpu(vq->vq.vdev, desc->len),
> > + addr, len,
> > (flags & VRING_DESC_F_WRITE) ?
> > DMA_FROM_DEVICE : DMA_TO_DEVICE);
> > }
> > @@ -235,8 +263,9 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
> > return dma_mapping_error(vring_dma_dev(vq), addr);
> > }
> > -static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
> > - unsigned int total_sg, gfp_t gfp)
> > +static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
> > + unsigned int total_sg,
> > + gfp_t gfp)
> > {
> > struct vring_desc *desc;
> > unsigned int i;
> > @@ -257,14 +286,32 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
> > return desc;
> > }
> > -static inline int virtqueue_add(struct virtqueue *_vq,
> > - struct scatterlist *sgs[],
> > - unsigned int total_sg,
> > - unsigned int out_sgs,
> > - unsigned int in_sgs,
> > - void *data,
> > - void *ctx,
> > - gfp_t gfp)
> > +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> > + unsigned int total_sg,
> > + gfp_t gfp)
> > +{
> > + struct vring_packed_desc *desc;
> > +
> > + /*
> > + * We require lowmem mappings for the descriptors because
> > + * otherwise virt_to_phys will give us bogus addresses in the
> > + * virtqueue.
> > + */
> > + gfp &= ~__GFP_HIGHMEM;
> > +
> > + desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> > +
> > + return desc;
> > +}
> > +
> > +static inline int virtqueue_add_split(struct virtqueue *_vq,
> > + struct scatterlist *sgs[],
> > + unsigned int total_sg,
> > + unsigned int out_sgs,
> > + unsigned int in_sgs,
> > + void *data,
> > + void *ctx,
> > + gfp_t gfp)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > struct scatterlist *sg;
> > @@ -303,7 +350,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> > /* If the host supports indirect descriptor tables, and we have multiple
> > * buffers, then go indirect. FIXME: tune this threshold */
> > if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> > - desc = alloc_indirect(_vq, total_sg, gfp);
> > + desc = alloc_indirect_split(_vq, total_sg, gfp);
> > else {
> > desc = NULL;
> > WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
> > @@ -437,6 +484,243 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> > return -EIO;
> > }
> > +static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > + struct scatterlist *sgs[],
> > + unsigned int total_sg,
> > + unsigned int out_sgs,
> > + unsigned int in_sgs,
> > + void *data,
> > + void *ctx,
> > + gfp_t gfp)
> > +{
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + struct vring_packed_desc *desc;
> > + struct scatterlist *sg;
> > + unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
> > + __virtio16 uninitialized_var(head_flags), flags;
> > + int head, wrap_counter;
> > + bool indirect;
> > +
> > + START_USE(vq);
> > +
> > + BUG_ON(data == NULL);
> > + BUG_ON(ctx && vq->indirect);
> > +
> > + if (unlikely(vq->broken)) {
> > + END_USE(vq);
> > + return -EIO;
> > + }
> > +
> > + if (total_sg > 1 && !vq->chaining && !vq->indirect) {
> > + END_USE(vq);
> > + return -ENOTSUPP;
> > + }
> > +
> > +#ifdef DEBUG
> > + {
> > + ktime_t now = ktime_get();
> > +
> > + /* No kick or get, with .1 second between? Warn. */
> > + if (vq->last_add_time_valid)
> > + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> > + > 100);
> > + vq->last_add_time = now;
> > + vq->last_add_time_valid = true;
> > + }
> > +#endif
> > +
> > + BUG_ON(total_sg == 0);
> > +
> > + head = vq->free_head;
> > + wrap_counter = vq->wrap_counter;
> > +
> > + /* If the host supports indirect descriptor tables, and we have multiple
> > + * buffers, then go indirect. FIXME: tune this threshold */
> > + if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> > + desc = alloc_indirect_packed(_vq, total_sg, gfp);
> > + else {
> > + desc = NULL;
> > + WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
> > + }
> > +
> > + if (desc) {
> > + /* Use a single buffer which doesn't continue */
> > + indirect = true;
> > + /* Set up rest to use this indirect table. */
> > + i = 0;
> > + descs_used = 1;
> > + } else {
> > + indirect = false;
> > + desc = vq->vring_packed.desc;
> > + i = head;
> > + descs_used = total_sg;
> > +
> > + if (total_sg > 1 && !vq->chaining) {
> > + END_USE(vq);
> > + return -ENOTSUPP;
> > + }
> > + }
> > +
> > + if (vq->vq.num_free < descs_used) {
> > + pr_debug("Can't add buf len %i - avail = %i\n",
> > + descs_used, vq->vq.num_free);
> > + /* FIXME: for historical reasons, we force a notify here if
> > + * there are outgoing parts to the buffer. Presumably the
> > + * host should service the ring ASAP. */
> > + if (out_sgs)
> > + vq->notify(&vq->vq);
> > + if (indirect)
> > + kfree(desc);
> > + END_USE(vq);
> > + return -ENOSPC;
> > + }
> > +
> > + for (n = 0; n < out_sgs; n++) {
> > + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
> > + if (vring_mapping_error(vq, addr))
> > + goto unmap_release;
> > +
> > + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> > + VRING_DESC_F_AVAIL(vq->wrap_counter) |
> > + VRING_DESC_F_USED(!vq->wrap_counter));
> > + if (!indirect && i == head)
> > + head_flags = flags;
> > + else
> > + desc[i].flags = flags;
> > +
> > + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> > + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> > + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
>
> If it's a part of chain, we only need to do this for last buffer I think.
I'm not sure I've got your point about the "last buffer".
But, yes, id just needs to be set for the last desc.
>
> > + prev = i;
> > + i++;
>
> It looks to me prev is always i - 1?
No. prev will be (vq->vring_packed.num - 1) when i becomes 0.
>
> > + if (!indirect && i >= vq->vring_packed.num) {
> > + i = 0;
> > + vq->wrap_counter ^= 1;
> > + }
> > + }
> > + }
> > + for (; n < (out_sgs + in_sgs); n++) {
> > + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
> > + if (vring_mapping_error(vq, addr))
> > + goto unmap_release;
> > +
> > + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> > + VRING_DESC_F_WRITE |
> > + VRING_DESC_F_AVAIL(vq->wrap_counter) |
> > + VRING_DESC_F_USED(!vq->wrap_counter));
> > + if (!indirect && i == head)
> > + head_flags = flags;
> > + else
> > + desc[i].flags = flags;
> > +
> > + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> > + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> > + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
> > + prev = i;
> > + i++;
> > + if (!indirect && i >= vq->vring_packed.num) {
> > + i = 0;
> > + vq->wrap_counter ^= 1;
> > + }
> > + }
> > + }
> > + /* Last one doesn't continue. */
> > + if (!indirect && (head + 1) % vq->vring_packed.num == i)
> > + head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
>
> I can't get the why we need this here.
If only one desc is used, we will need to clear the
VRING_DESC_F_NEXT flag from the head_flags.
>
> > + else
> > + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> > +
> > + if (indirect) {
> > + /* FIXME: to be implemented */
> > +
> > + /* Now that the indirect table is filled in, map it. */
> > + dma_addr_t addr = vring_map_single(
> > + vq, desc, total_sg * sizeof(struct vring_packed_desc),
> > + DMA_TO_DEVICE);
> > + if (vring_mapping_error(vq, addr))
> > + goto unmap_release;
> > +
> > + head_flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT |
> > + VRING_DESC_F_AVAIL(wrap_counter) |
> > + VRING_DESC_F_USED(!wrap_counter));
> > + vq->vring_packed.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
> > + vq->vring_packed.desc[head].len = cpu_to_virtio32(_vq->vdev,
> > + total_sg * sizeof(struct vring_packed_desc));
> > + vq->vring_packed.desc[head].id = cpu_to_virtio32(_vq->vdev, head);
> > + }
> > +
> > + /* We're using some buffers from the free list. */
> > + vq->vq.num_free -= descs_used;
> > +
> > + /* Update free pointer */
> > + if (indirect) {
> > + n = head + 1;
> > + if (n >= vq->vring_packed.num) {
> > + n = 0;
> > + vq->wrap_counter ^= 1;
> > + }
> > + vq->free_head = n;
>
> detach_buf_packed() does not even touch free_head here, so need to explain
> its meaning for packed ring.
Above code is for indirect support which isn't really
implemented in this patch yet.
For your question, free_head stores the index of the
next avail desc. I'll add a comment for it or move it
to union and give it a better name in next version.
>
> > + } else
> > + vq->free_head = i;
>
> ID is only valid in the last descriptor in the list, so head + 1 should be
> ok too?
I don't really get your point. The vq->free_head stores
the index of the next avail desc.
>
> > +
> > + /* Store token and indirect buffer state. */
> > + vq->desc_state[head].num = descs_used;
> > + vq->desc_state[head].data = data;
> > + if (indirect)
> > + vq->desc_state[head].indir_desc = desc;
> > + else
> > + vq->desc_state[head].indir_desc = ctx;
> > +
> > + virtio_wmb(vq->weak_barriers);
>
> Let's add a comment to explain the barrier here.
Okay.
>
> > + vq->vring_packed.desc[head].flags = head_flags;
> > + vq->num_added++;
> > +
> > + pr_debug("Added buffer head %i to %p\n", head, vq);
> > + END_USE(vq);
> > +
> > + return 0;
> > +
> > +unmap_release:
> > + err_idx = i;
> > + i = head;
> > +
> > + for (n = 0; n < total_sg; n++) {
> > + if (i == err_idx)
> > + break;
> > + vring_unmap_one(vq, &desc[i]);
> > + i++;
> > + if (!indirect && i >= vq->vring_packed.num)
> > + i = 0;
> > + }
> > +
> > + vq->wrap_counter = wrap_counter;
> > +
> > + if (indirect)
> > + kfree(desc);
> > +
> > + END_USE(vq);
> > + return -EIO;
> > +}
> > +
> > +static inline int virtqueue_add(struct virtqueue *_vq,
> > + struct scatterlist *sgs[],
> > + unsigned int total_sg,
> > + unsigned int out_sgs,
> > + unsigned int in_sgs,
> > + void *data,
> > + void *ctx,
> > + gfp_t gfp)
> > +{
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + return vq->packed ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs,
> > + in_sgs, data, ctx, gfp) :
> > + virtqueue_add_split(_vq, sgs, total_sg, out_sgs,
> > + in_sgs, data, ctx, gfp);
> > +}
> > +
> > /**
> > * virtqueue_add_sgs - expose buffers to other end
> > * @vq: the struct virtqueue we're talking about.
> > @@ -561,6 +845,12 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
> > * event. */
> > virtio_mb(vq->weak_barriers);
> > + if (vq->packed) {
> > + /* FIXME: to be implemented */
> > + needs_kick = true;
> > + goto out;
> > + }
> > +
> > old = vq->avail_idx_shadow - vq->num_added;
> > new = vq->avail_idx_shadow;
> > vq->num_added = 0;
> > @@ -579,6 +869,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
> > } else {
> > needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
> > }
> > +
> > +out:
> > END_USE(vq);
> > return needs_kick;
> > }
> > @@ -628,8 +920,8 @@ bool virtqueue_kick(struct virtqueue *vq)
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_kick);
> > -static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
> > - void **ctx)
> > +static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > + void **ctx)
> > {
> > unsigned int i, j;
> > __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> > @@ -677,29 +969,81 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
> > }
> > }
> > -static inline bool more_used(const struct vring_virtqueue *vq)
> > +static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > + void **ctx)
> > +{
> > + struct vring_packed_desc *desc;
> > + unsigned int i, j;
> > +
> > + /* Clear data ptr. */
> > + vq->desc_state[head].data = NULL;
> > +
> > + i = head;
> > +
> > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > + desc = &vq->vring_packed.desc[i];
> > + vring_unmap_one(vq, desc);
> > + i++;
> > + if (i >= vq->vring_packed.num)
> > + i = 0;
> > + }
> > +
> > + vq->vq.num_free += vq->desc_state[head].num;
>
> It looks to me vq->free_head grows always, how can we make sure it does not
> exceeds vq.num here?
The vq->free_head stores the index of the next avail
desc. You can find it wraps together with vq->wrap_counter
in virtqueue_add_packed().
>
> > +
> > + if (vq->indirect) {
> > + u32 len;
> > +
> > + desc = vq->desc_state[head].indir_desc;
> > + /* Free the indirect table, if any, now that it's unmapped. */
> > + if (!desc)
> > + return;
> > +
> > + len = virtio32_to_cpu(vq->vq.vdev,
> > + vq->vring_packed.desc[head].len);
> > +
> > + BUG_ON(!(vq->vring_packed.desc[head].flags &
> > + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
> > + BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));
> > +
> > + for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
> > + vring_unmap_one(vq, &desc[j]);
> > +
> > + kfree(desc);
> > + vq->desc_state[head].indir_desc = NULL;
> > + } else if (ctx) {
> > + *ctx = vq->desc_state[head].indir_desc;
> > + }
> > +}
> > +
> > +static inline bool more_used_split(const struct vring_virtqueue *vq)
> > {
> > return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
> > }
> > -/**
> > - * virtqueue_get_buf - get the next used buffer
> > - * @vq: the struct virtqueue we're talking about.
> > - * @len: the length written into the buffer
> > - *
> > - * If the device wrote data into the buffer, @len will be set to the
> > - * amount written. This means you don't need to clear the buffer
> > - * beforehand to ensure there's no data leakage in the case of short
> > - * writes.
> > - *
> > - * Caller must ensure we don't call this with other virtqueue
> > - * operations at the same time (except where noted).
> > - *
> > - * Returns NULL if there are no used buffers, or the "data" token
> > - * handed to virtqueue_add_*().
> > - */
> > -void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> > - void **ctx)
> > +static inline bool more_used_packed(const struct vring_virtqueue *vq)
> > +{
> > + u16 last_used, flags;
> > + bool avail, used;
> > +
> > + if (vq->vq.num_free == vq->vring.num)
> > + return false;
> > +
> > + last_used = vq->last_used_idx;
> > + flags = virtio16_to_cpu(vq->vq.vdev,
> > + vq->vring_packed.desc[last_used].flags);
> > + avail = flags & VRING_DESC_F_AVAIL(1);
> > + used = flags & VRING_DESC_F_USED(1);
> > +
> > + return avail == used;
> > +}
> > +
> > +static inline bool more_used(const struct vring_virtqueue *vq)
> > +{
> > + return vq->packed ? more_used_packed(vq) : more_used_split(vq);
> > +}
> > +
> > +void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len,
> > + void **ctx)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > void *ret;
> > @@ -735,9 +1079,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> > return NULL;
> > }
> > - /* detach_buf clears data, so grab it now. */
> > + /* detach_buf_split clears data, so grab it now. */
> > ret = vq->desc_state[i].data;
> > - detach_buf(vq, i, ctx);
> > + detach_buf_split(vq, i, ctx);
> > vq->last_used_idx++;
> > /* If we expect an interrupt for the next entry, tell host
> > * by writing event index and flush out the write before
> > @@ -754,6 +1098,87 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> > END_USE(vq);
> > return ret;
> > }
> > +
> > +void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, unsigned int *len,
> > + void **ctx)
> > +{
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > + void *ret;
> > + unsigned int i;
> > + u16 last_used;
> > +
> > + START_USE(vq);
> > +
> > + if (unlikely(vq->broken)) {
> > + END_USE(vq);
> > + return NULL;
> > + }
> > +
> > + if (!more_used(vq)) {
> > + pr_debug("No more buffers in queue\n");
> > + END_USE(vq);
> > + return NULL;
> > + }
> > +
> > + /* Only get used array entries after they have been exposed by host. */
> > + virtio_rmb(vq->weak_barriers);
> > +
> > + last_used = vq->last_used_idx;
> > +
> > + i = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].id);
> > + *len = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].len);
> > +
> > + if (unlikely(i >= vq->vring_packed.num)) {
> > + BAD_RING(vq, "id %u out of range\n", i);
> > + return NULL;
> > + }
> > + if (unlikely(!vq->desc_state[i].data)) {
> > + BAD_RING(vq, "id %u is not a head!\n", i);
> > + return NULL;
> > + }
> > +
> > + /* detach_buf_packed clears data, so grab it now. */
> > + ret = vq->desc_state[i].data;
> > + detach_buf_packed(vq, i, ctx);
> > +
> > + vq->last_used_idx += vq->desc_state[i].num;
> > + if (vq->last_used_idx >= vq->vring_packed.num)
> > + vq->last_used_idx %= vq->vring_packed.num;
>
> '-=' should be sufficient here?
Good suggestion. I think so.
>
> > +
> > + // FIXME: implement the desc event support
> > +
> > +#ifdef DEBUG
> > + vq->last_add_time_valid = false;
> > +#endif
> > +
> > + END_USE(vq);
> > + return ret;
> > +}
> > +
> > +/**
> > + * virtqueue_get_buf - get the next used buffer
> > + * @vq: the struct virtqueue we're talking about.
> > + * @len: the length written into the buffer
> > + *
> > + * If the device wrote data into the buffer, @len will be set to the
> > + * amount written. This means you don't need to clear the buffer
> > + * beforehand to ensure there's no data leakage in the case of short
> > + * writes.
> > + *
> > + * Caller must ensure we don't call this with other virtqueue
> > + * operations at the same time (except where noted).
> > + *
> > + * Returns NULL if there are no used buffers, or the "data" token
> > + * handed to virtqueue_add_*().
> > + */
> > +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> > + void **ctx)
> > +{
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + return vq->packed ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
> > + virtqueue_get_buf_ctx_split(_vq, len, ctx);
> > +}
> > EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
> > void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
> > @@ -761,6 +1186,24 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
> > return virtqueue_get_buf_ctx(_vq, len, NULL);
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_buf);
> > +
> > +static void virtqueue_disable_cb_split(struct virtqueue *_vq)
> > +{
> > + struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > + if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> > + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > + if (!vq->event)
> > + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
> > + vq->avail_flags_shadow);
> > + }
> > +}
> > +
> > +static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
> > +{
> > + // FIXME: to be implemented
> > +}
> > +
> > /**
> > * virtqueue_disable_cb - disable callbacks
> > * @vq: the struct virtqueue we're talking about.
> > @@ -774,12 +1217,10 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > - if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> > - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > - if (!vq->event)
> > - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
> > - }
> > -
> > + if (vq->packed)
> > + virtqueue_disable_cb_packed(_vq);
> > + else
> > + virtqueue_disable_cb_split(_vq);
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
> > @@ -802,6 +1243,12 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
> > START_USE(vq);
> > + if (vq->packed) {
> > + // FIXME: to be implemented
> > + last_used_idx = vq->last_used_idx;
> > + goto out;
> > + }
> > +
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
> > @@ -813,6 +1260,7 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
> > vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
> > }
> > vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
> > +out:
> > END_USE(vq);
> > return last_used_idx;
> > }
> > @@ -832,6 +1280,12 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > virtio_mb(vq->weak_barriers);
> > + if (vq->packed) {
> > + u16 flags = virtio16_to_cpu(vq->vq.vdev,
> > + vq->vring_packed.desc[last_used_idx].flags);
> > + return !(flags & VRING_DESC_F_AVAIL(1)) ==
> > + !(flags & VRING_DESC_F_USED(1));
> > + }
> > return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_poll);
> > @@ -874,6 +1328,11 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
> > START_USE(vq);
> > + if (vq->packed) {
> > + // FIXME: to be implemented
> > + goto out;
> > + }
> > +
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > @@ -896,6 +1355,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
> > return false;
> > }
> > +out:
> > END_USE(vq);
> > return true;
> > }
> > @@ -922,14 +1382,20 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> > continue;
> > /* detach_buf clears data, so grab it now. */
> > buf = vq->desc_state[i].data;
> > - detach_buf(vq, i, NULL);
> > - vq->avail_idx_shadow--;
> > - vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> > + if (vq->packed)
> > + detach_buf_packed(vq, i, NULL);
> > + else {
> > + detach_buf_split(vq, i, NULL);
> > + vq->avail_idx_shadow--;
> > + vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
> > + vq->avail_idx_shadow);
> > + }
> > END_USE(vq);
> > return buf;
> > }
> > /* That should have freed everything. */
> > - BUG_ON(vq->vq.num_free != vq->vring.num);
> > + BUG_ON(vq->vq.num_free != (vq->packed ? vq->vring_packed.num :
> > + vq->vring.num));
> > END_USE(vq);
> > return NULL;
> > @@ -957,7 +1423,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
> > EXPORT_SYMBOL_GPL(vring_interrupt);
> > struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > - struct vring vring,
> > + union vring_union vring,
> > + bool packed,
> > struct virtio_device *vdev,
> > bool weak_barriers,
> > bool context,
> > @@ -965,19 +1432,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > void (*callback)(struct virtqueue *),
> > const char *name)
> > {
> > - unsigned int i;
> > + unsigned int num, i;
> > struct vring_virtqueue *vq;
> > - vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
> > + num = packed ? vring.vring_packed.num : vring.vring_split.num;
> > +
> > + vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
> > GFP_KERNEL);
> > if (!vq)
> > return NULL;
> > - vq->vring = vring;
> > vq->vq.callback = callback;
> > vq->vq.vdev = vdev;
> > vq->vq.name = name;
> > - vq->vq.num_free = vring.num;
> > + vq->vq.num_free = num;
> > vq->vq.index = index;
> > vq->we_own_ring = false;
> > vq->queue_dma_addr = 0;
> > @@ -986,9 +1454,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > vq->weak_barriers = weak_barriers;
> > vq->broken = false;
> > vq->last_used_idx = 0;
> > - vq->avail_flags_shadow = 0;
> > - vq->avail_idx_shadow = 0;
> > vq->num_added = 0;
> > + vq->packed = packed;
> > list_add_tail(&vq->vq.list, &vdev->vqs);
> > #ifdef DEBUG
> > vq->in_use = false;
> > @@ -999,18 +1466,41 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > !context;
> > vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
> > + if (vq->packed) {
> > + vq->vring_packed = vring.vring_packed;
> > + vq->free_head = 0;
> > + vq->wrap_counter = 1;
> > +
> > +#if 0
> > + vq->chaining = virtio_has_feature(vdev,
> > + VIRTIO_RING_F_LIST_DESC);
> > +#else
> > + vq->chaining = true;
>
> Looks like in V10 there's no F_LIST_DESC.
Yes. I kept this in this patch just because the
desc chaining is optional in the old spec draft
when sending out this patch set. I'll remove it
in next version.
>
> > +#endif
> > + } else {
> > + vq->vring = vring.vring_split;
> > + vq->avail_flags_shadow = 0;
> > + vq->avail_idx_shadow = 0;
> > +
> > + /* Put everything in free lists. */
> > + vq->free_head = 0;
> > + for (i = 0; i < num-1; i++)
> > + vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
> > + }
> > +
> > /* No callback? Tell other side not to bother us. */
> > if (!callback) {
> > - vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > - if (!vq->event)
> > - vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
> > + if (packed) {
> > + // FIXME: to be implemented
> > + } else {
> > + vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > + if (!vq->event)
> > + vq->vring.avail->flags = cpu_to_virtio16(vdev,
> > + vq->avail_flags_shadow);
> > + }
> > }
> > - /* Put everything in free lists. */
> > - vq->free_head = 0;
> > - for (i = 0; i < vring.num-1; i++)
> > - vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
> > - memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
> > + memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
> > return &vq->vq;
> > }
> > @@ -1058,6 +1548,14 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size,
> > }
> > }
> > +static inline int
> > +__vring_size(unsigned int num, unsigned long align, bool packed)
> > +{
> > + if (packed)
> > + return vring_packed_size(num, align);
> > + return vring_size(num, align);
> > +}
> > +
> > struct virtqueue *vring_create_virtqueue(
> > unsigned int index,
> > unsigned int num,
> > @@ -1074,7 +1572,8 @@ struct virtqueue *vring_create_virtqueue(
> > void *queue = NULL;
> > dma_addr_t dma_addr;
> > size_t queue_size_in_bytes;
> > - struct vring vring;
> > + union vring_union vring;
> > + bool packed;
> > /* We assume num is a power of 2. */
> > if (num & (num - 1)) {
> > @@ -1082,9 +1581,13 @@ struct virtqueue *vring_create_virtqueue(
> > return NULL;
> > }
> > + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> > +
> > /* TODO: allocate each queue chunk individually */
> > - for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
> > - queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> > + for (; num && __vring_size(num, vring_align, packed) > PAGE_SIZE;
> > + num /= 2) {
> > + queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
> > + packed),
> > &dma_addr,
> > GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
> > if (queue)
> > @@ -1096,17 +1599,21 @@ struct virtqueue *vring_create_virtqueue(
> > if (!queue) {
> > /* Try to get a single page. You are my only hope! */
> > - queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> > + queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
> > + packed),
> > &dma_addr, GFP_KERNEL|__GFP_ZERO);
> > }
> > if (!queue)
> > return NULL;
> > - queue_size_in_bytes = vring_size(num, vring_align);
> > - vring_init(&vring, num, queue, vring_align);
> > + queue_size_in_bytes = __vring_size(num, vring_align, packed);
> > + if (packed)
> > + vring_packed_init(&vring.vring_packed, num, queue, vring_align);
> > + else
> > + vring_init(&vring.vring_split, num, queue, vring_align);
>
> Let's rename vring_init to vring_init_split() like other helpers?
The vring_init() is a public API in include/uapi/linux/virtio_ring.h.
I don't think we can rename it.
>
> > - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> > - notify, callback, name);
> > + vq = __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> > + context, notify, callback, name);
> > if (!vq) {
> > vring_free_queue(vdev, queue_size_in_bytes, queue,
> > dma_addr);
> > @@ -1132,10 +1639,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
> > void (*callback)(struct virtqueue *vq),
> > const char *name)
> > {
> > - struct vring vring;
> > - vring_init(&vring, num, pages, vring_align);
> > - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> > - notify, callback, name);
> > + union vring_union vring;
> > + bool packed;
> > +
> > + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> > + if (packed)
> > + vring_packed_init(&vring.vring_packed, num, pages, vring_align);
> > + else
> > + vring_init(&vring.vring_split, num, pages, vring_align);
> > +
> > + return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> > + context, notify, callback, name);
> > }
> > EXPORT_SYMBOL_GPL(vring_new_virtqueue);
> > @@ -1145,7 +1659,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
> > if (vq->we_own_ring) {
> > vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
> > - vq->vring.desc, vq->queue_dma_addr);
> > + vq->packed ? (void *)vq->vring_packed.desc :
> > + (void *)vq->vring.desc,
> > + vq->queue_dma_addr);
> > }
> > list_del(&_vq->list);
> > kfree(vq);
> > @@ -1159,14 +1675,18 @@ void vring_transport_features(struct virtio_device *vdev)
> > for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
> > switch (i) {
> > +#if 0 // FIXME: to be implemented
> > case VIRTIO_RING_F_INDIRECT_DESC:
> > break;
> > +#endif
> > case VIRTIO_RING_F_EVENT_IDX:
> > break;
> > case VIRTIO_F_VERSION_1:
> > break;
> > case VIRTIO_F_IOMMU_PLATFORM:
> > break;
> > + case VIRTIO_F_RING_PACKED:
> > + break;
> > default:
> > /* We don't understand this bit. */
> > __virtio_clear_bit(vdev, i);
> > @@ -1187,7 +1707,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > - return vq->vring.num;
> > + return vq->packed ? vq->vring_packed.num : vq->vring.num;
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
> > @@ -1224,6 +1744,7 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
> > +/* Only available for split ring */
>
> Interesting, I think we need this for correctly configure pci. e.g in
> setup_vq()?
Yes. The setup_vq() should be updated. But it requires
QEMU change, so I just kept it as is in this RFC patch.
>
> > dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > @@ -1235,6 +1756,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
> > +/* Only available for split ring */
> > dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
>
> Maybe it's better to rename this to get_device_addr().
It's a kernel API which has been exported by EXPORT_SYMBOL_GPL(),
I'm not sure whether it's a good idea to rename it.
Best regards,
Tiwei Bie
>
> Thanks
>
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > @@ -1246,6 +1768,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
> > +/* Only available for split ring */
> > const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > {
> > return &to_vvq(vq)->vring;
> > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> > index bbf32524ab27..a0075894ad16 100644
> > --- a/include/linux/virtio_ring.h
> > +++ b/include/linux/virtio_ring.h
> > @@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
> > struct virtio_device;
> > struct virtqueue;
> > +union vring_union {
> > + struct vring vring_split;
> > + struct vring_packed vring_packed;
> > +};
> > +
> > /*
> > * Creates a virtqueue and allocates the descriptor ring. If
> > * may_reduce_num is set, then this may allocate a smaller ring than
> > @@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
> > /* Creates a virtqueue with a custom layout. */
> > struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > - struct vring vring,
> > + union vring_union vring,
> > + bool packed,
> > struct virtio_device *vdev,
> > bool weak_barriers,
> > bool ctx,
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH bpf-next 0/4] tools: bpf: minor build improvements
From: Jakub Kicinski @ 2018-03-16 6:26 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
Hi!
As promised this series addresses nits and minor issues in tools/bpf
build infra. One GCC-7 warning which is nice to get rid of. Dependencies
when built with OUTPUT are fixed. make clean will now remove the
FEATURE-DUMP.* files. PHONY target is also updated to match reality.
Jakub Kicinski (4):
tools: bpftool: fix dependency file path
tools: bpftool: fix potential format truncation
tools: bpf: cleanup PHONY target
tools: bpf: remove feature detection output
tools/bpf/Makefile | 4 +++-
tools/bpf/bpftool/Makefile | 4 +++-
tools/bpf/bpftool/xlated_dumper.h | 2 +-
3 files changed, 7 insertions(+), 3 deletions(-)
--
2.15.1
^ permalink raw reply
* [PATCH bpf-next 1/4] tools: bpftool: fix dependency file path
From: Jakub Kicinski @ 2018-03-16 6:26 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180316062617.10254-1-jakub.kicinski@netronome.com>
Auto-generated dependency files are in the OUTPUT directory,
we need to include them from there. This fixes object files
not being rebuilt after header changes.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
tools/bpf/bpftool/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4c2867481f5c..50a715ac9e8b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
all: $(OUTPUT)bpftool
--
2.15.1
^ permalink raw reply related
* [PATCH bpf-next 4/4] tools: bpf: remove feature detection output
From: Jakub Kicinski @ 2018-03-16 6:26 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180316062617.10254-1-jakub.kicinski@netronome.com>
bpf tools use feature detection for libbfd dependency, clean up
the output files on make clean.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
tools/bpf/Makefile | 2 ++
tools/bpf/bpftool/Makefile | 2 ++
2 files changed, 4 insertions(+)
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index acfaf4c8cc32..1ea545965ee3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -81,6 +81,8 @@ clean: bpftool_clean
$(call QUIET_CLEAN, bpf-progs)
$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
install: $(PROGS) bpftool_install
$(call QUIET_INSTALL, bpf_jit_disasm)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 50a715ac9e8b..4e69782c4a79 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
install: $(OUTPUT)bpftool
$(call QUIET_INSTALL, bpftool)
--
2.15.1
^ permalink raw reply related
* [PATCH bpf-next 2/4] tools: bpftool: fix potential format truncation
From: Jakub Kicinski @ 2018-03-16 6:26 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180316062617.10254-1-jakub.kicinski@netronome.com>
GCC 7 complains:
xlated_dumper.c: In function ‘print_call’:
xlated_dumper.c:179:10: warning: ‘%s’ directive output may be truncated writing up to 255 bytes into a region of size between 249 and 253 [-Wformat-truncation=]
"%+d#%s", insn->off, sym->name);
Add a bit more space to the buffer so it can handle the entire
string and integer without truncation.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
tools/bpf/bpftool/xlated_dumper.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 51c935d38ae2..b34affa7ef2d 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,7 +49,7 @@ struct dump_data {
unsigned long address_call_base;
struct kernel_sym *sym_mapping;
__u32 sym_count;
- char scratch_buff[SYM_MAX_NAME];
+ char scratch_buff[SYM_MAX_NAME + 8];
};
void kernel_syms_load(struct dump_data *dd);
--
2.15.1
^ permalink raw reply related
* [PATCH bpf-next 3/4] tools: bpf: cleanup PHONY target
From: Jakub Kicinski @ 2018-03-16 6:26 UTC (permalink / raw)
To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180316062617.10254-1-jakub.kicinski@netronome.com>
There is no FORCE target in the Makefile and some of the PHONY
targets are missing, update the list.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
tools/bpf/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c07b4e718eeb..acfaf4c8cc32 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -100,4 +100,4 @@ install: $(PROGS) bpftool_install
bpftool_clean:
$(call descend,bpftool,clean)
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
--
2.15.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox