* [PATCH net-next 1/5] netvsc: fix return value for set_channels
2017-07-25 20:04 [PATCH net-next 0/5] netvsc: fixes and performance related changes Stephen Hemminger
@ 2017-07-25 20:04 ` Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 2/5] netvsc: fix warnings reported by lockdep Stephen Hemminger
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 20:04 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
The error and normal case got swapped.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
drivers/net/hyperv/netvsc_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 262486ce8e2a..f1eaf675d2e9 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -758,8 +758,8 @@ static int netvsc_set_channels(struct net_device *net,
if (!IS_ERR(nvdev)) {
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
- ret = PTR_ERR(nvdev);
} else {
+ ret = PTR_ERR(nvdev);
device_info.num_chn = orig;
rndis_filter_device_add(dev, &device_info);
}
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 2/5] netvsc: fix warnings reported by lockdep
2017-07-25 20:04 [PATCH net-next 0/5] netvsc: fixes and performance related changes Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 1/5] netvsc: fix return value for set_channels Stephen Hemminger
@ 2017-07-25 20:04 ` Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 3/5] netvsc: optimize receive completions Stephen Hemminger
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 20:04 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
This includes a bunch of fixups for issues reported by sparse and
lockdep.
* ethtool routines can assume RTNL
* send is done with RCU lock (and BH disable)
Most of the changes involve passing internal device struct (netvsc)
as parameter, so that code called in both initialization and changes
to MTU and channels doesn't have to worry about whether the value
is protected by RTNL, RCU, or the fact it isn't registered yet.
One special case is the callback that happens when a new VMBUS
channel is added. The setup code is waiting for completion with
RTNL held, so it is already safe.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
drivers/net/hyperv/hyperv_net.h | 3 +-
drivers/net/hyperv/netvsc.c | 2 +-
drivers/net/hyperv/netvsc_drv.c | 15 ++++---
drivers/net/hyperv/rndis_filter.c | 84 +++++++++++++++++++--------------------
4 files changed, 54 insertions(+), 50 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 4e7ff348327e..fb62ea632914 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -217,7 +217,8 @@ int rndis_filter_receive(struct net_device *ndev,
struct vmbus_channel *channel,
void *data, u32 buflen);
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
+int rndis_filter_set_device_mac(struct netvsc_device *ndev,
+ const char *mac);
void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 06f39a99da7c..94c00acac58a 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -832,7 +832,7 @@ int netvsc_send(struct net_device_context *ndev_ctx,
struct sk_buff *skb)
{
struct netvsc_device *net_device
- = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ = rcu_dereference_bh(ndev_ctx->nvdev);
struct hv_device *device = ndev_ctx->device_ctx;
int ret = 0;
struct netvsc_channel *nvchan;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f1eaf675d2e9..a04f2efbbc25 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -923,6 +923,8 @@ static void netvsc_get_stats64(struct net_device *net,
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
struct sockaddr *addr = p;
char save_adr[ETH_ALEN];
unsigned char save_aatype;
@@ -935,7 +937,10 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
if (err != 0)
return err;
- err = rndis_filter_set_device_mac(ndev, addr->sa_data);
+ if (!nvdev)
+ return -ENODEV;
+
+ err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
if (err != 0) {
/* roll back to saved MAC */
memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
@@ -981,7 +986,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
const struct netvsc_stats *qstats;
unsigned int start;
@@ -1019,7 +1024,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
u8 *p = data;
int i;
@@ -1077,7 +1082,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
u32 *rules)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
if (!nvdev)
return -ENODEV;
@@ -1127,7 +1132,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
struct rndis_device *rndis_dev;
int i;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index eaa3f0d5682a..bf21ea92c743 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -85,14 +85,6 @@ static struct rndis_device *get_rndis_device(void)
return device;
}
-static struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
-
- return rtnl_dereference(net_device_ctx->nvdev);
-}
-
static struct rndis_request *get_rndis_request(struct rndis_device *dev,
u32 msg_type,
u32 msg_len)
@@ -252,7 +244,10 @@ static int rndis_filter_send_request(struct rndis_device *dev,
pb[0].len;
}
+ rcu_read_lock_bh();
ret = netvsc_send(net_device_ctx, packet, NULL, &pb, NULL);
+ rcu_read_unlock_bh();
+
return ret;
}
@@ -452,8 +447,9 @@ int rndis_filter_receive(struct net_device *ndev,
return 0;
}
-static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
- void *result, u32 *result_size)
+static int rndis_filter_query_device(struct rndis_device *dev,
+ struct netvsc_device *nvdev,
+ u32 oid, void *result, u32 *result_size)
{
struct rndis_request *request;
u32 inresult_size = *result_size;
@@ -480,8 +476,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->dev_vc_handle = 0;
if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
- struct net_device_context *ndevctx = netdev_priv(dev->ndev);
- struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct ndis_offload *hwcaps;
u32 nvsp_version = nvdev->nvsp_version;
u8 ndis_rev;
@@ -550,14 +544,15 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
/* Get the hardware offload capabilities */
static int
-rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
+ struct ndis_offload *caps)
{
u32 caps_len = sizeof(*caps);
int ret;
memset(caps, 0, sizeof(*caps));
- ret = rndis_filter_query_device(dev,
+ ret = rndis_filter_query_device(dev, net_device,
OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
caps, &caps_len);
if (ret)
@@ -586,11 +581,12 @@ rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
return 0;
}
-static int rndis_filter_query_device_mac(struct rndis_device *dev)
+static int rndis_filter_query_device_mac(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = ETH_ALEN;
- return rndis_filter_query_device(dev,
+ return rndis_filter_query_device(dev, net_device,
RNDIS_OID_802_3_PERMANENT_ADDRESS,
dev->hw_mac_adr, &size);
}
@@ -598,9 +594,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev)
#define NWADR_STR "NetworkAddress"
#define NWADR_STRLEN 14
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
+int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
+ const char *mac)
{
- struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
struct rndis_device *rdev = nvdev->extension;
struct rndis_request *request;
struct rndis_set_request *set;
@@ -654,11 +650,8 @@ int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
wait_for_completion(&request->wait_event);
set_complete = &request->response_msg.msg.set_complete;
- if (set_complete->status != RNDIS_STATUS_SUCCESS) {
- netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
- set_complete->status);
- ret = -EINVAL;
- }
+ if (set_complete->status != RNDIS_STATUS_SUCCESS)
+ ret = -EIO;
cleanup:
put_rndis_request(rdev, request);
@@ -791,27 +784,27 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
return ret;
}
-static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+static int rndis_filter_query_device_link_status(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = sizeof(u32);
u32 link_status;
- int ret;
- ret = rndis_filter_query_device(dev,
- RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
- &link_status, &size);
-
- return ret;
+ return rndis_filter_query_device(dev, net_device,
+ RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+ &link_status, &size);
}
-static int rndis_filter_query_link_speed(struct rndis_device *dev)
+static int rndis_filter_query_link_speed(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = sizeof(u32);
u32 link_speed;
struct net_device_context *ndc;
int ret;
- ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+ ret = rndis_filter_query_device(dev, net_device,
+ RNDIS_OID_GEN_LINK_SPEED,
&link_speed, &size);
if (!ret) {
@@ -880,14 +873,14 @@ void rndis_filter_update(struct netvsc_device *nvdev)
schedule_work(&rdev->mcast_work);
}
-static int rndis_filter_init_device(struct rndis_device *dev)
+static int rndis_filter_init_device(struct rndis_device *dev,
+ struct netvsc_device *nvdev)
{
struct rndis_request *request;
struct rndis_initialize_request *init;
struct rndis_initialize_complete *init_complete;
u32 status;
int ret;
- struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
request = get_rndis_request(dev, RNDIS_MSG_INIT,
RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -1024,12 +1017,17 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
{
struct net_device *ndev =
hv_get_drvdata(new_sc->primary_channel->device_obj);
- struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct netvsc_device *nvscdev;
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
struct netvsc_channel *nvchan;
int ret;
- if (chn_index >= nvscdev->num_chn)
+ /* This is safe because this callback only happens when
+ * new device is being setup and waiting on the channel_init_wait.
+ */
+ nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
+ if (!nvscdev || chn_index >= nvscdev->num_chn)
return;
nvchan = nvscdev->chan_table + chn_index;
@@ -1104,27 +1102,27 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
rndis_device->ndev = net;
/* Send the rndis initialization message */
- ret = rndis_filter_init_device(rndis_device);
+ ret = rndis_filter_init_device(rndis_device, net_device);
if (ret != 0)
goto err_dev_remv;
/* Get the MTU from the host */
size = sizeof(u32);
- ret = rndis_filter_query_device(rndis_device,
+ ret = rndis_filter_query_device(rndis_device, net_device,
RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
&mtu, &size);
if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
net->mtu = mtu;
/* Get the mac address */
- ret = rndis_filter_query_device_mac(rndis_device);
+ ret = rndis_filter_query_device_mac(rndis_device, net_device);
if (ret != 0)
goto err_dev_remv;
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
/* Find HW offload capabilities */
- ret = rndis_query_hwcaps(rndis_device, &hwcaps);
+ ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
if (ret != 0)
goto err_dev_remv;
@@ -1185,7 +1183,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
if (ret)
goto err_dev_remv;
- rndis_filter_query_device_link_status(rndis_device);
+ rndis_filter_query_device_link_status(rndis_device, net_device);
netdev_dbg(net, "Device MAC %pM link state %s\n",
rndis_device->hw_mac_adr,
@@ -1194,11 +1192,11 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
return net_device;
- rndis_filter_query_link_speed(rndis_device);
+ rndis_filter_query_link_speed(rndis_device, net_device);
/* vRSS setup */
memset(&rsscap, 0, rsscap_size);
- ret = rndis_filter_query_device(rndis_device,
+ ret = rndis_filter_query_device(rndis_device, net_device,
OID_GEN_RECEIVE_SCALE_CAPABILITIES,
&rsscap, &rsscap_size);
if (ret || rsscap.num_recv_que < 2)
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 3/5] netvsc: optimize receive completions
2017-07-25 20:04 [PATCH net-next 0/5] netvsc: fixes and performance related changes Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 1/5] netvsc: fix return value for set_channels Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 2/5] netvsc: fix warnings reported by lockdep Stephen Hemminger
@ 2017-07-25 20:04 ` Stephen Hemminger
2017-07-25 21:07 ` Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 4/5] netvsc: signal host if receive ring is emptied Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 5/5] netvsc: allow smaller send/recv buffer size Stephen Hemminger
4 siblings, 1 reply; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 20:04 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
Optimize how receive completion ring are managed.
* Allocate only as many slots as needed for all buffers from host
* Allocate before setting up sub channel for better error detection
* Don't need to keep copy of initial receive section message
* Only needt keep the transaction id, status doesn't matter
* Precompute the watermark for when receive flushing is needed
* Replace division with conditional test
* Replace atomic per-device variable with per-channel check.
* Handle corner case where receive completion send
fails if ring buffer to host is full.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
drivers/net/hyperv/hyperv_net.h | 19 +--
drivers/net/hyperv/netvsc.c | 270 +++++++++++++++-----------------------
drivers/net/hyperv/rndis_filter.c | 20 +--
3 files changed, 125 insertions(+), 184 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index fb62ea632914..b0259b12c5ee 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -186,6 +186,7 @@ struct net_device_context;
struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *info);
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
void netvsc_device_remove(struct hv_device *device);
int netvsc_send(struct net_device_context *ndc,
struct hv_netvsc_packet *packet,
@@ -652,18 +653,10 @@ struct multi_send_data {
u32 count; /* counter of batched packets */
};
-struct recv_comp_data {
- u64 tid; /* transaction id */
- u32 status;
-};
-
-/* Netvsc Receive Slots Max */
-#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
-
struct multi_recv_comp {
- void *buf; /* queued receive completions */
- u32 first; /* first data entry */
- u32 next; /* next entry for writing */
+ u64 *slots; /* pending completion transactions */
+ u32 first; /* first data entry */
+ u32 next; /* next entry for writing */
};
struct netvsc_stats {
@@ -750,7 +743,7 @@ struct netvsc_device {
u32 recv_buf_size;
u32 recv_buf_gpadl_handle;
u32 recv_section_cnt;
- struct nvsp_1_receive_buffer_section *recv_section;
+ u32 recv_completion_cnt;
/* Send buffer allocated by us */
void *send_buf;
@@ -778,8 +771,6 @@ struct netvsc_device {
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
- atomic_t num_outstanding_recvs;
-
atomic_t open_cnt;
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 94c00acac58a..03c1fec762c7 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -72,9 +72,6 @@ static struct netvsc_device *alloc_net_device(void)
if (!net_device)
return NULL;
- net_device->chan_table[0].mrc.buf
- = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
atomic_set(&net_device->open_cnt, 0);
@@ -92,7 +89,7 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- vfree(nvdev->chan_table[i].mrc.buf);
+ vfree(nvdev->chan_table[i].mrc.slots);
kfree(nvdev);
}
@@ -171,12 +168,6 @@ static void netvsc_destroy_buf(struct hv_device *device)
net_device->recv_buf = NULL;
}
- if (net_device->recv_section) {
- net_device->recv_section_cnt = 0;
- kfree(net_device->recv_section);
- net_device->recv_section = NULL;
- }
-
/* Deal with the send buffer we may have setup.
* If we got a send section size, it means we received a
* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@ -239,11 +230,26 @@ static void netvsc_destroy_buf(struct hv_device *device)
kfree(net_device->send_section_map);
}
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+{
+ struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+ int node = cpu_to_node(nvchan->channel->target_cpu);
+ size_t size;
+
+ size = net_device->recv_completion_cnt * sizeof(u64);
+ nvchan->mrc.slots = vzalloc_node(size, node);
+ if (!nvchan->mrc.slots)
+ nvchan->mrc.slots = vzalloc(size);
+
+ return nvchan->mrc.slots ? 0 : -ENOMEM;
+}
+
static int netvsc_init_buf(struct hv_device *device,
struct netvsc_device *net_device)
{
int ret = 0;
struct nvsp_message *init_packet;
+ struct nvsp_1_message_send_receive_buffer_complete *resp;
struct net_device *ndev;
size_t map_words;
int node;
@@ -300,43 +306,41 @@ static int netvsc_init_buf(struct hv_device *device,
wait_for_completion(&net_device->channel_init_wait);
/* Check the response */
- if (init_packet->msg.v1_msg.
- send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
- netdev_err(ndev, "Unable to complete receive buffer "
- "initialization with NetVsp - status %d\n",
- init_packet->msg.v1_msg.
- send_recv_buf_complete.status);
+ resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+ if (resp->status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev,
+ "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+ resp->status);
ret = -EINVAL;
goto cleanup;
}
/* Parse the response */
+ netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+ resp->num_sections, resp->sections[0].sub_alloc_size,
+ resp->sections[0].num_sub_allocs);
- net_device->recv_section_cnt = init_packet->msg.
- v1_msg.send_recv_buf_complete.num_sections;
-
- net_device->recv_section = kmemdup(
- init_packet->msg.v1_msg.send_recv_buf_complete.sections,
- net_device->recv_section_cnt *
- sizeof(struct nvsp_1_receive_buffer_section),
- GFP_KERNEL);
- if (net_device->recv_section == NULL) {
- ret = -EINVAL;
- goto cleanup;
- }
+ net_device->recv_section_cnt = resp->num_sections;
/*
* For 1st release, there should only be 1 section that represents the
* entire receive buffer
*/
if (net_device->recv_section_cnt != 1 ||
- net_device->recv_section->offset != 0) {
+ resp->sections[0].offset != 0) {
ret = -EINVAL;
goto cleanup;
}
- /* Now setup the send buffer.
- */
+ /* Setup receive completion ring */
+ net_device->recv_completion_cnt
+ = round_up(resp->sections[0].num_sub_allocs + 1,
+ PAGE_SIZE / sizeof(u64));
+ ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+ if (ret)
+ goto cleanup;
+
+ /* Now setup the send buffer. */
net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
if (!net_device->send_buf)
net_device->send_buf = vzalloc(net_device->send_buf_size);
@@ -950,139 +954,96 @@ int netvsc_send(struct net_device_context *ndev_ctx,
return ret;
}
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
- u64 transaction_id, u32 status)
-{
- struct nvsp_message recvcompMessage;
- int ret;
-
- recvcompMessage.hdr.msg_type =
- NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
-
- recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
-
- /* Send the completion */
- ret = vmbus_sendpacket(channel, &recvcompMessage,
- sizeof(struct nvsp_message_header) + sizeof(u32),
- transaction_id, VM_PKT_COMP, 0);
-
- return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
- u32 *filled, u32 *avail)
+/* Send pending recv completions */
+static int send_recv_completions(struct netvsc_device *nvdev,
+ struct vmbus_channel *channel, u16 q_idx)
{
struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 first = mrc->first;
- u32 next = mrc->next;
+ struct recv_comp_msg {
+ struct nvsp_message_header hdr;
+ u32 status;
+ } __packed;
+ struct recv_comp_msg msg = {
+ .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+ .status = NVSP_STAT_SUCCESS,
+ };
+ int ret;
- *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
- next - first;
+ while (mrc->first != mrc->next) {
+ u64 tid = mrc->slots[mrc->first];
- *avail = NETVSC_RECVSLOT_MAX - *filled - 1;
-}
+ ret = vmbus_sendpacket(channel, &msg, sizeof(msg),
+ tid, VM_PKT_COMP, 0);
+ if (unlikely(ret))
+ return ret;
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
- *nvdev, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail;
-
- if (unlikely(!mrc->buf))
- return NULL;
+ if (++mrc->first == nvdev->recv_completion_cnt)
+ mrc->first = 0;
+ }
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!filled)
- return NULL;
+ /* receive completion ring has been emptied */
+ if (unlikely(nvdev->destroy))
+ wake_up(&nvdev->wait_drain);
- return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+ return 0;
}
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+/* Count how many receive completions are outstanding */
+static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+ const struct multi_recv_comp *mrc,
+ u32 *filled, u32 *avail)
{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- int num_recv;
+ u32 count = nvdev->recv_completion_cnt;
- mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
-
- num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+ if (mrc->next >= mrc->first)
+ *filled = mrc->next - mrc->first;
+ else
+ *filled = (count - mrc->first) + mrc->next;
- if (nvdev->destroy && num_recv == 0)
- wake_up(&nvdev->wait_drain);
+ *avail = count - *filled - 1;
}
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
- struct vmbus_channel *channel, u16 q_idx)
+/* Add receive complete to ring to send to host. */
+static void enq_receive_complete(struct net_device *ndev,
+ struct netvsc_device *nvdev, u16 q_idx,
+ u64 tid)
{
- struct recv_comp_data *rcd;
- int ret;
-
- while (true) {
- rcd = read_recv_comp_slot(nvdev, q_idx);
- if (!rcd)
- break;
+ struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+ struct multi_recv_comp *mrc = &nvchan->mrc;
+ u32 filled, avail;
- ret = netvsc_send_recv_completion(channel, rcd->tid,
- rcd->status);
- if (ret)
- break;
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
- put_recv_comp_slot(nvdev, q_idx);
+ if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+ send_recv_completions(nvdev, nvchan->channel, q_idx);
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
}
-}
-
-#define NETVSC_RCD_WATERMARK 80
-
-/* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
- struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail, next;
- struct recv_comp_data *rcd;
-
- if (unlikely(!nvdev->recv_section))
- return NULL;
-
- if (unlikely(!mrc->buf))
- return NULL;
-
- if (atomic_read(&nvdev->num_outstanding_recvs) >
- nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
- netvsc_chk_recv_comp(nvdev, channel, q_idx);
-
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!avail)
- return NULL;
-
- next = mrc->next;
- rcd = mrc->buf + next * sizeof(struct recv_comp_data);
- mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
- atomic_inc(&nvdev->num_outstanding_recvs);
+ if (unlikely(!avail)) {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, tid);
+ return;
+ }
- return rcd;
+ mrc->slots[mrc->next] = tid;
+ if (++mrc->next == nvdev->recv_completion_cnt)
+ mrc->next = 0;
}
static int netvsc_receive(struct net_device *ndev,
- struct netvsc_device *net_device,
- struct net_device_context *net_device_ctx,
- struct hv_device *device,
- struct vmbus_channel *channel,
- const struct vmpacket_descriptor *desc,
- struct nvsp_message *nvsp)
+ struct netvsc_device *net_device,
+ struct net_device_context *net_device_ctx,
+ struct hv_device *device,
+ struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *desc,
+ struct nvsp_message *nvsp)
{
const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d);
u16 q_idx = channel->offermsg.offer.sub_channel_index;
char *recv_buf = net_device->recv_buf;
- u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
- int ret;
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1109,29 +1070,13 @@ static int netvsc_receive(struct net_device *ndev,
u32 buflen = vmxferpage_packet->ranges[i].byte_count;
/* Pass it to the upper layer */
- status = rndis_filter_receive(ndev, net_device, device,
- channel, data, buflen);
+ rndis_filter_receive(ndev, net_device, device,
+ channel, data, buflen);
}
- if (net_device->chan_table[q_idx].mrc.buf) {
- struct recv_comp_data *rcd;
+ enq_receive_complete(ndev, net_device, q_idx,
+ vmxferpage_packet->d.trans_id);
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
- if (rcd) {
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
- } else {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- }
- } else {
- ret = netvsc_send_recv_completion(channel,
- vmxferpage_packet->d.trans_id,
- status);
- if (ret)
- netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
- q_idx, vmxferpage_packet->d.trans_id, ret);
- }
return count;
}
@@ -1244,17 +1189,18 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
- /* If receive ring was exhausted
- * and not doing busy poll
- * then re-enable host interrupts
- * and reschedule if ring is not empty.
+ /* If send of pending receive completions suceeded
+ * and did not exhaust NAPI budget
+ * and not doing busy poll
+ * then reschedule if more data has arrived from host
*/
- if (work_done < budget &&
+ if (send_recv_completions(net_device, channel, q_idx) == 0 &&
+ work_done < budget &&
napi_complete_done(napi, work_done) &&
- hv_end_read(&channel->inbound) != 0)
+ hv_end_read(&channel->inbound)) {
+ hv_begin_read(&channel->inbound);
napi_reschedule(napi);
-
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ }
/* Driver may overshoot since multiple packets per descriptor */
return min(work_done, budget);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index bf21ea92c743..db9bac31dd09 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -928,12 +928,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
{
int i;
- if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
- return false;
-
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+ if (nvchan->mrc.first != nvchan->mrc.next)
+ return false;
+
if (atomic_read(&nvchan->queue_sends) > 0)
return false;
}
@@ -1031,11 +1031,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
return;
nvchan = nvscdev->chan_table + chn_index;
- nvchan->mrc.buf
- = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
- if (!nvchan->mrc.buf)
- return;
/* Because the device uses NAPI, all the interrupt batching and
* control is done via Net softirq, not the channel handling
@@ -1225,6 +1220,15 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
if (num_rss_qs == 0)
return net_device;
+ for (i = 1; i < net_device->num_chn; i++) {
+ ret = netvsc_alloc_recv_comp_ring(net_device, i);
+ if (ret) {
+ while (--i != 0)
+ vfree(net_device->chan_table[i].mrc.slots);
+ goto out;
+ }
+ }
+
refcount_set(&net_device->sc_offered, num_rss_qs);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH net-next 3/5] netvsc: optimize receive completions
2017-07-25 20:04 ` [PATCH net-next 3/5] netvsc: optimize receive completions Stephen Hemminger
@ 2017-07-25 21:07 ` Stephen Hemminger
0 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 21:07 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
On Tue, 25 Jul 2017 13:04:20 -0700
Stephen Hemminger <stephen@networkplumber.org> wrote:
> Optimize how receive completion ring are managed.
> * Allocate only as many slots as needed for all buffers from host
> * Allocate before setting up sub channel for better error detection
> * Don't need to keep copy of initial receive section message
> * Only needt keep the transaction id, status doesn't matter
> * Precompute the watermark for when receive flushing is needed
> * Replace division with conditional test
> * Replace atomic per-device variable with per-channel check.
> * Handle corner case where receive completion send
> fails if ring buffer to host is full.
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
I probably pushed the shrinkage too far for now. Hiayang raised the
point that probably need to keep the status value. I dropped storing
it since it was always SUCCESS unless the host sent a bad packet,
or the system was out of skb's.
Will send V2 that keeps the status value for now.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH net-next 4/5] netvsc: signal host if receive ring is emptied
2017-07-25 20:04 [PATCH net-next 0/5] netvsc: fixes and performance related changes Stephen Hemminger
` (2 preceding siblings ...)
2017-07-25 20:04 ` [PATCH net-next 3/5] netvsc: optimize receive completions Stephen Hemminger
@ 2017-07-25 20:04 ` Stephen Hemminger
2017-07-25 20:04 ` [PATCH net-next 5/5] netvsc: allow smaller send/recv buffer size Stephen Hemminger
4 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 20:04 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
Latency improvement related to NAPI conversion.
If all packets are processed from receive ring then need
to signal host.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
drivers/net/hyperv/netvsc.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 03c1fec762c7..f5d7cae6ba70 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1189,10 +1189,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
- /* If send of pending receive completions suceeded
- * and did not exhaust NAPI budget
+ /* if ring is empty, signal host */
+ if (!nvchan->desc)
+ hv_pkt_iter_close(channel);
+
+ /* If send of pending receive completions suceeded
+ * and did not exhaust NAPI budget this time
* and not doing busy poll
- * then reschedule if more data has arrived from host
+ * then re-enable host interrupts
+ * and reschedule if ring is not empty.
*/
if (send_recv_completions(net_device, channel, q_idx) == 0 &&
work_done < budget &&
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 5/5] netvsc: allow smaller send/recv buffer size
2017-07-25 20:04 [PATCH net-next 0/5] netvsc: fixes and performance related changes Stephen Hemminger
` (3 preceding siblings ...)
2017-07-25 20:04 ` [PATCH net-next 4/5] netvsc: signal host if receive ring is emptied Stephen Hemminger
@ 2017-07-25 20:04 ` Stephen Hemminger
4 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2017-07-25 20:04 UTC (permalink / raw)
To: kys, haiyangz, sthemmin; +Cc: devel, netdev
The default value of send and receive buffer area for host DMA
is much larger than it needs to be. Experimentation shows that
4M receive and 1M send is sufficient.
Make the size a module parameter so that it can be adjusted
as needed for testing or special needs.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
drivers/net/hyperv/hyperv_net.h | 2 ++
drivers/net/hyperv/netvsc.c | 18 ++++++++++++------
drivers/net/hyperv/netvsc_drv.c | 31 ++++++++++++++++++++++++++++++-
3 files changed, 44 insertions(+), 7 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index b0259b12c5ee..40677de411b7 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -148,6 +148,8 @@ struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
int ring_size;
u32 num_chn;
+ u32 recv_buf_size;
+ u32 send_buf_size;
};
enum rndis_device_state {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index f5d7cae6ba70..0b03ace42ceb 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -482,14 +482,16 @@ static int negotiate_nvsp_ver(struct hv_device *device,
}
static int netvsc_connect_vsp(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
};
struct nvsp_message *init_packet;
- int ndis_version, i, ret;
+ u32 max_recv_buf_size, ndis_version;
+ int i, ret;
init_packet = &net_device->channel_init_pkt;
@@ -534,10 +536,14 @@ static int netvsc_connect_vsp(struct hv_device *device,
/* Post the big receive buffer to NetVSP */
if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+ max_recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
else
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
- net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+ max_recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+ net_device->recv_buf_size = min(max_recv_buf_size,
+ device_info->recv_buf_size);
+ net_device->send_buf_size = min_t(u32, NETVSC_SEND_BUFFER_SIZE,
+ device_info->send_buf_size);
ret = netvsc_init_buf(device, net_device);
@@ -1296,7 +1302,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
- ret = netvsc_connect_vsp(device, net_device);
+ ret = netvsc_connect_vsp(device, net_device, device_info);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index a04f2efbbc25..4a9562c7db01 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -44,13 +44,23 @@
#include "hyperv_net.h"
-#define RING_SIZE_MIN 64
+#define RING_SIZE_MIN 64
+#define RECV_BUFFER_MIN 16
+#define SEND_BUFFER_MIN 4
#define LINKCHANGE_INT (2 * HZ)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+static unsigned int recv_buffer_size = (4 * 1024 * 1024) / PAGE_SIZE;
+module_param(recv_buffer_size, uint, S_IRUGO);
+MODULE_PARM_DESC(recv_buffer_size, "Receive buffer size (# of pages)");
+
+static unsigned int send_buffer_size = (1024 * 1024) / PAGE_SIZE;
+module_param(send_buffer_size, uint, S_IRUGO);
+MODULE_PARM_DESC(send_buffer_size, "Send buffer size (# of pages)");
+
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
@@ -753,6 +763,8 @@ static int netvsc_set_channels(struct net_device *net,
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = count;
device_info.ring_size = ring_size;
+ device_info.send_buf_size = nvdev->send_buf_size;
+ device_info.recv_buf_size = nvdev->recv_buf_size;
nvdev = rndis_filter_device_add(dev, &device_info);
if (!IS_ERR(nvdev)) {
@@ -850,6 +862,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
+ device_info.send_buf_size = nvdev->send_buf_size;
+ device_info.recv_buf_size = nvdev->recv_buf_size;
rndis_filter_device_remove(hdev, nvdev);
@@ -1520,6 +1534,8 @@ static int netvsc_probe(struct hv_device *dev,
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = VRSS_CHANNEL_DEFAULT;
+ device_info.send_buf_size = send_buffer_size * PAGE_SIZE;
+ device_info.recv_buf_size = recv_buffer_size * PAGE_SIZE;
nvdev = rndis_filter_device_add(dev, &device_info);
if (IS_ERR(nvdev)) {
@@ -1671,6 +1687,19 @@ static int __init netvsc_drv_init(void)
pr_info("Increased ring_size to %d (min allowed)\n",
ring_size);
}
+
+ if (recv_buffer_size < RECV_BUFFER_MIN) {
+ recv_buffer_size = RECV_BUFFER_MIN;
+ pr_notice("Increased receive buffer size to %u (min allowed)\n",
+ recv_buffer_size);
+ }
+
+ if (send_buffer_size < SEND_BUFFER_MIN) {
+ send_buffer_size = SEND_BUFFER_MIN;
+ pr_notice("Increased receive buffer size to %u (min allowed)\n",
+ send_buffer_size);
+ }
+
ret = vmbus_driver_register(&netvsc_drv);
if (ret)
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread