* [PATCH net-next, v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
@ 2014-03-18 22:15 Haiyang Zhang
2014-03-19 20:40 ` [PATCH net-next,v3] " David Miller
0 siblings, 1 reply; 6+ messages in thread
From: Haiyang Zhang @ 2014-03-18 22:15 UTC (permalink / raw)
To: davem, netdev; +Cc: olaf, jasowang, driverdev-devel, linux-kernel, haiyangz
This feature allows multiple channels to be used by each virtual NIC.
It is available on Hyper-V host 2012 R2.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
---
drivers/net/hyperv/hyperv_net.h | 112 ++++++++++++++++++++++-
drivers/net/hyperv/netvsc.c | 134 +++++++++++++++++++++-----
drivers/net/hyperv/netvsc_drv.c | 102 +++++++++++++++++++-
drivers/net/hyperv/rndis_filter.c | 187 ++++++++++++++++++++++++++++++++++++-
4 files changed, 503 insertions(+), 32 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 13010b4..c2d72f6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -28,6 +28,98 @@
#include <linux/hyperv.h>
#include <linux/rndis.h>
+/* RSS related */
+#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
+#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */
+
+#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
+#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
+
+#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
+#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
+
+struct ndis_obj_header {
+ u8 type;
+ u8 rev;
+ u16 size;
+} __packed;
+
+
+/* ndis_recv_scale_cap/cap_flag */
+#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
+#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000
+#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
+#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
+
+struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
+ struct ndis_obj_header hdr;
+ u32 cap_flag;
+ u32 num_int_msg;
+ u32 num_recv_que;
+ u16 num_indirect_tabent;
+} __packed;
+
+
+/* ndis_recv_scale_param flags */
+#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
+#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
+#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
+#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
+#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
+
+/* Hash info bits */
+#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
+#define NDIS_HASH_IPV4 0x00000100
+#define NDIS_HASH_TCP_IPV4 0x00000200
+#define NDIS_HASH_IPV6 0x00000400
+#define NDIS_HASH_IPV6_EX 0x00000800
+#define NDIS_HASH_TCP_IPV6 0x00001000
+#define NDIS_HASH_TCP_IPV6_EX 0x00002000
+
+#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
+
+#define ITAB_NUM 128
+#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+extern u8 netvsc_hash_key[];
+
+struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
+ struct ndis_obj_header hdr;
+
+ /* Qualifies the rest of the information */
+ u16 flag;
+
+ /* The base CPU number to do receive processing. not used */
+ u16 base_cpu_number;
+
+ /* This describes the hash function and type being enabled */
+ u32 hashinfo;
+
+ /* The size of indirection table array */
+ u16 indirect_tabsize;
+
+ /* The offset of the indirection table from the beginning of this
+ * structure
+ */
+ u32 indirect_taboffset;
+
+ /* The size of the hash secret key */
+ u16 hashkey_size;
+
+ /* The offset of the secret key from the beginning of this structure */
+ u32 kashkey_offset;
+
+ u32 processor_masks_offset;
+ u32 num_processor_masks;
+ u32 processor_masks_entry_size;
+};
+
+
/* Fwd declaration */
struct hv_netvsc_packet;
struct ndis_tcp_ip_checksum_info;
@@ -39,6 +131,8 @@ struct xferpage_packet {
/* # of netvsc packets this xfer packet contains */
u32 count;
+
+ struct vmbus_channel *channel;
};
/*
@@ -54,6 +148,9 @@ struct hv_netvsc_packet {
bool is_data_pkt;
u16 vlan_tci;
+ u16 q_idx;
+ struct vmbus_channel *channel;
+
/*
* Valid only for receives when we break a xfer page packet
* into multiple netvsc packets
@@ -120,6 +217,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
int netvsc_recv_callback(struct hv_device *device_obj,
struct hv_netvsc_packet *packet,
struct ndis_tcp_ip_checksum_info *csum_info);
+extern void netvsc_channel_cb(void *context);
int rndis_filter_open(struct hv_device *dev);
int rndis_filter_close(struct hv_device *dev);
int rndis_filter_device_add(struct hv_device *dev,
@@ -522,6 +620,8 @@ struct nvsp_message {
#define NETVSC_PACKET_SIZE 2048
+#define VRSS_SEND_TAB_SIZE 16
+
/* Per netvsc channel-specific */
struct netvsc_device {
struct hv_device *dev;
@@ -555,10 +655,20 @@ struct netvsc_device {
struct net_device *ndev;
+ struct vmbus_channel *chn_table[NR_CPUS];
+ u32 send_table[VRSS_SEND_TAB_SIZE];
+ u32 num_chn;
+ atomic_t queue_sends[NR_CPUS];
+
/* Holds rndis device info */
void *extension;
- /* The recive buffer for this device */
+
+ int ring_size;
+
+ /* The primary channel callback buffer */
unsigned char cb_buffer[NETVSC_PACKET_SIZE];
+ /* The sub channel callback buffer */
+ unsigned char *sub_cb_buf;
};
/* NdisInitialize message */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index daddea2..990f9c2 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
kfree(netvsc_packet);
}
+ if (net_device->sub_cb_buf)
+ vfree(net_device->sub_cb_buf);
+
kfree(net_device);
return 0;
}
@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
(nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
(nvsp_packet->hdr.msg_type ==
- NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
+ NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
+ (nvsp_packet->hdr.msg_type ==
+ NVSP_MSG5_TYPE_SUBCHANNEL)) {
/* Copy the response back */
memcpy(&net_device->channel_init_pkt, nvsp_packet,
sizeof(struct nvsp_message));
@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
} else if (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
int num_outstanding_sends;
+ u16 q_idx = 0;
+ struct vmbus_channel *channel = device->channel;
+ int queue_sends;
/* Get the send context */
nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
packet->trans_id;
/* Notify the layer above us */
- if (nvsc_packet)
+ if (nvsc_packet) {
+ q_idx = nvsc_packet->q_idx;
+ channel = nvsc_packet->channel;
nvsc_packet->completion.send.send_completion(
nvsc_packet->completion.send.
send_completion_ctx);
+ }
num_outstanding_sends =
atomic_dec_return(&net_device->num_outstanding_sends);
+ queue_sends = atomic_dec_return(
+ &net_device->queue_sends[q_idx]);
if (net_device->destroy && num_outstanding_sends == 0)
wake_up(&net_device->wait_drain);
- if (netif_queue_stopped(ndev) && !net_device->start_remove &&
- (hv_ringbuf_avail_percent(&device->channel->outbound)
- > RING_AVAIL_PERCENT_HIWATER ||
- num_outstanding_sends < 1))
- netif_wake_queue(ndev);
+ if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+ !net_device->start_remove &&
+ (hv_ringbuf_avail_percent(&channel->outbound) >
+ RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, q_idx));
} else {
netdev_err(ndev, "Unknown send completion packet type- "
"%d received!!\n", nvsp_packet->hdr.msg_type);
@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
int ret = 0;
struct nvsp_message sendMessage;
struct net_device *ndev;
+ struct vmbus_channel *out_channel = NULL;
u64 req_id;
net_device = get_outbound_net_device(device);
@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
else
req_id = 0;
+ out_channel = net_device->chn_table[packet->q_idx];
+ if (out_channel == NULL)
+ out_channel = device->channel;
+ packet->channel = out_channel;
+
if (packet->page_buf_cnt) {
- ret = vmbus_sendpacket_pagebuffer(device->channel,
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
packet->page_buf,
packet->page_buf_cnt,
&sendMessage,
sizeof(struct nvsp_message),
req_id);
} else {
- ret = vmbus_sendpacket(device->channel, &sendMessage,
+ ret = vmbus_sendpacket(out_channel, &sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,
if (ret == 0) {
atomic_inc(&net_device->num_outstanding_sends);
- if (hv_ringbuf_avail_percent(&device->channel->outbound) <
+ atomic_inc(&net_device->queue_sends[packet->q_idx]);
+
+ if (hv_ringbuf_avail_percent(&out_channel->outbound) <
RING_AVAIL_PERCENT_LOWATER) {
- netif_stop_queue(ndev);
+ netif_tx_stop_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
+
if (atomic_read(&net_device->
- num_outstanding_sends) < 1)
- netif_wake_queue(ndev);
+ queue_sends[packet->q_idx]) < 1)
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
}
} else if (ret == -EAGAIN) {
- netif_stop_queue(ndev);
- if (atomic_read(&net_device->num_outstanding_sends) < 1) {
- netif_wake_queue(ndev);
+ netif_tx_stop_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
+ if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
ret = -ENOSPC;
}
} else {
@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
}
static void netvsc_send_recv_completion(struct hv_device *device,
+ struct vmbus_channel *channel,
struct netvsc_device *net_device,
u64 transaction_id, u32 status)
{
@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
retry_send_cmplt:
/* Send the completion */
- ret = vmbus_sendpacket(device->channel, &recvcompMessage,
+ ret = vmbus_sendpacket(channel, &recvcompMessage,
sizeof(struct nvsp_message), transaction_id,
VM_PKT_COMP, 0);
if (ret == 0) {
@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
{
struct hv_netvsc_packet *packet = context;
struct hv_device *device = packet->device;
+ struct vmbus_channel *channel;
struct netvsc_device *net_device;
u64 transaction_id = 0;
bool fsend_receive_comp = false;
@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
*/
if (packet->xfer_page_pkt->count == 0) {
fsend_receive_comp = true;
+ channel = packet->xfer_page_pkt->channel;
transaction_id = packet->completion.recv.recv_completion_tid;
status = packet->xfer_page_pkt->status;
list_add_tail(&packet->xfer_page_pkt->list_ent,
@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)
/* Send a receive completion for the xfer page packet */
if (fsend_receive_comp)
- netvsc_send_recv_completion(device, net_device, transaction_id,
- status);
+ netvsc_send_recv_completion(device, channel, net_device,
+ transaction_id, status);
}
static void netvsc_receive(struct netvsc_device *net_device,
+ struct vmbus_channel *channel,
struct hv_device *device,
struct vmpacket_descriptor *packet)
{
@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
flags);
- netvsc_send_recv_completion(device, net_device,
+ netvsc_send_recv_completion(device, channel, net_device,
vmxferpage_packet->d.trans_id,
NVSP_STAT_FAIL);
@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
xferpage_packet = (struct xferpage_packet *)listHead.next;
list_del(&xferpage_packet->list_ent);
xferpage_packet->status = NVSP_STAT_SUCCESS;
+ xferpage_packet->channel = channel;
/* This is how much we can satisfy */
xferpage_packet->count = count - 1;
@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,
}
-static void netvsc_channel_cb(void *context)
+
+static void netvsc_send_table(struct hv_device *hdev,
+ struct vmpacket_descriptor *vmpkt)
+{
+ struct netvsc_device *nvscdev;
+ struct net_device *ndev;
+ struct nvsp_message *nvmsg;
+ int i;
+ u32 count, *tab;
+
+ nvscdev = get_outbound_net_device(hdev);
+ if (!nvscdev)
+ return;
+ ndev = nvscdev->ndev;
+
+ nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
+ (vmpkt->offset8 << 3));
+
+ if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
+ return;
+
+ count = nvmsg->msg.v5_msg.send_table.count;
+ if (count != VRSS_SEND_TAB_SIZE) {
+ netdev_err(ndev, "Received wrong send-table size:%u\n", count);
+ return;
+ }
+
+ tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
+ nvmsg->msg.v5_msg.send_table.offset);
+
+ for (i = 0; i < count; i++)
+ nvscdev->send_table[i] = tab[i];
+}
+
+void netvsc_channel_cb(void *context)
{
int ret;
- struct hv_device *device = context;
+ struct vmbus_channel *channel = (struct vmbus_channel *)context;
+ struct hv_device *device;
struct netvsc_device *net_device;
u32 bytes_recvd;
u64 request_id;
@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev;
+ if (channel->primary_channel != NULL)
+ device = channel->primary_channel->device_obj;
+ else
+ device = channel->device_obj;
+
net_device = get_inbound_net_device(device);
if (!net_device)
return;
ndev = net_device->ndev;
- buffer = net_device->cb_buffer;
+ buffer = get_per_channel_state(channel);
do {
- ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
+ ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
&bytes_recvd, &request_id);
if (ret == 0) {
if (bytes_recvd > 0) {
@@ -831,10 +903,14 @@ static void netvsc_channel_cb(void *context)
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(net_device,
+ netvsc_receive(net_device, channel,
device, desc);
break;
+ case VM_PKT_DATA_INBAND:
+ netvsc_send_table(device, desc);
+ break;
+
default:
netdev_err(ndev,
"unhandled packet type %d, "
@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
goto cleanup;
}
+ net_device->ring_size = ring_size;
+
/*
* Coming into this function, struct net_device * is
* registered as the driver private data.
@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
}
init_completion(&net_device->channel_init_wait);
+ set_per_channel_state(device->channel, net_device->cb_buffer);
+
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, device);
+ netvsc_channel_cb, device->channel);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
/* Channel is opened */
pr_info("hv_netvsc channel opened successfully\n");
+ net_device->chn_table[0] = device->channel;
+
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device);
if (ret != 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4e4cf9e..980aa88 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
return ret;
}
- netif_start_queue(net);
+ netif_tx_start_all_queues(net);
nvdev = hv_get_drvdata(device_obj);
rdev = nvdev->extension;
@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
+union sub_key {
+ u64 k;
+ struct {
+ u8 pad[3];
+ u8 kb;
+ u32 ka;
+ };
+};
+
+/* Toeplitz hash function
+ * data: network byte order
+ * return: host byte order
+ */
+static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
+{
+ union sub_key subk;
+ int k_next = 4;
+ u8 dt;
+ int i, j;
+ u32 ret = 0;
+
+ subk.k = 0;
+ subk.ka = ntohl(*(u32 *)key);
+
+ for (i = 0; i < dlen; i++) {
+ subk.kb = key[k_next];
+ k_next = (k_next + 1) % klen;
+ dt = data[i];
+ for (j = 0; j < 8; j++) {
+ if (dt & 0x80)
+ ret ^= subk.ka;
+ dt <<= 1;
+ subk.k <<= 1;
+ }
+ }
+
+ return ret;
+}
+
+static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
+{
+ struct iphdr *iphdr;
+ int data_len;
+ bool ret = false;
+
+ if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
+ return false;
+
+ iphdr = ip_hdr(skb);
+
+ if (iphdr->version == 4) {
+ if (iphdr->protocol == IPPROTO_TCP)
+ data_len = 12;
+ else
+ data_len = 8;
+ *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
+ (u8 *)&iphdr->saddr, data_len);
+ ret = true;
+ }
+
+ return ret;
+}
+
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+ void *accel_priv, select_queue_fallback_t fallback)
+{
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
+ struct hv_device *hdev = net_device_ctx->device_ctx;
+ struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
+ u32 hash;
+ u16 q_idx = 0;
+
+ if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+ return 0;
+
+ if (netvsc_set_hash(&hash, skb))
+ q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+ ndev->real_num_tx_queues;
+
+ return q_idx;
+}
+
static void netvsc_xmit_completion(void *context)
{
struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
@@ -331,6 +413,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
packet->vlan_tci = skb->vlan_tci;
+ packet->q_idx = skb_get_queue_mapping(skb);
+
packet->is_data_pkt = true;
packet->total_data_buflen = skb->len;
@@ -528,6 +612,9 @@ int netvsc_recv_callback(struct hv_device *device_obj,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
packet->vlan_tci);
+ skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
+ offermsg.offer.sub_channel_index % net->real_num_rx_queues);
+
net->stats.rx_packets++;
net->stats.rx_bytes += packet->total_data_buflen;
@@ -576,7 +663,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
hv_set_drvdata(hdev, ndev);
device_info.ring_size = ring_size;
rndis_filter_device_add(hdev, &device_info);
- netif_wake_queue(ndev);
+ netif_tx_wake_all_queues(ndev);
return 0;
}
@@ -622,6 +709,7 @@ static const struct net_device_ops device_ops = {
.ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
+ .ndo_select_queue = netvsc_select_queue,
};
/*
@@ -668,9 +756,11 @@ static int netvsc_probe(struct hv_device *dev,
struct net_device *net = NULL;
struct net_device_context *net_device_ctx;
struct netvsc_device_info device_info;
+ struct netvsc_device *nvdev;
int ret;
- net = alloc_etherdev(sizeof(struct net_device_context));
+ net = alloc_etherdev_mq(sizeof(struct net_device_context),
+ num_online_cpus());
if (!net)
return -ENOMEM;
@@ -703,6 +793,12 @@ static int netvsc_probe(struct hv_device *dev,
}
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+ nvdev = hv_get_drvdata(dev);
+ netif_set_real_num_tx_queues(net, nvdev->num_chn);
+ netif_set_real_num_rx_queues(net, nvdev->num_chn);
+ dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
+ net->real_num_tx_queues, net->real_num_rx_queues);
+
ret = register_netdev(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 4a37e3d..29fcbd7 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,7 +31,7 @@
#include "hyperv_net.h"
-#define RNDIS_EXT_LEN 100
+#define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request {
struct list_head list_ent;
struct completion wait_event;
@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
rndis_msg->ndis_msg_type = msg_type;
rndis_msg->msg_len = msg_len;
+ request->pkt.q_idx = 0;
+
/*
* Set the request id. This field is always after the rndis header for
* request/response packet types so we just used the SetRequest as a
@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->info_buflen = 0;
query->dev_vc_handle = 0;
+ if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
+ struct ndis_recv_scale_cap *cap;
+
+ request->request_msg.msg_len +=
+ sizeof(struct ndis_recv_scale_cap);
+ query->info_buflen = sizeof(struct ndis_recv_scale_cap);
+ cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
+ query->info_buf_offset);
+ cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+ cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+ cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
+ }
+
ret = rndis_filter_send_request(dev, request);
if (ret != 0)
goto cleanup;
@@ -685,6 +700,88 @@ cleanup:
return ret;
}
+u8 netvsc_hash_key[HASH_KEYLEN] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
+{
+ struct net_device *ndev = rdev->net_dev->ndev;
+ struct rndis_request *request;
+ struct rndis_set_request *set;
+ struct rndis_set_complete *set_complete;
+ u32 extlen = sizeof(struct ndis_recv_scale_param) + 4*ITAB_NUM
+ + HASH_KEYLEN;
+ struct ndis_recv_scale_param *rssp;
+ u32 *itab;
+ u8 *keyp;
+ int i, t, ret;
+
+ request = get_rndis_request(rdev, RNDIS_MSG_SET,
+ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+ if (!request)
+ return -ENOMEM;
+
+ set = &request->request_msg.msg.set_req;
+ set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
+ set->info_buflen = extlen;
+ set->info_buf_offset = sizeof(struct rndis_set_request);
+ set->dev_vc_handle = 0;
+
+ rssp = (struct ndis_recv_scale_param *)(set + 1);
+ rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
+ rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+ rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
+ rssp->flag = 0;
+ rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
+ NDIS_HASH_TCP_IPV4;
+ rssp->indirect_tabsize = 4*ITAB_NUM;
+ rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
+ rssp->hashkey_size = HASH_KEYLEN;
+ rssp->kashkey_offset = rssp->indirect_taboffset
+ + rssp->indirect_tabsize;
+
+ /* Set indirection table entries */
+ itab = (u32 *)(rssp + 1);
+ for (i = 0; i < ITAB_NUM; i++)
+ itab[i] = i % num_queue;
+
+ /* Set hask key values */
+ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+ for (i = 0; i < HASH_KEYLEN; i++)
+ keyp[i] = netvsc_hash_key[i];
+
+
+ ret = rndis_filter_send_request(rdev, request);
+ if (ret != 0)
+ goto cleanup;
+
+ t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ if (t == 0) {
+ netdev_err(ndev, "timeout before we got a set response...\n");
+ /* can't put_rndis_request, since we may still receive a
+ * send-completion.
+ */
+ return -ETIMEDOUT;
+ } else {
+ set_complete = &request->response_msg.msg.set_complete;
+ if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+ netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
+ set_complete->status);
+ ret = -EINVAL;
+ }
+ }
+
+cleanup:
+ put_rndis_request(rdev, request);
+ return ret;
+}
+
+
static int rndis_filter_query_device_link_status(struct rndis_device *dev)
{
u32 size = sizeof(u32);
@@ -876,6 +973,29 @@ static int rndis_filter_close_device(struct rndis_device *dev)
return ret;
}
+
+static void netvsc_sc_open(struct vmbus_channel *new_sc)
+{
+ struct netvsc_device *nvscdev;
+ u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+ int ret;
+
+ nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
+
+ if (chn_index >= nvscdev->num_chn)
+ return;
+
+ set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
+ NETVSC_PACKET_SIZE);
+
+ ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
+ nvscdev->ring_size * PAGE_SIZE, NULL, 0,
+ netvsc_channel_cb, new_sc);
+
+ if (ret == 0)
+ nvscdev->chn_table[chn_index] = new_sc;
+}
+
int rndis_filter_device_add(struct hv_device *dev,
void *additional_info)
{
@@ -884,6 +1004,10 @@ int rndis_filter_device_add(struct hv_device *dev,
struct rndis_device *rndis_device;
struct netvsc_device_info *device_info = additional_info;
struct ndis_offload_params offloads;
+ struct nvsp_message *init_packet;
+ int t;
+ struct ndis_recv_scale_cap rsscap;
+ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
rndis_device = get_rndis_device();
if (!rndis_device)
@@ -903,6 +1027,7 @@ int rndis_filter_device_add(struct hv_device *dev,
/* Initialize the rndis device */
net_device = hv_get_drvdata(dev);
+ net_device->num_chn = 1;
net_device->extension = rndis_device;
rndis_device->net_dev = net_device;
@@ -951,7 +1076,65 @@ int rndis_filter_device_add(struct hv_device *dev,
rndis_device->hw_mac_adr,
device_info->link_state ? "down" : "up");
- return ret;
+ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+ return 0;
+
+ /* vRSS setup */
+ memset(&rsscap, 0, rsscap_size);
+ ret = rndis_filter_query_device(rndis_device,
+ OID_GEN_RECEIVE_SCALE_CAPABILITIES, &rsscap, &rsscap_size);
+ if (ret || rsscap.num_recv_que < 2)
+ goto out;
+
+ net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
+ num_online_cpus() : rsscap.num_recv_que;
+ if (net_device->num_chn == 1)
+ goto out;
+
+ net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
+ NETVSC_PACKET_SIZE);
+ if (!net_device->sub_cb_buf) {
+ net_device->num_chn = 1;
+ dev_info(&dev->device, "No memory for subchannels.\n");
+ goto out;
+ }
+
+ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
+
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
+ init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
+ init_packet->msg.v5_msg.subchn_req.num_subchannels =
+ net_device->num_chn - 1;
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ goto out;
+ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+ if (t == 0) {
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+ if (init_packet->msg.v5_msg.subchn_comp.status !=
+ NVSP_STAT_SUCCESS) {
+ ret = -ENODEV;
+ goto out;
+ }
+ net_device->num_chn = 1 +
+ init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+
+ vmbus_are_subchannels_present(dev->channel);
+
+ ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
+
+out:
+ if (ret)
+ net_device->num_chn = 1;
+ return 0; /* return 0 because primary channel can be used alone */
err_dev_remv:
rndis_filter_device_remove(dev);
--
1.7.4.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH net-next,v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
2014-03-18 22:15 [PATCH net-next, v3] hyperv: Add support for virtual Receive Side Scaling (vRSS) Haiyang Zhang
@ 2014-03-19 20:40 ` David Miller
2014-03-19 21:36 ` Haiyang Zhang
2014-03-19 21:56 ` Sergei Shtylyov
0 siblings, 2 replies; 6+ messages in thread
From: David Miller @ 2014-03-19 20:40 UTC (permalink / raw)
To: haiyangz; +Cc: olaf, netdev, jasowang, driverdev-devel, linux-kernel
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Tue, 18 Mar 2014 15:15:22 -0700
> +struct ndis_obj_header {
> + u8 type;
> + u8 rev;
> + u16 size;
> +} __packed;
> +
> +
> +/* ndis_recv_scale_cap/cap_flag */
One empty line is sufficient, we don't need two of them here.
> +extern u8 netvsc_hash_key[];
We no longer use extern in header file function declarations.
> + u32 processor_masks_offset;
> + u32 num_processor_masks;
> + u32 processor_masks_entry_size;
> +};
> +
> +
> /* Fwd declaration */
Again, one empty line is enough.
> @@ -120,6 +217,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
> int netvsc_recv_callback(struct hv_device *device_obj,
> struct hv_netvsc_packet *packet,
> struct ndis_tcp_ip_checksum_info *csum_info);
> +extern void netvsc_channel_cb(void *context);
Again, please remove this extern in function declarations.
> + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
> + !net_device->start_remove &&
> + (hv_ringbuf_avail_percent(&channel->outbound) >
> + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
For the second time, this is not properly indented. Multi-line
conditionals should be formatted like this:
if (CONDITION1 OPERATOR
CONDITION2 OPERATOR
...)
etc. For example, this specifically should be:
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
!net_device->start_remove &&
(hv_ringbuf_avail_percent(&channel->outbound) >
RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
Generally speaking, if you are only using TAB characters to indent,
you are probably doing it wrong.
You should use the appropriate number of TAB _and_ SPACE characters
necessary to start the line at exactly the first column after the
openning parenthesis of the appropriate construct.
> @@ -668,9 +756,11 @@ static int netvsc_probe(struct hv_device *dev,
> struct net_device *net = NULL;
> struct net_device_context *net_device_ctx;
> struct netvsc_device_info device_info;
> + struct netvsc_device *nvdev;
> int ret;
>
> - net = alloc_etherdev(sizeof(struct net_device_context));
> + net = alloc_etherdev_mq(sizeof(struct net_device_context),
> + num_online_cpus());
> if (!net)
> return -ENOMEM;
>
num_online_cpus() can change, will your driver accomodate this?
> +}
> +
> +
> static int rndis_filter_query_device_link_status(struct rndis_device *dev)
One empty line is sufficient.
> }
>
> +
> +static void netvsc_sc_open(struct vmbus_channel *new_sc)
Likewise.
> + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
> + nvscdev->ring_size * PAGE_SIZE, NULL, 0,
> + netvsc_channel_cb, new_sc);
When function arguments span multiple lines the function call
shall be indented as:
func(A, B, C,
D, E, F);
That is, the second and subsequent lines are indented, using
the appropriate number of TAB _and_ SPACE characters, necessary
to reach exactly the first column after the openning parenthesis.
There are several other cases of this, please audit your entire
submission for this problem.
If proper indentation causes lines to significantly exceed 80
columns, consider helper functions or local variables to
allieve the problem.
> + ret = rndis_filter_query_device(rndis_device,
> + OID_GEN_RECEIVE_SCALE_CAPABILITIES, &rsscap, &rsscap_size);
Same problem.
^ permalink raw reply [flat|nested] 6+ messages in thread* RE: [PATCH net-next,v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
2014-03-19 20:40 ` [PATCH net-next,v3] " David Miller
@ 2014-03-19 21:36 ` Haiyang Zhang
2014-03-19 21:56 ` Sergei Shtylyov
1 sibling, 0 replies; 6+ messages in thread
From: Haiyang Zhang @ 2014-03-19 21:36 UTC (permalink / raw)
To: David Miller
Cc: olaf@aepfle.de, netdev@vger.kernel.org, jasowang@redhat.com,
driverdev-devel@linuxdriverproject.org,
linux-kernel@vger.kernel.org
> -----Original Message-----
> > +
> > +
> > +/* ndis_recv_scale_cap/cap_flag */
>
> One empty line is sufficient, we don't need two of them here.
I will fix this and other formats you pointed out below, also check the whole patch
for similar issues.
> > - net = alloc_etherdev(sizeof(struct net_device_context));
> > + net = alloc_etherdev_mq(sizeof(struct net_device_context),
> > + num_online_cpus());
> > if (!net)
> > return -ENOMEM;
> >
>
> num_online_cpus() can change, will your driver accomodate this?
HyperV currently doesn't support hot-add/remove cpu, so this is constant for
this driver.
Thanks,
- Haiyang
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next,v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
2014-03-19 20:40 ` [PATCH net-next,v3] " David Miller
2014-03-19 21:36 ` Haiyang Zhang
@ 2014-03-19 21:56 ` Sergei Shtylyov
2014-03-19 21:00 ` Stephen Rothwell
2014-03-20 3:41 ` David Miller
1 sibling, 2 replies; 6+ messages in thread
From: Sergei Shtylyov @ 2014-03-19 21:56 UTC (permalink / raw)
To: David Miller, haiyangz
Cc: netdev, kys, olaf, jasowang, linux-kernel, driverdev-devel
Hello.
On 03/19/2014 11:40 PM, David Miller wrote:
>> +extern u8 netvsc_hash_key[];
> We no longer use extern in header file function declarations.
Wait, this is a variable declaration. :-)
WBR, Sergei
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH net-next,v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
2014-03-19 21:56 ` Sergei Shtylyov
@ 2014-03-19 21:00 ` Stephen Rothwell
2014-03-20 3:41 ` David Miller
1 sibling, 0 replies; 6+ messages in thread
From: Stephen Rothwell @ 2014-03-19 21:00 UTC (permalink / raw)
To: Sergei Shtylyov
Cc: olaf, netdev, jasowang, driverdev-devel, linux-kernel,
David Miller, haiyangz
[-- Attachment #1.1: Type: text/plain, Size: 542 bytes --]
On Thu, 20 Mar 2014 00:56:05 +0300 Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> wrote:
>
> On 03/19/2014 11:40 PM, David Miller wrote:
>
> >> +extern u8 netvsc_hash_key[];
>
> > We no longer use extern in header file function declarations.
>
> Wait, this is a variable declaration. :-)
And the need for the extern on variable declarations was one reason I
gave for not removing it from the function declarations (consistency is
good :-)).
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
[-- Attachment #1.2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #2: Type: text/plain, Size: 169 bytes --]
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next,v3] hyperv: Add support for virtual Receive Side Scaling (vRSS)
2014-03-19 21:56 ` Sergei Shtylyov
2014-03-19 21:00 ` Stephen Rothwell
@ 2014-03-20 3:41 ` David Miller
1 sibling, 0 replies; 6+ messages in thread
From: David Miller @ 2014-03-20 3:41 UTC (permalink / raw)
To: sergei.shtylyov
Cc: olaf, netdev, jasowang, driverdev-devel, linux-kernel, haiyangz
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Thu, 20 Mar 2014 00:56:05 +0300
> Hello.
>
> On 03/19/2014 11:40 PM, David Miller wrote:
>
>>> +extern u8 netvsc_hash_key[];
>
>> We no longer use extern in header file function declarations.
>
> Wait, this is a variable declaration. :-)
My bad :)
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-03-20 3:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-18 22:15 [PATCH net-next, v3] hyperv: Add support for virtual Receive Side Scaling (vRSS) Haiyang Zhang
2014-03-19 20:40 ` [PATCH net-next,v3] " David Miller
2014-03-19 21:36 ` Haiyang Zhang
2014-03-19 21:56 ` Sergei Shtylyov
2014-03-19 21:00 ` Stephen Rothwell
2014-03-20 3:41 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).