All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-02 15:23           ` santosh nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh nayak @ 2012-03-02 15:11 UTC (permalink / raw)
  To: sony.chacko
  Cc: rajesh.borundia, netdev, linux-kernel, kernel-janitors,
	Santosh Nayak

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.
Add a default case in 'netxen_list_config_vlan_ip'

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..971b286 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
 			adapter->driver_mismatch = 1;
 			return;
 		}
-		ptr32[i] = cpu_to_le32(val);
+		ptr32[i] = val;
 		offset += sizeof(u32);
 	}
 
@@ -3028,7 +3028,7 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
 		list_for_each(head, &adapter->vlan_ip_list) {
 			cur = list_entry(head, struct nx_vlan_ip_list, list);
 
-			if (cur->ip_addr == ifa->ifa_address)
+			if (cur->ip_addr == be32_to_cpu(ifa->ifa_address))
 				return;
 		}
 
@@ -3039,18 +3039,22 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
 			return;
 		}
 
-		cur->ip_addr = ifa->ifa_address;
+		cur->ip_addr = be32_to_cpu(ifa->ifa_address);
 		list_add_tail(&cur->list, &adapter->vlan_ip_list);
 		break;
 	case NX_IP_DOWN:
 		list_for_each_entry_safe(cur, tmp_cur,
 					&adapter->vlan_ip_list, list) {
-			if (cur->ip_addr == ifa->ifa_address) {
+			if (cur->ip_addr == be32_to_cpu(ifa->ifa_address)) {
 				list_del(&cur->list);
 				kfree(cur);
 				break;
 			}
 		}
+		break;
+	default:
+		printk(KERN_ERR "%ld: Wrong event id \n", event);
+		break;
 	}
 }
 static void
@@ -3070,12 +3074,12 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
 		switch (event) {
 		case NETDEV_UP:
 			netxen_config_ipaddr(adapter,
-					ifa->ifa_address, NX_IP_UP);
+					be32_to_cpu(ifa->ifa_address), NX_IP_UP);
 			netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
 			break;
 		case NETDEV_DOWN:
 			netxen_config_ipaddr(adapter,
-					ifa->ifa_address, NX_IP_DOWN);
+					be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
 			netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
 			break;
 		default:
@@ -3167,11 +3171,11 @@ recheck:
 
 	switch (event) {
 	case NETDEV_UP:
-		netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP);
+		netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_UP);
 		netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
 		break;
 	case NETDEV_DOWN:
-		netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN);
+		netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
 		netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
 		break;
 	default:
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-02 15:23           ` santosh nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh nayak @ 2012-03-02 15:23 UTC (permalink / raw)
  To: sony.chacko
  Cc: rajesh.borundia, netdev, linux-kernel, kernel-janitors,
	Santosh Nayak

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.
Add a default case in 'netxen_list_config_vlan_ip'

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..971b286 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
 			adapter->driver_mismatch = 1;
 			return;
 		}
-		ptr32[i] = cpu_to_le32(val);
+		ptr32[i] = val;
 		offset += sizeof(u32);
 	}
 
@@ -3028,7 +3028,7 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
 		list_for_each(head, &adapter->vlan_ip_list) {
 			cur = list_entry(head, struct nx_vlan_ip_list, list);
 
-			if (cur->ip_addr = ifa->ifa_address)
+			if (cur->ip_addr = be32_to_cpu(ifa->ifa_address))
 				return;
 		}
 
@@ -3039,18 +3039,22 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
 			return;
 		}
 
-		cur->ip_addr = ifa->ifa_address;
+		cur->ip_addr = be32_to_cpu(ifa->ifa_address);
 		list_add_tail(&cur->list, &adapter->vlan_ip_list);
 		break;
 	case NX_IP_DOWN:
 		list_for_each_entry_safe(cur, tmp_cur,
 					&adapter->vlan_ip_list, list) {
-			if (cur->ip_addr = ifa->ifa_address) {
+			if (cur->ip_addr = be32_to_cpu(ifa->ifa_address)) {
 				list_del(&cur->list);
 				kfree(cur);
 				break;
 			}
 		}
+		break;
+	default:
+		printk(KERN_ERR "%ld: Wrong event id \n", event);
+		break;
 	}
 }
 static void
@@ -3070,12 +3074,12 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
 		switch (event) {
 		case NETDEV_UP:
 			netxen_config_ipaddr(adapter,
-					ifa->ifa_address, NX_IP_UP);
+					be32_to_cpu(ifa->ifa_address), NX_IP_UP);
 			netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
 			break;
 		case NETDEV_DOWN:
 			netxen_config_ipaddr(adapter,
-					ifa->ifa_address, NX_IP_DOWN);
+					be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
 			netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
 			break;
 		default:
@@ -3167,11 +3171,11 @@ recheck:
 
 	switch (event) {
 	case NETDEV_UP:
-		netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP);
+		netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_UP);
 		netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
 		break;
 	case NETDEV_DOWN:
-		netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN);
+		netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
 		netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
 		break;
 	default:
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-02 15:23           ` santosh nayak
@ 2012-03-02 17:22             ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-02 17:22 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org

Santosh,

Thanks for pointing out the bug. But adapter takes  ip in big endian format.
So no need to use  be32_to_cpu instead data type of ip should be changed to __be32.
Also in netxen_config_ipaddr()
-      req.words[1] = cpu_to_le64(ip);
+      req.words[1] = ip;

Rest looks fine.

Rajesh
________________________________________
From: santosh nayak [santoshprasadnayak@gmail.com]
Sent: Friday, March 02, 2012 8:41 PM
To: Sony Chacko
Cc: Rajesh Borundia; netdev; linux-kernel; kernel-janitors@vger.kernel.org; Santosh Nayak
Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.
Add a default case in 'netxen_list_config_vlan_ip'

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..971b286 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
                        adapter->driver_mismatch = 1;
                        return;
                }
-               ptr32[i] = cpu_to_le32(val);
+               ptr32[i] = val;
                offset += sizeof(u32);
        }

@@ -3028,7 +3028,7 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
                list_for_each(head, &adapter->vlan_ip_list) {
                        cur = list_entry(head, struct nx_vlan_ip_list, list);

-                       if (cur->ip_addr = ifa->ifa_address)
+                       if (cur->ip_addr = be32_to_cpu(ifa->ifa_address))
                                return;
                }

@@ -3039,18 +3039,22 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
                        return;
                }

-               cur->ip_addr = ifa->ifa_address;
+               cur->ip_addr = be32_to_cpu(ifa->ifa_address);
                list_add_tail(&cur->list, &adapter->vlan_ip_list);
                break;
        case NX_IP_DOWN:
                list_for_each_entry_safe(cur, tmp_cur,
                                        &adapter->vlan_ip_list, list) {
-                       if (cur->ip_addr = ifa->ifa_address) {
+                       if (cur->ip_addr = be32_to_cpu(ifa->ifa_address)) {
                                list_del(&cur->list);
                                kfree(cur);
                                break;
                        }
                }
+               break;
+       default:
+               printk(KERN_ERR "%ld: Wrong event id \n", event);
+               break;
        }
 }
 static void
@@ -3070,12 +3074,12 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
                switch (event) {
                case NETDEV_UP:
                        netxen_config_ipaddr(adapter,
-                                       ifa->ifa_address, NX_IP_UP);
+                                       be32_to_cpu(ifa->ifa_address), NX_IP_UP);
                        netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
                        break;
                case NETDEV_DOWN:
                        netxen_config_ipaddr(adapter,
-                                       ifa->ifa_address, NX_IP_DOWN);
+                                       be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
                        netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
                        break;
                default:
@@ -3167,11 +3171,11 @@ recheck:

        switch (event) {
        case NETDEV_UP:
-               netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP);
+               netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_UP);
                netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
                break;
        case NETDEV_DOWN:
-               netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN);
+               netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
                netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
                break;
        default:
--
1.7.4.4




^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-02 17:22             ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-02 17:22 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org

Santosh,

Thanks for pointing out the bug. But adapter takes  ip in big endian format.
So no need to use  be32_to_cpu instead data type of ip should be changed to __be32.
Also in netxen_config_ipaddr()
-      req.words[1] = cpu_to_le64(ip);
+      req.words[1] = ip;

Rest looks fine.

Rajesh
________________________________________
From: santosh nayak [santoshprasadnayak@gmail.com]
Sent: Friday, March 02, 2012 8:41 PM
To: Sony Chacko
Cc: Rajesh Borundia; netdev; linux-kernel; kernel-janitors@vger.kernel.org; Santosh Nayak
Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.
Add a default case in 'netxen_list_config_vlan_ip'

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..971b286 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
                        adapter->driver_mismatch = 1;
                        return;
                }
-               ptr32[i] = cpu_to_le32(val);
+               ptr32[i] = val;
                offset += sizeof(u32);
        }

@@ -3028,7 +3028,7 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
                list_for_each(head, &adapter->vlan_ip_list) {
                        cur = list_entry(head, struct nx_vlan_ip_list, list);

-                       if (cur->ip_addr == ifa->ifa_address)
+                       if (cur->ip_addr == be32_to_cpu(ifa->ifa_address))
                                return;
                }

@@ -3039,18 +3039,22 @@ netxen_list_config_vlan_ip(struct netxen_adapter *adapter,
                        return;
                }

-               cur->ip_addr = ifa->ifa_address;
+               cur->ip_addr = be32_to_cpu(ifa->ifa_address);
                list_add_tail(&cur->list, &adapter->vlan_ip_list);
                break;
        case NX_IP_DOWN:
                list_for_each_entry_safe(cur, tmp_cur,
                                        &adapter->vlan_ip_list, list) {
-                       if (cur->ip_addr == ifa->ifa_address) {
+                       if (cur->ip_addr == be32_to_cpu(ifa->ifa_address)) {
                                list_del(&cur->list);
                                kfree(cur);
                                break;
                        }
                }
+               break;
+       default:
+               printk(KERN_ERR "%ld: Wrong event id \n", event);
+               break;
        }
 }
 static void
@@ -3070,12 +3074,12 @@ netxen_config_indev_addr(struct netxen_adapter *adapter,
                switch (event) {
                case NETDEV_UP:
                        netxen_config_ipaddr(adapter,
-                                       ifa->ifa_address, NX_IP_UP);
+                                       be32_to_cpu(ifa->ifa_address), NX_IP_UP);
                        netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
                        break;
                case NETDEV_DOWN:
                        netxen_config_ipaddr(adapter,
-                                       ifa->ifa_address, NX_IP_DOWN);
+                                       be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
                        netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
                        break;
                default:
@@ -3167,11 +3171,11 @@ recheck:

        switch (event) {
        case NETDEV_UP:
-               netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP);
+               netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_UP);
                netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP);
                break;
        case NETDEV_DOWN:
-               netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN);
+               netxen_config_ipaddr(adapter, be32_to_cpu(ifa->ifa_address), NX_IP_DOWN);
                netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN);
                break;
        default:
--
1.7.4.4




^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-02 15:23           ` santosh nayak
@ 2012-03-03 15:59 ` santosh nayak
  -1 siblings, 0 replies; 73+ messages in thread
From: santosh nayak @ 2012-03-03 15:47 UTC (permalink / raw)
  To: sony.chacko
  Cc: rajesh.borundia, netdev, linux-kernel, kernel-janitors,
	Santosh Nayak

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
 
 struct nx_vlan_ip_list {
 	struct list_head list;
-	u32 ip_addr;
+	__be32 ip_addr;
 };
 
 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..0f81287 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter *adapter, int enable)
 	return rv;
 }
 
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
 	nx_nic_req_t req;
 	u64 word;
+	u64 ip_addr;
 	int rv;
 
 	memset(&req, 0, sizeof(nx_nic_req_t));
@@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
 	req.req_hdr = cpu_to_le64(word);
 
 	req.words[0] = cpu_to_le64(cmd);
-	req.words[1] = cpu_to_le64(ip);
+	ip_addr = be32_to_cpu(ip);
+	*(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
 
 	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
 	if (rv != 0) {
@@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
 	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) == -1)
 		return -1;
 
-	if (*mac == cpu_to_le64(~0ULL)) {
+	if (*mac == ~0ULL) {
 
 		offset = NX_OLD_MAC_ADDR_OFFSET +
 			(adapter->portnum * sizeof(u64));
@@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
 					offset, sizeof(u64), pmac) == -1)
 			return -1;
 
-		if (*mac == cpu_to_le64(~0ULL))
+		if (*mac == ~0ULL)
 			return -1;
 	}
 	return 0;
@@ -2178,7 +2180,7 @@ lock_try:
 		NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0, waddr);
 		raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
 		NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
-		*data_buff++ = cpu_to_le32(val);
+		*data_buff++ = val;
 		fl_addr += sizeof(val);
 	}
 	readl((void __iomem *)(adapter->ahw.pci_base0 + NX_FLASH_SEM2_ULK));
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..70783b4 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
 			adapter->driver_mismatch = 1;
 			return;
 		}
-		ptr32[i] = cpu_to_le32(val);
+		ptr32[i] = val;
 		offset += sizeof(u32);
 	}
 
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-03 15:59 ` santosh nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh nayak @ 2012-03-03 15:59 UTC (permalink / raw)
  To: sony.chacko
  Cc: rajesh.borundia, netdev, linux-kernel, kernel-janitors,
	Santosh Nayak

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
 
 struct nx_vlan_ip_list {
 	struct list_head list;
-	u32 ip_addr;
+	__be32 ip_addr;
 };
 
 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..0f81287 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter *adapter, int enable)
 	return rv;
 }
 
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
 	nx_nic_req_t req;
 	u64 word;
+	u64 ip_addr;
 	int rv;
 
 	memset(&req, 0, sizeof(nx_nic_req_t));
@@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
 	req.req_hdr = cpu_to_le64(word);
 
 	req.words[0] = cpu_to_le64(cmd);
-	req.words[1] = cpu_to_le64(ip);
+	ip_addr = be32_to_cpu(ip);
+	*(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
 
 	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
 	if (rv != 0) {
@@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
 	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) = -1)
 		return -1;
 
-	if (*mac = cpu_to_le64(~0ULL)) {
+	if (*mac = ~0ULL) {
 
 		offset = NX_OLD_MAC_ADDR_OFFSET +
 			(adapter->portnum * sizeof(u64));
@@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
 					offset, sizeof(u64), pmac) = -1)
 			return -1;
 
-		if (*mac = cpu_to_le64(~0ULL))
+		if (*mac = ~0ULL)
 			return -1;
 	}
 	return 0;
@@ -2178,7 +2180,7 @@ lock_try:
 		NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0, waddr);
 		raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
 		NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
-		*data_buff++ = cpu_to_le32(val);
+		*data_buff++ = val;
 		fl_addr += sizeof(val);
 	}
 	readl((void __iomem *)(adapter->ahw.pci_base0 + NX_FLASH_SEM2_ULK));
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..70783b4 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
 			adapter->driver_mismatch = 1;
 			return;
 		}
-		ptr32[i] = cpu_to_le32(val);
+		ptr32[i] = val;
 		offset += sizeof(u32);
 	}
 
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-03 15:59 ` santosh nayak
@ 2012-03-05 11:43   ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-05 11:43 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org

I will review this and get back to you.

Rajesh
________________________________________
From: santosh nayak [santoshprasadnayak@gmail.com]
Sent: Saturday, March 03, 2012 9:17 PM
To: Sony Chacko
Cc: Rajesh Borundia; netdev; linux-kernel; kernel-janitors@vger.kernel.org; Santosh Nayak
Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {

 struct nx_vlan_ip_list {
        struct list_head list;
-       u32 ip_addr;
+       __be32 ip_addr;
 };

 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..0f81287 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter *adapter, int enable)
        return rv;
 }

-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
        nx_nic_req_t req;
        u64 word;
+       u64 ip_addr;
        int rv;

        memset(&req, 0, sizeof(nx_nic_req_t));
@@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
        req.req_hdr = cpu_to_le64(word);

        req.words[0] = cpu_to_le64(cmd);
-       req.words[1] = cpu_to_le64(ip);
+       ip_addr = be32_to_cpu(ip);
+       *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);

        rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
        if (rv != 0) {
@@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
        if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) = -1)
                return -1;

-       if (*mac = cpu_to_le64(~0ULL)) {
+       if (*mac = ~0ULL) {

                offset = NX_OLD_MAC_ADDR_OFFSET +
                        (adapter->portnum * sizeof(u64));
@@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
                                        offset, sizeof(u64), pmac) = -1)
                        return -1;

-               if (*mac = cpu_to_le64(~0ULL))
+               if (*mac = ~0ULL)
                        return -1;
        }
        return 0;
@@ -2178,7 +2180,7 @@ lock_try:
                NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0, waddr);
                raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
                NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
-               *data_buff++ = cpu_to_le32(val);
+               *data_buff++ = val;
                fl_addr += sizeof(val);
        }
        readl((void __iomem *)(adapter->ahw.pci_base0 + NX_FLASH_SEM2_ULK));
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..70783b4 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
                        adapter->driver_mismatch = 1;
                        return;
                }
-               ptr32[i] = cpu_to_le32(val);
+               ptr32[i] = val;
                offset += sizeof(u32);
        }

--
1.7.4.4




^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-05 11:43   ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-05 11:43 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org

I will review this and get back to you.

Rajesh
________________________________________
From: santosh nayak [santoshprasadnayak@gmail.com]
Sent: Saturday, March 03, 2012 9:17 PM
To: Sony Chacko
Cc: Rajesh Borundia; netdev; linux-kernel; kernel-janitors@vger.kernel.org; Santosh Nayak
Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.

From: Santosh Nayak <santoshprasadnayak@gmail.com>

Fix endian bug.

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {

 struct nx_vlan_ip_list {
        struct list_head list;
-       u32 ip_addr;
+       __be32 ip_addr;
 };

 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..0f81287 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter *adapter, int enable)
        return rv;
 }

-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
        nx_nic_req_t req;
        u64 word;
+       u64 ip_addr;
        int rv;

        memset(&req, 0, sizeof(nx_nic_req_t));
@@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
        req.req_hdr = cpu_to_le64(word);

        req.words[0] = cpu_to_le64(cmd);
-       req.words[1] = cpu_to_le64(ip);
+       ip_addr = be32_to_cpu(ip);
+       *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);

        rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
        if (rv != 0) {
@@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
        if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) == -1)
                return -1;

-       if (*mac == cpu_to_le64(~0ULL)) {
+       if (*mac == ~0ULL) {

                offset = NX_OLD_MAC_ADDR_OFFSET +
                        (adapter->portnum * sizeof(u64));
@@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac)
                                        offset, sizeof(u64), pmac) == -1)
                        return -1;

-               if (*mac == cpu_to_le64(~0ULL))
+               if (*mac == ~0ULL)
                        return -1;
        }
        return 0;
@@ -2178,7 +2180,7 @@ lock_try:
                NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0, waddr);
                raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
                NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
-               *data_buff++ = cpu_to_le32(val);
+               *data_buff++ = val;
                fl_addr += sizeof(val);
        }
        readl((void __iomem *)(adapter->ahw.pci_base0 + NX_FLASH_SEM2_ULK));
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8dc4a134..70783b4 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter *adapter)
                        adapter->driver_mismatch = 1;
                        return;
                }
-               ptr32[i] = cpu_to_le32(val);
+               ptr32[i] = val;
                offset += sizeof(u32);
        }

--
1.7.4.4




^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-05 11:43   ` Rajesh Borundia
@ 2012-03-05 20:35     ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2012-03-05 20:35 UTC (permalink / raw)
  To: rajesh.borundia
  Cc: santoshprasadnayak, sony.chacko, netdev, linux-kernel,
	kernel-janitors

From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Mon, 5 Mar 2012 05:43:56 -0600

> I will review this and get back to you.

Please don't quote patch postings this way.

They look like new patch postings, and therefore get added to our patch
tracking site.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-05 20:35     ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2012-03-05 20:35 UTC (permalink / raw)
  To: rajesh.borundia
  Cc: santoshprasadnayak, sony.chacko, netdev, linux-kernel,
	kernel-janitors

From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Mon, 5 Mar 2012 05:43:56 -0600

> I will review this and get back to you.

Please don't quote patch postings this way.

They look like new patch postings, and therefore get added to our patch
tracking site.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-02 15:23           ` santosh nayak
@ 2012-03-05 21:49             ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-05 21:49 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org,
	xen-devel@lists.xen.org
  Cc: Santosh Jodh, Paul Durrant

From: Santosh Jodh <santosh.jodh@citrix.com>

Add support for multi page ring for block devices.
The number of pages is configurable for blkback via module parameter.
blkback reports max-ring-page-order to blkfront via xenstore.
blkfront reports its supported ring-page-order to blkback via xenstore.
blkfront reports multi page ring references via ring-refNN in xenstore.
The change allows newer blkfront to work with older blkback and
vice-versa.
Based on original patch by Paul Durrant.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
---
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..72f2e18 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       unsigned long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -EINVAL;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-02 15:23           ` santosh nayak
                             ` (2 preceding siblings ...)
  (?)
@ 2012-03-05 21:49           ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-05 21:49 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel
  Cc: Paul Durrant, Santosh Jodh

From: Santosh Jodh <santosh.jodh@citrix.com>

Add support for multi page ring for block devices.
The number of pages is configurable for blkback via module parameter.
blkback reports max-ring-page-order to blkfront via xenstore.
blkfront reports its supported ring-page-order to blkback via xenstore.
blkfront reports multi page ring references via ring-refNN in xenstore.
The change allows newer blkfront to work with older blkback and
vice-versa.
Based on original patch by Paul Durrant.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
---
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..72f2e18 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       unsigned long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -EINVAL;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-05 21:49             ` Santosh Jodh
  0 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-05 21:49 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, "linux-kernel@vger.ker
  Cc: Santosh Jodh, Paul Durrant

From: Santosh Jodh <santosh.jodh@citrix.com>

Add support for multi page ring for block devices.
The number of pages is configurable for blkback via module parameter.
blkback reports max-ring-page-order to blkfront via xenstore.
blkfront reports its supported ring-page-order to blkback via xenstore.
blkfront reports multi page ring references via ring-refNN in xenstore.
The change allows newer blkfront to work with older blkback and
vice-versa.
Based on original patch by Paul Durrant.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
---
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..72f2e18 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       unsigned long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -EINVAL;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-02 15:23           ` santosh nayak
  (?)
  (?)
@ 2012-03-05 21:49           ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-05 21:49 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel
  Cc: Paul Durrant, Santosh Jodh

From: Santosh Jodh <santosh.jodh@citrix.com>

Add support for multi page ring for block devices.
The number of pages is configurable for blkback via module parameter.
blkback reports max-ring-page-order to blkfront via xenstore.
blkfront reports its supported ring-page-order to blkback via xenstore.
blkfront reports multi page ring references via ring-refNN in xenstore.
The change allows newer blkfront to work with older blkback and
vice-versa.
Based on original patch by Paul Durrant.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
---
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..72f2e18 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       unsigned long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -EINVAL;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
@ 2012-03-06  2:42               ` Rusty Russell
  -1 siblings, 0 replies; 73+ messages in thread
From: Rusty Russell @ 2012-03-06  2:42 UTC (permalink / raw)
  To: Santosh Jodh, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org,
	xen-devel@lists.xen.org
  Cc: Santosh Jodh, Paul Durrant

On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;

Hmm, order can't be < 0, since it's unsigned.  So did you mean
kstrtoull?

And I think returning err is cleaner (it's -EINVAL for malformed
strings, -ERANGE for ones too big).

> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));

This message doesn't mention the module namr or parameter name
anywhere.  Think of the poor sysadmins!

Thanks,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
  (?)
@ 2012-03-06  2:42             ` Rusty Russell
  -1 siblings, 0 replies; 73+ messages in thread
From: Rusty Russell @ 2012-03-06  2:42 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, dgdegra@tycho.nsa.gov,
	David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel
  Cc: Paul Durrant, Santosh Jodh

On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;

Hmm, order can't be < 0, since it's unsigned.  So did you mean
kstrtoull?

And I think returning err is cleaner (it's -EINVAL for malformed
strings, -ERANGE for ones too big).

> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));

This message doesn't mention the module namr or parameter name
anywhere.  Think of the poor sysadmins!

Thanks,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-06  2:42               ` Rusty Russell
  0 siblings, 0 replies; 73+ messages in thread
From: Rusty Russell @ 2012-03-06  2:42 UTC (permalink / raw)
  To: Santosh Jodh, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, "linux-kernel\@vg
  Cc: Santosh Jodh, Paul Durrant

On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;

Hmm, order can't be < 0, since it's unsigned.  So did you mean
kstrtoull?

And I think returning err is cleaner (it's -EINVAL for malformed
strings, -ERANGE for ones too big).

> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));

This message doesn't mention the module namr or parameter name
anywhere.  Think of the poor sysadmins!

Thanks,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (2 preceding siblings ...)
  (?)
@ 2012-03-06  2:42             ` Rusty Russell
  -1 siblings, 0 replies; 73+ messages in thread
From: Rusty Russell @ 2012-03-06  2:42 UTC (permalink / raw)
  To: konrad.wilk@oracle.com, jeremy@goop.org, Ian Campbell,
	jbarnes@virtuousgeek.org, jbeulich@novell.com, joe.jin@oracle.com,
	lersek@redhat.com, weiyi.huang@gmail.com, dgdegra@tycho.nsa.gov,
	David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel
  Cc: Paul Durrant, Santosh Jodh

On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;

Hmm, order can't be < 0, since it's unsigned.  So did you mean
kstrtoull?

And I think returning err is cleaner (it's -EINVAL for malformed
strings, -ERANGE for ones too big).

> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));

This message doesn't mention the module namr or parameter name
anywhere.  Think of the poor sysadmins!

Thanks,
Rusty.
-- 
  How could I marry someone with more hair than me?  http://baldalex.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06  2:42               ` Rusty Russell
@ 2012-03-06  6:21                 ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-06  6:21 UTC (permalink / raw)
  To: Rusty Russell, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org,
	xen-devel@lists.xen.org
  Cc: Paul Durrant

Great feedback. I removed unsigned for the first, changed the error code and added module param name in the printk.

Please see latest patch:
---

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..cc238e7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,40 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -ERANGE;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld "
+                       "set by module parameter %s.max_ring_order, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06  2:42               ` Rusty Russell
  (?)
  (?)
@ 2012-03-06  6:21               ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-06  6:21 UTC (permalink / raw)
  To: Rusty Russell, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org
  Cc: Paul Durrant

Great feedback. I removed unsigned for the first, changed the error code and added module param name in the printk.

Please see latest patch:
---

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..cc238e7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,40 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -ERANGE;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld "
+                       "set by module parameter %s.max_ring_order, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-06  6:21                 ` Santosh Jodh
  0 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-06  6:21 UTC (permalink / raw)
  To: Rusty Russell, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org,
	"linux-kernel@vger.kernel.org" <
  Cc: Paul Durrant

Great feedback. I removed unsigned for the first, changed the error code and added module param name in the printk.

Please see latest patch:
---

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..cc238e7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,40 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -ERANGE;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld "
+                       "set by module parameter %s.max_ring_order, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06  2:42               ` Rusty Russell
  (?)
@ 2012-03-06  6:21               ` Santosh Jodh
  -1 siblings, 0 replies; 73+ messages in thread
From: Santosh Jodh @ 2012-03-06  6:21 UTC (permalink / raw)
  To: Rusty Russell, konrad.wilk@oracle.com, jeremy@goop.org,
	Ian Campbell, jbarnes@virtuousgeek.org, jbeulich@novell.com,
	joe.jin@oracle.com, lersek@redhat.com, weiyi.huang@gmail.com,
	dgdegra@tycho.nsa.gov, David Vrabel, paul.gortmaker@windriver.com,
	akpm@linux-foundation.org, waldi@debian.org,
	virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org
  Cc: Paul Durrant

Great feedback. I removed unsigned for the first, changed the error code and added module param name in the printk.

Please see latest patch:
---

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0088bf6..cc238e7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -60,6 +60,40 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");

+/* Order of maximum shared ring size advertised to the front end. */
+int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
+
+#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+
+static int set_max_ring_order(const char *buf, struct kernel_param *kp)
+{
+       int err;
+       long order;
+
+       err = kstrtol(buf, 0, &order);
+       if (err ||
+           order < 0 ||
+           order > XENBUS_MAX_RING_ORDER)
+               return -ERANGE;
+
+       if (xen_blkif_reqs < BLK_RING_SIZE(order))
+               printk(KERN_WARNING "WARNING: "
+                      "I/O request space (%d reqs) < ring order %ld "
+                       "set by module parameter %s.max_ring_order, "
+                      "consider increasing %s.reqs to >= %ld.",
+                      xen_blkif_reqs, order, KBUILD_MODNAME, KBUILD_MODNAME,
+                      roundup_pow_of_two(BLK_RING_SIZE(order)));
+
+       xen_blkif_max_ring_order = order;
+
+       return 0;
+}
+
+module_param_call(max_ring_order,
+                 set_max_ring_order, param_get_int,
+                 &xen_blkif_max_ring_order, 0644);
+MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index d0ee7ed..5f33a1a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -126,6 +126,8 @@ struct blkif_x86_64_response {
        int16_t         status;          /* BLKIF_RSP_???       */
 };

+extern int xen_blkif_max_ring_order;
+
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
                  struct blkif_common_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24a2fb5..7a9d71d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        return blkif;
 }

-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
-                        unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
+                        unsigned int ring_order, unsigned int evtchn)
 {
        int err;

@@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        if (blkif->irq)
                return 0;

-       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
+                                    &blkif->blk_ring);
        if (err < 0)
                return err;

@@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
        {
                struct blkif_sring *sring;
                sring = (struct blkif_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.native, sring,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_32:
        {
                struct blkif_x86_32_sring *sring_x86_32;
                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+                              PAGE_SIZE << ring_order);
                break;
        }
        case BLKIF_PROTOCOL_X86_64:
        {
                struct blkif_x86_64_sring *sring_x86_64;
                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+                              PAGE_SIZE << ring_order);
                break;
        }
        default:
@@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
        if (err)
                goto fail;

+       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
+                           "%u", xen_blkif_max_ring_order);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -744,22 +753,80 @@ again:
 static int connect_ring(struct backend_info *be)
 {
        struct xenbus_device *dev = be->dev;
-       unsigned long ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       unsigned int ring_order;
        unsigned int evtchn;
        char protocol[64] = "";
        int err;

        DPRINTK("%s", dev->otherend);

-       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
-       if (err) {
-               xenbus_dev_fatal(dev, err,
-                                "reading %s/ring-ref and event-channel",
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+                          &evtchn);
+       if (err != 1) {
+               err = -EINVAL;
+
+               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
                                 dev->otherend);
                return err;
        }

+       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+                          &ring_order);
+       if (err != 1) {
+               DPRINTK("%s: using single page handshake", dev->otherend);
+
+               ring_order = 0;
+
+               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+                                  "%d", &ring_ref[0]);
+               if (err != 1) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+                                        dev->otherend);
+                       return err;
+               }
+
+               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
+       } else {
+               unsigned int i;
+
+               if (ring_order > xen_blkif_max_ring_order) {
+                       err = -EINVAL;
+
+                       xenbus_dev_fatal(dev, err,
+                                        "%s/ring-page-order too big",
+                                        dev->otherend);
+                       return err;
+               }
+
+               for (i = 0; i < (1u << ring_order); i++) {
+                       char ring_ref_name[10];
+
+                       snprintf(ring_ref_name, sizeof(ring_ref_name),
+                                "ring-ref%u", i);
+
+                       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                                          ring_ref_name, "%d",
+                                          &ring_ref[i]);
+                       if (err != 1) {
+                               err = -EINVAL;
+
+                               xenbus_dev_fatal(dev, err,
+                                                "reading %s/%s",
+                                                dev->otherend,
+                                                ring_ref_name);
+                               return err;
+                       }
+
+                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
+                              ring_ref[i]);
+               }
+       }
+
        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
                            "%63s", protocol, NULL);
@@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -1;
        }
-       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-               ring_ref, evtchn, be->blkif->blk_protocol, protocol);

        /* Map the shared frame, irq etc. */
-       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
        if (err) {
-               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-                                ring_ref, evtchn);
+               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
                return err;
        }

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f22874..485813a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -57,6 +57,10 @@

 #include <asm/xen/hypervisor.h>

+static int xen_blkif_ring_order;
+module_param_named(reqs, xen_blkif_ring_order, int, 0);
+MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
+
 enum blkif_state {
        BLKIF_STATE_DISCONNECTED,
        BLKIF_STATE_CONNECTED,
@@ -72,7 +76,8 @@ struct blk_shadow {
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;

-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
+#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)

 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -87,14 +92,15 @@ struct blkfront_info
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
-       int ring_ref;
+       int ring_ref[XENBUS_MAX_RING_PAGES];
+       int ring_order;
        struct blkif_front_ring ring;
        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
        struct gnttab_free_callback callback;
-       struct blk_shadow shadow[BLK_RING_SIZE];
+       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
@@ -111,9 +117,7 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);

-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-#define GRANT_INVALID_REF      0
+#define GRANT_INVALID_REF      0

 #define PARTS_PER_DISK         16
 #define PARTS_PER_EXT_DISK      256
@@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
-       BUG_ON(free >= BLK_RING_SIZE);
+       BUG_ON(free >= BLK_MAX_RING_SIZE);
        info->shadow_free = info->shadow[free].req.u.rw.id;
        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
        return free;
@@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)

 static void blkif_free(struct blkfront_info *info, int suspend)
 {
+       int i;
+
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
@@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        flush_work_sync(&info->work);

        /* Free resources associated with old device channel. */
-       if (info->ring_ref != GRANT_INVALID_REF) {
-               gnttab_end_foreign_access(info->ring_ref, 0,
-                                         (unsigned long)info->ring.sring);
-               info->ring_ref = GRANT_INVALID_REF;
-               info->ring.sring = NULL;
+       for (i = 0; i < (1 << info->ring_order); i++) {
+               if (info->ring_ref[i] != GRANT_INVALID_REF) {
+                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+                       info->ring_ref[i] = GRANT_INVALID_REF;
+               }
        }
+
+       free_pages((unsigned long)info->ring.sring, info->ring_order);
+       info->ring.sring = NULL;
+
        if (info->irq)
                unbind_from_irqhandler(info->irq, info);
        info->evtchn = info->irq = 0;
-
 }

 static void blkif_completion(struct blk_shadow *s)
@@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
        struct blkif_sring *sring;
        int err;

-       info->ring_ref = GRANT_INVALID_REF;
-
-       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+                                                      info->ring_order);
        if (!sring) {
                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
                return -ENOMEM;
        }
        SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);

        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

-       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
+                               info->ring_ref);
        if (err < 0) {
-               free_page((unsigned long)sring);
+               free_pages((unsigned long)sring, info->ring_order);
                info->ring.sring = NULL;
                goto fail;
        }
-       info->ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
@@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
        const char *message = NULL;
        struct xenbus_transaction xbt;
+       unsigned int ring_order;
+       int legacy_backend;
+       int i;
        int err;

+       for (i = 0; i < (1 << info->ring_order); i++)
+               info->ring_ref[i] = GRANT_INVALID_REF;
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
+                          &ring_order);
+
+       legacy_backend = !(err == 1);
+
+       if (legacy_backend) {
+               info->ring_order = 0;
+       } else {
+               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
+                                  ring_order :
+                                  xen_blkif_ring_order;
+       }
+
        /* Create shared ring, alloc event channel. */
        err = setup_blkring(dev, info);
        if (err)
@@ -889,12 +916,35 @@ again:
                goto destroy_blkring;
        }

-       err = xenbus_printf(xbt, dev->nodename,
-                           "ring-ref", "%u", info->ring_ref);
-       if (err) {
-               message = "writing ring-ref";
-               goto abort_transaction;
+       if (legacy_backend) {
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-ref", "%d", info->ring_ref[0]);
+               if (err) {
+                       message = "writing ring-ref";
+                       goto abort_transaction;
+               }
+       } else {
+               for (i = 0; i < (1 << info->ring_order); i++) {
+                       char key[sizeof("ring-ref") + 2];
+
+                       sprintf(key, "ring-ref%d", i);
+
+                       err = xenbus_printf(xbt, dev->nodename,
+                                           key, "%d", info->ring_ref[i]);
+                       if (err) {
+                               message = "writing ring-ref";
+                               goto abort_transaction;
+                       }
+               }
+
+               err = xenbus_printf(xbt, dev->nodename,
+                                   "ring-page-order", "%u", info->ring_order);
+               if (err) {
+                       message = "writing ring-order";
+                       goto abort_transaction;
+               }
        }
+
        err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
@@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->connected = BLKIF_STATE_DISCONNECTED;
        INIT_WORK(&info->work, blkif_restart_queue);

-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Front end dir is a number, which is used as the id. */
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);

-       err = talk_to_blkback(dev, info);
-       if (err) {
-               kfree(info);
-               dev_set_drvdata(&dev->dev, NULL);
-               return err;
-       }
-
        return 0;
 }

@@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)

        /* Stage 2: Set up free list. */
        memset(&info->shadow, 0, sizeof(info->shadow));
-       for (i = 0; i < BLK_RING_SIZE; i++)
+       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
                info->shadow[i].req.u.rw.id = i+1;
        info->shadow_free = info->ring.req_prod_pvt;
-       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

        /* Stage 3: Find pending requests and requeue them. */
-       for (i = 0; i < BLK_RING_SIZE; i++) {
+       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
@@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,

        switch (backend_state) {
        case XenbusStateInitialising:
-       case XenbusStateInitWait:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
@@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateClosed:
                break;

+       case XenbusStateInitWait:
+               talk_to_blkback(dev, info);
+               break;
+
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..f93b59a 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref);
+                                int tx_ring_ref,
+                                int rx_ring_ref);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 59effac..0b014cf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 }

 int xen_netbk_map_frontend_rings(struct xenvif *vif,
-                                grant_ref_t tx_ring_ref,
-                                grant_ref_t rx_ring_ref)
+                                int tx_ring_ref,
+                                int rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        int err = -ENOMEM;

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    tx_ring_ref, &addr);
+                                    &tx_ring_ref, 1, &addr);
        if (err)
                goto err;

@@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-                                    rx_ring_ref, &addr);
+                                    &rx_ring_ref, 1, &addr);
        if (err)
                goto err;

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..521a595 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)txs);
                goto fail;
        }

-       info->tx_ring_ref = err;
        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
        if (!rxs) {
                err = -ENOMEM;
@@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
        SHARED_RING_INIT(rxs);
        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);

-       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
        if (err < 0) {
                free_page((unsigned long)rxs);
                goto fail;
        }
-       info->rx_ring_ref = err;

        err = xenbus_alloc_evtchn(dev, &info->evtchn);
        if (err)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 1620088..95109d8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
        int err = 0;
        struct xenbus_transaction trans;

-       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
        if (err < 0)
                goto out;

-       pdev->gnt_ref = err;
-
        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
        if (err)
                goto out;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f9..e0834cd 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
                gnt_ref, remote_evtchn);

-       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
        if (err < 0) {
                xenbus_dev_fatal(pdev->xdev, err,
                                "Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 566d2ad..3a14524 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -53,14 +53,16 @@ struct xenbus_map_node {
                struct vm_struct *area; /* PV */
                struct page *page;     /* HVM */
        };
-       grant_handle_t handle;
+       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
+       unsigned int   nr_handles;
 };

 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);

 struct xenbus_ring_ops {
-       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
+                  void **vaddr);
        int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };

@@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of page to be granted
+ * @grefs: grant reference array to be filled in
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the first error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[])
 {
-       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-       if (err < 0)
-               xenbus_dev_fatal(dev, err, "granting access to ring page");
+       int i;
+       int err;
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               err = gnttab_grant_foreign_access(dev->otherend_id,
+                                                 virt_to_mfn(addr), 0);
+               if (err < 0) {
+                       xenbus_dev_fatal(dev, err,
+                                        "granting access to ring page");
+                       goto fail;
+               }
+               grefs[i] = err;
+       }
+
+       return 0;
+
+fail:
+       for ( ; i >= 0; i--)
+               gnttab_end_foreign_access_ref(grefs[i], 0);
        return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant reference
  * @vaddr: pointer to address to be filled out by mapping
  *
  * Based on Rusty Russell's skeleton driver's map_page.
@@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  * or -ENOMEM on error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr)
 {
-       return ring_ops->map(dev, gnt_ref, vaddr);
+       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node);
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-                                    int gnt_ref, void **vaddr)
+                                    int gnt_ref[], int nr_grefs, void **vaddr)
 {
-       struct gnttab_map_grant_ref op = {
-               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
-               .ref   = gnt_ref,
-               .dom   = dev->otherend_id,
-       };
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
        struct xenbus_map_node *node;
        struct vm_struct *area;
-       pte_t *pte;
+       pte_t *pte[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = 0;
+
+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;

        *vaddr = NULL;

@@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
        if (!node)
                return -ENOMEM;

-       area = alloc_vm_area(PAGE_SIZE, &pte);
+       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
        if (!area) {
                kfree(node);
                return -ENOMEM;
        }

-       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
+       for (i = 0; i < nr_grefs; i++) {
+               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
+               op[i].ref   = gnt_ref[i],
+               op[i].dom   = dev->otherend_id,
+               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
+       };

        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
                BUG();

-       if (op.status != GNTST_okay) {
-               free_vm_area(area);
-               kfree(node);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-               return op.status;
+       node->nr_handles = nr_grefs;
+       node->area = area;
+
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+                       continue;
+               }
+               node->handle[i] = op[i].handle;
        }

-       node->handle = op.handle;
-       node->area = area;
+       if (err != 0) {
+               for (i = 0; i < nr_grefs; i++)
+                       xenbus_dev_fatal(dev, op[i].status,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+
+                __xenbus_unmap_ring_vfree_pv(dev, node);
+
+               return err;
+       }

        spin_lock(&xenbus_valloc_lock);
        list_add(&node->next, &xenbus_valloc_pages);
@@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 }

 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-                                     int gnt_ref, void **vaddr)
+                                     int gnt_ref[], int nr_grefs, void **vaddr)
 {
        struct xenbus_map_node *node;
        int err;
        void *addr;

+       if (nr_grefs > XENBUS_MAX_RING_PAGES)
+               return -EINVAL;
+
        *vaddr = NULL;

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return -ENOMEM;

-       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+       err = alloc_xenballooned_pages(nr_grefs, &node->page,
+                                      false /* lowmem */);
        if (err)
                goto out_err;

        addr = pfn_to_kaddr(page_to_pfn(node->page));

-       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
        if (err)
                goto out_err;

@@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
        return 0;

  out_err:
-       free_xenballooned_pages(1, &node->page);
+       free_xenballooned_pages(nr_grefs, &node->page);
        kfree(node);
        return err;
 }
@@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
+ * @gnt_ref: grant reference array
+ * @nr_grefs: number of grant references
+ * @handle: pointer to grant handle array to be filled, mind the size
  * @vaddr: address to be mapped to
  *
- * Map a page of memory into this domain from another domain's grant table.
+ * Map pages of memory into this domain from another domain's grant table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
  * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * XenbusStateClosing and the last error message will be saved in XenStore.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                   grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr)
 {
-       struct gnttab_map_grant_ref op;
-
-       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
-                         dev->otherend_id);
+       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i;
+       int err = GNTST_okay;   /* 0 */
+
+       for (i = 0; i < nr_grefs; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
+                                 GNTMAP_host_map, gnt_ref[i],
+                                 dev->otherend_id);
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
                BUG();

-       if (op.status != GNTST_okay) {
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-       } else
-               *handle = op.handle;
+       for (i = 0; i < nr_grefs; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_fatal(dev, err,
+                               "mapping in shared page %d from domain %d",
+                               gnt_ref[i], dev->otherend_id);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               } else
+                       handle[i] = op[i].handle;
+       }

-       return op.status;
+       if (err != GNTST_okay)
+               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);

@@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);

+static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
+                                       struct xenbus_map_node *node)
+{
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       unsigned int level;
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < node->nr_handles; i++) {
+               unsigned long vaddr = (unsigned long)node->area->addr +
+                       (PAGE_SIZE * i);
+               if (node->handle[i] != INVALID_GRANT_HANDLE) {
+                       memset(&op[j], 0, sizeof(op[0]));
+                       op[j].host_addr = arbitrary_virt_to_machine(
+                                       lookup_address(vaddr, &level)).maddr;
+                       op[j].handle = node->handle[i];
+                       j++;
+                       node->handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
+               BUG();
+
+       node->nr_handles = 0;
+
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page %d at handle %d error %d",
+                               i, op[i].handle, err);
+               }
+       }
+
+       if (err == GNTST_okay)
+               free_vm_area(node->area);
+
+       kfree(node);
+
+       return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
        struct xenbus_map_node *node;
-       struct gnttab_unmap_grant_ref op = {
-               .host_addr = (unsigned long)vaddr,
-       };
-       unsigned int level;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       op.handle = node->handle;
-       op.host_addr = arbitrary_virt_to_machine(
-               lookup_address((unsigned long)vaddr, &level)).maddr;
-
-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-               BUG();
-
-       if (op.status == GNTST_okay)
-               free_vm_area(node->area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                node->handle, op.status);
-
-       kfree(node);
-       return op.status;
+       return __xenbus_unmap_ring_vfree_pv(dev, node);
 }

 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
 {
        int rv;
        struct xenbus_map_node *node;
-       void *addr;
+       void *addr = NULL;

        spin_lock(&xenbus_valloc_lock);
        list_for_each_entry(node, &xenbus_valloc_pages, next) {
@@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)

        if (!node) {
                xenbus_dev_error(dev, -ENOENT,
-                                "can't find mapped virtual address %p", vaddr);
+                               "can't find mapped virtual address %p", vaddr);
                return GNTST_bad_virt_addr;
        }

-       rv = xenbus_unmap_ring(dev, node->handle, addr);
+       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);

        if (!rv)
-               free_xenballooned_pages(1, &node->page);
+               free_xenballooned_pages(node->nr_handles, &node->page);
        else
                WARN(1, "Leaking %p\n", vaddr);

@@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * xenbus_unmap_ring
  * @dev: xenbus device
  * @handle: grant handle
+ * @nr_handles: number of grant handle
  * @vaddr: addr to unmap
  *
  * Unmap a page of memory in this domain that was imported from another domain.
@@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr)
+                       grant_handle_t handle[], int nr_handles,
+                       void *vaddr)
 {
-       struct gnttab_unmap_grant_ref op;
-
-       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
+       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
+       int i, j;
+       int err = GNTST_okay;
+
+       j = 0;
+       for (i = 0; i < nr_handles; i++) {
+               unsigned long addr = (unsigned long)vaddr +
+                       (PAGE_SIZE * i);
+               if (handle[i] != INVALID_GRANT_HANDLE) {
+                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
+                                           GNTMAP_host_map, handle[i]);
+                       handle[i] = INVALID_GRANT_HANDLE;
+               }
+       }

-       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
                BUG();

-       if (op.status != GNTST_okay)
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
-                                handle, op.status);
+       for (i = 0; i < j; i++) {
+               if (op[i].status != GNTST_okay) {
+                       err = op[i].status;
+                       xenbus_dev_error(dev, err,
+                               "unmapping page at handle %d error %d",
+                               handle[i], err);
+               }
+       }

-       return op.status;
+       return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3864967..62b92d2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
        return err;
 }

+extern void xenbus_ring_ops_init(void);
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -767,6 +768,8 @@ static int __init xenbus_init(void)
        proc_mkdir("xen", NULL);
 #endif

+       xenbus_ring_ops_init();
+
 out_error:
        return err;
 }
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index e8c599b..cdbd948 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
                         const char *pathfmt, ...);

 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-                          int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-                          grant_handle_t *handle, void *vaddr);
+
+#define        XENBUS_MAX_RING_ORDER   2
+#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
+
+#define INVALID_GRANT_HANDLE           (~0U)
+
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+                     int nr_pages, int grefs[]);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
+                          int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
+                   grant_handle_t handle[], void *vaddr);

 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-                     grant_handle_t handle, void *vaddr);
+                     grant_handle_t handle[], int nr_handles,
+                     void *vaddr);

 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
@ 2012-03-06  8:34               ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-06  8:34 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: David Vrabel, Ian Campbell, Paul Durrant, waldi@debian.org,
	weiyi.huang@gmail.com, jeremy@goop.org, akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org,
	xen-devel@lists.xen.org, joe.jin@oracle.com,
	konrad.wilk@oracle.com, lersek@redhat.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, netdev@vger.kernel.org,
	jbarnes@virtuousgeek.org, paul.gortmaker@windriver.com

>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:

Could this be split up into 3 patches, for easier reviewing:
- one adjusting the xenbus interface to allow for multiple ring pages (and
  maybe even that one should be split into the backend and frontend
  related parts), syncing with the similar netback effort?
- one for the blkback changes
- one for the blkfront changes?

> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         return blkif;
>  }
> 
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],

As you need to touch this anyway, can you please switch this to the
proper type (grant_ref_t) rather than using plain "int" (not just here)?

> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>         int err;
> 
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>         unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);

Wouldn't you better check against the actual limit here?

>         info->shadow_free = info->shadow[free].req.u.rw.id;
>         info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>         return free;
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>         flush_work_sync(&info->work);
> 
>         /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>         }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);

No. The freeing must continue happen in gnttab_end_foreign_access()
(with the sole exception when a page was allocated but the grant
didn't get established), since it must be suppressed/delayed when the
grant is still in use (otherwise the kernel will die on the first re-use of
the page). I just happened to fix that problem at the end of last week
in the variant of the patch that we pulled into our tree.

Further, rather than doing a non-zero order allocation here, I'd
suggest allocating individual pages and vmap()-ing them.

> +       info->ring.sring = NULL;
> +
>         if (info->irq)
>                 unbind_from_irqhandler(info->irq, info);
>         info->evtchn = info->irq = 0;
> -
>  }
> 
>  static void blkif_completion(struct blk_shadow *s)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>         const char *message = NULL;
>         struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>         int err;
> 
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);

At least the frontend should imo also support the alternative interface
(using "max-ring-pages" etc).

> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;

min()?

> +       }
> +
>         /* Create shared ring, alloc event channel. */
>         err = setup_blkring(dev, info);
>         if (err)
> @@ -889,12 +916,35 @@ again:
>                 goto destroy_blkring;
>         }
> 
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {

Why not use the simpler interface always when info->ring_order == 0?

> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>         }
> +
>         err = xenbus_printf(xbt, dev->nodename,
>                             "event-channel", "%u", info->evtchn);
>         if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>         info->connected = BLKIF_STATE_DISCONNECTED;
>         INIT_WORK(&info->work, blkif_restart_queue);
> 
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                 info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

A proper terminator must also be written in talk_to_blkback() once
the actual ring size is known.

Further, blkif_recover() must be able to deal with a change of the
allowed upper bound.

>         /* Front end dir is a number, which is used as the id. */
>         info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>         dev_set_drvdata(&dev->dev, info);
> 
> -       err = talk_to_blkback(dev, info);

Completely removing this here is wrong afaict - what if the backend
already is in InitWait when the frontend starts?

Further, whatever is done to this call here also needs to be done in
blkfront_resume().

> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>         return 0;
>  }
> 
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>         case XenbusStateClosed:
>                 break;
> 
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);

This call can return an error.

> +               break;
> +
>         case XenbusStateConnected:
>                 blkfront_connect(info);
>                 break;
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                          const char *pathfmt, ...);
> 
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)

Why do you need an artificial global limit here? Each driver can decide
individually what its limit should be.

Jan



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (5 preceding siblings ...)
  (?)
@ 2012-03-06  8:34             ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-06  8:34 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org,
	xen-devel@lists.xen.org, lersek@redhat.com, dgdegra

>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:

Could this be split up into 3 patches, for easier reviewing:
- one adjusting the xenbus interface to allow for multiple ring pages (and
  maybe even that one should be split into the backend and frontend
  related parts), syncing with the similar netback effort?
- one for the blkback changes
- one for the blkfront changes?

> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         return blkif;
>  }
> 
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],

As you need to touch this anyway, can you please switch this to the
proper type (grant_ref_t) rather than using plain "int" (not just here)?

> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>         int err;
> 
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>         unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);

Wouldn't you better check against the actual limit here?

>         info->shadow_free = info->shadow[free].req.u.rw.id;
>         info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>         return free;
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>         flush_work_sync(&info->work);
> 
>         /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>         }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);

No. The freeing must continue happen in gnttab_end_foreign_access()
(with the sole exception when a page was allocated but the grant
didn't get established), since it must be suppressed/delayed when the
grant is still in use (otherwise the kernel will die on the first re-use of
the page). I just happened to fix that problem at the end of last week
in the variant of the patch that we pulled into our tree.

Further, rather than doing a non-zero order allocation here, I'd
suggest allocating individual pages and vmap()-ing them.

> +       info->ring.sring = NULL;
> +
>         if (info->irq)
>                 unbind_from_irqhandler(info->irq, info);
>         info->evtchn = info->irq = 0;
> -
>  }
> 
>  static void blkif_completion(struct blk_shadow *s)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>         const char *message = NULL;
>         struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>         int err;
> 
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);

At least the frontend should imo also support the alternative interface
(using "max-ring-pages" etc).

> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;

min()?

> +       }
> +
>         /* Create shared ring, alloc event channel. */
>         err = setup_blkring(dev, info);
>         if (err)
> @@ -889,12 +916,35 @@ again:
>                 goto destroy_blkring;
>         }
> 
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {

Why not use the simpler interface always when info->ring_order == 0?

> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>         }
> +
>         err = xenbus_printf(xbt, dev->nodename,
>                             "event-channel", "%u", info->evtchn);
>         if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>         info->connected = BLKIF_STATE_DISCONNECTED;
>         INIT_WORK(&info->work, blkif_restart_queue);
> 
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                 info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

A proper terminator must also be written in talk_to_blkback() once
the actual ring size is known.

Further, blkif_recover() must be able to deal with a change of the
allowed upper bound.

>         /* Front end dir is a number, which is used as the id. */
>         info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>         dev_set_drvdata(&dev->dev, info);
> 
> -       err = talk_to_blkback(dev, info);

Completely removing this here is wrong afaict - what if the backend
already is in InitWait when the frontend starts?

Further, whatever is done to this call here also needs to be done in
blkfront_resume().

> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>         return 0;
>  }
> 
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>         case XenbusStateClosed:
>                 break;
> 
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);

This call can return an error.

> +               break;
> +
>         case XenbusStateConnected:
>                 blkfront_connect(info);
>                 break;
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                          const char *pathfmt, ...);
> 
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)

Why do you need an artificial global limit here? Each driver can decide
individually what its limit should be.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-06  8:34               ` Jan Beulich
  0 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-06  8:34 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: David Vrabel, Ian Campbell, Paul Durrant, waldi@debian.org,
	weiyi.huang@gmail.com, jeremy@goop.org, akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org,
	xen-devel@lists.xen.org, joe.jin@oracle.com,
	konrad.wilk@oracle.com, lersek@redhat.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, netdev@vger.kernel.org,
	"jbarnes@virtuousgeek.o

>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:

Could this be split up into 3 patches, for easier reviewing:
- one adjusting the xenbus interface to allow for multiple ring pages (and
  maybe even that one should be split into the backend and frontend
  related parts), syncing with the similar netback effort?
- one for the blkback changes
- one for the blkfront changes?

> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         return blkif;
>  }
> 
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],

As you need to touch this anyway, can you please switch this to the
proper type (grant_ref_t) rather than using plain "int" (not just here)?

> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>         int err;
> 
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>         unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);

Wouldn't you better check against the actual limit here?

>         info->shadow_free = info->shadow[free].req.u.rw.id;
>         info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>         return free;
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>         flush_work_sync(&info->work);
> 
>         /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>         }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);

No. The freeing must continue happen in gnttab_end_foreign_access()
(with the sole exception when a page was allocated but the grant
didn't get established), since it must be suppressed/delayed when the
grant is still in use (otherwise the kernel will die on the first re-use of
the page). I just happened to fix that problem at the end of last week
in the variant of the patch that we pulled into our tree.

Further, rather than doing a non-zero order allocation here, I'd
suggest allocating individual pages and vmap()-ing them.

> +       info->ring.sring = NULL;
> +
>         if (info->irq)
>                 unbind_from_irqhandler(info->irq, info);
>         info->evtchn = info->irq = 0;
> -
>  }
> 
>  static void blkif_completion(struct blk_shadow *s)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>         const char *message = NULL;
>         struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>         int err;
> 
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);

At least the frontend should imo also support the alternative interface
(using "max-ring-pages" etc).

> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;

min()?

> +       }
> +
>         /* Create shared ring, alloc event channel. */
>         err = setup_blkring(dev, info);
>         if (err)
> @@ -889,12 +916,35 @@ again:
>                 goto destroy_blkring;
>         }
> 
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {

Why not use the simpler interface always when info->ring_order == 0?

> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>         }
> +
>         err = xenbus_printf(xbt, dev->nodename,
>                             "event-channel", "%u", info->evtchn);
>         if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>         info->connected = BLKIF_STATE_DISCONNECTED;
>         INIT_WORK(&info->work, blkif_restart_queue);
> 
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                 info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

A proper terminator must also be written in talk_to_blkback() once
the actual ring size is known.

Further, blkif_recover() must be able to deal with a change of the
allowed upper bound.

>         /* Front end dir is a number, which is used as the id. */
>         info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>         dev_set_drvdata(&dev->dev, info);
> 
> -       err = talk_to_blkback(dev, info);

Completely removing this here is wrong afaict - what if the backend
already is in InitWait when the frontend starts?

Further, whatever is done to this call here also needs to be done in
blkfront_resume().

> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>         return 0;
>  }
> 
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>         case XenbusStateClosed:
>                 break;
> 
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);

This call can return an error.

> +               break;
> +
>         case XenbusStateConnected:
>                 blkfront_connect(info);
>                 break;
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                          const char *pathfmt, ...);
> 
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)

Why do you need an artificial global limit here? Each driver can decide
individually what its limit should be.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (3 preceding siblings ...)
  (?)
@ 2012-03-06  8:34             ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-06  8:34 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	rusty@rustcorp.com.au, weiyi.huang@gmail.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org,
	xen-devel@lists.xen.org

>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:

Could this be split up into 3 patches, for easier reviewing:
- one adjusting the xenbus interface to allow for multiple ring pages (and
  maybe even that one should be split into the backend and frontend
  related parts), syncing with the similar netback effort?
- one for the blkback changes
- one for the blkfront changes?

> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>         return blkif;
>  }
> 
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],

As you need to touch this anyway, can you please switch this to the
proper type (grant_ref_t) rather than using plain "int" (not just here)?

> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>         int err;
> 
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>         unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);

Wouldn't you better check against the actual limit here?

>         info->shadow_free = info->shadow[free].req.u.rw.id;
>         info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>         return free;
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>         flush_work_sync(&info->work);
> 
>         /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>         }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);

No. The freeing must continue happen in gnttab_end_foreign_access()
(with the sole exception when a page was allocated but the grant
didn't get established), since it must be suppressed/delayed when the
grant is still in use (otherwise the kernel will die on the first re-use of
the page). I just happened to fix that problem at the end of last week
in the variant of the patch that we pulled into our tree.

Further, rather than doing a non-zero order allocation here, I'd
suggest allocating individual pages and vmap()-ing them.

> +       info->ring.sring = NULL;
> +
>         if (info->irq)
>                 unbind_from_irqhandler(info->irq, info);
>         info->evtchn = info->irq = 0;
> -
>  }
> 
>  static void blkif_completion(struct blk_shadow *s)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>         const char *message = NULL;
>         struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>         int err;
> 
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);

At least the frontend should imo also support the alternative interface
(using "max-ring-pages" etc).

> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;

min()?

> +       }
> +
>         /* Create shared ring, alloc event channel. */
>         err = setup_blkring(dev, info);
>         if (err)
> @@ -889,12 +916,35 @@ again:
>                 goto destroy_blkring;
>         }
> 
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {

Why not use the simpler interface always when info->ring_order == 0?

> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>         }
> +
>         err = xenbus_printf(xbt, dev->nodename,
>                             "event-channel", "%u", info->evtchn);
>         if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>         info->connected = BLKIF_STATE_DISCONNECTED;
>         INIT_WORK(&info->work, blkif_restart_queue);
> 
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                 info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;

A proper terminator must also be written in talk_to_blkback() once
the actual ring size is known.

Further, blkif_recover() must be able to deal with a change of the
allowed upper bound.

>         /* Front end dir is a number, which is used as the id. */
>         info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>         dev_set_drvdata(&dev->dev, info);
> 
> -       err = talk_to_blkback(dev, info);

Completely removing this here is wrong afaict - what if the backend
already is in InitWait when the frontend starts?

Further, whatever is done to this call here also needs to be done in
blkfront_resume().

> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>         return 0;
>  }
> 
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>         case XenbusStateClosed:
>                 break;
> 
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);

This call can return an error.

> +               break;
> +
>         case XenbusStateConnected:
>                 blkfront_connect(info);
>                 break;
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                          const char *pathfmt, ...);
> 
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)

Why do you need an artificial global limit here? Each driver can decide
individually what its limit should be.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (7 preceding siblings ...)
  (?)
@ 2012-03-06 11:16             ` Wei Liu
  -1 siblings, 0 replies; 73+ messages in thread
From: Wei Liu @ 2012-03-06 11:16 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, wei.liu2, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, weiyi.huang@gmail.com, joe.jin@oracle.com,
	linux-kernel@vger.kernel.org, jbeulich@novell.com,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, linux-pci@vger.kernel.org,
	Paul Durrant, jbarnes@virtuousgeek.org, netdev@vger.kernel.org,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com

On Mon, 2012-03-05 at 21:49 +0000, Santosh Jodh wrote:
> From: Santosh Jodh <santosh.jodh@citrix.com>
> 
> Add support for multi page ring for block devices.
> The number of pages is configurable for blkback via module parameter.
> blkback reports max-ring-page-order to blkfront via xenstore.
> blkfront reports its supported ring-page-order to blkback via xenstore.
> blkfront reports multi page ring references via ring-refNN in xenstore.
> The change allows newer blkfront to work with older blkback and
> vice-versa.
> Based on original patch by Paul Durrant.
> 
> Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>


Doesn't the xenbus interface change deserve another patch (as
prerequisite for block devices change)? Or at least please mention the
change in commit message?


Wei.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
@ 2012-03-06 11:16               ` Wei Liu
  -1 siblings, 0 replies; 73+ messages in thread
From: Wei Liu @ 2012-03-06 11:16 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, wei.liu2, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, linux-pci@vger.kernel.org,
	Paul Durrant, jbarnes@virtuousgeek.org, netdev@vger.kernel.org,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com

On Mon, 2012-03-05 at 21:49 +0000, Santosh Jodh wrote:
> From: Santosh Jodh <santosh.jodh@citrix.com>
> 
> Add support for multi page ring for block devices.
> The number of pages is configurable for blkback via module parameter.
> blkback reports max-ring-page-order to blkfront via xenstore.
> blkfront reports its supported ring-page-order to blkback via xenstore.
> blkfront reports multi page ring references via ring-refNN in xenstore.
> The change allows newer blkfront to work with older blkback and
> vice-versa.
> Based on original patch by Paul Durrant.
> 
> Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>


Doesn't the xenbus interface change deserve another patch (as
prerequisite for block devices change)? Or at least please mention the
change in commit message?


Wei.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-06 11:16               ` Wei Liu
  0 siblings, 0 replies; 73+ messages in thread
From: Wei Liu @ 2012-03-06 11:16 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, wei.liu2, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, linux-pci@vger.kernel.org,
	Paul Durrant, jbarnes@virtuousgeek.org, netdev@vger.kernel.org,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com

On Mon, 2012-03-05 at 21:49 +0000, Santosh Jodh wrote:
> From: Santosh Jodh <santosh.jodh@citrix.com>
> 
> Add support for multi page ring for block devices.
> The number of pages is configurable for blkback via module parameter.
> blkback reports max-ring-page-order to blkfront via xenstore.
> blkfront reports its supported ring-page-order to blkback via xenstore.
> blkfront reports multi page ring references via ring-refNN in xenstore.
> The change allows newer blkfront to work with older blkback and
> vice-versa.
> Based on original patch by Paul Durrant.
> 
> Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>


Doesn't the xenbus interface change deserve another patch (as
prerequisite for block devices change)? Or at least please mention the
change in commit message?


Wei.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (8 preceding siblings ...)
  (?)
@ 2012-03-06 17:20             ` Konrad Rzeszutek Wilk
  2012-03-07  9:33                 ` Jan Beulich
  2012-03-07  9:33               ` Jan Beulich
  -1 siblings, 2 replies; 73+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-06 17:20 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, weiyi.huang@gmail.com, joe.jin@oracle.com,
	linux-kernel@vger.kernel.org, jbeulich@novell.com,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, linux-pci@vger.kernel.org,
	Paul Durrant, jbarnes@virtuousgeek.org, netdev@vger.kernel.org,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com,
	akpm

On Mon, Mar 5, 2012 at 4:49 PM, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> From: Santosh Jodh <santosh.jodh@citrix.com>
>
> Add support for multi page ring for block devices.
> The number of pages is configurable for blkback via module parameter.
> blkback reports max-ring-page-order to blkfront via xenstore.
> blkfront reports its supported ring-page-order to blkback via xenstore.
> blkfront reports multi page ring references via ring-refNN in xenstore.
> The change allows newer blkfront to work with older blkback and
> vice-versa.
> Based on original patch by Paul Durrant.

you should include his SoB in this patch.

The patch overall looks Ok, thought I do have some comments:

 -> the call to "xenbus_ring_ops_init();" looks like a bug-fix? If so,
it should be a separate patch.
 -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
like a fix to something.
-> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
default size for SSD usage? 16?
 -> don't do sprintf, use snprinf
 -> don't use printk(KERN_..), use pr_info or the variant of
pr_err,pr_debug, etc.
 -> don't split the printk contents. It is Ok for them to be more than
80 lines.
 -> check that xen_blkif_ring_order is under XENBUS_MAX_RING_PAGES.
Otherwise a joker could do = 9999999999999999999 for ring size and we
would try to use that.
 -> Separate the patch that introduces the changes to the XenBus
infrastructure (and then the changes to net* and blk*) to use the
extra arguments would be folded in that patch. Then the patch that
implements the multi ring to blkback is a patch that depends on that
the XenBus modifications patch. Also make sure you CC David Miller and
Jens Axboe on the XenBus patch as it modifies the net-* side which
requires Ian's and David's Ack.
 -> Have you done a sanity/test check where the backend and frontend
have different size rings? Just to make sure nothing explodes.

>
> Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
> ---
> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> index 0088bf6..72f2e18 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
>  module_param_named(reqs, xen_blkif_reqs, int, 0);
>  MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
>
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;
> +
> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));
> +
> +       xen_blkif_max_ring_order = order;
> +
> +       return 0;
> +}
> +
> +module_param_call(max_ring_order,
> +                 set_max_ring_order, param_get_int,
> +                 &xen_blkif_max_ring_order, 0644);
> +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
> +
>  /* Run-time switchable: /sys/module/blkback/parameters/ */
>  static unsigned int log_stats;
>  module_param(log_stats, int, 0644);
> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
> index d0ee7ed..5f33a1a 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -126,6 +126,8 @@ struct blkif_x86_64_response {
>        int16_t         status;          /* BLKIF_RSP_???       */
>  };
>
> +extern int xen_blkif_max_ring_order;
> +
>  DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
>                  struct blkif_common_response);
>  DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 24a2fb5..7a9d71d 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>        return blkif;
>  }
>
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>        int err;
>
> @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
>        if (blkif->irq)
>                return 0;
>
> -       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
> +       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
> +                                    &blkif->blk_ring);
>        if (err < 0)
>                return err;
>
> @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
>        {
>                struct blkif_sring *sring;
>                sring = (struct blkif_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.native, sring,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        case BLKIF_PROTOCOL_X86_32:
>        {
>                struct blkif_x86_32_sring *sring_x86_32;
>                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        case BLKIF_PROTOCOL_X86_64:
>        {
>                struct blkif_x86_64_sring *sring_x86_64;
>                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        default:
> @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
>        if (err)
>                goto fail;
>
> +       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
> +                           "%u", xen_blkif_max_ring_order);
> +       if (err)
> +               goto fail;
> +
>        err = xenbus_switch_state(dev, XenbusStateInitWait);
>        if (err)
>                goto fail;
> @@ -744,22 +753,80 @@ again:
>  static int connect_ring(struct backend_info *be)
>  {
>        struct xenbus_device *dev = be->dev;
> -       unsigned long ring_ref;
> +       int ring_ref[XENBUS_MAX_RING_PAGES];
> +       unsigned int ring_order;
>        unsigned int evtchn;
>        char protocol[64] = "";
>        int err;
>
>        DPRINTK("%s", dev->otherend);
>
> -       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
> -                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
> -       if (err) {
> -               xenbus_dev_fatal(dev, err,
> -                                "reading %s/ring-ref and event-channel",
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
> +                          &evtchn);
> +       if (err != 1) {
> +               err = -EINVAL;
> +
> +               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
>                                 dev->otherend);
>                return err;
>        }
>
> +       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
> +                          &ring_order);
> +       if (err != 1) {
> +               DPRINTK("%s: using single page handshake", dev->otherend);
> +
> +               ring_order = 0;
> +
> +               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
> +                                  "%d", &ring_ref[0]);
> +               if (err != 1) {
> +                       err = -EINVAL;
> +
> +                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
> +                                        dev->otherend);
> +                       return err;
> +               }
> +
> +               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
> +       } else {
> +               unsigned int i;
> +
> +               if (ring_order > xen_blkif_max_ring_order) {
> +                       err = -EINVAL;
> +
> +                       xenbus_dev_fatal(dev, err,
> +                                        "%s/ring-page-order too big",
> +                                        dev->otherend);
> +                       return err;
> +               }
> +
> +               for (i = 0; i < (1u << ring_order); i++) {
> +                       char ring_ref_name[10];
> +
> +                       snprintf(ring_ref_name, sizeof(ring_ref_name),
> +                                "ring-ref%u", i);
> +
> +                       err = xenbus_scanf(XBT_NIL, dev->otherend,
> +                                          ring_ref_name, "%d",
> +                                          &ring_ref[i]);
> +                       if (err != 1) {
> +                               err = -EINVAL;
> +
> +                               xenbus_dev_fatal(dev, err,
> +                                                "reading %s/%s",
> +                                                dev->otherend,
> +                                                ring_ref_name);
> +                               return err;
> +                       }
> +
> +                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
> +                              ring_ref[i]);
> +               }
> +       }
> +
>        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
>        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
>                            "%63s", protocol, NULL);
> @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
>                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
>                return -1;
>        }
> -       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
> -               ring_ref, evtchn, be->blkif->blk_protocol, protocol);
>
>        /* Map the shared frame, irq etc. */
> -       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
> +       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
>        if (err) {
> -               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
> -                                ring_ref, evtchn);
> +               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
>                return err;
>        }
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 2f22874..485813a 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -57,6 +57,10 @@
>
>  #include <asm/xen/hypervisor.h>
>
> +static int xen_blkif_ring_order;
> +module_param_named(reqs, xen_blkif_ring_order, int, 0);
> +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
> +
>  enum blkif_state {
>        BLKIF_STATE_DISCONNECTED,
>        BLKIF_STATE_CONNECTED,
> @@ -72,7 +76,8 @@ struct blk_shadow {
>  static DEFINE_MUTEX(blkfront_mutex);
>  static const struct block_device_operations xlvbd_block_fops;
>
> -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
> +#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)
>
>  /*
>  * We have one of these per vbd, whether ide, scsi or 'other'.  They
> @@ -87,14 +92,15 @@ struct blkfront_info
>        int vdevice;
>        blkif_vdev_t handle;
>        enum blkif_state connected;
> -       int ring_ref;
> +       int ring_ref[XENBUS_MAX_RING_PAGES];
> +       int ring_order;
>        struct blkif_front_ring ring;
>        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>        unsigned int evtchn, irq;
>        struct request_queue *rq;
>        struct work_struct work;
>        struct gnttab_free_callback callback;
> -       struct blk_shadow shadow[BLK_RING_SIZE];
> +       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
>        unsigned long shadow_free;
>        unsigned int feature_flush;
>        unsigned int flush_op;
> @@ -111,9 +117,7 @@ static unsigned int nr_minors;
>  static unsigned long *minors;
>  static DEFINE_SPINLOCK(minor_lock);
>
> -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
> -       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
> -#define GRANT_INVALID_REF      0
> +#define GRANT_INVALID_REF      0
>
>  #define PARTS_PER_DISK         16
>  #define PARTS_PER_EXT_DISK      256
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>        unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);
>        info->shadow_free = info->shadow[free].req.u.rw.id;
>        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>        return free;
> @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)
>
>  static void blkif_free(struct blkfront_info *info, int suspend)
>  {
> +       int i;
> +
>        /* Prevent new requests being issued until we fix things up. */
>        spin_lock_irq(&blkif_io_lock);
>        info->connected = suspend ?
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>        flush_work_sync(&info->work);
>
>        /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>        }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);
> +       info->ring.sring = NULL;
> +
>        if (info->irq)
>                unbind_from_irqhandler(info->irq, info);
>        info->evtchn = info->irq = 0;
> -
>  }
>
>  static void blkif_completion(struct blk_shadow *s)
> @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
>        struct blkif_sring *sring;
>        int err;
>
> -       info->ring_ref = GRANT_INVALID_REF;
> -
> -       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
> +       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
> +                                                      info->ring_order);
>        if (!sring) {
>                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
>                return -ENOMEM;
>        }
>        SHARED_RING_INIT(sring);
> -       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
> +       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);
>
>        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
> +       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
> +                               info->ring_ref);
>        if (err < 0) {
> -               free_page((unsigned long)sring);
> +               free_pages((unsigned long)sring, info->ring_order);
>                info->ring.sring = NULL;
>                goto fail;
>        }
> -       info->ring_ref = err;
>
>        err = xenbus_alloc_evtchn(dev, &info->evtchn);
>        if (err)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>        const char *message = NULL;
>        struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>        int err;
>
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);
> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;
> +       }
> +
>        /* Create shared ring, alloc event channel. */
>        err = setup_blkring(dev, info);
>        if (err)
> @@ -889,12 +916,35 @@ again:
>                goto destroy_blkring;
>        }
>
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>        }
> +
>        err = xenbus_printf(xbt, dev->nodename,
>                            "event-channel", "%u", info->evtchn);
>        if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>        info->connected = BLKIF_STATE_DISCONNECTED;
>        INIT_WORK(&info->work, blkif_restart_queue);
>
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
>
>        /* Front end dir is a number, which is used as the id. */
>        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>        dev_set_drvdata(&dev->dev, info);
>
> -       err = talk_to_blkback(dev, info);
> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>        return 0;
>  }
>
> @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)
>
>        /* Stage 2: Set up free list. */
>        memset(&info->shadow, 0, sizeof(info->shadow));
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                info->shadow[i].req.u.rw.id = i+1;
>        info->shadow_free = info->ring.req_prod_pvt;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
>
>        /* Stage 3: Find pending requests and requeue them. */
> -       for (i = 0; i < BLK_RING_SIZE; i++) {
> +       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
>                /* Not in use? */
>                if (!copy[i].request)
>                        continue;
> @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,
>
>        switch (backend_state) {
>        case XenbusStateInitialising:
> -       case XenbusStateInitWait:
>        case XenbusStateInitialised:
>        case XenbusStateReconfiguring:
>        case XenbusStateReconfigured:
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>        case XenbusStateClosed:
>                break;
>
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);
> +               break;
> +
>        case XenbusStateConnected:
>                blkfront_connect(info);
>                break;
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index 94b79c3..f93b59a 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
>  /* (Un)Map communication rings. */
>  void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
>  int xen_netbk_map_frontend_rings(struct xenvif *vif,
> -                                grant_ref_t tx_ring_ref,
> -                                grant_ref_t rx_ring_ref);
> +                                int tx_ring_ref,
> +                                int rx_ring_ref);
>
>  /* (De)Register a xenvif with the netback backend. */
>  void xen_netbk_add_xenvif(struct xenvif *vif);
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 59effac..0b014cf 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
>  }
>
>  int xen_netbk_map_frontend_rings(struct xenvif *vif,
> -                                grant_ref_t tx_ring_ref,
> -                                grant_ref_t rx_ring_ref)
> +                                int tx_ring_ref,
> +                                int rx_ring_ref)
>  {
>        void *addr;
>        struct xen_netif_tx_sring *txs;
> @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
>        int err = -ENOMEM;
>
>        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
> -                                    tx_ring_ref, &addr);
> +                                    &tx_ring_ref, 1, &addr);
>        if (err)
>                goto err;
>
> @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
>        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
>
>        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
> -                                    rx_ring_ref, &addr);
> +                                    &rx_ring_ref, 1, &addr);
>        if (err)
>                goto err;
>
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 698b905..521a595 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>        SHARED_RING_INIT(txs);
>        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
> +       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
>        if (err < 0) {
>                free_page((unsigned long)txs);
>                goto fail;
>        }
>
> -       info->tx_ring_ref = err;
>        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
>        if (!rxs) {
>                err = -ENOMEM;
> @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>        SHARED_RING_INIT(rxs);
>        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
> +       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
>        if (err < 0) {
>                free_page((unsigned long)rxs);
>                goto fail;
>        }
> -       info->rx_ring_ref = err;
>
>        err = xenbus_alloc_evtchn(dev, &info->evtchn);
>        if (err)
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index 1620088..95109d8 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
>        int err = 0;
>        struct xenbus_transaction trans;
>
> -       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
> +       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
>        if (err < 0)
>                goto out;
>
> -       pdev->gnt_ref = err;
> -
>        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
>        if (err)
>                goto out;
> diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
> index 64b11f9..e0834cd 100644
> --- a/drivers/xen/xen-pciback/xenbus.c
> +++ b/drivers/xen/xen-pciback/xenbus.c
> @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
>                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
>                gnt_ref, remote_evtchn);
>
> -       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
> +       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
>        if (err < 0) {
>                xenbus_dev_fatal(pdev->xdev, err,
>                                "Error mapping other domain page in ours.");
> diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
> index 566d2ad..3a14524 100644
> --- a/drivers/xen/xenbus/xenbus_client.c
> +++ b/drivers/xen/xenbus/xenbus_client.c
> @@ -53,14 +53,16 @@ struct xenbus_map_node {
>                struct vm_struct *area; /* PV */
>                struct page *page;     /* HVM */
>        };
> -       grant_handle_t handle;
> +       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
> +       unsigned int   nr_handles;
>  };
>
>  static DEFINE_SPINLOCK(xenbus_valloc_lock);
>  static LIST_HEAD(xenbus_valloc_pages);
>
>  struct xenbus_ring_ops {
> -       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
> +       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
> +                  void **vaddr);
>        int (*unmap)(struct xenbus_device *dev, void *vaddr);
>  };
>
> @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
>  /**
>  * xenbus_grant_ring
>  * @dev: xenbus device
> - * @ring_mfn: mfn of ring to grant
> -
> - * Grant access to the given @ring_mfn to the peer of the given device.  Return
> - * 0 on success, or -errno on error.  On error, the device will switch to
> - * XenbusStateClosing, and the error will be saved in the store.
> + * @vaddr: starting virtual address of the ring
> + * @nr_pages: number of page to be granted
> + * @grefs: grant reference array to be filled in
> + * Grant access to the given @vaddr to the peer of the given device.
> + * Then fill in @grefs with grant references.  Return 0 on success, or
> + * -errno on error.  On error, the device will switch to
> + * XenbusStateClosing, and the first error will be saved in the store.
>  */
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
> +                     int nr_pages, int grefs[])
>  {
> -       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
> -       if (err < 0)
> -               xenbus_dev_fatal(dev, err, "granting access to ring page");
> +       int i;
> +       int err;
> +
> +       for (i = 0; i < nr_pages; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               err = gnttab_grant_foreign_access(dev->otherend_id,
> +                                                 virt_to_mfn(addr), 0);
> +               if (err < 0) {
> +                       xenbus_dev_fatal(dev, err,
> +                                        "granting access to ring page");
> +                       goto fail;
> +               }
> +               grefs[i] = err;
> +       }
> +
> +       return 0;
> +
> +fail:
> +       for ( ; i >= 0; i--)
> +               gnttab_end_foreign_access_ref(grefs[i], 0);
>        return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_grant_ring);
> @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
>  /**
>  * xenbus_map_ring_valloc
>  * @dev: xenbus device
> - * @gnt_ref: grant reference
> + * @gnt_ref: grant reference array
> + * @nr_grefs: number of grant reference
>  * @vaddr: pointer to address to be filled out by mapping
>  *
>  * Based on Rusty Russell's skeleton driver's map_page.
> @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
>  * or -ENOMEM on error. If an error is returned, device will switch to
>  * XenbusStateClosing and the error message will be saved in XenStore.
>  */
> -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
> +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
> +                          int nr_grefs, void **vaddr)
>  {
> -       return ring_ops->map(dev, gnt_ref, vaddr);
> +       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
>  }
>  EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
>
> +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
> +                                       struct xenbus_map_node *node);
> +
>  static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
> -                                    int gnt_ref, void **vaddr)
> +                                    int gnt_ref[], int nr_grefs, void **vaddr)
>  {
> -       struct gnttab_map_grant_ref op = {
> -               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
> -               .ref   = gnt_ref,
> -               .dom   = dev->otherend_id,
> -       };
> +       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
>        struct xenbus_map_node *node;
>        struct vm_struct *area;
> -       pte_t *pte;
> +       pte_t *pte[XENBUS_MAX_RING_PAGES];
> +       int i;
> +       int err = 0;
> +
> +       if (nr_grefs > XENBUS_MAX_RING_PAGES)
> +               return -EINVAL;
>
>        *vaddr = NULL;
>
> @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
>        if (!node)
>                return -ENOMEM;
>
> -       area = alloc_vm_area(PAGE_SIZE, &pte);
> +       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
>        if (!area) {
>                kfree(node);
>                return -ENOMEM;
>        }
>
> -       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
> +       for (i = 0; i < nr_grefs; i++) {
> +               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
> +               op[i].ref   = gnt_ref[i],
> +               op[i].dom   = dev->otherend_id,
> +               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
> +       };
>
>        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
>                BUG();
>
> -       if (op.status != GNTST_okay) {
> -               free_vm_area(area);
> -               kfree(node);
> -               xenbus_dev_fatal(dev, op.status,
> -                                "mapping in shared page %d from domain %d",
> -                                gnt_ref, dev->otherend_id);
> -               return op.status;
> +       node->nr_handles = nr_grefs;
> +       node->area = area;
> +
> +       for (i = 0; i < nr_grefs; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       node->handle[i] = INVALID_GRANT_HANDLE;
> +                       continue;
> +               }
> +               node->handle[i] = op[i].handle;
>        }
>
> -       node->handle = op.handle;
> -       node->area = area;
> +       if (err != 0) {
> +               for (i = 0; i < nr_grefs; i++)
> +                       xenbus_dev_fatal(dev, op[i].status,
> +                               "mapping in shared page %d from domain %d",
> +                               gnt_ref[i], dev->otherend_id);
> +
> +                __xenbus_unmap_ring_vfree_pv(dev, node);
> +
> +               return err;
> +       }
>
>        spin_lock(&xenbus_valloc_lock);
>        list_add(&node->next, &xenbus_valloc_pages);
> @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
>  }
>
>  static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
> -                                     int gnt_ref, void **vaddr)
> +                                     int gnt_ref[], int nr_grefs, void **vaddr)
>  {
>        struct xenbus_map_node *node;
>        int err;
>        void *addr;
>
> +       if (nr_grefs > XENBUS_MAX_RING_PAGES)
> +               return -EINVAL;
> +
>        *vaddr = NULL;
>
>        node = kzalloc(sizeof(*node), GFP_KERNEL);
>        if (!node)
>                return -ENOMEM;
>
> -       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
> +       err = alloc_xenballooned_pages(nr_grefs, &node->page,
> +                                      false /* lowmem */);
>        if (err)
>                goto out_err;
>
>        addr = pfn_to_kaddr(page_to_pfn(node->page));
>
> -       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
> +       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
>        if (err)
>                goto out_err;
>
> @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
>        return 0;
>
>  out_err:
> -       free_xenballooned_pages(1, &node->page);
> +       free_xenballooned_pages(nr_grefs, &node->page);
>        kfree(node);
>        return err;
>  }
> @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
>  /**
>  * xenbus_map_ring
>  * @dev: xenbus device
> - * @gnt_ref: grant reference
> - * @handle: pointer to grant handle to be filled
> + * @gnt_ref: grant reference array
> + * @nr_grefs: number of grant references
> + * @handle: pointer to grant handle array to be filled, mind the size
>  * @vaddr: address to be mapped to
>  *
> - * Map a page of memory into this domain from another domain's grant table.
> + * Map pages of memory into this domain from another domain's grant table.
>  * xenbus_map_ring does not allocate the virtual address space (you must do
> - * this yourself!). It only maps in the page to the specified address.
> + * this yourself!). It only maps in the pages to the specified address.
>  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
>  * or -ENOMEM on error. If an error is returned, device will switch to
> - * XenbusStateClosing and the error message will be saved in XenStore.
> + * XenbusStateClosing and the last error message will be saved in XenStore.
>  */
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                   grant_handle_t *handle, void *vaddr)
> +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
> +                   grant_handle_t handle[], void *vaddr)
>  {
> -       struct gnttab_map_grant_ref op;
> -
> -       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
> -                         dev->otherend_id);
> +       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       int i;
> +       int err = GNTST_okay;   /* 0 */
> +
> +       for (i = 0; i < nr_grefs; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
> +                                 GNTMAP_host_map, gnt_ref[i],
> +                                 dev->otherend_id);
> +       }
>
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
>                BUG();
>
> -       if (op.status != GNTST_okay) {
> -               xenbus_dev_fatal(dev, op.status,
> -                                "mapping in shared page %d from domain %d",
> -                                gnt_ref, dev->otherend_id);
> -       } else
> -               *handle = op.handle;
> +       for (i = 0; i < nr_grefs; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_fatal(dev, err,
> +                               "mapping in shared page %d from domain %d",
> +                               gnt_ref[i], dev->otherend_id);
> +                       handle[i] = INVALID_GRANT_HANDLE;
> +               } else
> +                       handle[i] = op[i].handle;
> +       }
>
> -       return op.status;
> +       if (err != GNTST_okay)
> +               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
> +
> +       return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_map_ring);
>
> @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
>  }
>  EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
>
> +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
> +                                       struct xenbus_map_node *node)
> +{
> +       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       unsigned int level;
> +       int i, j;
> +       int err = GNTST_okay;
> +
> +       j = 0;
> +       for (i = 0; i < node->nr_handles; i++) {
> +               unsigned long vaddr = (unsigned long)node->area->addr +
> +                       (PAGE_SIZE * i);
> +               if (node->handle[i] != INVALID_GRANT_HANDLE) {
> +                       memset(&op[j], 0, sizeof(op[0]));
> +                       op[j].host_addr = arbitrary_virt_to_machine(
> +                                       lookup_address(vaddr, &level)).maddr;
> +                       op[j].handle = node->handle[i];
> +                       j++;
> +                       node->handle[i] = INVALID_GRANT_HANDLE;
> +               }
> +       }
> +
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
> +               BUG();
> +
> +       node->nr_handles = 0;
> +
> +       for (i = 0; i < j; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_error(dev, err,
> +                               "unmapping page %d at handle %d error %d",
> +                               i, op[i].handle, err);
> +               }
> +       }
> +
> +       if (err == GNTST_okay)
> +               free_vm_area(node->area);
> +
> +       kfree(node);
> +
> +       return err;
> +}
> +
>  static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
>  {
>        struct xenbus_map_node *node;
> -       struct gnttab_unmap_grant_ref op = {
> -               .host_addr = (unsigned long)vaddr,
> -       };
> -       unsigned int level;
>
>        spin_lock(&xenbus_valloc_lock);
>        list_for_each_entry(node, &xenbus_valloc_pages, next) {
> @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
>
>        if (!node) {
>                xenbus_dev_error(dev, -ENOENT,
> -                                "can't find mapped virtual address %p", vaddr);
> +                               "can't find mapped virtual address %p", vaddr);
>                return GNTST_bad_virt_addr;
>        }
>
> -       op.handle = node->handle;
> -       op.host_addr = arbitrary_virt_to_machine(
> -               lookup_address((unsigned long)vaddr, &level)).maddr;
> -
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
> -               BUG();
> -
> -       if (op.status == GNTST_okay)
> -               free_vm_area(node->area);
> -       else
> -               xenbus_dev_error(dev, op.status,
> -                                "unmapping page at handle %d error %d",
> -                                node->handle, op.status);
> -
> -       kfree(node);
> -       return op.status;
> +       return __xenbus_unmap_ring_vfree_pv(dev, node);
>  }
>
>  static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  {
>        int rv;
>        struct xenbus_map_node *node;
> -       void *addr;
> +       void *addr = NULL;
>
>        spin_lock(&xenbus_valloc_lock);
>        list_for_each_entry(node, &xenbus_valloc_pages, next) {
> @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>
>        if (!node) {
>                xenbus_dev_error(dev, -ENOENT,
> -                                "can't find mapped virtual address %p", vaddr);
> +                               "can't find mapped virtual address %p", vaddr);
>                return GNTST_bad_virt_addr;
>        }
>
> -       rv = xenbus_unmap_ring(dev, node->handle, addr);
> +       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);
>
>        if (!rv)
> -               free_xenballooned_pages(1, &node->page);
> +               free_xenballooned_pages(node->nr_handles, &node->page);
>        else
>                WARN(1, "Leaking %p\n", vaddr);
>
> @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  * xenbus_unmap_ring
>  * @dev: xenbus device
>  * @handle: grant handle
> + * @nr_handles: number of grant handle
>  * @vaddr: addr to unmap
>  *
>  * Unmap a page of memory in this domain that was imported from another domain.
> @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  * (see xen/include/interface/grant_table.h).
>  */
>  int xenbus_unmap_ring(struct xenbus_device *dev,
> -                     grant_handle_t handle, void *vaddr)
> +                       grant_handle_t handle[], int nr_handles,
> +                       void *vaddr)
>  {
> -       struct gnttab_unmap_grant_ref op;
> -
> -       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
> +       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       int i, j;
> +       int err = GNTST_okay;
> +
> +       j = 0;
> +       for (i = 0; i < nr_handles; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               if (handle[i] != INVALID_GRANT_HANDLE) {
> +                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
> +                                           GNTMAP_host_map, handle[i]);
> +                       handle[i] = INVALID_GRANT_HANDLE;
> +               }
> +       }
>
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
>                BUG();
>
> -       if (op.status != GNTST_okay)
> -               xenbus_dev_error(dev, op.status,
> -                                "unmapping page at handle %d error %d",
> -                                handle, op.status);
> +       for (i = 0; i < j; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_error(dev, err,
> +                               "unmapping page at handle %d error %d",
> +                               handle[i], err);
> +               }
> +       }
>
> -       return op.status;
> +       return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
>
> diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
> index 3864967..62b92d2 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
>        return err;
>  }
>
> +extern void xenbus_ring_ops_init(void);
>  static int __init xenbus_init(void)
>  {
>        int err = 0;
> @@ -767,6 +768,8 @@ static int __init xenbus_init(void)
>        proc_mkdir("xen", NULL);
>  #endif
>
> +       xenbus_ring_ops_init();
> +
>  out_error:
>        return err;
>  }
> diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
> index e8c599b..cdbd948 100644
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                         const char *pathfmt, ...);
>
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
> +
> +#define INVALID_GRANT_HANDLE           (~0U)
> +
> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
> +                     int nr_pages, int grefs[]);
> +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
> +                          int nr_grefs, void **vaddr);
> +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
> +                   grant_handle_t handle[], void *vaddr);
>
>  int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
>  int xenbus_unmap_ring(struct xenbus_device *dev,
> -                     grant_handle_t handle, void *vaddr);
> +                     grant_handle_t handle[], int nr_handles,
> +                     void *vaddr);
>
>  int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
>  int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-05 21:49             ` Santosh Jodh
                               ` (9 preceding siblings ...)
  (?)
@ 2012-03-06 17:20             ` Konrad Rzeszutek Wilk
  -1 siblings, 0 replies; 73+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-06 17:20 UTC (permalink / raw)
  To: Santosh Jodh
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, weiyi.huang@gmail.com, rusty@rustcorp.com.au,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, linux-pci@vger.kernel.org,
	Paul Durrant, jbarnes@virtuousgeek.org, netdev@vger.kernel.org,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com,
	akpm

On Mon, Mar 5, 2012 at 4:49 PM, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> From: Santosh Jodh <santosh.jodh@citrix.com>
>
> Add support for multi page ring for block devices.
> The number of pages is configurable for blkback via module parameter.
> blkback reports max-ring-page-order to blkfront via xenstore.
> blkfront reports its supported ring-page-order to blkback via xenstore.
> blkfront reports multi page ring references via ring-refNN in xenstore.
> The change allows newer blkfront to work with older blkback and
> vice-versa.
> Based on original patch by Paul Durrant.

you should include his SoB in this patch.

The patch overall looks Ok, thought I do have some comments:

 -> the call to "xenbus_ring_ops_init();" looks like a bug-fix? If so,
it should be a separate patch.
 -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
like a fix to something.
-> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
default size for SSD usage? 16?
 -> don't do sprintf, use snprinf
 -> don't use printk(KERN_..), use pr_info or the variant of
pr_err,pr_debug, etc.
 -> don't split the printk contents. It is Ok for them to be more than
80 lines.
 -> check that xen_blkif_ring_order is under XENBUS_MAX_RING_PAGES.
Otherwise a joker could do = 9999999999999999999 for ring size and we
would try to use that.
 -> Separate the patch that introduces the changes to the XenBus
infrastructure (and then the changes to net* and blk*) to use the
extra arguments would be folded in that patch. Then the patch that
implements the multi ring to blkback is a patch that depends on that
the XenBus modifications patch. Also make sure you CC David Miller and
Jens Axboe on the XenBus patch as it modifies the net-* side which
requires Ian's and David's Ack.
 -> Have you done a sanity/test check where the backend and frontend
have different size rings? Just to make sure nothing explodes.

>
> Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>
> ---
> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> index 0088bf6..72f2e18 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64;
>  module_param_named(reqs, xen_blkif_reqs, int, 0);
>  MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
>
> +/* Order of maximum shared ring size advertised to the front end. */
> +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER;
> +
> +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +
> +static int set_max_ring_order(const char *buf, struct kernel_param *kp)
> +{
> +       int err;
> +       unsigned long order;
> +
> +       err = kstrtol(buf, 0, &order);
> +       if (err ||
> +           order < 0 ||
> +           order > XENBUS_MAX_RING_ORDER)
> +               return -EINVAL;
> +
> +       if (xen_blkif_reqs < BLK_RING_SIZE(order))
> +               printk(KERN_WARNING "WARNING: "
> +                      "I/O request space (%d reqs) < ring order %ld, "
> +                      "consider increasing %s.reqs to >= %ld.",
> +                      xen_blkif_reqs, order, KBUILD_MODNAME,
> +                      roundup_pow_of_two(BLK_RING_SIZE(order)));
> +
> +       xen_blkif_max_ring_order = order;
> +
> +       return 0;
> +}
> +
> +module_param_call(max_ring_order,
> +                 set_max_ring_order, param_get_int,
> +                 &xen_blkif_max_ring_order, 0644);
> +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages.");
> +
>  /* Run-time switchable: /sys/module/blkback/parameters/ */
>  static unsigned int log_stats;
>  module_param(log_stats, int, 0644);
> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
> index d0ee7ed..5f33a1a 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -126,6 +126,8 @@ struct blkif_x86_64_response {
>        int16_t         status;          /* BLKIF_RSP_???       */
>  };
>
> +extern int xen_blkif_max_ring_order;
> +
>  DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
>                  struct blkif_common_response);
>  DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 24a2fb5..7a9d71d 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>        return blkif;
>  }
>
> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> -                        unsigned int evtchn)
> +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],
> +                        unsigned int ring_order, unsigned int evtchn)
>  {
>        int err;
>
> @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
>        if (blkif->irq)
>                return 0;
>
> -       err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
> +       err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order,
> +                                    &blkif->blk_ring);
>        if (err < 0)
>                return err;
>
> @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
>        {
>                struct blkif_sring *sring;
>                sring = (struct blkif_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.native, sring,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        case BLKIF_PROTOCOL_X86_32:
>        {
>                struct blkif_x86_32_sring *sring_x86_32;
>                sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        case BLKIF_PROTOCOL_X86_64:
>        {
>                struct blkif_x86_64_sring *sring_x86_64;
>                sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
> -               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
> +               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
> +                              PAGE_SIZE << ring_order);
>                break;
>        }
>        default:
> @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
>        if (err)
>                goto fail;
>
> +       err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order",
> +                           "%u", xen_blkif_max_ring_order);
> +       if (err)
> +               goto fail;
> +
>        err = xenbus_switch_state(dev, XenbusStateInitWait);
>        if (err)
>                goto fail;
> @@ -744,22 +753,80 @@ again:
>  static int connect_ring(struct backend_info *be)
>  {
>        struct xenbus_device *dev = be->dev;
> -       unsigned long ring_ref;
> +       int ring_ref[XENBUS_MAX_RING_PAGES];
> +       unsigned int ring_order;
>        unsigned int evtchn;
>        char protocol[64] = "";
>        int err;
>
>        DPRINTK("%s", dev->otherend);
>
> -       err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
> -                           &ring_ref, "event-channel", "%u", &evtchn, NULL);
> -       if (err) {
> -               xenbus_dev_fatal(dev, err,
> -                                "reading %s/ring-ref and event-channel",
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
> +                          &evtchn);
> +       if (err != 1) {
> +               err = -EINVAL;
> +
> +               xenbus_dev_fatal(dev, err, "reading %s/event-channel",
>                                 dev->otherend);
>                return err;
>        }
>
> +       printk(KERN_INFO "blkback: event-channel %u\n", evtchn);
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
> +                          &ring_order);
> +       if (err != 1) {
> +               DPRINTK("%s: using single page handshake", dev->otherend);
> +
> +               ring_order = 0;
> +
> +               err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
> +                                  "%d", &ring_ref[0]);
> +               if (err != 1) {
> +                       err = -EINVAL;
> +
> +                       xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
> +                                        dev->otherend);
> +                       return err;
> +               }
> +
> +               printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]);
> +       } else {
> +               unsigned int i;
> +
> +               if (ring_order > xen_blkif_max_ring_order) {
> +                       err = -EINVAL;
> +
> +                       xenbus_dev_fatal(dev, err,
> +                                        "%s/ring-page-order too big",
> +                                        dev->otherend);
> +                       return err;
> +               }
> +
> +               for (i = 0; i < (1u << ring_order); i++) {
> +                       char ring_ref_name[10];
> +
> +                       snprintf(ring_ref_name, sizeof(ring_ref_name),
> +                                "ring-ref%u", i);
> +
> +                       err = xenbus_scanf(XBT_NIL, dev->otherend,
> +                                          ring_ref_name, "%d",
> +                                          &ring_ref[i]);
> +                       if (err != 1) {
> +                               err = -EINVAL;
> +
> +                               xenbus_dev_fatal(dev, err,
> +                                                "reading %s/%s",
> +                                                dev->otherend,
> +                                                ring_ref_name);
> +                               return err;
> +                       }
> +
> +                       printk(KERN_INFO "blkback: ring-ref%u %d\n", i,
> +                              ring_ref[i]);
> +               }
> +       }
> +
>        be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
>        err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
>                            "%63s", protocol, NULL);
> @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be)
>                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
>                return -1;
>        }
> -       pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
> -               ring_ref, evtchn, be->blkif->blk_protocol, protocol);
>
>        /* Map the shared frame, irq etc. */
> -       err = xen_blkif_map(be->blkif, ring_ref, evtchn);
> +       err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn);
>        if (err) {
> -               xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
> -                                ring_ref, evtchn);
> +               xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn");
>                return err;
>        }
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 2f22874..485813a 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -57,6 +57,10 @@
>
>  #include <asm/xen/hypervisor.h>
>
> +static int xen_blkif_ring_order;
> +module_param_named(reqs, xen_blkif_ring_order, int, 0);
> +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages.");
> +
>  enum blkif_state {
>        BLKIF_STATE_DISCONNECTED,
>        BLKIF_STATE_CONNECTED,
> @@ -72,7 +76,8 @@ struct blk_shadow {
>  static DEFINE_MUTEX(blkfront_mutex);
>  static const struct block_device_operations xlvbd_block_fops;
>
> -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
> +#define BLK_RING_SIZE(_order)  __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order))
> +#define BLK_MAX_RING_SIZE      BLK_RING_SIZE(XENBUS_MAX_RING_ORDER)
>
>  /*
>  * We have one of these per vbd, whether ide, scsi or 'other'.  They
> @@ -87,14 +92,15 @@ struct blkfront_info
>        int vdevice;
>        blkif_vdev_t handle;
>        enum blkif_state connected;
> -       int ring_ref;
> +       int ring_ref[XENBUS_MAX_RING_PAGES];
> +       int ring_order;
>        struct blkif_front_ring ring;
>        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>        unsigned int evtchn, irq;
>        struct request_queue *rq;
>        struct work_struct work;
>        struct gnttab_free_callback callback;
> -       struct blk_shadow shadow[BLK_RING_SIZE];
> +       struct blk_shadow shadow[BLK_MAX_RING_SIZE];
>        unsigned long shadow_free;
>        unsigned int feature_flush;
>        unsigned int flush_op;
> @@ -111,9 +117,7 @@ static unsigned int nr_minors;
>  static unsigned long *minors;
>  static DEFINE_SPINLOCK(minor_lock);
>
> -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
> -       (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
> -#define GRANT_INVALID_REF      0
> +#define GRANT_INVALID_REF      0
>
>  #define PARTS_PER_DISK         16
>  #define PARTS_PER_EXT_DISK      256
> @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
>        unsigned long free = info->shadow_free;
> -       BUG_ON(free >= BLK_RING_SIZE);
> +       BUG_ON(free >= BLK_MAX_RING_SIZE);
>        info->shadow_free = info->shadow[free].req.u.rw.id;
>        info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
>        return free;
> @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work)
>
>  static void blkif_free(struct blkfront_info *info, int suspend)
>  {
> +       int i;
> +
>        /* Prevent new requests being issued until we fix things up. */
>        spin_lock_irq(&blkif_io_lock);
>        info->connected = suspend ?
> @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>        flush_work_sync(&info->work);
>
>        /* Free resources associated with old device channel. */
> -       if (info->ring_ref != GRANT_INVALID_REF) {
> -               gnttab_end_foreign_access(info->ring_ref, 0,
> -                                         (unsigned long)info->ring.sring);
> -               info->ring_ref = GRANT_INVALID_REF;
> -               info->ring.sring = NULL;
> +       for (i = 0; i < (1 << info->ring_order); i++) {
> +               if (info->ring_ref[i] != GRANT_INVALID_REF) {
> +                       gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
> +                       info->ring_ref[i] = GRANT_INVALID_REF;
> +               }
>        }
> +
> +       free_pages((unsigned long)info->ring.sring, info->ring_order);
> +       info->ring.sring = NULL;
> +
>        if (info->irq)
>                unbind_from_irqhandler(info->irq, info);
>        info->evtchn = info->irq = 0;
> -
>  }
>
>  static void blkif_completion(struct blk_shadow *s)
> @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev,
>        struct blkif_sring *sring;
>        int err;
>
> -       info->ring_ref = GRANT_INVALID_REF;
> -
> -       sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
> +       sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
> +                                                      info->ring_order);
>        if (!sring) {
>                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
>                return -ENOMEM;
>        }
>        SHARED_RING_INIT(sring);
> -       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
> +       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order);
>
>        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
> +       err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order,
> +                               info->ring_ref);
>        if (err < 0) {
> -               free_page((unsigned long)sring);
> +               free_pages((unsigned long)sring, info->ring_order);
>                info->ring.sring = NULL;
>                goto fail;
>        }
> -       info->ring_ref = err;
>
>        err = xenbus_alloc_evtchn(dev, &info->evtchn);
>        if (err)
> @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  {
>        const char *message = NULL;
>        struct xenbus_transaction xbt;
> +       unsigned int ring_order;
> +       int legacy_backend;
> +       int i;
>        int err;
>
> +       for (i = 0; i < (1 << info->ring_order); i++)
> +               info->ring_ref[i] = GRANT_INVALID_REF;
> +
> +       err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u",
> +                          &ring_order);
> +
> +       legacy_backend = !(err == 1);
> +
> +       if (legacy_backend) {
> +               info->ring_order = 0;
> +       } else {
> +               info->ring_order = (ring_order <= xen_blkif_ring_order) ?
> +                                  ring_order :
> +                                  xen_blkif_ring_order;
> +       }
> +
>        /* Create shared ring, alloc event channel. */
>        err = setup_blkring(dev, info);
>        if (err)
> @@ -889,12 +916,35 @@ again:
>                goto destroy_blkring;
>        }
>
> -       err = xenbus_printf(xbt, dev->nodename,
> -                           "ring-ref", "%u", info->ring_ref);
> -       if (err) {
> -               message = "writing ring-ref";
> -               goto abort_transaction;
> +       if (legacy_backend) {
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-ref", "%d", info->ring_ref[0]);
> +               if (err) {
> +                       message = "writing ring-ref";
> +                       goto abort_transaction;
> +               }
> +       } else {
> +               for (i = 0; i < (1 << info->ring_order); i++) {
> +                       char key[sizeof("ring-ref") + 2];
> +
> +                       sprintf(key, "ring-ref%d", i);
> +
> +                       err = xenbus_printf(xbt, dev->nodename,
> +                                           key, "%d", info->ring_ref[i]);
> +                       if (err) {
> +                               message = "writing ring-ref";
> +                               goto abort_transaction;
> +                       }
> +               }
> +
> +               err = xenbus_printf(xbt, dev->nodename,
> +                                   "ring-page-order", "%u", info->ring_order);
> +               if (err) {
> +                       message = "writing ring-order";
> +                       goto abort_transaction;
> +               }
>        }
> +
>        err = xenbus_printf(xbt, dev->nodename,
>                            "event-channel", "%u", info->evtchn);
>        if (err) {
> @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev,
>        info->connected = BLKIF_STATE_DISCONNECTED;
>        INIT_WORK(&info->work, blkif_restart_queue);
>
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                info->shadow[i].req.u.rw.id = i+1;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
>
>        /* Front end dir is a number, which is used as the id. */
>        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
>        dev_set_drvdata(&dev->dev, info);
>
> -       err = talk_to_blkback(dev, info);
> -       if (err) {
> -               kfree(info);
> -               dev_set_drvdata(&dev->dev, NULL);
> -               return err;
> -       }
> -
>        return 0;
>  }
>
> @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info)
>
>        /* Stage 2: Set up free list. */
>        memset(&info->shadow, 0, sizeof(info->shadow));
> -       for (i = 0; i < BLK_RING_SIZE; i++)
> +       for (i = 0; i < BLK_MAX_RING_SIZE; i++)
>                info->shadow[i].req.u.rw.id = i+1;
>        info->shadow_free = info->ring.req_prod_pvt;
> -       info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
> +       info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
>
>        /* Stage 3: Find pending requests and requeue them. */
> -       for (i = 0; i < BLK_RING_SIZE; i++) {
> +       for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) {
>                /* Not in use? */
>                if (!copy[i].request)
>                        continue;
> @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev,
>
>        switch (backend_state) {
>        case XenbusStateInitialising:
> -       case XenbusStateInitWait:
>        case XenbusStateInitialised:
>        case XenbusStateReconfiguring:
>        case XenbusStateReconfigured:
> @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev,
>        case XenbusStateClosed:
>                break;
>
> +       case XenbusStateInitWait:
> +               talk_to_blkback(dev, info);
> +               break;
> +
>        case XenbusStateConnected:
>                blkfront_connect(info);
>                break;
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index 94b79c3..f93b59a 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif);
>  /* (Un)Map communication rings. */
>  void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
>  int xen_netbk_map_frontend_rings(struct xenvif *vif,
> -                                grant_ref_t tx_ring_ref,
> -                                grant_ref_t rx_ring_ref);
> +                                int tx_ring_ref,
> +                                int rx_ring_ref);
>
>  /* (De)Register a xenvif with the netback backend. */
>  void xen_netbk_add_xenvif(struct xenvif *vif);
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 59effac..0b014cf 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
>  }
>
>  int xen_netbk_map_frontend_rings(struct xenvif *vif,
> -                                grant_ref_t tx_ring_ref,
> -                                grant_ref_t rx_ring_ref)
> +                                int tx_ring_ref,
> +                                int rx_ring_ref)
>  {
>        void *addr;
>        struct xen_netif_tx_sring *txs;
> @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
>        int err = -ENOMEM;
>
>        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
> -                                    tx_ring_ref, &addr);
> +                                    &tx_ring_ref, 1, &addr);
>        if (err)
>                goto err;
>
> @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
>        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
>
>        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
> -                                    rx_ring_ref, &addr);
> +                                    &rx_ring_ref, 1, &addr);
>        if (err)
>                goto err;
>
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 698b905..521a595 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>        SHARED_RING_INIT(txs);
>        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
> +       err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref);
>        if (err < 0) {
>                free_page((unsigned long)txs);
>                goto fail;
>        }
>
> -       info->tx_ring_ref = err;
>        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
>        if (!rxs) {
>                err = -ENOMEM;
> @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
>        SHARED_RING_INIT(rxs);
>        FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
>
> -       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
> +       err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref);
>        if (err < 0) {
>                free_page((unsigned long)rxs);
>                goto fail;
>        }
> -       info->rx_ring_ref = err;
>
>        err = xenbus_alloc_evtchn(dev, &info->evtchn);
>        if (err)
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index 1620088..95109d8 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev)
>        int err = 0;
>        struct xenbus_transaction trans;
>
> -       err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
> +       err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref);
>        if (err < 0)
>                goto out;
>
> -       pdev->gnt_ref = err;
> -
>        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
>        if (err)
>                goto out;
> diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
> index 64b11f9..e0834cd 100644
> --- a/drivers/xen/xen-pciback/xenbus.c
> +++ b/drivers/xen/xen-pciback/xenbus.c
> @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
>                "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
>                gnt_ref, remote_evtchn);
>
> -       err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
> +       err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
>        if (err < 0) {
>                xenbus_dev_fatal(pdev->xdev, err,
>                                "Error mapping other domain page in ours.");
> diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
> index 566d2ad..3a14524 100644
> --- a/drivers/xen/xenbus/xenbus_client.c
> +++ b/drivers/xen/xenbus/xenbus_client.c
> @@ -53,14 +53,16 @@ struct xenbus_map_node {
>                struct vm_struct *area; /* PV */
>                struct page *page;     /* HVM */
>        };
> -       grant_handle_t handle;
> +       grant_handle_t handle[XENBUS_MAX_RING_PAGES];
> +       unsigned int   nr_handles;
>  };
>
>  static DEFINE_SPINLOCK(xenbus_valloc_lock);
>  static LIST_HEAD(xenbus_valloc_pages);
>
>  struct xenbus_ring_ops {
> -       int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
> +       int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts,
> +                  void **vaddr);
>        int (*unmap)(struct xenbus_device *dev, void *vaddr);
>  };
>
> @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
>  /**
>  * xenbus_grant_ring
>  * @dev: xenbus device
> - * @ring_mfn: mfn of ring to grant
> -
> - * Grant access to the given @ring_mfn to the peer of the given device.  Return
> - * 0 on success, or -errno on error.  On error, the device will switch to
> - * XenbusStateClosing, and the error will be saved in the store.
> + * @vaddr: starting virtual address of the ring
> + * @nr_pages: number of page to be granted
> + * @grefs: grant reference array to be filled in
> + * Grant access to the given @vaddr to the peer of the given device.
> + * Then fill in @grefs with grant references.  Return 0 on success, or
> + * -errno on error.  On error, the device will switch to
> + * XenbusStateClosing, and the first error will be saved in the store.
>  */
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
> +                     int nr_pages, int grefs[])
>  {
> -       int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
> -       if (err < 0)
> -               xenbus_dev_fatal(dev, err, "granting access to ring page");
> +       int i;
> +       int err;
> +
> +       for (i = 0; i < nr_pages; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               err = gnttab_grant_foreign_access(dev->otherend_id,
> +                                                 virt_to_mfn(addr), 0);
> +               if (err < 0) {
> +                       xenbus_dev_fatal(dev, err,
> +                                        "granting access to ring page");
> +                       goto fail;
> +               }
> +               grefs[i] = err;
> +       }
> +
> +       return 0;
> +
> +fail:
> +       for ( ; i >= 0; i--)
> +               gnttab_end_foreign_access_ref(grefs[i], 0);
>        return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_grant_ring);
> @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
>  /**
>  * xenbus_map_ring_valloc
>  * @dev: xenbus device
> - * @gnt_ref: grant reference
> + * @gnt_ref: grant reference array
> + * @nr_grefs: number of grant reference
>  * @vaddr: pointer to address to be filled out by mapping
>  *
>  * Based on Rusty Russell's skeleton driver's map_page.
> @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
>  * or -ENOMEM on error. If an error is returned, device will switch to
>  * XenbusStateClosing and the error message will be saved in XenStore.
>  */
> -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
> +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
> +                          int nr_grefs, void **vaddr)
>  {
> -       return ring_ops->map(dev, gnt_ref, vaddr);
> +       return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr);
>  }
>  EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
>
> +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
> +                                       struct xenbus_map_node *node);
> +
>  static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
> -                                    int gnt_ref, void **vaddr)
> +                                    int gnt_ref[], int nr_grefs, void **vaddr)
>  {
> -       struct gnttab_map_grant_ref op = {
> -               .flags = GNTMAP_host_map | GNTMAP_contains_pte,
> -               .ref   = gnt_ref,
> -               .dom   = dev->otherend_id,
> -       };
> +       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
>        struct xenbus_map_node *node;
>        struct vm_struct *area;
> -       pte_t *pte;
> +       pte_t *pte[XENBUS_MAX_RING_PAGES];
> +       int i;
> +       int err = 0;
> +
> +       if (nr_grefs > XENBUS_MAX_RING_PAGES)
> +               return -EINVAL;
>
>        *vaddr = NULL;
>
> @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
>        if (!node)
>                return -ENOMEM;
>
> -       area = alloc_vm_area(PAGE_SIZE, &pte);
> +       area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte);
>        if (!area) {
>                kfree(node);
>                return -ENOMEM;
>        }
>
> -       op.host_addr = arbitrary_virt_to_machine(pte).maddr;
> +       for (i = 0; i < nr_grefs; i++) {
> +               op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte,
> +               op[i].ref   = gnt_ref[i],
> +               op[i].dom   = dev->otherend_id,
> +               op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr;
> +       };
>
>        if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
>                BUG();
>
> -       if (op.status != GNTST_okay) {
> -               free_vm_area(area);
> -               kfree(node);
> -               xenbus_dev_fatal(dev, op.status,
> -                                "mapping in shared page %d from domain %d",
> -                                gnt_ref, dev->otherend_id);
> -               return op.status;
> +       node->nr_handles = nr_grefs;
> +       node->area = area;
> +
> +       for (i = 0; i < nr_grefs; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       node->handle[i] = INVALID_GRANT_HANDLE;
> +                       continue;
> +               }
> +               node->handle[i] = op[i].handle;
>        }
>
> -       node->handle = op.handle;
> -       node->area = area;
> +       if (err != 0) {
> +               for (i = 0; i < nr_grefs; i++)
> +                       xenbus_dev_fatal(dev, op[i].status,
> +                               "mapping in shared page %d from domain %d",
> +                               gnt_ref[i], dev->otherend_id);
> +
> +                __xenbus_unmap_ring_vfree_pv(dev, node);
> +
> +               return err;
> +       }
>
>        spin_lock(&xenbus_valloc_lock);
>        list_add(&node->next, &xenbus_valloc_pages);
> @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
>  }
>
>  static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
> -                                     int gnt_ref, void **vaddr)
> +                                     int gnt_ref[], int nr_grefs, void **vaddr)
>  {
>        struct xenbus_map_node *node;
>        int err;
>        void *addr;
>
> +       if (nr_grefs > XENBUS_MAX_RING_PAGES)
> +               return -EINVAL;
> +
>        *vaddr = NULL;
>
>        node = kzalloc(sizeof(*node), GFP_KERNEL);
>        if (!node)
>                return -ENOMEM;
>
> -       err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
> +       err = alloc_xenballooned_pages(nr_grefs, &node->page,
> +                                      false /* lowmem */);
>        if (err)
>                goto out_err;
>
>        addr = pfn_to_kaddr(page_to_pfn(node->page));
>
> -       err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
> +       err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr);
>        if (err)
>                goto out_err;
>
> @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
>        return 0;
>
>  out_err:
> -       free_xenballooned_pages(1, &node->page);
> +       free_xenballooned_pages(nr_grefs, &node->page);
>        kfree(node);
>        return err;
>  }
> @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
>  /**
>  * xenbus_map_ring
>  * @dev: xenbus device
> - * @gnt_ref: grant reference
> - * @handle: pointer to grant handle to be filled
> + * @gnt_ref: grant reference array
> + * @nr_grefs: number of grant references
> + * @handle: pointer to grant handle array to be filled, mind the size
>  * @vaddr: address to be mapped to
>  *
> - * Map a page of memory into this domain from another domain's grant table.
> + * Map pages of memory into this domain from another domain's grant table.
>  * xenbus_map_ring does not allocate the virtual address space (you must do
> - * this yourself!). It only maps in the page to the specified address.
> + * this yourself!). It only maps in the pages to the specified address.
>  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
>  * or -ENOMEM on error. If an error is returned, device will switch to
> - * XenbusStateClosing and the error message will be saved in XenStore.
> + * XenbusStateClosing and the last error message will be saved in XenStore.
>  */
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                   grant_handle_t *handle, void *vaddr)
> +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
> +                   grant_handle_t handle[], void *vaddr)
>  {
> -       struct gnttab_map_grant_ref op;
> -
> -       gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
> -                         dev->otherend_id);
> +       struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       int i;
> +       int err = GNTST_okay;   /* 0 */
> +
> +       for (i = 0; i < nr_grefs; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               gnttab_set_map_op(&op[i], (phys_addr_t)addr,
> +                                 GNTMAP_host_map, gnt_ref[i],
> +                                 dev->otherend_id);
> +       }
>
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs))
>                BUG();
>
> -       if (op.status != GNTST_okay) {
> -               xenbus_dev_fatal(dev, op.status,
> -                                "mapping in shared page %d from domain %d",
> -                                gnt_ref, dev->otherend_id);
> -       } else
> -               *handle = op.handle;
> +       for (i = 0; i < nr_grefs; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_fatal(dev, err,
> +                               "mapping in shared page %d from domain %d",
> +                               gnt_ref[i], dev->otherend_id);
> +                       handle[i] = INVALID_GRANT_HANDLE;
> +               } else
> +                       handle[i] = op[i].handle;
> +       }
>
> -       return op.status;
> +       if (err != GNTST_okay)
> +               xenbus_unmap_ring(dev, handle, nr_grefs, vaddr);
> +
> +       return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_map_ring);
>
> @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
>  }
>  EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
>
> +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
> +                                       struct xenbus_map_node *node)
> +{
> +       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       unsigned int level;
> +       int i, j;
> +       int err = GNTST_okay;
> +
> +       j = 0;
> +       for (i = 0; i < node->nr_handles; i++) {
> +               unsigned long vaddr = (unsigned long)node->area->addr +
> +                       (PAGE_SIZE * i);
> +               if (node->handle[i] != INVALID_GRANT_HANDLE) {
> +                       memset(&op[j], 0, sizeof(op[0]));
> +                       op[j].host_addr = arbitrary_virt_to_machine(
> +                                       lookup_address(vaddr, &level)).maddr;
> +                       op[j].handle = node->handle[i];
> +                       j++;
> +                       node->handle[i] = INVALID_GRANT_HANDLE;
> +               }
> +       }
> +
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
> +               BUG();
> +
> +       node->nr_handles = 0;
> +
> +       for (i = 0; i < j; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_error(dev, err,
> +                               "unmapping page %d at handle %d error %d",
> +                               i, op[i].handle, err);
> +               }
> +       }
> +
> +       if (err == GNTST_okay)
> +               free_vm_area(node->area);
> +
> +       kfree(node);
> +
> +       return err;
> +}
> +
>  static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
>  {
>        struct xenbus_map_node *node;
> -       struct gnttab_unmap_grant_ref op = {
> -               .host_addr = (unsigned long)vaddr,
> -       };
> -       unsigned int level;
>
>        spin_lock(&xenbus_valloc_lock);
>        list_for_each_entry(node, &xenbus_valloc_pages, next) {
> @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
>
>        if (!node) {
>                xenbus_dev_error(dev, -ENOENT,
> -                                "can't find mapped virtual address %p", vaddr);
> +                               "can't find mapped virtual address %p", vaddr);
>                return GNTST_bad_virt_addr;
>        }
>
> -       op.handle = node->handle;
> -       op.host_addr = arbitrary_virt_to_machine(
> -               lookup_address((unsigned long)vaddr, &level)).maddr;
> -
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
> -               BUG();
> -
> -       if (op.status == GNTST_okay)
> -               free_vm_area(node->area);
> -       else
> -               xenbus_dev_error(dev, op.status,
> -                                "unmapping page at handle %d error %d",
> -                                node->handle, op.status);
> -
> -       kfree(node);
> -       return op.status;
> +       return __xenbus_unmap_ring_vfree_pv(dev, node);
>  }
>
>  static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  {
>        int rv;
>        struct xenbus_map_node *node;
> -       void *addr;
> +       void *addr = NULL;
>
>        spin_lock(&xenbus_valloc_lock);
>        list_for_each_entry(node, &xenbus_valloc_pages, next) {
> @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>
>        if (!node) {
>                xenbus_dev_error(dev, -ENOENT,
> -                                "can't find mapped virtual address %p", vaddr);
> +                               "can't find mapped virtual address %p", vaddr);
>                return GNTST_bad_virt_addr;
>        }
>
> -       rv = xenbus_unmap_ring(dev, node->handle, addr);
> +       rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr);
>
>        if (!rv)
> -               free_xenballooned_pages(1, &node->page);
> +               free_xenballooned_pages(node->nr_handles, &node->page);
>        else
>                WARN(1, "Leaking %p\n", vaddr);
>
> @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  * xenbus_unmap_ring
>  * @dev: xenbus device
>  * @handle: grant handle
> + * @nr_handles: number of grant handle
>  * @vaddr: addr to unmap
>  *
>  * Unmap a page of memory in this domain that was imported from another domain.
> @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
>  * (see xen/include/interface/grant_table.h).
>  */
>  int xenbus_unmap_ring(struct xenbus_device *dev,
> -                     grant_handle_t handle, void *vaddr)
> +                       grant_handle_t handle[], int nr_handles,
> +                       void *vaddr)
>  {
> -       struct gnttab_unmap_grant_ref op;
> -
> -       gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
> +       struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES];
> +       int i, j;
> +       int err = GNTST_okay;
> +
> +       j = 0;
> +       for (i = 0; i < nr_handles; i++) {
> +               unsigned long addr = (unsigned long)vaddr +
> +                       (PAGE_SIZE * i);
> +               if (handle[i] != INVALID_GRANT_HANDLE) {
> +                       gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr,
> +                                           GNTMAP_host_map, handle[i]);
> +                       handle[i] = INVALID_GRANT_HANDLE;
> +               }
> +       }
>
> -       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
> +       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j))
>                BUG();
>
> -       if (op.status != GNTST_okay)
> -               xenbus_dev_error(dev, op.status,
> -                                "unmapping page at handle %d error %d",
> -                                handle, op.status);
> +       for (i = 0; i < j; i++) {
> +               if (op[i].status != GNTST_okay) {
> +                       err = op[i].status;
> +                       xenbus_dev_error(dev, err,
> +                               "unmapping page at handle %d error %d",
> +                               handle[i], err);
> +               }
> +       }
>
> -       return op.status;
> +       return err;
>  }
>  EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
>
> diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
> index 3864967..62b92d2 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void)
>        return err;
>  }
>
> +extern void xenbus_ring_ops_init(void);
>  static int __init xenbus_init(void)
>  {
>        int err = 0;
> @@ -767,6 +768,8 @@ static int __init xenbus_init(void)
>        proc_mkdir("xen", NULL);
>  #endif
>
> +       xenbus_ring_ops_init();
> +
>  out_error:
>        return err;
>  }
> diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
> index e8c599b..cdbd948 100644
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
>                         const char *pathfmt, ...);
>
>  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
> -                          int gnt_ref, void **vaddr);
> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
> -                          grant_handle_t *handle, void *vaddr);
> +
> +#define        XENBUS_MAX_RING_ORDER   2
> +#define        XENBUS_MAX_RING_PAGES   (1 << XENBUS_MAX_RING_ORDER)
> +
> +#define INVALID_GRANT_HANDLE           (~0U)
> +
> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
> +                     int nr_pages, int grefs[]);
> +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[],
> +                          int nr_grefs, void **vaddr);
> +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs,
> +                   grant_handle_t handle[], void *vaddr);
>
>  int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
>  int xenbus_unmap_ring(struct xenbus_device *dev,
> -                     grant_handle_t handle, void *vaddr);
> +                     grant_handle_t handle[], int nr_handles,
> +                     void *vaddr);
>
>  int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
>  int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06 17:20             ` Konrad Rzeszutek Wilk
  2012-03-07  9:33                 ` Jan Beulich
@ 2012-03-07  9:33                 ` Jan Beulich
  1 sibling, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-07  9:33 UTC (permalink / raw)
  To: Santosh Jodh, konrad
  Cc: David Vrabel, Ian Campbell, Paul Durrant, waldi@debian.org,
	weiyi.huang@gmail.com, jeremy@goop.org, akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org,
	xen-devel@lists.xen.org, joe.jin@oracle.com,
	konrad.wilk@oracle.com, lersek@redhat.com, rusty@rustcorp.com.au,
	dgdegra@tycho.nsa.gov, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, netdev@vger.kernel.org,
	jbarnes@virtuousgeek.org, paul.gortmaker@windriver.com

>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> like a fix to something.

No, this is required to get the negotiation working (the frontend must
not try to read the new nodes until it can be certain that the backend
populated them). However, as already pointed out in an earlier reply
to Santosh, the way this is done here doesn't appear to allow for the
backend to already be in InitWait state when the frontend gets
invoked.

> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> default size for SSD usage? 16?

What do SSDs have to do with a XenBus definition? Imo it's wrong (and
unnecessary) to introduce a limit at the XenBus level at all - each driver
can do this for itself.

As to the limit for SSDs in the block interface - I don't think the number
of possibly simultaneous requests has anything to do with this. Instead,
I'd expect the request number/size/segments extension that NetBSD
apparently implements to possibly have an effect.

Jan


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-07  9:33                 ` Jan Beulich
  0 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-07  9:33 UTC (permalink / raw)
  To: Santosh Jodh, konrad
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org,
	xen-devel@lists.xen.org, lersek@redhat.com, dgdegra

>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> like a fix to something.

No, this is required to get the negotiation working (the frontend must
not try to read the new nodes until it can be certain that the backend
populated them). However, as already pointed out in an earlier reply
to Santosh, the way this is done here doesn't appear to allow for the
backend to already be in InitWait state when the frontend gets
invoked.

> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> default size for SSD usage? 16?

What do SSDs have to do with a XenBus definition? Imo it's wrong (and
unnecessary) to introduce a limit at the XenBus level at all - each driver
can do this for itself.

As to the limit for SSDs in the block interface - I don't think the number
of possibly simultaneous requests has anything to do with this. Instead,
I'd expect the request number/size/segments extension that NetBSD
apparently implements to possibly have an effect.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-07  9:33                 ` Jan Beulich
  0 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-07  9:33 UTC (permalink / raw)
  To: Santosh Jodh, konrad
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org,
	xen-devel@lists.xen.org, lersek@redhat.com, "dgdegra@ty

>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> like a fix to something.

No, this is required to get the negotiation working (the frontend must
not try to read the new nodes until it can be certain that the backend
populated them). However, as already pointed out in an earlier reply
to Santosh, the way this is done here doesn't appear to allow for the
backend to already be in InitWait state when the frontend gets
invoked.

> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> default size for SSD usage? 16?

What do SSDs have to do with a XenBus definition? Imo it's wrong (and
unnecessary) to introduce a limit at the XenBus level at all - each driver
can do this for itself.

As to the limit for SSDs in the block interface - I don't think the number
of possibly simultaneous requests has anything to do with this. Instead,
I'd expect the request number/size/segments extension that NetBSD
apparently implements to possibly have an effect.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06 17:20             ` Konrad Rzeszutek Wilk
  2012-03-07  9:33                 ` Jan Beulich
@ 2012-03-07  9:33               ` Jan Beulich
  1 sibling, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-07  9:33 UTC (permalink / raw)
  To: Santosh Jodh, konrad
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	rusty@rustcorp.com.au, weiyi.huang@gmail.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org,
	xen-devel@lists.xen.org

>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> like a fix to something.

No, this is required to get the negotiation working (the frontend must
not try to read the new nodes until it can be certain that the backend
populated them). However, as already pointed out in an earlier reply
to Santosh, the way this is done here doesn't appear to allow for the
backend to already be in InitWait state when the frontend gets
invoked.

> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> default size for SSD usage? 16?

What do SSDs have to do with a XenBus definition? Imo it's wrong (and
unnecessary) to introduce a limit at the XenBus level at all - each driver
can do this for itself.

As to the limit for SSDs in the block interface - I don't think the number
of possibly simultaneous requests has anything to do with this. Instead,
I'd expect the request number/size/segments extension that NetBSD
apparently implements to possibly have an effect.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-07  9:33                 ` Jan Beulich
@ 2012-03-07 15:15                   ` Konrad Rzeszutek Wilk
  -1 siblings, 0 replies; 73+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-07 15:15 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, netdev@vger.kernel.org, joe.jin,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	xen-devel@lists.xen.org, paul.gortmaker@windriver.com,
	Paul Durrant, weiyi.huang@gmail.com, Santosh Jodh,
	linux-pci@vger.kernel.org, dgdegra,
	virtualization@lists.linux-foundation.org, lersek@redhat.com,
	akpm@linux-foundation.org, David


[-- Attachment #1.1: Type: text/plain, Size: 1374 bytes --]

On Mar 7, 2012 4:33 AM, "Jan Beulich" <JBeulich@suse.com> wrote:
>
> >>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
> >  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> > like a fix to something.
>
> No, this is required to get the negotiation working (the frontend must
> not try to read the new nodes until it can be certain that the backend
> populated them). However, as already pointed out in an earlier reply
> to Santosh, the way this is done here doesn't appear to allow for the
> backend to already be in InitWait state when the frontend gets
> invoked.

OK.
>
> > -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> > default size for SSD usage? 16?
>
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.

The patch should mention what the benefit of multi ring is.
>
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.

.. which sounds to me like increasing the bandwidth of the protocol. Should
be mentioned somewhere in the git description.
>
> Jan
>
>

[-- Attachment #1.2: Type: text/html, Size: 1774 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-07 15:15                   ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 73+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-07 15:15 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, netdev@vger.kernel.org, joe.jin,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	xen-devel@lists.xen.org, paul.gortmaker@windriver.com,
	Paul Durrant, weiyi.huang@gmail.com, Santosh Jodh,
	linux-pci@vger.kernel.org, dgdegra,
	virtualization@lists.linux-foundation.org, lersek@redhat.com,
	akpm@linux-foundation.org, David 


[-- Attachment #1.1: Type: text/plain, Size: 1374 bytes --]

On Mar 7, 2012 4:33 AM, "Jan Beulich" <JBeulich@suse.com> wrote:
>
> >>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
> >  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> > like a fix to something.
>
> No, this is required to get the negotiation working (the frontend must
> not try to read the new nodes until it can be certain that the backend
> populated them). However, as already pointed out in an earlier reply
> to Santosh, the way this is done here doesn't appear to allow for the
> backend to already be in InitWait state when the frontend gets
> invoked.

OK.
>
> > -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> > default size for SSD usage? 16?
>
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.

The patch should mention what the benefit of multi ring is.
>
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.

.. which sounds to me like increasing the bandwidth of the protocol. Should
be mentioned somewhere in the git description.
>
> Jan
>
>

[-- Attachment #1.2: Type: text/html, Size: 1774 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-07  9:33                 ` Jan Beulich
                                   ` (2 preceding siblings ...)
  (?)
@ 2012-03-07 15:15                 ` Konrad Rzeszutek Wilk
  -1 siblings, 0 replies; 73+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-03-07 15:15 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, netdev@vger.kernel.org, rusty@rustcorp.com.au,
	joe.jin, linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	xen-devel@lists.xen.org, paul.gortmaker@windriver.com,
	Paul Durrant, weiyi.huang@gmail.com, Santosh Jodh,
	linux-pci@vger.kernel.org, dgdegra,
	virtualization@lists.linux-foundation.org, lersek@redhat.com,
	akpm


[-- Attachment #1.1: Type: text/plain, Size: 1374 bytes --]

On Mar 7, 2012 4:33 AM, "Jan Beulich" <JBeulich@suse.com> wrote:
>
> >>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
> >  -> the usage of XenbusStateInitWait? Why do we introduce that? Looks
> > like a fix to something.
>
> No, this is required to get the negotiation working (the frontend must
> not try to read the new nodes until it can be certain that the backend
> populated them). However, as already pointed out in an earlier reply
> to Santosh, the way this is done here doesn't appear to allow for the
> backend to already be in InitWait state when the frontend gets
> invoked.

OK.
>
> > -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
> > default size for SSD usage? 16?
>
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.

The patch should mention what the benefit of multi ring is.
>
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.

.. which sounds to me like increasing the bandwidth of the protocol. Should
be mentioned somewhere in the git description.
>
> Jan
>
>

[-- Attachment #1.2: Type: text/html, Size: 1774 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-05 20:35     ` David Miller
@ 2012-03-07 19:41       ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-07 19:41 UTC (permalink / raw)
  To: David Miller
  Cc: santoshprasadnayak@gmail.com, Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Tuesday, March 06, 2012 2:05 AM
> To: Rajesh Borundia
> Cc: santoshprasadnayak@gmail.com; Sony Chacko; netdev; linux-kernel;
> kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> From: Rajesh Borundia <rajesh.borundia@qlogic.com>
> Date: Mon, 5 Mar 2012 05:43:56 -0600
> 
> > I will review this and get back to you.
> 
> Please don't quote patch postings this way.
> 
> They look like new patch postings, and therefore get added to our patch
> tracking site.


Sorry about that. I will take care from next time.

Rajesh



^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-07 19:41       ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-07 19:41 UTC (permalink / raw)
  To: David Miller
  Cc: santoshprasadnayak@gmail.com, Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Tuesday, March 06, 2012 2:05 AM
> To: Rajesh Borundia
> Cc: santoshprasadnayak@gmail.com; Sony Chacko; netdev; linux-kernel;
> kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> From: Rajesh Borundia <rajesh.borundia@qlogic.com>
> Date: Mon, 5 Mar 2012 05:43:56 -0600
> 
> > I will review this and get back to you.
> 
> Please don't quote patch postings this way.
> 
> They look like new patch postings, and therefore get added to our patch
> tracking site.


Sorry about that. I will take care from next time.

Rajesh



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-07 19:41       ` Rajesh Borundia
  (?)
@ 2012-03-09 15:13       ` santosh prasad nayak
  -1 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-09 15:13 UTC (permalink / raw)
  To: Rajesh Borundia; +Cc: David Miller, Sony Chacko, netdev

Hi Rajesh,

Could I know the status of the review ?


Regards
Santosh

On Thu, Mar 8, 2012 at 1:11 AM, Rajesh Borundia
<rajesh.borundia@qlogic.com> wrote:
>
>
>> -----Original Message-----
>> From: David Miller [mailto:davem@davemloft.net]
>> Sent: Tuesday, March 06, 2012 2:05 AM
>> To: Rajesh Borundia
>> Cc: santoshprasadnayak@gmail.com; Sony Chacko; netdev; linux-kernel;
>> kernel-janitors@vger.kernel.org
>> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> From: Rajesh Borundia <rajesh.borundia@qlogic.com>
>> Date: Mon, 5 Mar 2012 05:43:56 -0600
>>
>> > I will review this and get back to you.
>>
>> Please don't quote patch postings this way.
>>
>> They look like new patch postings, and therefore get added to our patch
>> tracking site.
>
>
> Sorry about that. I will take care from next time.
>
> Rajesh
>
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-03 15:59 ` santosh nayak
@ 2012-03-09 16:34   ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-09 16:34 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org


> -----Original Message-----
> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Saturday, March 03, 2012 9:18 PM
> To: Sony Chacko
> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> janitors@vger.kernel.org; Santosh Nayak
> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> 
> Fix endian bug.
> 
> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> ---
>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>  3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> index 2eeac32..b5de8a7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> 
>  struct nx_vlan_ip_list {
>  	struct list_head list;
> -	u32 ip_addr;
> +	__be32 ip_addr;
>  };
> 
>  /*
> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> nx_host_sds_ring *sds_ring, int max);
>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd);
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd);
>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> enable);
>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> linkup);
>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> index 6f37470..0f81287 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> *adapter, int enable)
>  	return rv;
>  }
> 
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd)
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd)
>  {
>  	nx_nic_req_t req;
>  	u64 word;
> +	u64 ip_addr;
>  	int rv;
> 
>  	memset(&req, 0, sizeof(nx_nic_req_t));
> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
> *adapter, u32 ip, int cmd)
>  	req.req_hdr = cpu_to_le64(word);
> 
>  	req.words[0] = cpu_to_le64(cmd);
> -	req.words[1] = cpu_to_le64(ip);
> +	ip_addr = be32_to_cpu(ip);
> +	*(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);

Adapter requires ip value in big endian stored at lower 32 bit address.
The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and adapter
Will get incorrect ip addr. Instead u can do.

       U32 *ip_addr;
       ip_addr = (u32 *)&req.words[1];
       *ip_addr = ip;

     
> 
>  	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> *)&req, 1);
>  	if (rv != 0) {
> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) =
> -1)
>  		return -1;
> 
> -	if (*mac = cpu_to_le64(~0ULL)) {
> +	if (*mac = ~0ULL) {

*mac is in little endian format so compare it with cpu_to_le64.

> 
>  		offset = NX_OLD_MAC_ADDR_OFFSET +
>  			(adapter->portnum * sizeof(u64));
> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  					offset, sizeof(u64), pmac) = -1)
>  			return -1;
> 
> -		if (*mac = cpu_to_le64(~0ULL))
> +		if (*mac = ~0ULL)

*mac here is in little endian format so compare it with cpu_to_le64.

>  			return -1;
>  	}
>  	return 0;
> @@ -2178,7 +2180,7 @@ lock_try:
>  		NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0,
> waddr);
>  		raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>  		NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
> -		*data_buff++ = cpu_to_le32(val);
> +		*data_buff++ = val;

It should be cpu_to_le32 as it is returned to tool which requires
output in little endian.

>  		fl_addr += sizeof(val);
>  	}
>  	readl((void __iomem *)(adapter->ahw.pci_base0 +
> NX_FLASH_SEM2_ULK));
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> index 8dc4a134..70783b4 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> *adapter)
>  			adapter->driver_mismatch = 1;
>  			return;
>  		}
> -		ptr32[i] = cpu_to_le32(val);
> +		ptr32[i] = val;

Here val should be in little endian (cpu_to_le32) as it will be referenced by byte array to print serial number.

>  		offset += sizeof(u32);
>  	}
> 
> --
> 1.7.4.4
> 


Sorry for Late reply.

Rajesh


^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-09 16:34   ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-09 16:34 UTC (permalink / raw)
  To: santosh nayak, Sony Chacko
  Cc: netdev, linux-kernel, kernel-janitors@vger.kernel.org


> -----Original Message-----
> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Saturday, March 03, 2012 9:18 PM
> To: Sony Chacko
> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> janitors@vger.kernel.org; Santosh Nayak
> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> 
> Fix endian bug.
> 
> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> ---
>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>  3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> index 2eeac32..b5de8a7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> 
>  struct nx_vlan_ip_list {
>  	struct list_head list;
> -	u32 ip_addr;
> +	__be32 ip_addr;
>  };
> 
>  /*
> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> nx_host_sds_ring *sds_ring, int max);
>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd);
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd);
>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> enable);
>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> linkup);
>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> index 6f37470..0f81287 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> *adapter, int enable)
>  	return rv;
>  }
> 
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd)
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd)
>  {
>  	nx_nic_req_t req;
>  	u64 word;
> +	u64 ip_addr;
>  	int rv;
> 
>  	memset(&req, 0, sizeof(nx_nic_req_t));
> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
> *adapter, u32 ip, int cmd)
>  	req.req_hdr = cpu_to_le64(word);
> 
>  	req.words[0] = cpu_to_le64(cmd);
> -	req.words[1] = cpu_to_le64(ip);
> +	ip_addr = be32_to_cpu(ip);
> +	*(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);

Adapter requires ip value in big endian stored at lower 32 bit address.
The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and adapter
Will get incorrect ip addr. Instead u can do.

       U32 *ip_addr;
       ip_addr = (u32 *)&req.words[1];
       *ip_addr = ip;

     
> 
>  	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> *)&req, 1);
>  	if (rv != 0) {
> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) ==
> -1)
>  		return -1;
> 
> -	if (*mac == cpu_to_le64(~0ULL)) {
> +	if (*mac == ~0ULL) {

*mac is in little endian format so compare it with cpu_to_le64.

> 
>  		offset = NX_OLD_MAC_ADDR_OFFSET +
>  			(adapter->portnum * sizeof(u64));
> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  					offset, sizeof(u64), pmac) == -1)
>  			return -1;
> 
> -		if (*mac == cpu_to_le64(~0ULL))
> +		if (*mac == ~0ULL)

*mac here is in little endian format so compare it with cpu_to_le64.

>  			return -1;
>  	}
>  	return 0;
> @@ -2178,7 +2180,7 @@ lock_try:
>  		NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0,
> waddr);
>  		raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>  		NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
> -		*data_buff++ = cpu_to_le32(val);
> +		*data_buff++ = val;

It should be cpu_to_le32 as it is returned to tool which requires
output in little endian.

>  		fl_addr += sizeof(val);
>  	}
>  	readl((void __iomem *)(adapter->ahw.pci_base0 +
> NX_FLASH_SEM2_ULK));
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> index 8dc4a134..70783b4 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> *adapter)
>  			adapter->driver_mismatch = 1;
>  			return;
>  		}
> -		ptr32[i] = cpu_to_le32(val);
> +		ptr32[i] = val;

Here val should be in little endian (cpu_to_le32) as it will be referenced by byte array to print serial number.

>  		offset += sizeof(u32);
>  	}
> 
> --
> 1.7.4.4
> 


Sorry for Late reply.

Rajesh


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-09 16:34   ` Rajesh Borundia
@ 2012-03-09 18:51     ` santosh prasad nayak
  -1 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-09 18:50 UTC (permalink / raw)
  To: Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org

On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
<rajesh.borundia@qlogic.com> wrote:
>
>> -----Original Message-----
>> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
>> Sent: Saturday, March 03, 2012 9:18 PM
>> To: Sony Chacko
>> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
>> janitors@vger.kernel.org; Santosh Nayak
>> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> From: Santosh Nayak <santoshprasadnayak@gmail.com>
>>
>> Fix endian bug.
>>
>> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
>> ---
>>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
>>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>>  3 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> index 2eeac32..b5de8a7 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
>>
>>  struct nx_vlan_ip_list {
>>       struct list_head list;
>> -     u32 ip_addr;
>> +     __be32 ip_addr;
>>  };
>>
>>  /*
>> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
>> nx_host_sds_ring *sds_ring, int max);
>>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
>> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
>> cmd);
>> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> int cmd);
>>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
>> enable);
>>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
>> linkup);
>>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> index 6f37470..0f81287 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
>> *adapter, int enable)
>>       return rv;
>>  }
>>
>> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
>> cmd)
>> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> int cmd)
>>  {
>>       nx_nic_req_t req;
>>       u64 word;
>> +     u64 ip_addr;
>>       int rv;
>>
>>       memset(&req, 0, sizeof(nx_nic_req_t));
>> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
>> *adapter, u32 ip, int cmd)
>>       req.req_hdr = cpu_to_le64(word);
>>
>>       req.words[0] = cpu_to_le64(cmd);
>> -     req.words[1] = cpu_to_le64(ip);
>> +     ip_addr = be32_to_cpu(ip);
>> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
>


> Adapter requires ip value in big endian stored at lower 32 bit address.
> The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and adapter
> Will get incorrect ip addr. Instead u can do.
>
>       U32 *ip_addr;
>       ip_addr = (u32 *)&req.words[1];
>       *ip_addr = ip;


1.  It looks incomplete.
    In the function call "netxen_send_cmd_descs" we have to pass "&req" as
   2nd argument  not  "ipaddr".

2. Your above suggestion is with assumption that the data type of 2nd
argument "ip"
     in "netxen_config_ipaddr()" is still "u32".  This is not true.

     Some days back you suggested to change the data type to "__be32".
 In the present patch
     the "ip"  is in "__be32" format i.e already in Big endian format
as per requirement.
     We need to only convert this 32 bit to 64 bit.  There are two ways:

     a.   *(__be64 *)&req.words[1] = ip;   // auto conversion

     b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
            // explicit conversion.


 Please correct me if I am wrong.


regards
Santosh




>
>
>>
>>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
>> *)&req, 1);
>>       if (rv != 0) {
>> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
>> netxen_adapter *adapter, u64 *mac)
>>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) ==
>> -1)
>>               return -1;
>>
>> -     if (*mac == cpu_to_le64(~0ULL)) {
>> +     if (*mac == ~0ULL) {
>
> *mac is in little endian format so compare it with cpu_to_le64.
>
>>
>>               offset = NX_OLD_MAC_ADDR_OFFSET +
>>                       (adapter->portnum * sizeof(u64));
>> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
>> netxen_adapter *adapter, u64 *mac)
>>                                       offset, sizeof(u64), pmac) == -1)
>>                       return -1;
>>
>> -             if (*mac == cpu_to_le64(~0ULL))
>> +             if (*mac == ~0ULL)
>
> *mac here is in little endian format so compare it with cpu_to_le64.
>
>>                       return -1;
>>       }
>>       return 0;
>> @@ -2178,7 +2180,7 @@ lock_try:
>>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0,
>> waddr);
>>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
>> -             *data_buff++ = cpu_to_le32(val);
>> +             *data_buff++ = val;
>
> It should be cpu_to_le32 as it is returned to tool which requires
> output in little endian.
>
>>               fl_addr += sizeof(val);
>>       }
>>       readl((void __iomem *)(adapter->ahw.pci_base0 +
>> NX_FLASH_SEM2_ULK));
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> index 8dc4a134..70783b4 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
>> *adapter)
>>                       adapter->driver_mismatch = 1;
>>                       return;
>>               }
>> -             ptr32[i] = cpu_to_le32(val);
>> +             ptr32[i] = val;
>
> Here val should be in little endian (cpu_to_le32) as it will be referenced by byte array to print serial number.
>
>>               offset += sizeof(u32);
>>       }
>>
>> --
>> 1.7.4.4
>>
>
>
> Sorry for Late reply.
>
> Rajesh
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-09 18:51     ` santosh prasad nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-09 18:51 UTC (permalink / raw)
  To: Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org

On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
<rajesh.borundia@qlogic.com> wrote:
>
>> -----Original Message-----
>> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
>> Sent: Saturday, March 03, 2012 9:18 PM
>> To: Sony Chacko
>> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
>> janitors@vger.kernel.org; Santosh Nayak
>> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> From: Santosh Nayak <santoshprasadnayak@gmail.com>
>>
>> Fix endian bug.
>>
>> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
>> ---
>>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++-----
>>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>>  3 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> index 2eeac32..b5de8a7 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
>>
>>  struct nx_vlan_ip_list {
>>       struct list_head list;
>> -     u32 ip_addr;
>> +     __be32 ip_addr;
>>  };
>>
>>  /*
>> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
>> nx_host_sds_ring *sds_ring, int max);
>>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
>> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
>> cmd);
>> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> int cmd);
>>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
>> enable);
>>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
>> linkup);
>>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> index 6f37470..0f81287 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
>> *adapter, int enable)
>>       return rv;
>>  }
>>
>> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
>> cmd)
>> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> int cmd)
>>  {
>>       nx_nic_req_t req;
>>       u64 word;
>> +     u64 ip_addr;
>>       int rv;
>>
>>       memset(&req, 0, sizeof(nx_nic_req_t));
>> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
>> *adapter, u32 ip, int cmd)
>>       req.req_hdr = cpu_to_le64(word);
>>
>>       req.words[0] = cpu_to_le64(cmd);
>> -     req.words[1] = cpu_to_le64(ip);
>> +     ip_addr = be32_to_cpu(ip);
>> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
>


> Adapter requires ip value in big endian stored at lower 32 bit address.
> The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and adapter
> Will get incorrect ip addr. Instead u can do.
>
>       U32 *ip_addr;
>       ip_addr = (u32 *)&req.words[1];
>       *ip_addr = ip;


1.  It looks incomplete.
    In the function call "netxen_send_cmd_descs" we have to pass "&req" as
   2nd argument  not  "ipaddr".

2. Your above suggestion is with assumption that the data type of 2nd
argument "ip"
     in "netxen_config_ipaddr()" is still "u32".  This is not true.

     Some days back you suggested to change the data type to "__be32".
 In the present patch
     the "ip"  is in "__be32" format i.e already in Big endian format
as per requirement.
     We need to only convert this 32 bit to 64 bit.  There are two ways:

     a.   *(__be64 *)&req.words[1] = ip;   // auto conversion

     b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
            // explicit conversion.


 Please correct me if I am wrong.


regards
Santosh




>
>
>>
>>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
>> *)&req, 1);
>>       if (rv != 0) {
>> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
>> netxen_adapter *adapter, u64 *mac)
>>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) =
>> -1)
>>               return -1;
>>
>> -     if (*mac = cpu_to_le64(~0ULL)) {
>> +     if (*mac = ~0ULL) {
>
> *mac is in little endian format so compare it with cpu_to_le64.
>
>>
>>               offset = NX_OLD_MAC_ADDR_OFFSET +
>>                       (adapter->portnum * sizeof(u64));
>> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
>> netxen_adapter *adapter, u64 *mac)
>>                                       offset, sizeof(u64), pmac) = -1)
>>                       return -1;
>>
>> -             if (*mac = cpu_to_le64(~0ULL))
>> +             if (*mac = ~0ULL)
>
> *mac here is in little endian format so compare it with cpu_to_le64.
>
>>                       return -1;
>>       }
>>       return 0;
>> @@ -2178,7 +2180,7 @@ lock_try:
>>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter->ahw.pci_base0,
>> waddr);
>>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
>> -             *data_buff++ = cpu_to_le32(val);
>> +             *data_buff++ = val;
>
> It should be cpu_to_le32 as it is returned to tool which requires
> output in little endian.
>
>>               fl_addr += sizeof(val);
>>       }
>>       readl((void __iomem *)(adapter->ahw.pci_base0 +
>> NX_FLASH_SEM2_ULK));
>> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> index 8dc4a134..70783b4 100644
>> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
>> *adapter)
>>                       adapter->driver_mismatch = 1;
>>                       return;
>>               }
>> -             ptr32[i] = cpu_to_le32(val);
>> +             ptr32[i] = val;
>
> Here val should be in little endian (cpu_to_le32) as it will be referenced by byte array to print serial number.
>
>>               offset += sizeof(u32);
>>       }
>>
>> --
>> 1.7.4.4
>>
>
>
> Sorry for Late reply.
>
> Rajesh
>
--
To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-09 18:51     ` santosh prasad nayak
@ 2012-03-10 19:01       ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-10 19:01 UTC (permalink / raw)
  To: santosh prasad nayak
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Saturday, March 10, 2012 12:20 AM
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
> <rajesh.borundia@qlogic.com> wrote:
> >
> >> -----Original Message-----
> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> >> Sent: Saturday, March 03, 2012 9:18 PM
> >> To: Sony Chacko
> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> >> janitors@vger.kernel.org; Santosh Nayak
> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >>
> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> >>
> >> Fix endian bug.
> >>
> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> ---
> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++--
> ---
> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
> >>  3 files changed, 10 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> index 2eeac32..b5de8a7 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> >>
> >>  struct nx_vlan_ip_list {
> >>       struct list_head list;
> >> -     u32 ip_addr;
> >> +     __be32 ip_addr;
> >>  };
> >>
> >>  /*
> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> >> nx_host_sds_ring *sds_ring, int max);
> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
> >>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> int
> >> cmd);
> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> >> int cmd);
> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> >> enable);
> >>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> >> linkup);
> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> index 6f37470..0f81287 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> >> *adapter, int enable)
> >>       return rv;
> >>  }
> >>
> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> int
> >> cmd)
> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> >> int cmd)
> >>  {
> >>       nx_nic_req_t req;
> >>       u64 word;
> >> +     u64 ip_addr;
> >>       int rv;
> >>
> >>       memset(&req, 0, sizeof(nx_nic_req_t));
> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
> >> *adapter, u32 ip, int cmd)
> >>       req.req_hdr = cpu_to_le64(word);
> >>
> >>       req.words[0] = cpu_to_le64(cmd);
> >> -     req.words[1] = cpu_to_le64(ip);
> >> +     ip_addr = be32_to_cpu(ip);
> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
> >
> 
> 
> > Adapter requires ip value in big endian stored at lower 32 bit
> address.
> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
> adapter
> > Will get incorrect ip addr. Instead u can do.
> >
> >       U32 *ip_addr;
> >       ip_addr = (u32 *)&req.words[1];
> >       *ip_addr = ip;
> 
> 
> 1.  It looks incomplete.
>     In the function call "netxen_send_cmd_descs" we have to pass "&req"
> as
>    2nd argument  not  "ipaddr".

 I should have sent a patch. This piece of code was just to show how to
copy ip addr in  req.words[1].

> 
> 2. Your above suggestion is with assumption that the data type of 2nd
> argument "ip"
>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
> 
>      Some days back you suggested to change the data type to "__be32".
>  In the present patch
>      the "ip"  is in "__be32" format i.e already in Big endian format
> as per requirement.
>      We need to only convert this 32 bit to 64 bit.  There are two
> ways:
> 
 No I did not assume that ip is u32, ip is still __be32(ip value is in form of big endian)
 though I should have mentioned it explicitly. But the ip value should be copied to lower 32 bit of req.words[1].
 
    
>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
In big endian machine MSB is copied into MSB first. So ip will get copied into higher 32 bit of
req.words[1] but adapter requires it in lower 32 bit. 
> 
>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
>             // explicit conversion.
> 
If you follow second cpu_to_be64 it will swap lower 32 bit of ip with higher 32 bit in little endian machine.
But adapter requires it in lower 32 bit.

Simple solution to copy ip in req.words[1] could be memcpy(&req.words[1], &ip, sizeof(u32));

 
> 
>  Please correct me if I am wrong.
> 
> 
> regards
> Santosh
> 
> 
> 
> 
> >
> >
> >>
> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> >> *)&req, 1);
> >>       if (rv != 0) {
> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> >> netxen_adapter *adapter, u64 *mac)
> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac)
> =
> >> -1)
> >>               return -1;
> >>
> >> -     if (*mac = cpu_to_le64(~0ULL)) {
> >> +     if (*mac = ~0ULL) {
> >
> > *mac is in little endian format so compare it with cpu_to_le64.
> >
> >>
> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
> >>                       (adapter->portnum * sizeof(u64));
> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> >> netxen_adapter *adapter, u64 *mac)
> >>                                       offset, sizeof(u64), pmac) =
> -1)
> >>                       return -1;
> >>
> >> -             if (*mac = cpu_to_le64(~0ULL))
> >> +             if (*mac = ~0ULL)
> >
> > *mac here is in little endian format so compare it with cpu_to_le64.
> >
> >>                       return -1;
> >>       }
> >>       return 0;
> >> @@ -2178,7 +2180,7 @@ lock_try:
> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
> >ahw.pci_base0,
> >> waddr);
> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
> >> -             *data_buff++ = cpu_to_le32(val);
> >> +             *data_buff++ = val;
> >
> > It should be cpu_to_le32 as it is returned to tool which requires
> > output in little endian.
> >
> >>               fl_addr += sizeof(val);
> >>       }
> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
> >> NX_FLASH_SEM2_ULK));
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> index 8dc4a134..70783b4 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> >> *adapter)
> >>                       adapter->driver_mismatch = 1;
> >>                       return;
> >>               }
> >> -             ptr32[i] = cpu_to_le32(val);
> >> +             ptr32[i] = val;
> >
> > Here val should be in little endian (cpu_to_le32) as it will be
> referenced by byte array to print serial number.
> >
> >>               offset += sizeof(u32);
> >>       }
> >>
> >> --
> >> 1.7.4.4
> >>
> >
> >
> > Sorry for Late reply.
> >
> > Rajesh
> >


--
To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-10 19:01       ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-10 19:01 UTC (permalink / raw)
  To: santosh prasad nayak
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Saturday, March 10, 2012 12:20 AM
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
> <rajesh.borundia@qlogic.com> wrote:
> >
> >> -----Original Message-----
> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> >> Sent: Saturday, March 03, 2012 9:18 PM
> >> To: Sony Chacko
> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> >> janitors@vger.kernel.org; Santosh Nayak
> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >>
> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> >>
> >> Fix endian bug.
> >>
> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> ---
> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++--
> ---
> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
> >>  3 files changed, 10 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> index 2eeac32..b5de8a7 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> >>
> >>  struct nx_vlan_ip_list {
> >>       struct list_head list;
> >> -     u32 ip_addr;
> >> +     __be32 ip_addr;
> >>  };
> >>
> >>  /*
> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> >> nx_host_sds_ring *sds_ring, int max);
> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
> >>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> int
> >> cmd);
> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> >> int cmd);
> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> >> enable);
> >>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> >> linkup);
> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> index 6f37470..0f81287 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> >> *adapter, int enable)
> >>       return rv;
> >>  }
> >>
> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> int
> >> cmd)
> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> >> int cmd)
> >>  {
> >>       nx_nic_req_t req;
> >>       u64 word;
> >> +     u64 ip_addr;
> >>       int rv;
> >>
> >>       memset(&req, 0, sizeof(nx_nic_req_t));
> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
> >> *adapter, u32 ip, int cmd)
> >>       req.req_hdr = cpu_to_le64(word);
> >>
> >>       req.words[0] = cpu_to_le64(cmd);
> >> -     req.words[1] = cpu_to_le64(ip);
> >> +     ip_addr = be32_to_cpu(ip);
> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
> >
> 
> 
> > Adapter requires ip value in big endian stored at lower 32 bit
> address.
> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
> adapter
> > Will get incorrect ip addr. Instead u can do.
> >
> >       U32 *ip_addr;
> >       ip_addr = (u32 *)&req.words[1];
> >       *ip_addr = ip;
> 
> 
> 1.  It looks incomplete.
>     In the function call "netxen_send_cmd_descs" we have to pass "&req"
> as
>    2nd argument  not  "ipaddr".

 I should have sent a patch. This piece of code was just to show how to
copy ip addr in  req.words[1].

> 
> 2. Your above suggestion is with assumption that the data type of 2nd
> argument "ip"
>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
> 
>      Some days back you suggested to change the data type to "__be32".
>  In the present patch
>      the "ip"  is in "__be32" format i.e already in Big endian format
> as per requirement.
>      We need to only convert this 32 bit to 64 bit.  There are two
> ways:
> 
 No I did not assume that ip is u32, ip is still __be32(ip value is in form of big endian)
 though I should have mentioned it explicitly. But the ip value should be copied to lower 32 bit of req.words[1].
 
    
>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
In big endian machine MSB is copied into MSB first. So ip will get copied into higher 32 bit of
req.words[1] but adapter requires it in lower 32 bit. 
> 
>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
>             // explicit conversion.
> 
If you follow second cpu_to_be64 it will swap lower 32 bit of ip with higher 32 bit in little endian machine.
But adapter requires it in lower 32 bit.

Simple solution to copy ip in req.words[1] could be memcpy(&req.words[1], &ip, sizeof(u32));

 
> 
>  Please correct me if I am wrong.
> 
> 
> regards
> Santosh
> 
> 
> 
> 
> >
> >
> >>
> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> >> *)&req, 1);
> >>       if (rv != 0) {
> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> >> netxen_adapter *adapter, u64 *mac)
> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac)
> ==
> >> -1)
> >>               return -1;
> >>
> >> -     if (*mac == cpu_to_le64(~0ULL)) {
> >> +     if (*mac == ~0ULL) {
> >
> > *mac is in little endian format so compare it with cpu_to_le64.
> >
> >>
> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
> >>                       (adapter->portnum * sizeof(u64));
> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> >> netxen_adapter *adapter, u64 *mac)
> >>                                       offset, sizeof(u64), pmac) ==
> -1)
> >>                       return -1;
> >>
> >> -             if (*mac == cpu_to_le64(~0ULL))
> >> +             if (*mac == ~0ULL)
> >
> > *mac here is in little endian format so compare it with cpu_to_le64.
> >
> >>                       return -1;
> >>       }
> >>       return 0;
> >> @@ -2178,7 +2180,7 @@ lock_try:
> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
> >ahw.pci_base0,
> >> waddr);
> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
> >> -             *data_buff++ = cpu_to_le32(val);
> >> +             *data_buff++ = val;
> >
> > It should be cpu_to_le32 as it is returned to tool which requires
> > output in little endian.
> >
> >>               fl_addr += sizeof(val);
> >>       }
> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
> >> NX_FLASH_SEM2_ULK));
> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> index 8dc4a134..70783b4 100644
> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> >> *adapter)
> >>                       adapter->driver_mismatch = 1;
> >>                       return;
> >>               }
> >> -             ptr32[i] = cpu_to_le32(val);
> >> +             ptr32[i] = val;
> >
> > Here val should be in little endian (cpu_to_le32) as it will be
> referenced by byte array to print serial number.
> >
> >>               offset += sizeof(u32);
> >>       }
> >>
> >> --
> >> 1.7.4.4
> >>
> >
> >
> > Sorry for Late reply.
> >
> > Rajesh
> >



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-10 19:01       ` Rajesh Borundia
@ 2012-03-11  9:28         ` santosh prasad nayak
  -1 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-11  9:16 UTC (permalink / raw)
  To: Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org

Thanks Rajesh for clarification.
Included all your inputs in the following patch.
This is for review not a formal one. Once review is done I will send a
formal patch.



diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {

 struct nx_vlan_ip_list {
 	struct list_head list;
-	u32 ip_addr;
+	__be32 ip_addr;
 };

 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..59d5ee7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,7 +909,7 @@ int netxen_config_rss(struct netxen_adapter
*adapter, int enable)
 	return rv;
 }

-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
 	nx_nic_req_t req;
 	u64 word;
@@ -922,7 +922,7 @@ int netxen_config_ipaddr(struct netxen_adapter
*adapter, u32 ip, int cmd)
 	req.req_hdr = cpu_to_le64(word);

 	req.words[0] = cpu_to_le64(cmd);
-	req.words[1] = cpu_to_le64(ip);
+	memcpy(&req.words[1], &ip, sizeof(u32));

 	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
 	if (rv != 0) {
@@ -1050,7 +1050,7 @@ int netxen_get_flash_mac_addr(struct
netxen_adapter *adapter, u64 *mac)
 	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) == -1)
 		return -1;

-	if (*mac == cpu_to_le64(~0ULL)) {
+	if (*(__le64 *)mac == cpu_to_le64(~0ULL)) {

 		offset = NX_OLD_MAC_ADDR_OFFSET +
 			(adapter->portnum * sizeof(u64));
@@ -1059,7 +1059,7 @@ int netxen_get_flash_mac_addr(struct
netxen_adapter *adapter, u64 *mac)
 					offset, sizeof(u64), pmac) == -1)
 			return -1;

-		if (*mac == cpu_to_le64(~0ULL))
+		if (*(__le64 *)mac == cpu_to_le64(~0ULL))
 			return -1;
 	}
 	return 0;
@@ -2155,7 +2155,7 @@ static u32 netxen_md_rd_crb(struct
netxen_adapter *adapter,
 static u32
 netxen_md_rdrom(struct netxen_adapter *adapter,
 			struct netxen_minidump_entry_rdrom
-				*romEntry, u32 *data_buff)
+				*romEntry, __le32 *data_buff)
 {
 	int i, count = 0;
 	u32 size, lck_val;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 7648995..65a718f 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -805,12 +805,12 @@ netxen_check_options(struct netxen_adapter *adapter)
 	char brd_name[NETXEN_MAX_SHORT_NAME];
 	char serial_num[32];
 	int i, offset, val, err;
-	int *ptr32;
+	__le32 *ptr32;
 	struct pci_dev *pdev = adapter->pdev;

 	adapter->driver_mismatch = 0;

-	ptr32 = (int *)&serial_num;
+	ptr32 = (__le32 *)&serial_num;
 	offset = NX_FW_SERIAL_NUM_OFFSET;
 	for (i = 0; i < 8; i++) {
 		if (netxen_rom_fast_read(adapter, offset, &val) == -1) {



On Sun, Mar 11, 2012 at 12:31 AM, Rajesh Borundia
<rajesh.borundia@qlogic.com> wrote:
>
>
>> -----Original Message-----
>> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
>> Sent: Saturday, March 10, 2012 12:20 AM
>> To: Rajesh Borundia
>> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
>> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
>> <rajesh.borundia@qlogic.com> wrote:
>> >
>> >> -----Original Message-----
>> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
>> >> Sent: Saturday, March 03, 2012 9:18 PM
>> >> To: Sony Chacko
>> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
>> >> janitors@vger.kernel.org; Santosh Nayak
>> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>> >>
>> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
>> >>
>> >> Fix endian bug.
>> >>
>> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
>> >> ---
>> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++--
>> ---
>> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>> >>  3 files changed, 10 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> index 2eeac32..b5de8a7 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
>> >>
>> >>  struct nx_vlan_ip_list {
>> >>       struct list_head list;
>> >> -     u32 ip_addr;
>> >> +     __be32 ip_addr;
>> >>  };
>> >>
>> >>  /*
>> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
>> >> nx_host_sds_ring *sds_ring, int max);
>> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>> >>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
>> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
>> int
>> >> cmd);
>> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> >> int cmd);
>> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
>> >> enable);
>> >>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
>> >> linkup);
>> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> index 6f37470..0f81287 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
>> >> *adapter, int enable)
>> >>       return rv;
>> >>  }
>> >>
>> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
>> int
>> >> cmd)
>> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> >> int cmd)
>> >>  {
>> >>       nx_nic_req_t req;
>> >>       u64 word;
>> >> +     u64 ip_addr;
>> >>       int rv;
>> >>
>> >>       memset(&req, 0, sizeof(nx_nic_req_t));
>> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
>> >> *adapter, u32 ip, int cmd)
>> >>       req.req_hdr = cpu_to_le64(word);
>> >>
>> >>       req.words[0] = cpu_to_le64(cmd);
>> >> -     req.words[1] = cpu_to_le64(ip);
>> >> +     ip_addr = be32_to_cpu(ip);
>> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
>> >
>>
>>
>> > Adapter requires ip value in big endian stored at lower 32 bit
>> address.
>> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
>> adapter
>> > Will get incorrect ip addr. Instead u can do.
>> >
>> >       U32 *ip_addr;
>> >       ip_addr = (u32 *)&req.words[1];
>> >       *ip_addr = ip;
>>
>>
>> 1.  It looks incomplete.
>>     In the function call "netxen_send_cmd_descs" we have to pass "&req"
>> as
>>    2nd argument  not  "ipaddr".
>
>  I should have sent a patch. This piece of code was just to show how to
> copy ip addr in  req.words[1].
>
>>
>> 2. Your above suggestion is with assumption that the data type of 2nd
>> argument "ip"
>>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
>>
>>      Some days back you suggested to change the data type to "__be32".
>>  In the present patch
>>      the "ip"  is in "__be32" format i.e already in Big endian format
>> as per requirement.
>>      We need to only convert this 32 bit to 64 bit.  There are two
>> ways:
>>
>  No I did not assume that ip is u32, ip is still __be32(ip value is in form of big endian)
>  though I should have mentioned it explicitly. But the ip value should be copied to lower 32 bit of req.words[1].
>
>
>>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
> In big endian machine MSB is copied into MSB first. So ip will get copied into higher 32 bit of
> req.words[1] but adapter requires it in lower 32 bit.
>>
>>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
>>             // explicit conversion.
>>
> If you follow second cpu_to_be64 it will swap lower 32 bit of ip with higher 32 bit in little endian machine.
> But adapter requires it in lower 32 bit.
>
> Simple solution to copy ip in req.words[1] could be memcpy(&req.words[1], &ip, sizeof(u32));
>
>
>>
>>  Please correct me if I am wrong.
>>
>>
>> regards
>> Santosh
>>
>>
>>
>>
>> >
>> >
>> >>
>> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
>> >> *)&req, 1);
>> >>       if (rv != 0) {
>> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
>> >> netxen_adapter *adapter, u64 *mac)
>> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac)
>> ==
>> >> -1)
>> >>               return -1;
>> >>
>> >> -     if (*mac == cpu_to_le64(~0ULL)) {
>> >> +     if (*mac == ~0ULL) {
>> >
>> > *mac is in little endian format so compare it with cpu_to_le64.
>> >
>> >>
>> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
>> >>                       (adapter->portnum * sizeof(u64));
>> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
>> >> netxen_adapter *adapter, u64 *mac)
>> >>                                       offset, sizeof(u64), pmac) ==
>> -1)
>> >>                       return -1;
>> >>
>> >> -             if (*mac == cpu_to_le64(~0ULL))
>> >> +             if (*mac == ~0ULL)
>> >
>> > *mac here is in little endian format so compare it with cpu_to_le64.
>> >
>> >>                       return -1;
>> >>       }
>> >>       return 0;
>> >> @@ -2178,7 +2180,7 @@ lock_try:
>> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
>> >ahw.pci_base0,
>> >> waddr);
>> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
>> >> -             *data_buff++ = cpu_to_le32(val);
>> >> +             *data_buff++ = val;
>> >
>> > It should be cpu_to_le32 as it is returned to tool which requires
>> > output in little endian.
>> >
>> >>               fl_addr += sizeof(val);
>> >>       }
>> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
>> >> NX_FLASH_SEM2_ULK));
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> index 8dc4a134..70783b4 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
>> >> *adapter)
>> >>                       adapter->driver_mismatch = 1;
>> >>                       return;
>> >>               }
>> >> -             ptr32[i] = cpu_to_le32(val);
>> >> +             ptr32[i] = val;
>> >
>> > Here val should be in little endian (cpu_to_le32) as it will be
>> referenced by byte array to print serial number.
>> >
>> >>               offset += sizeof(u32);
>> >>       }
>> >>
>> >> --
>> >> 1.7.4.4
>> >>
>> >
>> >
>> > Sorry for Late reply.
>> >
>> > Rajesh
>> >
>
>

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-11  9:28         ` santosh prasad nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-11  9:28 UTC (permalink / raw)
  To: Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org

Thanks Rajesh for clarification.
Included all your inputs in the following patch.
This is for review not a formal one. Once review is done I will send a
formal patch.



diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 2eeac32..b5de8a7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {

 struct nx_vlan_ip_list {
 	struct list_head list;
-	u32 ip_addr;
+	__be32 ip_addr;
 };

 /*
@@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
nx_host_sds_ring *sds_ring, int max);
 void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
 int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
 int netxen_config_rss(struct netxen_adapter *adapter, int enable);
-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd);
 int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
 void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
 void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 6f37470..59d5ee7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -909,7 +909,7 @@ int netxen_config_rss(struct netxen_adapter
*adapter, int enable)
 	return rv;
 }

-int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd)
+int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip, int cmd)
 {
 	nx_nic_req_t req;
 	u64 word;
@@ -922,7 +922,7 @@ int netxen_config_ipaddr(struct netxen_adapter
*adapter, u32 ip, int cmd)
 	req.req_hdr = cpu_to_le64(word);

 	req.words[0] = cpu_to_le64(cmd);
-	req.words[1] = cpu_to_le64(ip);
+	memcpy(&req.words[1], &ip, sizeof(u32));

 	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1);
 	if (rv != 0) {
@@ -1050,7 +1050,7 @@ int netxen_get_flash_mac_addr(struct
netxen_adapter *adapter, u64 *mac)
 	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) = -1)
 		return -1;

-	if (*mac = cpu_to_le64(~0ULL)) {
+	if (*(__le64 *)mac = cpu_to_le64(~0ULL)) {

 		offset = NX_OLD_MAC_ADDR_OFFSET +
 			(adapter->portnum * sizeof(u64));
@@ -1059,7 +1059,7 @@ int netxen_get_flash_mac_addr(struct
netxen_adapter *adapter, u64 *mac)
 					offset, sizeof(u64), pmac) = -1)
 			return -1;

-		if (*mac = cpu_to_le64(~0ULL))
+		if (*(__le64 *)mac = cpu_to_le64(~0ULL))
 			return -1;
 	}
 	return 0;
@@ -2155,7 +2155,7 @@ static u32 netxen_md_rd_crb(struct
netxen_adapter *adapter,
 static u32
 netxen_md_rdrom(struct netxen_adapter *adapter,
 			struct netxen_minidump_entry_rdrom
-				*romEntry, u32 *data_buff)
+				*romEntry, __le32 *data_buff)
 {
 	int i, count = 0;
 	u32 size, lck_val;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 7648995..65a718f 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -805,12 +805,12 @@ netxen_check_options(struct netxen_adapter *adapter)
 	char brd_name[NETXEN_MAX_SHORT_NAME];
 	char serial_num[32];
 	int i, offset, val, err;
-	int *ptr32;
+	__le32 *ptr32;
 	struct pci_dev *pdev = adapter->pdev;

 	adapter->driver_mismatch = 0;

-	ptr32 = (int *)&serial_num;
+	ptr32 = (__le32 *)&serial_num;
 	offset = NX_FW_SERIAL_NUM_OFFSET;
 	for (i = 0; i < 8; i++) {
 		if (netxen_rom_fast_read(adapter, offset, &val) = -1) {



On Sun, Mar 11, 2012 at 12:31 AM, Rajesh Borundia
<rajesh.borundia@qlogic.com> wrote:
>
>
>> -----Original Message-----
>> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
>> Sent: Saturday, March 10, 2012 12:20 AM
>> To: Rajesh Borundia
>> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
>> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
>> <rajesh.borundia@qlogic.com> wrote:
>> >
>> >> -----Original Message-----
>> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
>> >> Sent: Saturday, March 03, 2012 9:18 PM
>> >> To: Sony Chacko
>> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
>> >> janitors@vger.kernel.org; Santosh Nayak
>> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>> >>
>> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
>> >>
>> >> Fix endian bug.
>> >>
>> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
>> >> ---
>> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
>> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12 +++++++--
>> ---
>> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
>> >>  3 files changed, 10 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> index 2eeac32..b5de8a7 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
>> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
>> >>
>> >>  struct nx_vlan_ip_list {
>> >>       struct list_head list;
>> >> -     u32 ip_addr;
>> >> +     __be32 ip_addr;
>> >>  };
>> >>
>> >>  /*
>> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
>> >> nx_host_sds_ring *sds_ring, int max);
>> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>> >>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
>> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
>> int
>> >> cmd);
>> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> >> int cmd);
>> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
>> >> enable);
>> >>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
>> >> linkup);
>> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> index 6f37470..0f81287 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
>> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
>> >> *adapter, int enable)
>> >>       return rv;
>> >>  }
>> >>
>> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
>> int
>> >> cmd)
>> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
>> >> int cmd)
>> >>  {
>> >>       nx_nic_req_t req;
>> >>       u64 word;
>> >> +     u64 ip_addr;
>> >>       int rv;
>> >>
>> >>       memset(&req, 0, sizeof(nx_nic_req_t));
>> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct netxen_adapter
>> >> *adapter, u32 ip, int cmd)
>> >>       req.req_hdr = cpu_to_le64(word);
>> >>
>> >>       req.words[0] = cpu_to_le64(cmd);
>> >> -     req.words[1] = cpu_to_le64(ip);
>> >> +     ip_addr = be32_to_cpu(ip);
>> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
>> >
>>
>>
>> > Adapter requires ip value in big endian stored at lower 32 bit
>> address.
>> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
>> adapter
>> > Will get incorrect ip addr. Instead u can do.
>> >
>> >       U32 *ip_addr;
>> >       ip_addr = (u32 *)&req.words[1];
>> >       *ip_addr = ip;
>>
>>
>> 1.  It looks incomplete.
>>     In the function call "netxen_send_cmd_descs" we have to pass "&req"
>> as
>>    2nd argument  not  "ipaddr".
>
>  I should have sent a patch. This piece of code was just to show how to
> copy ip addr in  req.words[1].
>
>>
>> 2. Your above suggestion is with assumption that the data type of 2nd
>> argument "ip"
>>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
>>
>>      Some days back you suggested to change the data type to "__be32".
>>  In the present patch
>>      the "ip"  is in "__be32" format i.e already in Big endian format
>> as per requirement.
>>      We need to only convert this 32 bit to 64 bit.  There are two
>> ways:
>>
>  No I did not assume that ip is u32, ip is still __be32(ip value is in form of big endian)
>  though I should have mentioned it explicitly. But the ip value should be copied to lower 32 bit of req.words[1].
>
>
>>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
> In big endian machine MSB is copied into MSB first. So ip will get copied into higher 32 bit of
> req.words[1] but adapter requires it in lower 32 bit.
>>
>>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
>>             // explicit conversion.
>>
> If you follow second cpu_to_be64 it will swap lower 32 bit of ip with higher 32 bit in little endian machine.
> But adapter requires it in lower 32 bit.
>
> Simple solution to copy ip in req.words[1] could be memcpy(&req.words[1], &ip, sizeof(u32));
>
>
>>
>>  Please correct me if I am wrong.
>>
>>
>> regards
>> Santosh
>>
>>
>>
>>
>> >
>> >
>> >>
>> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
>> >> *)&req, 1);
>> >>       if (rv != 0) {
>> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
>> >> netxen_adapter *adapter, u64 *mac)
>> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac)
>> =
>> >> -1)
>> >>               return -1;
>> >>
>> >> -     if (*mac = cpu_to_le64(~0ULL)) {
>> >> +     if (*mac = ~0ULL) {
>> >
>> > *mac is in little endian format so compare it with cpu_to_le64.
>> >
>> >>
>> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
>> >>                       (adapter->portnum * sizeof(u64));
>> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
>> >> netxen_adapter *adapter, u64 *mac)
>> >>                                       offset, sizeof(u64), pmac) =
>> -1)
>> >>                       return -1;
>> >>
>> >> -             if (*mac = cpu_to_le64(~0ULL))
>> >> +             if (*mac = ~0ULL)
>> >
>> > *mac here is in little endian format so compare it with cpu_to_le64.
>> >
>> >>                       return -1;
>> >>       }
>> >>       return 0;
>> >> @@ -2178,7 +2180,7 @@ lock_try:
>> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
>> >ahw.pci_base0,
>> >> waddr);
>> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
>> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0, &val);
>> >> -             *data_buff++ = cpu_to_le32(val);
>> >> +             *data_buff++ = val;
>> >
>> > It should be cpu_to_le32 as it is returned to tool which requires
>> > output in little endian.
>> >
>> >>               fl_addr += sizeof(val);
>> >>       }
>> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
>> >> NX_FLASH_SEM2_ULK));
>> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> index 8dc4a134..70783b4 100644
>> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
>> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
>> >> *adapter)
>> >>                       adapter->driver_mismatch = 1;
>> >>                       return;
>> >>               }
>> >> -             ptr32[i] = cpu_to_le32(val);
>> >> +             ptr32[i] = val;
>> >
>> > Here val should be in little endian (cpu_to_le32) as it will be
>> referenced by byte array to print serial number.
>> >
>> >>               offset += sizeof(u32);
>> >>       }
>> >>
>> >> --
>> >> 1.7.4.4
>> >>
>> >
>> >
>> > Sorry for Late reply.
>> >
>> > Rajesh
>> >
>
>
--
To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-11  9:28         ` santosh prasad nayak
@ 2012-03-12  6:19           ` Rajesh Borundia
  -1 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-12  6:19 UTC (permalink / raw)
  To: santosh prasad nayak
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Sunday, March 11, 2012 2:47 PM
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> Thanks Rajesh for clarification.
> Included all your inputs in the following patch.
> This is for review not a formal one. Once review is done I will send a
> formal patch.
> 
> 
Looks fine to me.

> 
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> index 2eeac32..b5de8a7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> 
>  struct nx_vlan_ip_list {
>  	struct list_head list;
> -	u32 ip_addr;
> +	__be32 ip_addr;
>  };
> 
>  /*
> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> nx_host_sds_ring *sds_ring, int max);
>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd);
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd);
>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> enable);
>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> linkup);
>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> index 6f37470..59d5ee7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> @@ -909,7 +909,7 @@ int netxen_config_rss(struct netxen_adapter
> *adapter, int enable)
>  	return rv;
>  }
> 
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd)
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd)
>  {
>  	nx_nic_req_t req;
>  	u64 word;
> @@ -922,7 +922,7 @@ int netxen_config_ipaddr(struct netxen_adapter
> *adapter, u32 ip, int cmd)
>  	req.req_hdr = cpu_to_le64(word);
> 
>  	req.words[0] = cpu_to_le64(cmd);
> -	req.words[1] = cpu_to_le64(ip);
> +	memcpy(&req.words[1], &ip, sizeof(u32));
> 
>  	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> *)&req, 1);
>  	if (rv != 0) {
> @@ -1050,7 +1050,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) =
> -1)
>  		return -1;
> 
> -	if (*mac = cpu_to_le64(~0ULL)) {
> +	if (*(__le64 *)mac = cpu_to_le64(~0ULL)) {
> 
>  		offset = NX_OLD_MAC_ADDR_OFFSET +
>  			(adapter->portnum * sizeof(u64));
> @@ -1059,7 +1059,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  					offset, sizeof(u64), pmac) = -1)
>  			return -1;
> 
> -		if (*mac = cpu_to_le64(~0ULL))
> +		if (*(__le64 *)mac = cpu_to_le64(~0ULL))
>  			return -1;
>  	}
>  	return 0;
> @@ -2155,7 +2155,7 @@ static u32 netxen_md_rd_crb(struct
> netxen_adapter *adapter,
>  static u32
>  netxen_md_rdrom(struct netxen_adapter *adapter,
>  			struct netxen_minidump_entry_rdrom
> -				*romEntry, u32 *data_buff)
> +				*romEntry, __le32 *data_buff)
>  {
>  	int i, count = 0;
>  	u32 size, lck_val;
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> index 7648995..65a718f 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> @@ -805,12 +805,12 @@ netxen_check_options(struct netxen_adapter
> *adapter)
>  	char brd_name[NETXEN_MAX_SHORT_NAME];
>  	char serial_num[32];
>  	int i, offset, val, err;
> -	int *ptr32;
> +	__le32 *ptr32;
>  	struct pci_dev *pdev = adapter->pdev;
> 
>  	adapter->driver_mismatch = 0;
> 
> -	ptr32 = (int *)&serial_num;
> +	ptr32 = (__le32 *)&serial_num;
>  	offset = NX_FW_SERIAL_NUM_OFFSET;
>  	for (i = 0; i < 8; i++) {
>  		if (netxen_rom_fast_read(adapter, offset, &val) = -1) {
> 
> 
> 
> On Sun, Mar 11, 2012 at 12:31 AM, Rajesh Borundia
> <rajesh.borundia@qlogic.com> wrote:
> >
> >
> >> -----Original Message-----
> >> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> >> Sent: Saturday, March 10, 2012 12:20 AM
> >> To: Rajesh Borundia
> >> Cc: Sony Chacko; netdev; linux-kernel; kernel-
> janitors@vger.kernel.org
> >> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >>
> >> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
> >> <rajesh.borundia@qlogic.com> wrote:
> >> >
> >> >> -----Original Message-----
> >> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> >> >> Sent: Saturday, March 03, 2012 9:18 PM
> >> >> To: Sony Chacko
> >> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> >> >> janitors@vger.kernel.org; Santosh Nayak
> >> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >> >>
> >> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> >>
> >> >> Fix endian bug.
> >> >>
> >> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> >> ---
> >> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
> >> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12
> +++++++--
> >> ---
> >> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
> >> >>  3 files changed, 10 insertions(+), 8 deletions(-)
> >> >>
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> index 2eeac32..b5de8a7 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> >> >>
> >> >>  struct nx_vlan_ip_list {
> >> >>       struct list_head list;
> >> >> -     u32 ip_addr;
> >> >> +     __be32 ip_addr;
> >> >>  };
> >> >>
> >> >>  /*
> >> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> >> >> nx_host_sds_ring *sds_ring, int max);
> >> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
> >> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
> >> >>  int netxen_config_rss(struct netxen_adapter *adapter, int
> enable);
> >> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> >> int
> >> >> cmd);
> >> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32
> ip,
> >> >> int cmd);
> >> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> >> >> enable);
> >> >>  void netxen_advert_link_change(struct netxen_adapter *adapter,
> int
> >> >> linkup);
> >> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64
> *);
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> index 6f37470..0f81287 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> >> >> *adapter, int enable)
> >> >>       return rv;
> >> >>  }
> >> >>
> >> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> >> int
> >> >> cmd)
> >> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32
> ip,
> >> >> int cmd)
> >> >>  {
> >> >>       nx_nic_req_t req;
> >> >>       u64 word;
> >> >> +     u64 ip_addr;
> >> >>       int rv;
> >> >>
> >> >>       memset(&req, 0, sizeof(nx_nic_req_t));
> >> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct
> netxen_adapter
> >> >> *adapter, u32 ip, int cmd)
> >> >>       req.req_hdr = cpu_to_le64(word);
> >> >>
> >> >>       req.words[0] = cpu_to_le64(cmd);
> >> >> -     req.words[1] = cpu_to_le64(ip);
> >> >> +     ip_addr = be32_to_cpu(ip);
> >> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
> >> >
> >>
> >>
> >> > Adapter requires ip value in big endian stored at lower 32 bit
> >> address.
> >> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
> >> adapter
> >> > Will get incorrect ip addr. Instead u can do.
> >> >
> >> >       U32 *ip_addr;
> >> >       ip_addr = (u32 *)&req.words[1];
> >> >       *ip_addr = ip;
> >>
> >>
> >> 1.  It looks incomplete.
> >>     In the function call "netxen_send_cmd_descs" we have to pass
> "&req"
> >> as
> >>    2nd argument  not  "ipaddr".
> >
> >  I should have sent a patch. This piece of code was just to show how
> to
> > copy ip addr in  req.words[1].
> >
> >>
> >> 2. Your above suggestion is with assumption that the data type of
> 2nd
> >> argument "ip"
> >>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
> >>
> >>      Some days back you suggested to change the data type to
> "__be32".
> >>  In the present patch
> >>      the "ip"  is in "__be32" format i.e already in Big endian
> format
> >> as per requirement.
> >>      We need to only convert this 32 bit to 64 bit.  There are two
> >> ways:
> >>
> >  No I did not assume that ip is u32, ip is still __be32(ip value is
> in form of big endian)
> >  though I should have mentioned it explicitly. But the ip value
> should be copied to lower 32 bit of req.words[1].
> >
> >
> >>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
> > In big endian machine MSB is copied into MSB first. So ip will get
> copied into higher 32 bit of
> > req.words[1] but adapter requires it in lower 32 bit.
> >>
> >>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
> >>             // explicit conversion.
> >>
> > If you follow second cpu_to_be64 it will swap lower 32 bit of ip with
> higher 32 bit in little endian machine.
> > But adapter requires it in lower 32 bit.
> >
> > Simple solution to copy ip in req.words[1] could be
> memcpy(&req.words[1], &ip, sizeof(u32));
> >
> >
> >>
> >>  Please correct me if I am wrong.
> >>
> >>
> >> regards
> >> Santosh
> >>
> >>
> >>
> >>
> >> >
> >> >
> >> >>
> >> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> >> >> *)&req, 1);
> >> >>       if (rv != 0) {
> >> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> >> >> netxen_adapter *adapter, u64 *mac)
> >> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64),
> pmac)
> >> =
> >> >> -1)
> >> >>               return -1;
> >> >>
> >> >> -     if (*mac = cpu_to_le64(~0ULL)) {
> >> >> +     if (*mac = ~0ULL) {
> >> >
> >> > *mac is in little endian format so compare it with cpu_to_le64.
> >> >
> >> >>
> >> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
> >> >>                       (adapter->portnum * sizeof(u64));
> >> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> >> >> netxen_adapter *adapter, u64 *mac)
> >> >>                                       offset, sizeof(u64), pmac)
> =
> >> -1)
> >> >>                       return -1;
> >> >>
> >> >> -             if (*mac = cpu_to_le64(~0ULL))
> >> >> +             if (*mac = ~0ULL)
> >> >
> >> > *mac here is in little endian format so compare it with
> cpu_to_le64.
> >> >
> >> >>                       return -1;
> >> >>       }
> >> >>       return 0;
> >> >> @@ -2178,7 +2180,7 @@ lock_try:
> >> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
> >> >ahw.pci_base0,
> >> >> waddr);
> >> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
> >> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0,
> &val);
> >> >> -             *data_buff++ = cpu_to_le32(val);
> >> >> +             *data_buff++ = val;
> >> >
> >> > It should be cpu_to_le32 as it is returned to tool which requires
> >> > output in little endian.
> >> >
> >> >>               fl_addr += sizeof(val);
> >> >>       }
> >> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
> >> >> NX_FLASH_SEM2_ULK));
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> index 8dc4a134..70783b4 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> >> >> *adapter)
> >> >>                       adapter->driver_mismatch = 1;
> >> >>                       return;
> >> >>               }
> >> >> -             ptr32[i] = cpu_to_le32(val);
> >> >> +             ptr32[i] = val;
> >> >
> >> > Here val should be in little endian (cpu_to_le32) as it will be
> >> referenced by byte array to print serial number.
> >> >
> >> >>               offset += sizeof(u32);
> >> >>       }
> >> >>
> >> >> --
> >> >> 1.7.4.4
> >> >>
> >> >
> >> >
> >> > Sorry for Late reply.
> >> >
> >> > Rajesh
> >> >
> >
> >


--
To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-12  6:19           ` Rajesh Borundia
  0 siblings, 0 replies; 73+ messages in thread
From: Rajesh Borundia @ 2012-03-12  6:19 UTC (permalink / raw)
  To: santosh prasad nayak
  Cc: Sony Chacko, netdev, linux-kernel,
	kernel-janitors@vger.kernel.org



> -----Original Message-----
> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> Sent: Sunday, March 11, 2012 2:47 PM
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> Thanks Rajesh for clarification.
> Included all your inputs in the following patch.
> This is for review not a formal one. Once review is done I will send a
> formal patch.
> 
> 
Looks fine to me.

> 
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> index 2eeac32..b5de8a7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> 
>  struct nx_vlan_ip_list {
>  	struct list_head list;
> -	u32 ip_addr;
> +	__be32 ip_addr;
>  };
> 
>  /*
> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> nx_host_sds_ring *sds_ring, int max);
>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
>  int netxen_config_rss(struct netxen_adapter *adapter, int enable);
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd);
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd);
>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> enable);
>  void netxen_advert_link_change(struct netxen_adapter *adapter, int
> linkup);
>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64 *);
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> index 6f37470..59d5ee7 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> @@ -909,7 +909,7 @@ int netxen_config_rss(struct netxen_adapter
> *adapter, int enable)
>  	return rv;
>  }
> 
> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int
> cmd)
> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32 ip,
> int cmd)
>  {
>  	nx_nic_req_t req;
>  	u64 word;
> @@ -922,7 +922,7 @@ int netxen_config_ipaddr(struct netxen_adapter
> *adapter, u32 ip, int cmd)
>  	req.req_hdr = cpu_to_le64(word);
> 
>  	req.words[0] = cpu_to_le64(cmd);
> -	req.words[1] = cpu_to_le64(ip);
> +	memcpy(&req.words[1], &ip, sizeof(u32));
> 
>  	rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> *)&req, 1);
>  	if (rv != 0) {
> @@ -1050,7 +1050,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  	if (netxen_get_flash_block(adapter, offset, sizeof(u64), pmac) ==
> -1)
>  		return -1;
> 
> -	if (*mac == cpu_to_le64(~0ULL)) {
> +	if (*(__le64 *)mac == cpu_to_le64(~0ULL)) {
> 
>  		offset = NX_OLD_MAC_ADDR_OFFSET +
>  			(adapter->portnum * sizeof(u64));
> @@ -1059,7 +1059,7 @@ int netxen_get_flash_mac_addr(struct
> netxen_adapter *adapter, u64 *mac)
>  					offset, sizeof(u64), pmac) == -1)
>  			return -1;
> 
> -		if (*mac == cpu_to_le64(~0ULL))
> +		if (*(__le64 *)mac == cpu_to_le64(~0ULL))
>  			return -1;
>  	}
>  	return 0;
> @@ -2155,7 +2155,7 @@ static u32 netxen_md_rd_crb(struct
> netxen_adapter *adapter,
>  static u32
>  netxen_md_rdrom(struct netxen_adapter *adapter,
>  			struct netxen_minidump_entry_rdrom
> -				*romEntry, u32 *data_buff)
> +				*romEntry, __le32 *data_buff)
>  {
>  	int i, count = 0;
>  	u32 size, lck_val;
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> index 7648995..65a718f 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> @@ -805,12 +805,12 @@ netxen_check_options(struct netxen_adapter
> *adapter)
>  	char brd_name[NETXEN_MAX_SHORT_NAME];
>  	char serial_num[32];
>  	int i, offset, val, err;
> -	int *ptr32;
> +	__le32 *ptr32;
>  	struct pci_dev *pdev = adapter->pdev;
> 
>  	adapter->driver_mismatch = 0;
> 
> -	ptr32 = (int *)&serial_num;
> +	ptr32 = (__le32 *)&serial_num;
>  	offset = NX_FW_SERIAL_NUM_OFFSET;
>  	for (i = 0; i < 8; i++) {
>  		if (netxen_rom_fast_read(adapter, offset, &val) == -1) {
> 
> 
> 
> On Sun, Mar 11, 2012 at 12:31 AM, Rajesh Borundia
> <rajesh.borundia@qlogic.com> wrote:
> >
> >
> >> -----Original Message-----
> >> From: santosh prasad nayak [mailto:santoshprasadnayak@gmail.com]
> >> Sent: Saturday, March 10, 2012 12:20 AM
> >> To: Rajesh Borundia
> >> Cc: Sony Chacko; netdev; linux-kernel; kernel-
> janitors@vger.kernel.org
> >> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >>
> >> On Fri, Mar 9, 2012 at 10:04 PM, Rajesh Borundia
> >> <rajesh.borundia@qlogic.com> wrote:
> >> >
> >> >> -----Original Message-----
> >> >> From: santosh nayak [mailto:santoshprasadnayak@gmail.com]
> >> >> Sent: Saturday, March 03, 2012 9:18 PM
> >> >> To: Sony Chacko
> >> >> Cc: Rajesh Borundia; netdev; linux-kernel; kernel-
> >> >> janitors@vger.kernel.org; Santosh Nayak
> >> >> Subject: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> >> >>
> >> >> From: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> >>
> >> >> Fix endian bug.
> >> >>
> >> >> Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
> >> >> ---
> >> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
> >> >>  drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c |   12
> +++++++--
> >> ---
> >> >>  .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    2 +-
> >> >>  3 files changed, 10 insertions(+), 8 deletions(-)
> >> >>
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> index 2eeac32..b5de8a7 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
> >> >> @@ -954,7 +954,7 @@ typedef struct nx_mac_list_s {
> >> >>
> >> >>  struct nx_vlan_ip_list {
> >> >>       struct list_head list;
> >> >> -     u32 ip_addr;
> >> >> +     __be32 ip_addr;
> >> >>  };
> >> >>
> >> >>  /*
> >> >> @@ -1780,7 +1780,7 @@ int netxen_process_rcv_ring(struct
> >> >> nx_host_sds_ring *sds_ring, int max);
> >> >>  void netxen_p3_free_mac_list(struct netxen_adapter *adapter);
> >> >>  int netxen_config_intr_coalesce(struct netxen_adapter *adapter);
> >> >>  int netxen_config_rss(struct netxen_adapter *adapter, int
> enable);
> >> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> >> int
> >> >> cmd);
> >> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32
> ip,
> >> >> int cmd);
> >> >>  int netxen_linkevent_request(struct netxen_adapter *adapter, int
> >> >> enable);
> >> >>  void netxen_advert_link_change(struct netxen_adapter *adapter,
> int
> >> >> linkup);
> >> >>  void netxen_pci_camqm_read_2M(struct netxen_adapter *, u64, u64
> *);
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> index 6f37470..0f81287 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
> >> >> @@ -909,10 +909,11 @@ int netxen_config_rss(struct netxen_adapter
> >> >> *adapter, int enable)
> >> >>       return rv;
> >> >>  }
> >> >>
> >> >> -int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip,
> >> int
> >> >> cmd)
> >> >> +int netxen_config_ipaddr(struct netxen_adapter *adapter, __be32
> ip,
> >> >> int cmd)
> >> >>  {
> >> >>       nx_nic_req_t req;
> >> >>       u64 word;
> >> >> +     u64 ip_addr;
> >> >>       int rv;
> >> >>
> >> >>       memset(&req, 0, sizeof(nx_nic_req_t));
> >> >> @@ -922,7 +923,8 @@ int netxen_config_ipaddr(struct
> netxen_adapter
> >> >> *adapter, u32 ip, int cmd)
> >> >>       req.req_hdr = cpu_to_le64(word);
> >> >>
> >> >>       req.words[0] = cpu_to_le64(cmd);
> >> >> -     req.words[1] = cpu_to_le64(ip);
> >> >> +     ip_addr = be32_to_cpu(ip);
> >> >> +     *(__be64 *)&req.words[1] = cpu_to_be64(ip_addr);
> >> >
> >>
> >>
> >> > Adapter requires ip value in big endian stored at lower 32 bit
> >> address.
> >> > The cpu_to_be64 will swap the lower 32 bit with higher 32 bit and
> >> adapter
> >> > Will get incorrect ip addr. Instead u can do.
> >> >
> >> >       U32 *ip_addr;
> >> >       ip_addr = (u32 *)&req.words[1];
> >> >       *ip_addr = ip;
> >>
> >>
> >> 1.  It looks incomplete.
> >>     In the function call "netxen_send_cmd_descs" we have to pass
> "&req"
> >> as
> >>    2nd argument  not  "ipaddr".
> >
> >  I should have sent a patch. This piece of code was just to show how
> to
> > copy ip addr in  req.words[1].
> >
> >>
> >> 2. Your above suggestion is with assumption that the data type of
> 2nd
> >> argument "ip"
> >>      in "netxen_config_ipaddr()" is still "u32".  This is not true.
> >>
> >>      Some days back you suggested to change the data type to
> "__be32".
> >>  In the present patch
> >>      the "ip"  is in "__be32" format i.e already in Big endian
> format
> >> as per requirement.
> >>      We need to only convert this 32 bit to 64 bit.  There are two
> >> ways:
> >>
> >  No I did not assume that ip is u32, ip is still __be32(ip value is
> in form of big endian)
> >  though I should have mentioned it explicitly. But the ip value
> should be copied to lower 32 bit of req.words[1].
> >
> >
> >>      a.   *(__be64 *)&req.words[1] = ip;   // auto conversion
> > In big endian machine MSB is copied into MSB first. So ip will get
> copied into higher 32 bit of
> > req.words[1] but adapter requires it in lower 32 bit.
> >>
> >>      b.   *(__be64 *)&req.words[1] = cpu_to_be64(be32_to_cpu(ip));
> >>             // explicit conversion.
> >>
> > If you follow second cpu_to_be64 it will swap lower 32 bit of ip with
> higher 32 bit in little endian machine.
> > But adapter requires it in lower 32 bit.
> >
> > Simple solution to copy ip in req.words[1] could be
> memcpy(&req.words[1], &ip, sizeof(u32));
> >
> >
> >>
> >>  Please correct me if I am wrong.
> >>
> >>
> >> regards
> >> Santosh
> >>
> >>
> >>
> >>
> >> >
> >> >
> >> >>
> >> >>       rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0
> >> >> *)&req, 1);
> >> >>       if (rv != 0) {
> >> >> @@ -1050,7 +1052,7 @@ int netxen_get_flash_mac_addr(struct
> >> >> netxen_adapter *adapter, u64 *mac)
> >> >>       if (netxen_get_flash_block(adapter, offset, sizeof(u64),
> pmac)
> >> ==
> >> >> -1)
> >> >>               return -1;
> >> >>
> >> >> -     if (*mac == cpu_to_le64(~0ULL)) {
> >> >> +     if (*mac == ~0ULL) {
> >> >
> >> > *mac is in little endian format so compare it with cpu_to_le64.
> >> >
> >> >>
> >> >>               offset = NX_OLD_MAC_ADDR_OFFSET +
> >> >>                       (adapter->portnum * sizeof(u64));
> >> >> @@ -1059,7 +1061,7 @@ int netxen_get_flash_mac_addr(struct
> >> >> netxen_adapter *adapter, u64 *mac)
> >> >>                                       offset, sizeof(u64), pmac)
> ==
> >> -1)
> >> >>                       return -1;
> >> >>
> >> >> -             if (*mac == cpu_to_le64(~0ULL))
> >> >> +             if (*mac == ~0ULL)
> >> >
> >> > *mac here is in little endian format so compare it with
> cpu_to_le64.
> >> >
> >> >>                       return -1;
> >> >>       }
> >> >>       return 0;
> >> >> @@ -2178,7 +2180,7 @@ lock_try:
> >> >>               NX_WR_DUMP_REG(FLASH_ROM_WINDOW, adapter-
> >> >ahw.pci_base0,
> >> >> waddr);
> >> >>               raddr = FLASH_ROM_DATA + (fl_addr & 0x0000FFFF);
> >> >>               NX_RD_DUMP_REG(raddr, adapter->ahw.pci_base0,
> &val);
> >> >> -             *data_buff++ = cpu_to_le32(val);
> >> >> +             *data_buff++ = val;
> >> >
> >> > It should be cpu_to_le32 as it is returned to tool which requires
> >> > output in little endian.
> >> >
> >> >>               fl_addr += sizeof(val);
> >> >>       }
> >> >>       readl((void __iomem *)(adapter->ahw.pci_base0 +
> >> >> NX_FLASH_SEM2_ULK));
> >> >> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> index 8dc4a134..70783b4 100644
> >> >> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
> >> >> @@ -818,7 +818,7 @@ netxen_check_options(struct netxen_adapter
> >> >> *adapter)
> >> >>                       adapter->driver_mismatch = 1;
> >> >>                       return;
> >> >>               }
> >> >> -             ptr32[i] = cpu_to_le32(val);
> >> >> +             ptr32[i] = val;
> >> >
> >> > Here val should be in little endian (cpu_to_le32) as it will be
> >> referenced by byte array to print serial number.
> >> >
> >> >>               offset += sizeof(u32);
> >> >>       }
> >> >>
> >> >> --
> >> >> 1.7.4.4
> >> >>
> >> >
> >> >
> >> > Sorry for Late reply.
> >> >
> >> > Rajesh
> >> >
> >
> >



^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-11  9:28         ` santosh prasad nayak
@ 2012-03-12  9:37             ` David Laight
  -1 siblings, 0 replies; 73+ messages in thread
From: David Laight @ 2012-03-12  9:37 UTC (permalink / raw)
  To: santosh prasad nayak, Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel, kernel-janitors

 

> -----Original Message-----
> From: netdev-owner@vger.kernel.org 
> [mailto:netdev-owner@vger.kernel.org] On Behalf Of santosh 
> prasad nayak
> Sent: 11 March 2012 09:17
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> Thanks Rajesh for clarification.
> Included all your inputs in the following patch.
> This is for review not a formal one. Once review is done I will send a
> formal patch.

I'm not sure of the exact nature of the issues here,
but whenever I see code that casts between the addresses
of integer types large bells start ringing - such code
tends to have unwanted dependencies against the sizes
and endiannesses of the relevant fields.
This code might be ok, but lines like:
> +	if (*(__le64 *)mac = cpu_to_le64(~0ULL)) {
rather give me the willies.

	David



^ permalink raw reply	[flat|nested] 73+ messages in thread

* RE: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-12  9:37             ` David Laight
  0 siblings, 0 replies; 73+ messages in thread
From: David Laight @ 2012-03-12  9:37 UTC (permalink / raw)
  To: santosh prasad nayak, Rajesh Borundia
  Cc: Sony Chacko, netdev, linux-kernel, kernel-janitors

 

> -----Original Message-----
> From: netdev-owner@vger.kernel.org 
> [mailto:netdev-owner@vger.kernel.org] On Behalf Of santosh 
> prasad nayak
> Sent: 11 March 2012 09:17
> To: Rajesh Borundia
> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
> 
> Thanks Rajesh for clarification.
> Included all your inputs in the following patch.
> This is for review not a formal one. Once review is done I will send a
> formal patch.

I'm not sure of the exact nature of the issues here,
but whenever I see code that casts between the addresses
of integer types large bells start ringing - such code
tends to have unwanted dependencies against the sizes
and endiannesses of the relevant fields.
This code might be ok, but lines like:
> +	if (*(__le64 *)mac == cpu_to_le64(~0ULL)) {
rather give me the willies.

	David



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
  2012-03-12  9:37             ` David Laight
@ 2012-03-12  9:59               ` santosh prasad nayak
  -1 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-12  9:47 UTC (permalink / raw)
  To: David Laight
  Cc: Rajesh Borundia, Sony Chacko, netdev, linux-kernel,
	kernel-janitors

here "mac" is in "u64" and I have casted it to "__le64".
Because its required there.

If you have any better suggestion, please let me know.


regards
santosh

On Mon, Mar 12, 2012 at 3:07 PM, David Laight <David.Laight@aculab.com> wrote:
>
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org
>> [mailto:netdev-owner@vger.kernel.org] On Behalf Of santosh
>> prasad nayak
>> Sent: 11 March 2012 09:17
>> To: Rajesh Borundia
>> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
>> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> Thanks Rajesh for clarification.
>> Included all your inputs in the following patch.
>> This is for review not a formal one. Once review is done I will send a
>> formal patch.
>
> I'm not sure of the exact nature of the issues here,
> but whenever I see code that casts between the addresses
> of integer types large bells start ringing - such code
> tends to have unwanted dependencies against the sizes
> and endiannesses of the relevant fields.
> This code might be ok, but lines like:
>> +     if (*(__le64 *)mac == cpu_to_le64(~0ULL)) {
> rather give me the willies.
>
>        David
>
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
@ 2012-03-12  9:59               ` santosh prasad nayak
  0 siblings, 0 replies; 73+ messages in thread
From: santosh prasad nayak @ 2012-03-12  9:59 UTC (permalink / raw)
  To: David Laight
  Cc: Rajesh Borundia, Sony Chacko, netdev, linux-kernel,
	kernel-janitors

here "mac" is in "u64" and I have casted it to "__le64".
Because its required there.

If you have any better suggestion, please let me know.


regards
santosh

On Mon, Mar 12, 2012 at 3:07 PM, David Laight <David.Laight@aculab.com> wrote:
>
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org
>> [mailto:netdev-owner@vger.kernel.org] On Behalf Of santosh
>> prasad nayak
>> Sent: 11 March 2012 09:17
>> To: Rajesh Borundia
>> Cc: Sony Chacko; netdev; linux-kernel; kernel-janitors@vger.kernel.org
>> Subject: Re: [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug.
>>
>> Thanks Rajesh for clarification.
>> Included all your inputs in the following patch.
>> This is for review not a formal one. Once review is done I will send a
>> formal patch.
>
> I'm not sure of the exact nature of the issues here,
> but whenever I see code that casts between the addresses
> of integer types large bells start ringing - such code
> tends to have unwanted dependencies against the sizes
> and endiannesses of the relevant fields.
> This code might be ok, but lines like:
>> +     if (*(__le64 *)mac = cpu_to_le64(~0ULL)) {
> rather give me the willies.
>
>        David
>
>
--
To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-07  9:33                 ` Jan Beulich
@ 2012-03-14  6:32                   ` Justin Gibbs
  -1 siblings, 0 replies; 73+ messages in thread
From: Justin Gibbs @ 2012-03-14  6:32 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, xen-devel@lists.xen.org, Ian Campbell,
	konrad.wilk@oracle.com, waldi@debian.org, netdev@vger.kernel.org,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org, weiyi.huang@gmail.com,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org,
	"virtualization@lists.linux-foundation.org" <virtualizati>


On Mar 7, 2012, at 2:33 AM, Jan Beulich wrote:

>>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
>> default size for SSD usage? 16?
> 
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.
> 
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.
> 
> Jan

There's another problem here that I brought up during the Xen
Hack-a-thon.  The ring macros require that the ring element count
be a power of two.  This doesn't mean that the ring will be a power
of 2 pages in size.  To illustrate this point, I modified the FreeBSD
blkback driver to provide negotiated ring stats via sysctl.

Here's a connection to a Windows VM running the Citrix PV drivers:

    dev.xbbd.2.max_requests: 128
    dev.xbbd.2.max_request_segments: 11
    dev.xbbd.2.max_request_size: 45056
    dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
    dev.xbbd.2.ring_pages: 4
    dev.xbbd.2.ring_elements: 128
    dev.xbbd.2.ring_waste: 2496

Over half a page is wasted when ring-page-order is 2.  I'm sure you
can see where this is going.  :-)

Here are the limits published by our backend to the XenStore:

    max-ring-pages = "113"
    max-ring-page-order = "7"
    max-requests = "256"
    max-request-segments = "129"
    max-request-size = "524288"

Because we allow so many concurrent, large requests in our product,
the ring wastage really adds up if the front end doesn't support
the "ring-pages" variant of the extension.  However, you only need
a ring-page-order of 3 with this protocol to start seeing pages of
wasted ring space.

You don't really want to negotiate "ring-pages" either.  The backends
often need to support multiple ABIs.  I can easily construct a set
of limits for the FreeBSD blkback driver which will cause the ring
limits to vary by a page between the 32bit and 64bit ABIs.

With all this in mind, the backend must do a dance of rounding up,
taking the max of the ring sizes for the different ABIs, and then
validating the front-end published limits taking its ABI into
account.  The front-end does some of this too.  Its way too messy
and error prone because we don't communicate the ring element limit
directly.

"max-ring-element-order" anyone? :-)

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
@ 2012-03-14  6:32                   ` Justin Gibbs
  0 siblings, 0 replies; 73+ messages in thread
From: Justin Gibbs @ 2012-03-14  6:32 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, xen-devel@lists.xen.org, Ian Campbell,
	konrad.wilk@oracle.com, waldi@debian.org, netdev@vger.kernel.org,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org, weiyi.huang@gmail.com,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org,
	"virtualization@lists.linux-foundation.org" <virtualizati


On Mar 7, 2012, at 2:33 AM, Jan Beulich wrote:

>>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
>> default size for SSD usage? 16?
> 
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.
> 
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.
> 
> Jan

There's another problem here that I brought up during the Xen
Hack-a-thon.  The ring macros require that the ring element count
be a power of two.  This doesn't mean that the ring will be a power
of 2 pages in size.  To illustrate this point, I modified the FreeBSD
blkback driver to provide negotiated ring stats via sysctl.

Here's a connection to a Windows VM running the Citrix PV drivers:

    dev.xbbd.2.max_requests: 128
    dev.xbbd.2.max_request_segments: 11
    dev.xbbd.2.max_request_size: 45056
    dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
    dev.xbbd.2.ring_pages: 4
    dev.xbbd.2.ring_elements: 128
    dev.xbbd.2.ring_waste: 2496

Over half a page is wasted when ring-page-order is 2.  I'm sure you
can see where this is going.  :-)

Here are the limits published by our backend to the XenStore:

    max-ring-pages = "113"
    max-ring-page-order = "7"
    max-requests = "256"
    max-request-segments = "129"
    max-request-size = "524288"

Because we allow so many concurrent, large requests in our product,
the ring wastage really adds up if the front end doesn't support
the "ring-pages" variant of the extension.  However, you only need
a ring-page-order of 3 with this protocol to start seeing pages of
wasted ring space.

You don't really want to negotiate "ring-pages" either.  The backends
often need to support multiple ABIs.  I can easily construct a set
of limits for the FreeBSD blkback driver which will cause the ring
limits to vary by a page between the 32bit and 64bit ABIs.

With all this in mind, the backend must do a dance of rounding up,
taking the max of the ring sizes for the different ABIs, and then
validating the front-end published limits taking its ABI into
account.  The front-end does some of this too.  Its way too messy
and error prone because we don't communicate the ring element limit
directly.

"max-ring-element-order" anyone? :-)

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-07  9:33                 ` Jan Beulich
                                   ` (3 preceding siblings ...)
  (?)
@ 2012-03-14  6:32                 ` Justin Gibbs
  -1 siblings, 0 replies; 73+ messages in thread
From: Justin Gibbs @ 2012-03-14  6:32 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, xen-devel@lists.xen.org, Ian Campbell,
	konrad.wilk@oracle.com, waldi@debian.org, netdev@vger.kernel.org,
	rusty@rustcorp.com.au, joe.jin@oracle.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	weiyi.huang@gmail.com, paul.gortmaker@windriver.com, Paul Durrant,
	David Vrabel, Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org, virtual


On Mar 7, 2012, at 2:33 AM, Jan Beulich wrote:

>>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote:
>> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal
>> default size for SSD usage? 16?
> 
> What do SSDs have to do with a XenBus definition? Imo it's wrong (and
> unnecessary) to introduce a limit at the XenBus level at all - each driver
> can do this for itself.
> 
> As to the limit for SSDs in the block interface - I don't think the number
> of possibly simultaneous requests has anything to do with this. Instead,
> I'd expect the request number/size/segments extension that NetBSD
> apparently implements to possibly have an effect.
> 
> Jan

There's another problem here that I brought up during the Xen
Hack-a-thon.  The ring macros require that the ring element count
be a power of two.  This doesn't mean that the ring will be a power
of 2 pages in size.  To illustrate this point, I modified the FreeBSD
blkback driver to provide negotiated ring stats via sysctl.

Here's a connection to a Windows VM running the Citrix PV drivers:

    dev.xbbd.2.max_requests: 128
    dev.xbbd.2.max_request_segments: 11
    dev.xbbd.2.max_request_size: 45056
    dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
    dev.xbbd.2.ring_pages: 4
    dev.xbbd.2.ring_elements: 128
    dev.xbbd.2.ring_waste: 2496

Over half a page is wasted when ring-page-order is 2.  I'm sure you
can see where this is going.  :-)

Here are the limits published by our backend to the XenStore:

    max-ring-pages = "113"
    max-ring-page-order = "7"
    max-requests = "256"
    max-request-segments = "129"
    max-request-size = "524288"

Because we allow so many concurrent, large requests in our product,
the ring wastage really adds up if the front end doesn't support
the "ring-pages" variant of the extension.  However, you only need
a ring-page-order of 3 with this protocol to start seeing pages of
wasted ring space.

You don't really want to negotiate "ring-pages" either.  The backends
often need to support multiple ABIs.  I can easily construct a set
of limits for the FreeBSD blkback driver which will cause the ring
limits to vary by a page between the 32bit and 64bit ABIs.

With all this in mind, the backend must do a dance of rounding up,
taking the max of the ring sizes for the different ABIs, and then
validating the front-end published limits taking its ABI into
account.  The front-end does some of this too.  Its way too messy
and error prone because we don't communicate the ring element limit
directly.

"max-ring-element-order" anyone? :-)

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14  6:32                   ` Justin Gibbs
  (?)
  (?)
@ 2012-03-14  8:35                   ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-14  8:35 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org, xen-devel

>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
> There's another problem here that I brought up during the Xen
> Hack-a-thon.  The ring macros require that the ring element count
> be a power of two.  This doesn't mean that the ring will be a power
> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
> blkback driver to provide negotiated ring stats via sysctl.
> 
> Here's a connection to a Windows VM running the Citrix PV drivers:
> 
>     dev.xbbd.2.max_requests: 128
>     dev.xbbd.2.max_request_segments: 11
>     dev.xbbd.2.max_request_size: 45056
>     dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>     dev.xbbd.2.ring_pages: 4
>     dev.xbbd.2.ring_elements: 128
>     dev.xbbd.2.ring_waste: 2496
> 
> Over half a page is wasted when ring-page-order is 2.  I'm sure you
> can see where this is going.  :-)
> 
> Here are the limits published by our backend to the XenStore:
> 
>     max-ring-pages = "113"
>     max-ring-page-order = "7"
>     max-requests = "256"
>     max-request-segments = "129"
>     max-request-size = "524288"
> 
> Because we allow so many concurrent, large requests in our product,
> the ring wastage really adds up if the front end doesn't support
> the "ring-pages" variant of the extension.  However, you only need
> a ring-page-order of 3 with this protocol to start seeing pages of
> wasted ring space.
> 
> You don't really want to negotiate "ring-pages" either.  The backends
> often need to support multiple ABIs.  I can easily construct a set
> of limits for the FreeBSD blkback driver which will cause the ring
> limits to vary by a page between the 32bit and 64bit ABIs.
> 
> With all this in mind, the backend must do a dance of rounding up,
> taking the max of the ring sizes for the different ABIs, and then
> validating the front-end published limits taking its ABI into
> account.  The front-end does some of this too.  Its way too messy
> and error prone because we don't communicate the ring element limit
> directly.
> 
> "max-ring-element-order" anyone? :-)

Interesting observation - yes, I think deprecating both pre-existing
methods in favor of something along those lines would be desirable.
(But I'd favor not using the term "order" here as it is - at least in
Linux - usually implied to be used on pages. "max-ringent-log2"
perhaps?)

What you say also implies that all currently floating around Linux
backend patches are flawed in their way of calculating the number
of ring entries, as this number really depends on the protocol the
frontend advertises.

Further, if you're concerned about wasting ring space (and
particularly in the context of your request number/size/segments
extension), shouldn't we bother to define pairs (or larger groups)
of struct blkif_request_segment (as currently a quarter of the space
is mere padding)? Or split grefs from {first,last}_sect altogether?

Finally, while looking at all this again, I stumbled across the use
of blkif_vdev_t in the ring structures: At least Linux'es blkback
completely ignores this field - {xen_,}vbd_translate() simply
overwrites what dispatch_rw_block_io() put there (and with this,
struct phys_req's dev and bdev members seem rather pointless too).
Does anyone recall what the original intention with this request field
was? Allowing I/O on multiple devices over a single ring?

Bottom line - shouldn't we define a blkif2 interface to cleanly
accommodate all the various extensions (and do away with the
protocol variations)?

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14  6:32                   ` Justin Gibbs
  (?)
@ 2012-03-14  8:35                   ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-14  8:35 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	rusty@rustcorp.com.au, weiyi.huang@gmail.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>

>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
> There's another problem here that I brought up during the Xen
> Hack-a-thon.  The ring macros require that the ring element count
> be a power of two.  This doesn't mean that the ring will be a power
> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
> blkback driver to provide negotiated ring stats via sysctl.
> 
> Here's a connection to a Windows VM running the Citrix PV drivers:
> 
>     dev.xbbd.2.max_requests: 128
>     dev.xbbd.2.max_request_segments: 11
>     dev.xbbd.2.max_request_size: 45056
>     dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>     dev.xbbd.2.ring_pages: 4
>     dev.xbbd.2.ring_elements: 128
>     dev.xbbd.2.ring_waste: 2496
> 
> Over half a page is wasted when ring-page-order is 2.  I'm sure you
> can see where this is going.  :-)
> 
> Here are the limits published by our backend to the XenStore:
> 
>     max-ring-pages = "113"
>     max-ring-page-order = "7"
>     max-requests = "256"
>     max-request-segments = "129"
>     max-request-size = "524288"
> 
> Because we allow so many concurrent, large requests in our product,
> the ring wastage really adds up if the front end doesn't support
> the "ring-pages" variant of the extension.  However, you only need
> a ring-page-order of 3 with this protocol to start seeing pages of
> wasted ring space.
> 
> You don't really want to negotiate "ring-pages" either.  The backends
> often need to support multiple ABIs.  I can easily construct a set
> of limits for the FreeBSD blkback driver which will cause the ring
> limits to vary by a page between the 32bit and 64bit ABIs.
> 
> With all this in mind, the backend must do a dance of rounding up,
> taking the max of the ring sizes for the different ABIs, and then
> validating the front-end published limits taking its ABI into
> account.  The front-end does some of this too.  Its way too messy
> and error prone because we don't communicate the ring element limit
> directly.
> 
> "max-ring-element-order" anyone? :-)

Interesting observation - yes, I think deprecating both pre-existing
methods in favor of something along those lines would be desirable.
(But I'd favor not using the term "order" here as it is - at least in
Linux - usually implied to be used on pages. "max-ringent-log2"
perhaps?)

What you say also implies that all currently floating around Linux
backend patches are flawed in their way of calculating the number
of ring entries, as this number really depends on the protocol the
frontend advertises.

Further, if you're concerned about wasting ring space (and
particularly in the context of your request number/size/segments
extension), shouldn't we bother to define pairs (or larger groups)
of struct blkif_request_segment (as currently a quarter of the space
is mere padding)? Or split grefs from {first,last}_sect altogether?

Finally, while looking at all this again, I stumbled across the use
of blkif_vdev_t in the ring structures: At least Linux'es blkback
completely ignores this field - {xen_,}vbd_translate() simply
overwrites what dispatch_rw_block_io() put there (and with this,
struct phys_req's dev and bdev members seem rather pointless too).
Does anyone recall what the original intention with this request field
was? Allowing I/O on multiple devices over a single ring?

Bottom line - shouldn't we define a blkif2 interface to cleanly
accommodate all the various extensions (and do away with the
protocol variations)?

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14  6:32                   ` Justin Gibbs
                                     ` (2 preceding siblings ...)
  (?)
@ 2012-03-14 15:34                   ` Jan Beulich
  2012-03-14 17:01                     ` Justin Gibbs
  2012-03-14 17:01                     ` [Xen-devel] " Justin Gibbs
  -1 siblings, 2 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-14 15:34 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org, xen-devel

>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
> There's another problem here that I brought up during the Xen
> Hack-a-thon.  The ring macros require that the ring element count
> be a power of two.  This doesn't mean that the ring will be a power
> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
> blkback driver to provide negotiated ring stats via sysctl.
> 
> Here's a connection to a Windows VM running the Citrix PV drivers:
> 
>     dev.xbbd.2.max_requests: 128
>     dev.xbbd.2.max_request_segments: 11
>     dev.xbbd.2.max_request_size: 45056
>     dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>     dev.xbbd.2.ring_pages: 4
>     dev.xbbd.2.ring_elements: 128
>     dev.xbbd.2.ring_waste: 2496
> 
> Over half a page is wasted when ring-page-order is 2.  I'm sure you
> can see where this is going.  :-)

Having looked a little closer on how the wasted space is progressing,
I find myself in the odd position that I can't explain the original (and
still active) definition of BLKIF_MAX_SEGMENTS_PER_REQUEST (11):
With ring-order zero, there's 0x240/0x1c0 bytes (32/64-bit
respectively) are unused. With 32 requests fitting in the ring, and with
each segment occupying 6 bytes (padded to 8), in the 64-bit variant
there's enough space for a 12th segment (32-bit would even have
space for a 13th). Am I missing anything here?

Plus all this assumes a page size of 4k, yet ia64 had always been using
pages of 16k iirc.

> Here are the limits published by our backend to the XenStore:
>
>     max-ring-pages = "113"
>     max-ring-page-order = "7"
>     max-requests = "256"
>     max-request-segments = "129"
>     max-request-size = "524288"

Oh, so this protocol doesn't require ring-pages (and max-ring-pages)
to be a power of two? In which case I think it is a mistake to also
advertise max-ring-page-order, as at least the (Linux) frontend code
I know of interprets this as being able to set up a ring of (using the
numbers above) 128 pages (unless, of course, your backend can deal
with this regardless of the max-ring-pages value it announces).

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14  6:32                   ` Justin Gibbs
                                     ` (3 preceding siblings ...)
  (?)
@ 2012-03-14 15:34                   ` Jan Beulich
  -1 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-14 15:34 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	rusty@rustcorp.com.au, weiyi.huang@gmail.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>

>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
> There's another problem here that I brought up during the Xen
> Hack-a-thon.  The ring macros require that the ring element count
> be a power of two.  This doesn't mean that the ring will be a power
> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
> blkback driver to provide negotiated ring stats via sysctl.
> 
> Here's a connection to a Windows VM running the Citrix PV drivers:
> 
>     dev.xbbd.2.max_requests: 128
>     dev.xbbd.2.max_request_segments: 11
>     dev.xbbd.2.max_request_size: 45056
>     dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>     dev.xbbd.2.ring_pages: 4
>     dev.xbbd.2.ring_elements: 128
>     dev.xbbd.2.ring_waste: 2496
> 
> Over half a page is wasted when ring-page-order is 2.  I'm sure you
> can see where this is going.  :-)

Having looked a little closer on how the wasted space is progressing,
I find myself in the odd position that I can't explain the original (and
still active) definition of BLKIF_MAX_SEGMENTS_PER_REQUEST (11):
With ring-order zero, there's 0x240/0x1c0 bytes (32/64-bit
respectively) are unused. With 32 requests fitting in the ring, and with
each segment occupying 6 bytes (padded to 8), in the 64-bit variant
there's enough space for a 12th segment (32-bit would even have
space for a 13th). Am I missing anything here?

Plus all this assumes a page size of 4k, yet ia64 had always been using
pages of 16k iirc.

> Here are the limits published by our backend to the XenStore:
>
>     max-ring-pages = "113"
>     max-ring-page-order = "7"
>     max-requests = "256"
>     max-request-segments = "129"
>     max-request-size = "524288"

Oh, so this protocol doesn't require ring-pages (and max-ring-pages)
to be a power of two? In which case I think it is a mistake to also
advertise max-ring-page-order, as at least the (Linux) frontend code
I know of interprets this as being able to set up a ring of (using the
numbers above) 128 pages (unless, of course, your backend can deal
with this regardless of the max-ring-pages value it announces).

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14 15:34                   ` Jan Beulich
  2012-03-14 17:01                     ` Justin Gibbs
@ 2012-03-14 17:01                     ` Justin Gibbs
  2012-03-15  8:03                       ` Jan Beulich
  2012-03-15  8:03                       ` [Xen-devel] " Jan Beulich
  1 sibling, 2 replies; 73+ messages in thread
From: Justin Gibbs @ 2012-03-14 17:01 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	weiyi.huang@gmail.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>, akpm@linux-foundation.org, xen-devel

On Mar 14, 2012, at 9:34 AM, Jan Beulich wrote:

>>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
>> There's another problem here that I brought up during the Xen
>> Hack-a-thon.  The ring macros require that the ring element count
>> be a power of two.  This doesn't mean that the ring will be a power
>> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
>> blkback driver to provide negotiated ring stats via sysctl.
>> 
>> Here's a connection to a Windows VM running the Citrix PV drivers:
>> 
>>    dev.xbbd.2.max_requests: 128
>>    dev.xbbd.2.max_request_segments: 11
>>    dev.xbbd.2.max_request_size: 45056
>>    dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>>    dev.xbbd.2.ring_pages: 4
>>    dev.xbbd.2.ring_elements: 128
>>    dev.xbbd.2.ring_waste: 2496
>> 
>> Over half a page is wasted when ring-page-order is 2.  I'm sure you
>> can see where this is going.  :-)
> 
> Having looked a little closer on how the wasted space is progressing,
> I find myself in the odd position that I can't explain the original (and
> still active) definition of BLKIF_MAX_SEGMENTS_PER_REQUEST (11):
> With ring-order zero, there's 0x240/0x1c0 bytes (32/64-bit
> respectively) are unused. With 32 requests fitting in the ring, and with
> each segment occupying 6 bytes (padded to 8), in the 64-bit variant
> there's enough space for a 12th segment (32-bit would even have
> space for a 13th). Am I missing anything here?

I don't profess to know the real reason, but the only thing I can come up
with is a requirement/desire on some platforms for 16byte alignment
of the request structures.  This would make the largest possible structure
112 bytes, not the 120 that would allow for more elements.

While we're talking about fixing ring data structures, can RING_IDX
be defined as a "uint32_t" instead of "unsigned int".  The structure
padding in the ring macros assumes RING_IDX is exactly 4 bytes,
so this should be made explicit.  ILP64 machines may still be a way
out, but the use of non-fixed sized types in places where size really
matters just isn't clean.

> 
>> Here are the limits published by our backend to the XenStore:
>> 
>>    max-ring-pages = "113"
>>    max-ring-page-order = "7"
>>    max-requests = "256"
>>    max-request-segments = "129"
>>    max-request-size = "524288"
> 
> Oh, so this protocol doesn't require ring-pages (and max-ring-pages)
> to be a power of two? In which case I think it is a mistake to also
> advertise max-ring-page-order, as at least the (Linux) frontend code
> I know of interprets this as being able to set up a ring of (using the
> numbers above) 128 pages (unless, of course, your backend can deal
> with this regardless of the max-ring-pages value it announces).

The advertised max-ring-pages is sufficient to hold the maximum allowed
number of ring elements regardless of ABI.  This is then rounded up to the
next power of 2 pages to get the max-ring-page order.  When the front-end
negotiates, the backend just verifies that the maximum number of ring
elements in the specified ring size doesn't exceed the backend's limit.
Fortunately, even with this large of a ring, regardless of ABI, a given
page order computes to the same number of ring elements.  You just have
more wasted space.

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14 15:34                   ` Jan Beulich
@ 2012-03-14 17:01                     ` Justin Gibbs
  2012-03-14 17:01                     ` [Xen-devel] " Justin Gibbs
  1 sibling, 0 replies; 73+ messages in thread
From: Justin Gibbs @ 2012-03-14 17:01 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, Ian Campbell, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, waldi@debian.org, joe.jin@oracle.com,
	rusty@rustcorp.com.au, weiyi.huang@gmail.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	virtualization@lists.linux-foundation.org,
	paul.gortmaker@windriver.com, Paul Durrant, DavidVrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	<konrad@darnok.org>

On Mar 14, 2012, at 9:34 AM, Jan Beulich wrote:

>>>> On 14.03.12 at 07:32, Justin Gibbs <justing@spectralogic.com> wrote:
>> There's another problem here that I brought up during the Xen
>> Hack-a-thon.  The ring macros require that the ring element count
>> be a power of two.  This doesn't mean that the ring will be a power
>> of 2 pages in size.  To illustrate this point, I modified the FreeBSD
>> blkback driver to provide negotiated ring stats via sysctl.
>> 
>> Here's a connection to a Windows VM running the Citrix PV drivers:
>> 
>>    dev.xbbd.2.max_requests: 128
>>    dev.xbbd.2.max_request_segments: 11
>>    dev.xbbd.2.max_request_size: 45056
>>    dev.xbbd.2.ring_elem_size: 108  <= 32bit ABI
>>    dev.xbbd.2.ring_pages: 4
>>    dev.xbbd.2.ring_elements: 128
>>    dev.xbbd.2.ring_waste: 2496
>> 
>> Over half a page is wasted when ring-page-order is 2.  I'm sure you
>> can see where this is going.  :-)
> 
> Having looked a little closer on how the wasted space is progressing,
> I find myself in the odd position that I can't explain the original (and
> still active) definition of BLKIF_MAX_SEGMENTS_PER_REQUEST (11):
> With ring-order zero, there's 0x240/0x1c0 bytes (32/64-bit
> respectively) are unused. With 32 requests fitting in the ring, and with
> each segment occupying 6 bytes (padded to 8), in the 64-bit variant
> there's enough space for a 12th segment (32-bit would even have
> space for a 13th). Am I missing anything here?

I don't profess to know the real reason, but the only thing I can come up
with is a requirement/desire on some platforms for 16byte alignment
of the request structures.  This would make the largest possible structure
112 bytes, not the 120 that would allow for more elements.

While we're talking about fixing ring data structures, can RING_IDX
be defined as a "uint32_t" instead of "unsigned int".  The structure
padding in the ring macros assumes RING_IDX is exactly 4 bytes,
so this should be made explicit.  ILP64 machines may still be a way
out, but the use of non-fixed sized types in places where size really
matters just isn't clean.

> 
>> Here are the limits published by our backend to the XenStore:
>> 
>>    max-ring-pages = "113"
>>    max-ring-page-order = "7"
>>    max-requests = "256"
>>    max-request-segments = "129"
>>    max-request-size = "524288"
> 
> Oh, so this protocol doesn't require ring-pages (and max-ring-pages)
> to be a power of two? In which case I think it is a mistake to also
> advertise max-ring-page-order, as at least the (Linux) frontend code
> I know of interprets this as being able to set up a ring of (using the
> numbers above) 128 pages (unless, of course, your backend can deal
> with this regardless of the max-ring-pages value it announces).

The advertised max-ring-pages is sufficient to hold the maximum allowed
number of ring elements regardless of ABI.  This is then rounded up to the
next power of 2 pages to get the max-ring-page order.  When the front-end
negotiates, the backend just verifies that the maximum number of ring
elements in the specified ring size doesn't exceed the backend's limit.
Fortunately, even with this large of a ring, regardless of ABI, a given
page order computes to the same number of ring elements.  You just have
more wasted space.

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06  8:34               ` Jan Beulich
  (?)
@ 2012-03-14 17:17               ` Justin T. Gibbs
  2012-03-15  8:09                 ` Jan Beulich
  -1 siblings, 1 reply; 73+ messages in thread
From: Justin T. Gibbs @ 2012-03-14 17:17 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, xen-devel@lists.xen.org, Ian Campbell,
	konrad.wilk@oracle.com, waldi@debian.org, netdev@vger.kernel.org,
	joe.jin@oracle.com, linux-kernel@vger.kernel.org,
	jbarnes@virtuousgeek.org, weiyi.huang@gmail.com,
	paul.gortmaker@windriver.com, Paul Durrant, David Vrabel,
	Santosh Jodh, linux-pci@vger.kernel.org,
	akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org, lersek

On Mar 6, 2012, at 1:34 AM, Jan Beulich wrote:

>>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
>> +       }
>> +
>>        /* Create shared ring, alloc event channel. */
>>        err = setup_blkring(dev, info);
>>        if (err)
>> @@ -889,12 +916,35 @@ again:
>>                goto destroy_blkring;
>>        }
>> 
>> -       err = xenbus_printf(xbt, dev->nodename,
>> -                           "ring-ref", "%u", info->ring_ref);
>> -       if (err) {
>> -               message = "writing ring-ref";
>> -               goto abort_transaction;
>> +       if (legacy_backend) {
> 
> Why not use the simpler interface always when info->ring_order == 0?

Because, as I just found out today via a FreeBSD bug report, that's
not how XenServer works.  If the front-end publishes "ring-page-order",
the backend assumes the "ring-refNN" XenStore nodes are in effect,
even if the order is 0.

I'm working on a documentation update for blkif.h now.

<sigh>

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-06  8:34               ` Jan Beulich
  (?)
  (?)
@ 2012-03-14 17:17               ` Justin T. Gibbs
  -1 siblings, 0 replies; 73+ messages in thread
From: Justin T. Gibbs @ 2012-03-14 17:17 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, xen-devel@lists.xen.org, Ian Campbell,
	konrad.wilk@oracle.com, waldi@debian.org, netdev@vger.kernel.org,
	rusty@rustcorp.com.au, joe.jin@oracle.com,
	linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
	weiyi.huang@gmail.com, paul.gortmaker@windriver.com, Paul Durrant,
	David Vrabel, Santosh Jodh, linux-pci@vger.kernel.org,
	akpm@linux-foundation.org,
	"virtualization@lists.linux-foundation.org" <virtua>

On Mar 6, 2012, at 1:34 AM, Jan Beulich wrote:

>>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
>> +       }
>> +
>>        /* Create shared ring, alloc event channel. */
>>        err = setup_blkring(dev, info);
>>        if (err)
>> @@ -889,12 +916,35 @@ again:
>>                goto destroy_blkring;
>>        }
>> 
>> -       err = xenbus_printf(xbt, dev->nodename,
>> -                           "ring-ref", "%u", info->ring_ref);
>> -       if (err) {
>> -               message = "writing ring-ref";
>> -               goto abort_transaction;
>> +       if (legacy_backend) {
> 
> Why not use the simpler interface always when info->ring_order == 0?

Because, as I just found out today via a FreeBSD bug report, that's
not how XenServer works.  If the front-end publishes "ring-page-order",
the backend assumes the "ring-refNN" XenStore nodes are in effect,
even if the order is 0.

I'm working on a documentation update for blkif.h now.

<sigh>

--
Justin

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14 17:01                     ` [Xen-devel] " Justin Gibbs
  2012-03-15  8:03                       ` Jan Beulich
@ 2012-03-15  8:03                       ` Jan Beulich
  2012-03-15  8:51                         ` Ian Campbell
  2012-03-15  8:51                         ` [Xen-devel] " Ian Campbell
  1 sibling, 2 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-15  8:03 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Paul Durrant,
	DavidVrabel, SantoshJodh, <konrad@darnok.org>,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com

>>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
> While we're talking about fixing ring data structures, can RING_IDX
> be defined as a "uint32_t" instead of "unsigned int".  The structure
> padding in the ring macros assumes RING_IDX is exactly 4 bytes,
> so this should be made explicit.  ILP64 machines may still be a way
> out, but the use of non-fixed sized types in places where size really
> matters just isn't clean.

Yes, if we're going to rev the interface, then any such flaws should be
corrected.

(Also shrinking the Cc list a little.)

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14 17:01                     ` [Xen-devel] " Justin Gibbs
@ 2012-03-15  8:03                       ` Jan Beulich
  2012-03-15  8:03                       ` [Xen-devel] " Jan Beulich
  1 sibling, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-15  8:03 UTC (permalink / raw)
  To: Justin Gibbs
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Paul Durrant,
	DavidVrabel, SantoshJodh, <konrad@darnok.org>,
	dgdegra@tycho.nsa.gov, xen-devel@lists.xen.org, lersek@redhat.com

>>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
> While we're talking about fixing ring data structures, can RING_IDX
> be defined as a "uint32_t" instead of "unsigned int".  The structure
> padding in the ring macros assumes RING_IDX is exactly 4 bytes,
> so this should be made explicit.  ILP64 machines may still be a way
> out, but the use of non-fixed sized types in places where size really
> matters just isn't clean.

Yes, if we're going to rev the interface, then any such flaws should be
corrected.

(Also shrinking the Cc list a little.)

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-14 17:17               ` [Xen-devel] " Justin T. Gibbs
@ 2012-03-15  8:09                 ` Jan Beulich
  0 siblings, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-15  8:09 UTC (permalink / raw)
  To: Justin T. Gibbs
  Cc: jeremy@goop.org, Ian Campbell, konrad.wilk@oracle.com,
	waldi@debian.org, joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Paul Durrant,
	David Vrabel, Santosh Jodh, dgdegra@tycho.nsa.gov,
	xen-devel@lists.xen.org, lersek@redhat.com

>>> On 14.03.12 at 18:17, "Justin T. Gibbs" <gibbs@scsiguy.com> wrote:
> On Mar 6, 2012, at 1:34 AM, Jan Beulich wrote:
> 
>>>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:
> 
> …
> 
>>> +       }
>>> +
>>>        /* Create shared ring, alloc event channel. */
>>>        err = setup_blkring(dev, info);
>>>        if (err)
>>> @@ -889,12 +916,35 @@ again:
>>>                goto destroy_blkring;
>>>        }
>>> 
>>> -       err = xenbus_printf(xbt, dev->nodename,
>>> -                           "ring-ref", "%u", info->ring_ref);
>>> -       if (err) {
>>> -               message = "writing ring-ref";
>>> -               goto abort_transaction;
>>> +       if (legacy_backend) {
>> 
>> Why not use the simpler interface always when info->ring_order == 0?
> 
> Because, as I just found out today via a FreeBSD bug report, that's
> not how XenServer works.  If the front-end publishes "ring-page-order",
> the backend assumes the "ring-refNN" XenStore nodes are in effect,
> even if the order is 0.

I was certainly implying to not write the ring-page-order and
num-ring-pages nodes in that case.

> I'm working on a documentation update for blkif.h now.
> 
> <sigh>
> 
> --
> Justin


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-15  8:03                       ` [Xen-devel] " Jan Beulich
  2012-03-15  8:51                         ` Ian Campbell
@ 2012-03-15  8:51                         ` Ian Campbell
  2012-03-15  9:31                           ` Jan Beulich
  2012-03-15  9:31                           ` [Xen-devel] " Jan Beulich
  1 sibling, 2 replies; 73+ messages in thread
From: Ian Campbell @ 2012-03-15  8:51 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, konrad.wilk@oracle.com, waldi@debian.org,
	joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Justin Gibbs,
	Paul Durrant, David Vrabel, Santosh Jodh,
	<konrad@darnok.org>, dgdegra@tycho.nsa.gov,
	xen-devel@lists.xen.org, lersek@redhat.com

On Thu, 2012-03-15 at 08:03 +0000, Jan Beulich wrote:
> >>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
> > While we're talking about fixing ring data structures, can RING_IDX
> > be defined as a "uint32_t" instead of "unsigned int".  The structure
> > padding in the ring macros assumes RING_IDX is exactly 4 bytes,
> > so this should be made explicit.  ILP64 machines may still be a way
> > out, but the use of non-fixed sized types in places where size really
> > matters just isn't clean.
> 
> Yes, if we're going to rev the interface, then any such flaws should be
> corrected.

There has been talk of doing something similar for netif too. IIRC the
netchannel2 work included a new generic ring scheme with support for
variable sized req/rsp elements and such.

If we are going to rev the rings then should we try and use a common
ring mechanism? I think so. If so then we could do worse than to start
from the netchannel2 ring stuff and/or concepts?

Looks like that is
http://xenbits.xen.org/ext/netchannel2/linux-2.6.18/log/075f6677a290/include/xen/interface/io/uring.h
still a bit nc2 specific though.

Ian.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-15  8:03                       ` [Xen-devel] " Jan Beulich
@ 2012-03-15  8:51                         ` Ian Campbell
  2012-03-15  8:51                         ` [Xen-devel] " Ian Campbell
  1 sibling, 0 replies; 73+ messages in thread
From: Ian Campbell @ 2012-03-15  8:51 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jeremy@goop.org, konrad.wilk@oracle.com, waldi@debian.org,
	joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Justin Gibbs,
	Paul Durrant, David Vrabel, Santosh Jodh,
	<konrad@darnok.org>, dgdegra@tycho.nsa.gov,
	xen-devel@lists.xen.org, lersek@redhat.com

On Thu, 2012-03-15 at 08:03 +0000, Jan Beulich wrote:
> >>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
> > While we're talking about fixing ring data structures, can RING_IDX
> > be defined as a "uint32_t" instead of "unsigned int".  The structure
> > padding in the ring macros assumes RING_IDX is exactly 4 bytes,
> > so this should be made explicit.  ILP64 machines may still be a way
> > out, but the use of non-fixed sized types in places where size really
> > matters just isn't clean.
> 
> Yes, if we're going to rev the interface, then any such flaws should be
> corrected.

There has been talk of doing something similar for netif too. IIRC the
netchannel2 work included a new generic ring scheme with support for
variable sized req/rsp elements and such.

If we are going to rev the rings then should we try and use a common
ring mechanism? I think so. If so then we could do worse than to start
from the netchannel2 ring stuff and/or concepts?

Looks like that is
http://xenbits.xen.org/ext/netchannel2/linux-2.6.18/log/075f6677a290/include/xen/interface/io/uring.h
still a bit nc2 specific though.

Ian.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-15  8:51                         ` [Xen-devel] " Ian Campbell
  2012-03-15  9:31                           ` Jan Beulich
@ 2012-03-15  9:31                           ` Jan Beulich
  1 sibling, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-15  9:31 UTC (permalink / raw)
  To: Ian Campbell
  Cc: jeremy@goop.org, konrad.wilk@oracle.com, waldi@debian.org,
	joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Justin Gibbs,
	Paul Durrant, David Vrabel, SantoshJodh,
	<konrad@darnok.org>, dgdegra@tycho.nsa.gov,
	xen-devel@lists.xen.org, lersek@redhat.com

>>> On 15.03.12 at 09:51, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2012-03-15 at 08:03 +0000, Jan Beulich wrote:
>> >>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
>> > While we're talking about fixing ring data structures, can RING_IDX
>> > be defined as a "uint32_t" instead of "unsigned int".  The structure
>> > padding in the ring macros assumes RING_IDX is exactly 4 bytes,
>> > so this should be made explicit.  ILP64 machines may still be a way
>> > out, but the use of non-fixed sized types in places where size really
>> > matters just isn't clean.
>> 
>> Yes, if we're going to rev the interface, then any such flaws should be
>> corrected.
> 
> There has been talk of doing something similar for netif too. IIRC the
> netchannel2 work included a new generic ring scheme with support for
> variable sized req/rsp elements and such.
> 
> If we are going to rev the rings then should we try and use a common
> ring mechanism? I think so. If so then we could do worse than to start
> from the netchannel2 ring stuff and/or concepts?
> 
> Looks like that is
> http://xenbits.xen.org/ext/netchannel2/linux-2.6.18/log/075f6677a290/include 
> /xen/interface/io/uring.h
> still a bit nc2 specific though.

Taking the concept (and the implementation as a starting point) would
seem like a good idea to me. Separate request and reply rings as well
as variable size entries would certainly benefit blkif too.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 0001/001] xen: multi page ring support for block devices
  2012-03-15  8:51                         ` [Xen-devel] " Ian Campbell
@ 2012-03-15  9:31                           ` Jan Beulich
  2012-03-15  9:31                           ` [Xen-devel] " Jan Beulich
  1 sibling, 0 replies; 73+ messages in thread
From: Jan Beulich @ 2012-03-15  9:31 UTC (permalink / raw)
  To: Ian Campbell
  Cc: jeremy@goop.org, konrad.wilk@oracle.com, waldi@debian.org,
	joe.jin@oracle.com, weiyi.huang@gmail.com,
	virtualization@lists.linux-foundation.org, Justin Gibbs,
	Paul Durrant, David Vrabel, SantoshJodh,
	<konrad@darnok.org>, dgdegra@tycho.nsa.gov,
	xen-devel@lists.xen.org, lersek@redhat.com

>>> On 15.03.12 at 09:51, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2012-03-15 at 08:03 +0000, Jan Beulich wrote:
>> >>> On 14.03.12 at 18:01, Justin Gibbs <justing@spectralogic.com> wrote:
>> > While we're talking about fixing ring data structures, can RING_IDX
>> > be defined as a "uint32_t" instead of "unsigned int".  The structure
>> > padding in the ring macros assumes RING_IDX is exactly 4 bytes,
>> > so this should be made explicit.  ILP64 machines may still be a way
>> > out, but the use of non-fixed sized types in places where size really
>> > matters just isn't clean.
>> 
>> Yes, if we're going to rev the interface, then any such flaws should be
>> corrected.
> 
> There has been talk of doing something similar for netif too. IIRC the
> netchannel2 work included a new generic ring scheme with support for
> variable sized req/rsp elements and such.
> 
> If we are going to rev the rings then should we try and use a common
> ring mechanism? I think so. If so then we could do worse than to start
> from the netchannel2 ring stuff and/or concepts?
> 
> Looks like that is
> http://xenbits.xen.org/ext/netchannel2/linux-2.6.18/log/075f6677a290/include 
> /xen/interface/io/uring.h
> still a bit nc2 specific though.

Taking the concept (and the implementation as a starting point) would
seem like a good idea to me. Separate request and reply rings as well
as variable size entries would certainly benefit blkif too.

Jan

^ permalink raw reply	[flat|nested] 73+ messages in thread

end of thread, other threads:[~2012-03-15  9:31 UTC | newest]

Thread overview: 73+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-03 15:47 [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug santosh nayak
2012-03-03 15:59 ` santosh nayak
2012-03-05 11:43 ` Rajesh Borundia
2012-03-05 11:43   ` Rajesh Borundia
2012-03-05 20:35   ` David Miller
2012-03-05 20:35     ` David Miller
2012-03-07 19:41     ` Rajesh Borundia
2012-03-07 19:41       ` Rajesh Borundia
2012-03-09 15:13       ` santosh prasad nayak
2012-03-09 16:34 ` Rajesh Borundia
2012-03-09 16:34   ` Rajesh Borundia
2012-03-09 18:50   ` santosh prasad nayak
2012-03-09 18:51     ` santosh prasad nayak
2012-03-10 19:01     ` Rajesh Borundia
2012-03-10 19:01       ` Rajesh Borundia
2012-03-11  9:16       ` santosh prasad nayak
2012-03-11  9:28         ` santosh prasad nayak
2012-03-02 15:11         ` santosh nayak
2012-03-02 15:23           ` santosh nayak
2012-03-02 17:22           ` Rajesh Borundia
2012-03-02 17:22             ` Rajesh Borundia
2012-03-05 21:49           ` [PATCH 0001/001] xen: multi page ring support for block devices Santosh Jodh
2012-03-05 21:49           ` Santosh Jodh
2012-03-05 21:49           ` Santosh Jodh
2012-03-05 21:49             ` Santosh Jodh
2012-03-06  2:42             ` Rusty Russell
2012-03-06  2:42             ` Rusty Russell
2012-03-06  2:42               ` Rusty Russell
2012-03-06  6:21               ` Santosh Jodh
2012-03-06  6:21               ` Santosh Jodh
2012-03-06  6:21               ` Santosh Jodh
2012-03-06  6:21                 ` Santosh Jodh
2012-03-06  2:42             ` Rusty Russell
2012-03-06  8:34             ` Jan Beulich
2012-03-06  8:34             ` Jan Beulich
2012-03-06  8:34               ` Jan Beulich
2012-03-14 17:17               ` [Xen-devel] " Justin T. Gibbs
2012-03-15  8:09                 ` Jan Beulich
2012-03-14 17:17               ` Justin T. Gibbs
2012-03-06  8:34             ` Jan Beulich
2012-03-06 11:16             ` Wei Liu
2012-03-06 11:16               ` Wei Liu
2012-03-06 11:16             ` [Xen-devel] " Wei Liu
2012-03-06 17:20             ` Konrad Rzeszutek Wilk
2012-03-07  9:33               ` Jan Beulich
2012-03-07  9:33                 ` Jan Beulich
2012-03-07  9:33                 ` Jan Beulich
2012-03-07 15:15                 ` Konrad Rzeszutek Wilk
2012-03-07 15:15                   ` Konrad Rzeszutek Wilk
2012-03-07 15:15                 ` Konrad Rzeszutek Wilk
2012-03-14  6:32                 ` Justin Gibbs
2012-03-14  6:32                 ` [Xen-devel] " Justin Gibbs
2012-03-14  6:32                   ` Justin Gibbs
2012-03-14  8:35                   ` Jan Beulich
2012-03-14  8:35                   ` [Xen-devel] " Jan Beulich
2012-03-14 15:34                   ` Jan Beulich
2012-03-14 17:01                     ` Justin Gibbs
2012-03-14 17:01                     ` [Xen-devel] " Justin Gibbs
2012-03-15  8:03                       ` Jan Beulich
2012-03-15  8:03                       ` [Xen-devel] " Jan Beulich
2012-03-15  8:51                         ` Ian Campbell
2012-03-15  8:51                         ` [Xen-devel] " Ian Campbell
2012-03-15  9:31                           ` Jan Beulich
2012-03-15  9:31                           ` [Xen-devel] " Jan Beulich
2012-03-14 15:34                   ` Jan Beulich
2012-03-07  9:33               ` Jan Beulich
2012-03-06 17:20             ` Konrad Rzeszutek Wilk
2012-03-12  9:37           ` [PATCH 3/3] netxen: qlogic ethernet : Fix Endian Bug David Laight
2012-03-12  9:37             ` David Laight
2012-03-12  9:47             ` santosh prasad nayak
2012-03-12  9:59               ` santosh prasad nayak
2012-03-12  6:19         ` Rajesh Borundia
2012-03-12  6:19           ` Rajesh Borundia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.