All of lore.kernel.org

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next 2/4] be2net: refactor VF setup/teardown code into be_vf_setup/clear()
From: Sathya Perla @ 2011-10-24 12:45 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1319460303-25560-1-git-send-email-sathya.perla@emulex.com>

Currently the code for VF setup/teardown done by a PF (if_create, mac_add_config, link_status_query etc) is scattered; this patch refactors this code into be_vf_setup() and be_vf_clear().
The if_create/if_destroy/mac_addr_query cmds are now called after the MCCQ is created; so these cmds are now modified to use the MCCQ instead of MBOX.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |   66 +++++---
 drivers/net/ethernet/emulex/benet/be_cmds.h |    4 +-
 drivers/net/ethernet/emulex/benet/be_main.c |  233 ++++++++++++---------------
 3 files changed, 149 insertions(+), 154 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 6e7b521..8ae5a4c 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -615,7 +615,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
 	return status;
 }
 
-/* Uses mbox */
+/* Use MCC */
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 			u8 type, bool permanent, u32 if_handle)
 {
@@ -623,10 +623,13 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 	struct be_cmd_req_mac_query *req;
 	int status;
 
-	if (mutex_lock_interruptible(&adapter->mbox_lock))
-		return -1;
+	spin_lock_bh(&adapter->mcc_lock);
 
-	wrb = wrb_from_mbox(adapter);
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
 	req = embedded_payload(wrb);
 
 	be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0,
@@ -643,13 +646,14 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 		req->permanent = 0;
 	}
 
-	status = be_mbox_notify_wait(adapter);
+	status = be_mcc_notify_wait(adapter);
 	if (!status) {
 		struct be_cmd_resp_mac_query *resp = embedded_payload(wrb);
 		memcpy(mac_addr, resp->mac.addr, ETH_ALEN);
 	}
 
-	mutex_unlock(&adapter->mbox_lock);
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1111,20 +1115,22 @@ err:
 }
 
 /* Create an rx filtering policy configuration on an i/f
- * Uses mbox
+ * Uses MCCQ 
  */
 int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
-		u8 *mac, bool pmac_invalid, u32 *if_handle, u32 *pmac_id,
-		u32 domain)
+		u8 *mac, u32 *if_handle, u32 *pmac_id, u32 domain)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_if_create *req;
 	int status;
 
-	if (mutex_lock_interruptible(&adapter->mbox_lock))
-		return -1;
+	spin_lock_bh(&adapter->mcc_lock);
 
-	wrb = wrb_from_mbox(adapter);
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
 	req = embedded_payload(wrb);
 
 	be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0,
@@ -1136,23 +1142,25 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
 	req->hdr.domain = domain;
 	req->capability_flags = cpu_to_le32(cap_flags);
 	req->enable_flags = cpu_to_le32(en_flags);
-	req->pmac_invalid = pmac_invalid;
-	if (!pmac_invalid)
+	if (mac)
 		memcpy(req->mac_addr, mac, ETH_ALEN);
+	else
+		req->pmac_invalid = true;
 
-	status = be_mbox_notify_wait(adapter);
+	status = be_mcc_notify_wait(adapter);
 	if (!status) {
 		struct be_cmd_resp_if_create *resp = embedded_payload(wrb);
 		*if_handle = le32_to_cpu(resp->interface_id);
-		if (!pmac_invalid)
+		if (mac)
 			*pmac_id = le32_to_cpu(resp->pmac_id);
 	}
 
-	mutex_unlock(&adapter->mbox_lock);
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
 
-/* Uses mbox */
+/* Uses MCCQ */
 int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id, u32 domain)
 {
 	struct be_mcc_wrb *wrb;
@@ -1162,10 +1170,16 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id, u32 domain)
 	if (adapter->eeh_err)
 		return -EIO;
 
-	if (mutex_lock_interruptible(&adapter->mbox_lock))
-		return -1;
+	if (!interface_id)
+		return 0;
 
-	wrb = wrb_from_mbox(adapter);
+	spin_lock_bh(&adapter->mcc_lock);
+
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
 	req = embedded_payload(wrb);
 
 	be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0,
@@ -1177,10 +1191,9 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id, u32 domain)
 	req->hdr.domain = domain;
 	req->interface_id = cpu_to_le32(interface_id);
 
-	status = be_mbox_notify_wait(adapter);
-
-	mutex_unlock(&adapter->mbox_lock);
-
+	status = be_mcc_notify_wait(adapter);
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1301,7 +1314,8 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
 		struct be_cmd_resp_link_status *resp = embedded_payload(wrb);
 		if (resp->mac_speed != PHY_LINK_SPEED_ZERO) {
 			*link_speed = le16_to_cpu(resp->link_speed);
-			*mac_speed = resp->mac_speed;
+			if (mac_speed)
+				*mac_speed = resp->mac_speed;
 		}
 	}
 
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index abaa90c..75b7574 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1413,8 +1413,8 @@ extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
 extern int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id,
 			u32 pmac_id, u32 domain);
 extern int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags,
-			u32 en_flags, u8 *mac, bool pmac_invalid,
-			u32 *if_handle, u32 *pmac_id, u32 domain);
+			u32 en_flags, u8 *mac, u32 *if_handle, u32 *pmac_id,
+			u32 domain);
 extern int be_cmd_if_destroy(struct be_adapter *adapter, u32 if_handle,
 			u32 domain);
 extern int be_cmd_eq_create(struct be_adapter *adapter,
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 88c9dca..4ab182f 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2069,7 +2069,7 @@ done:
 	return;
 }
 
-static void be_sriov_enable(struct be_adapter *adapter)
+static int be_sriov_enable(struct be_adapter *adapter)
 {
 	be_check_sriov_fn_type(adapter);
 #ifdef CONFIG_PCI_IOV
@@ -2091,8 +2091,17 @@ static void be_sriov_enable(struct be_adapter *adapter)
 
 		status = pci_enable_sriov(adapter->pdev, num_vfs);
 		adapter->sriov_enabled = status ? false : true;
+
+		if (adapter->sriov_enabled) {
+			adapter->vf_cfg = kcalloc(num_vfs,
+						sizeof(struct be_vf_cfg),
+						GFP_KERNEL);
+			if (!adapter->vf_cfg)
+				return -ENOMEM;
+		}
 	}
 #endif
+	return 0;
 }
 
 static void be_sriov_disable(struct be_adapter *adapter)
@@ -2100,6 +2109,7 @@ static void be_sriov_disable(struct be_adapter *adapter)
 #ifdef CONFIG_PCI_IOV
 	if (adapter->sriov_enabled) {
 		pci_disable_sriov(adapter->pdev);
+		kfree(adapter->vf_cfg);
 		adapter->sriov_enabled = false;
 	}
 #endif
@@ -2405,7 +2415,7 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable)
  */
 static inline int be_vf_eth_addr_config(struct be_adapter *adapter)
 {
-	u32 vf = 0;
+	u32 vf;
 	int status = 0;
 	u8 mac[ETH_ALEN];
 
@@ -2427,7 +2437,7 @@ static inline int be_vf_eth_addr_config(struct be_adapter *adapter)
 	return status;
 }
 
-static inline void be_vf_eth_addr_rem(struct be_adapter *adapter)
+static void be_vf_clear(struct be_adapter *adapter)
 {
 	u32 vf;
 
@@ -2437,29 +2447,25 @@ static inline void be_vf_eth_addr_rem(struct be_adapter *adapter)
 					adapter->vf_cfg[vf].vf_if_handle,
 					adapter->vf_cfg[vf].vf_pmac_id, vf + 1);
 	}
+
+	for (vf = 0; vf < num_vfs; vf++)
+		if (adapter->vf_cfg[vf].vf_if_handle)
+			be_cmd_if_destroy(adapter,
+				adapter->vf_cfg[vf].vf_if_handle, vf + 1);
 }
 
 static int be_clear(struct be_adapter *adapter)
 {
-	int vf;
-
 	if (be_physfn(adapter) && adapter->sriov_enabled)
-		be_vf_eth_addr_rem(adapter);
+		be_vf_clear(adapter);
+
+	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
 
 	be_mcc_queues_destroy(adapter);
 	be_rx_queues_destroy(adapter);
 	be_tx_queues_destroy(adapter);
 	adapter->eq_next_idx = 0;
 
-	if (be_physfn(adapter) && adapter->sriov_enabled)
-		for (vf = 0; vf < num_vfs; vf++)
-			if (adapter->vf_cfg[vf].vf_if_handle)
-				be_cmd_if_destroy(adapter,
-					adapter->vf_cfg[vf].vf_if_handle,
-					vf + 1);
-
-	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
-
 	adapter->be3_native = false;
 	adapter->promiscuous = false;
 
@@ -2468,59 +2474,92 @@ static int be_clear(struct be_adapter *adapter)
 	return 0;
 }
 
+static int be_vf_setup(struct be_adapter *adapter)
+{
+	u32 cap_flags, en_flags, vf;
+	u16 lnk_speed;
+	int status;
+
+	cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST;
+	for (vf = 0; vf < num_vfs; vf++) {
+		status = be_cmd_if_create(adapter, cap_flags, en_flags, NULL,
+					&adapter->vf_cfg[vf].vf_if_handle,
+					NULL, vf+1);
+		if (status)
+			goto err;
+		adapter->vf_cfg[vf].vf_pmac_id = BE_INVALID_PMAC_ID;
+	}
+
+	if (!lancer_chip(adapter)) {
+		status = be_vf_eth_addr_config(adapter);
+		if (status)
+			goto err;
+	}
+
+	for (vf = 0; vf < num_vfs; vf++) {
+		status = be_cmd_link_status_query(adapter, NULL, &lnk_speed,
+				vf + 1);
+		if (status)
+			goto err;
+		adapter->vf_cfg[vf].vf_tx_rate = lnk_speed * 10;
+	}
+	return 0;
+err:
+	return status;
+}
+
 static int be_setup(struct be_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	u32 cap_flags, en_flags, vf = 0;
+	u32 cap_flags, en_flags;
 	u32 tx_fc, rx_fc;
 	int status;
 	u8 mac[ETH_ALEN];
 
+	/* Allow all priorities by default. A GRP5 evt may modify this */
+	adapter->vlan_prio_bmap = 0xff;
+	adapter->link_speed = -1;
+
 	be_cmd_req_native_mode(adapter);
 
-	cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED |
-				BE_IF_FLAGS_BROADCAST |
-				BE_IF_FLAGS_MULTICAST;
-
-	if (be_physfn(adapter)) {
-		cap_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS |
-				BE_IF_FLAGS_PROMISCUOUS |
-				BE_IF_FLAGS_PASS_L3L4_ERRORS;
-		en_flags |= BE_IF_FLAGS_PASS_L3L4_ERRORS;
-
-		if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) {
-			cap_flags |= BE_IF_FLAGS_RSS;
-			en_flags |= BE_IF_FLAGS_RSS;
-		}
+	status = be_tx_queues_create(adapter);
+	if (status != 0)
+		goto err;
+
+	status = be_rx_queues_create(adapter);
+	if (status != 0)
+		goto err;
+
+	status = be_mcc_queues_create(adapter);
+	if (status != 0)
+		goto err;
+
+	memset(mac, 0, ETH_ALEN);
+	status = be_cmd_mac_addr_query(adapter, mac, MAC_ADDRESS_TYPE_NETWORK,
+			true /*permanent */, 0);
+	if (status)
+		return status;
+	memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
+	memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
+
+	en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
+			BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
+	cap_flags = en_flags | BE_IF_FLAGS_MCAST_PROMISCUOUS |
+			BE_IF_FLAGS_PROMISCUOUS;
+	if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) {
+		cap_flags |= BE_IF_FLAGS_RSS;
+		en_flags |= BE_IF_FLAGS_RSS;
 	}
-
 	status = be_cmd_if_create(adapter, cap_flags, en_flags,
-			netdev->dev_addr, false/* pmac_invalid */,
-			&adapter->if_handle, &adapter->pmac_id, 0);
+			netdev->dev_addr, &adapter->if_handle,
+			&adapter->pmac_id, 0);
 	if (status != 0)
 		goto err;
-
-	if (be_physfn(adapter)) {
-		if (adapter->sriov_enabled) {
-			while (vf < num_vfs) {
-				cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED |
-							BE_IF_FLAGS_BROADCAST;
-				status = be_cmd_if_create(adapter, cap_flags,
-					en_flags, mac, true,
-					&adapter->vf_cfg[vf].vf_if_handle,
-					NULL, vf+1);
-				if (status) {
-					dev_err(&adapter->pdev->dev,
-					"Interface Create failed for VF %d\n",
-					vf);
-					goto err;
-				}
-				adapter->vf_cfg[vf].vf_pmac_id =
-							BE_INVALID_PMAC_ID;
-				vf++;
-			}
-		}
-	} else {
+	
+	/* For BEx, the VF's permanent mac queried from card is incorrect.
+	 * Query the mac configued by the PF using if_handle
+	 */
+	if (!be_physfn(adapter) && !lancer_chip(adapter)) {
 		status = be_cmd_mac_addr_query(adapter, mac,
 			MAC_ADDRESS_TYPE_NETWORK, false, adapter->if_handle);
 		if (!status) {
@@ -2529,23 +2568,6 @@ static int be_setup(struct be_adapter *adapter)
 		}
 	}
 
-	status = be_tx_queues_create(adapter);
-	if (status != 0)
-		goto err;
-
-	status = be_rx_queues_create(adapter);
-	if (status != 0)
-		goto err;
-
-	/* Allow all priorities by default. A GRP5 evt may modify this */
-	adapter->vlan_prio_bmap = 0xff;
-
-	status = be_mcc_queues_create(adapter);
-	if (status != 0)
-		goto err;
-
-	adapter->link_speed = -1;
-
 	be_cmd_get_fw_ver(adapter, adapter->fw_ver, NULL);
 
 	status = be_vid_config(adapter, false, 0);
@@ -2565,8 +2587,14 @@ static int be_setup(struct be_adapter *adapter)
 	}	
 
 	pcie_set_readrq(adapter->pdev, 4096);
-	return 0;
 
+	if (be_physfn(adapter) && adapter->sriov_enabled) {
+		status = be_vf_setup(adapter);
+		if (status)
+			goto err;
+	}
+		
+	return 0;
 err:
 	be_clear(adapter);
 	return status;
@@ -3123,7 +3151,6 @@ static void __devexit be_remove(struct pci_dev *pdev)
 
 	be_ctrl_cleanup(adapter);
 
-	kfree(adapter->vf_cfg);
 	be_sriov_disable(adapter);
 
 	be_msix_disable(adapter);
@@ -3138,30 +3165,12 @@ static void __devexit be_remove(struct pci_dev *pdev)
 static int be_get_config(struct be_adapter *adapter)
 {
 	int status;
-	u8 mac[ETH_ALEN];
 
 	status = be_cmd_query_fw_cfg(adapter, &adapter->port_num,
 			&adapter->function_mode, &adapter->function_caps);
 	if (status)
 		return status;
 
-	memset(mac, 0, ETH_ALEN);
-
-	/* A default permanent address is given to each VF for Lancer*/
-	if (be_physfn(adapter) || lancer_chip(adapter)) {
-		status = be_cmd_mac_addr_query(adapter, mac,
-			MAC_ADDRESS_TYPE_NETWORK, true /*permanent */, 0);
-
-		if (status)
-			return status;
-
-		if (!is_valid_ether_addr(mac))
-			return -EADDRNOTAVAIL;
-
-		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
-		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
-	}
-
 	if (adapter->function_mode & 0x400)
 		adapter->max_vlans = BE_NUM_VLANS_SUPPORTED/4;
 	else
@@ -3310,18 +3319,13 @@ static int __devinit be_probe(struct pci_dev *pdev,
 		}
 	}
 
-	be_sriov_enable(adapter);
-	if (adapter->sriov_enabled) {
-		adapter->vf_cfg = kcalloc(num_vfs,
-			sizeof(struct be_vf_cfg), GFP_KERNEL);
-
-		if (!adapter->vf_cfg)
-			goto free_netdev;
-	}
+	status = be_sriov_enable(adapter);
+	if (status)
+		goto free_netdev;
 
 	status = be_ctrl_init(adapter);
 	if (status)
-		goto free_vf_cfg;
+		goto disable_sriov;
 
 	if (lancer_chip(adapter)) {
 		status = lancer_test_and_set_rdy_state(adapter);
@@ -3375,33 +3379,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
 	if (status != 0)
 		goto unsetup;
 
-	if (be_physfn(adapter) && adapter->sriov_enabled) {
-		u8 mac_speed;
-		u16 vf, lnk_speed;
-
-		if (!lancer_chip(adapter)) {
-			status = be_vf_eth_addr_config(adapter);
-			if (status)
-				goto unreg_netdev;
-		}
-
-		for (vf = 0; vf < num_vfs; vf++) {
-			status = be_cmd_link_status_query(adapter, &mac_speed,
-						&lnk_speed, vf + 1);
-			if (!status)
-				adapter->vf_cfg[vf].vf_tx_rate = lnk_speed * 10;
-			else
-				goto unreg_netdev;
-		}
-	}
-
 	dev_info(&pdev->dev, "%s port %d\n", nic_name(pdev), adapter->port_num);
 
 	schedule_delayed_work(&adapter->work, msecs_to_jiffies(100));
 	return 0;
 
-unreg_netdev:
-	unregister_netdev(netdev);
 unsetup:
 	be_clear(adapter);
 msix_disable:
@@ -3410,10 +3392,9 @@ stats_clean:
 	be_stats_cleanup(adapter);
 ctrl_clean:
 	be_ctrl_cleanup(adapter);
-free_vf_cfg:
-	kfree(adapter->vf_cfg);
+disable_sriov:
+	be_sriov_disable(adapter);
 free_netdev:
-	be_sriov_disable(adapter);
 	free_netdev(netdev);
 	pci_set_drvdata(pdev, NULL);
 rel_reg:
-- 
1.7.4

^ permalink raw reply related

* [PATCH net-next 3/4] be2net: don't create multiple TXQs in BE2
From: Sathya Perla @ 2011-10-24 12:45 UTC (permalink / raw)
  To: netdev; +Cc: Vasundhara Volam
In-Reply-To: <1319460303-25560-1-git-send-email-sathya.perla@emulex.com>

Multiple TXQ support is partially broken in BE2. It is fully
supported BE3 onwards and in Lancer.

Signed-off-by: Vasundhara Volam <vasundhara.volam@emulex.com>
Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |   26 ++++++++++++++++----------
 1 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 4ab182f..77ce24b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1633,6 +1633,17 @@ static void be_tx_queues_destroy(struct be_adapter *adapter)
 	be_queue_free(adapter, q);
 }
 
+static int be_num_txqs_want(struct be_adapter *adapter)
+{
+	if ((num_vfs && adapter->sriov_enabled) ||
+		(adapter->function_mode & 0x400) ||
+		lancer_chip(adapter) || !be_physfn(adapter) ||
+		adapter->generation == BE_GEN2)
+		return 1;
+	else
+		return MAX_TX_QS;
+}
+
 /* One TX event queue is shared by all TX compl qs */
 static int be_tx_queues_create(struct be_adapter *adapter)
 {
@@ -1640,6 +1651,11 @@ static int be_tx_queues_create(struct be_adapter *adapter)
 	struct be_tx_obj *txo;
 	u8 i;
 
+	adapter->num_tx_qs = be_num_txqs_want(adapter);
+	if (adapter->num_tx_qs != MAX_TX_QS)
+		netif_set_real_num_tx_queues(adapter->netdev,
+			adapter->num_tx_qs);
+
 	adapter->tx_eq.max_eqd = 0;
 	adapter->tx_eq.min_eqd = 0;
 	adapter->tx_eq.cur_eqd = 96;
@@ -3180,16 +3196,6 @@ static int be_get_config(struct be_adapter *adapter)
 	if (status)
 		return status;
 
-	if ((num_vfs && adapter->sriov_enabled) ||
-		(adapter->function_mode & 0x400) ||
-		lancer_chip(adapter) || !be_physfn(adapter)) {
-		adapter->num_tx_qs = 1;
-		netif_set_real_num_tx_queues(adapter->netdev,
-			adapter->num_tx_qs);
-	} else {
-		adapter->num_tx_qs = MAX_TX_QS;
-	}
-
 	return 0;
 }
 
-- 
1.7.4

^ permalink raw reply related

* [PATCH net-next 4/4] be2net: don't create multiple RX/TX rings in multi channel mode
From: Sathya Perla @ 2011-10-24 12:45 UTC (permalink / raw)
  To: netdev; +Cc: Suresh Reddy
In-Reply-To: <1319460303-25560-1-git-send-email-sathya.perla@emulex.com>

When the HW is in multi-channel mode based on the skew/IPL, there are 4 functions per port and so not enough resources to create multiple RX/TX rings for each function.

Signed-off-by: Suresh Reddy <suresh.reddy@emulex.com>
Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.h |    6 ++++++
 drivers/net/ethernet/emulex/benet/be_main.c |   16 ++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 75b7574..a35cd03 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1046,6 +1046,12 @@ struct be_cmd_resp_modify_eq_delay {
 
 /******************** Get FW Config *******************/
 #define BE_FUNCTION_CAPS_RSS			0x2
+/* The HW can come up in either of the following multi-channel modes
+ * based on the skew/IPL.
+ */
+#define FLEX10_MODE				0x400
+#define VNIC_MODE				0x20000
+#define UMC_ENABLED				0x1000000
 struct be_cmd_req_query_fw_cfg {
 	struct be_cmd_req_hdr hdr;
 	u32 rsvd[31];
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 77ce24b..91fe12a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -114,6 +114,13 @@ static const char * const ue_status_hi_desc[] = {
 	"Unknown"
 };
 
+/* Is BE in a multi-channel mode */
+static inline bool be_is_mc(struct be_adapter *adapter) {
+	return (adapter->function_mode & FLEX10_MODE ||
+		adapter->function_mode & VNIC_MODE ||
+		adapter->function_mode & UMC_ENABLED);
+}
+
 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 {
 	struct be_dma_mem *mem = &q->dma_mem;
@@ -1289,7 +1296,7 @@ static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
 	if (rxcp->vlanf) {
 		/* vlanf could be wrongly set in some cards.
 		 * ignore if vtm is not set */
-		if ((adapter->function_mode & 0x400) && !rxcp->vtm)
+		if ((adapter->function_mode & FLEX10_MODE) && !rxcp->vtm)
 			rxcp->vlanf = 0;
 
 		if (!lancer_chip(adapter))
@@ -1636,7 +1643,7 @@ static void be_tx_queues_destroy(struct be_adapter *adapter)
 static int be_num_txqs_want(struct be_adapter *adapter)
 {
 	if ((num_vfs && adapter->sriov_enabled) ||
-		(adapter->function_mode & 0x400) ||
+		be_is_mc(adapter) ||
 		lancer_chip(adapter) || !be_physfn(adapter) ||
 		adapter->generation == BE_GEN2)
 		return 1;
@@ -1718,7 +1725,8 @@ static void be_rx_queues_destroy(struct be_adapter *adapter)
 static u32 be_num_rxqs_want(struct be_adapter *adapter)
 {
 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
-		!adapter->sriov_enabled && !(adapter->function_mode & 0x400)) {
+		!adapter->sriov_enabled && be_physfn(adapter) &&
+		!be_is_mc(adapter)) {
 		return 1 + MAX_RSS_QS; /* one default non-RSS queue */
 	} else {
 		dev_warn(&adapter->pdev->dev,
@@ -3187,7 +3195,7 @@ static int be_get_config(struct be_adapter *adapter)
 	if (status)
 		return status;
 
-	if (adapter->function_mode & 0x400)
+	if (adapter->function_mode & FLEX10_MODE)
 		adapter->max_vlans = BE_NUM_VLANS_SUPPORTED/4;
 	else
 		adapter->max_vlans = BE_NUM_VLANS_SUPPORTED;
-- 
1.7.4

^ permalink raw reply related

* [PATCH] [kvm-autotest]client.tests.kvm.tests.cgroup: Add TestDeviceAccess subtest
From: Lukas Doktor @ 2011-10-24 12:46 UTC (permalink / raw)
  To: autotest, kvm, kvm-autotest, akong, lmr, ldoktor, jzupka
In-Reply-To: <1319460365-11819-1-git-send-email-ldoktor@redhat.com>

This subtest tries to attach scsi_debug disk with different cgroup
devices.list setting.

 * subtests the devices.{allow, deny, list} cgroup functionality
 * new function get_maj_min(dev) which returns (major, minor) numbers
   of dev
 * rm_drive: support for rm_device without drive (only remove the host
   file)
 * improved logging

Signed-off-by: Lukas Doktor <ldoktor@redhat.com>
---
 client/tests/kvm/tests/cgroup.py |  234 +++++++++++++++++++++++++++++++++-----
 1 files changed, 203 insertions(+), 31 deletions(-)

diff --git a/client/tests/kvm/tests/cgroup.py b/client/tests/kvm/tests/cgroup.py
index 6c64532..c83f91a 100644
--- a/client/tests/kvm/tests/cgroup.py
+++ b/client/tests/kvm/tests/cgroup.py
@@ -50,7 +50,7 @@ def run_cgroup(test, params, env):
         return abs(float(actual-reference) / reference)
 
 
-    def get_dd_cmd(direction, dev='vd?', count=None, bs=None):
+    def get_dd_cmd(direction, dev=None, count=None, bs=None):
         """
         Generates dd_cmd string
         @param direction: {read,write,bi} dd direction
@@ -59,6 +59,11 @@ def run_cgroup(test, params, env):
         @param bs: bs parameter of dd
         @return: dd command string
         """
+        if dev is None:
+            if get_device_driver() == "virtio":
+                dev = 'vd?'
+            else:
+                dev = '[sh]d?'
         if direction == "read":
             params = "if=$FILE of=/dev/null iflag=direct"
         elif direction == "write":
@@ -82,6 +87,21 @@ def run_cgroup(test, params, env):
         return params.get('drive_format', 'virtio')
 
 
+    def get_maj_min(dev):
+        """
+        Returns the major and minor numbers of the dev device
+        @return: Tupple(major, minor) numbers of the dev device
+        """
+        try:
+            ret = utils.system_output("ls -l %s" % dev)
+            ret = re.match(r'[bc][rwx-]{9} \d+ \w+ \w+ (\d+), (\d+)',
+                           ret).groups()
+        except Exception, details:
+            raise error.TestFail("Couldn't get %s maj and min numbers: %s" %
+                                  (dev, details))
+        return ret
+
+
     def add_file_drive(vm, driver=get_device_driver(), host_file=None):
         """
         Hot-add a drive based on file to a vm
@@ -173,14 +193,17 @@ def run_cgroup(test, params, env):
         """
         err = False
         # TODO: Implement also via QMP
-        vm.monitor.cmd("pci_del %s" % device)
-        time.sleep(3)
-        qtree = vm.monitor.info('qtree', debug=False)
-        if qtree.count('addr %s.0' % device) != 0:
-            err = True
-            vm.destroy()
-
-        if isinstance(host_file, str):    # scsi device
+        if device:
+            vm.monitor.cmd("pci_del %s" % device)
+            time.sleep(3)
+            qtree = vm.monitor.info('qtree', debug=False)
+            if qtree.count('addr %s.0' % device) != 0:
+                err = True
+                vm.destroy()
+
+        if host_file is None:   # Do not remove
+            pass
+        elif isinstance(host_file, str):    # scsi device
             utils.system("echo -1> /sys/bus/pseudo/drivers/scsi_debug/add_host")
         else:     # file
             host_file.close()
@@ -334,7 +357,7 @@ def run_cgroup(test, params, env):
             _TestBlkioBandwidth.__init__(self, vms, modules)
             # Read from the last vd* in a loop until test removes the
             # /tmp/cgroup_lock file (and kills us)
-            self.dd_cmd = get_dd_cmd("read", bs="100K")
+            self.dd_cmd = get_dd_cmd("read", dev='vd?', bs="100K")
 
 
     class TestBlkioBandwidthWeigthWrite(_TestBlkioBandwidth):
@@ -350,7 +373,7 @@ def run_cgroup(test, params, env):
             # Write on the last vd* in a loop until test removes the
             # /tmp/cgroup_lock file (and kills us)
             _TestBlkioBandwidth.__init__(self, vms, modules)
-            self.dd_cmd = get_dd_cmd("write", bs="100K")
+            self.dd_cmd = get_dd_cmd("write", dev='vd?', bs="100K")
 
 
     class _TestBlkioThrottle:
@@ -376,10 +399,6 @@ def run_cgroup(test, params, env):
             self.devices = None # Temporary virt devices (PCI drive 1 per vm)
             self.dd_cmd = None  # DD command used to test the throughput
             self.speeds = None  # cgroup throughput
-            if get_device_driver() == "virtio":
-                self.dev = "vd?"
-            else:
-                self.dev = "[sh]d?"
 
         def cleanup(self):
             """
@@ -417,13 +436,8 @@ def run_cgroup(test, params, env):
                                                             driver="virtio")
             else:
                 (self.files, self.devices) = add_scsi_drive(self.vm)
-            try:
-                dev = utils.system_output("ls -l %s" % self.files).split()[4:6]
-                dev[0] = dev[0][:-1]    # Remove tailing ','
-            except:
-                time.sleep(5)
-                raise error.TestFail("Couldn't get %s maj and min numbers"
-                                     % self.files)
+            time.sleep(3)
+            dev = get_maj_min(self.files)
 
             cgroup = self.cgroup
             cgroup.initialize(self.modules)
@@ -548,7 +562,7 @@ def run_cgroup(test, params, env):
             @param modules: initialized cgroup module class
             """
             _TestBlkioThrottle.__init__(self, vms, modules)
-            self.dd_cmd = get_dd_cmd("read", dev=self.dev, count=1)
+            self.dd_cmd = get_dd_cmd("read", count=1)
             self.speeds = [1024]
 
 
@@ -561,7 +575,7 @@ def run_cgroup(test, params, env):
             @param modules: initialized cgroup module class
             """
             _TestBlkioThrottle.__init__(self, vms, modules)
-            self.dd_cmd = get_dd_cmd("write", dev=self.dev, count=1)
+            self.dd_cmd = get_dd_cmd("write", count=1)
             self.speeds = [1024]
 
 
@@ -577,7 +591,7 @@ def run_cgroup(test, params, env):
             @param modules: initialized cgroup module class
             """
             _TestBlkioThrottle.__init__(self, vms, modules)
-            self.dd_cmd = get_dd_cmd("read", dev=self.dev, count=1)
+            self.dd_cmd = get_dd_cmd("read", count=1)
             self.speeds = [0, 1024, 0, 2048, 0, 4096]
 
 
@@ -593,10 +607,166 @@ def run_cgroup(test, params, env):
             @param modules: initialized cgroup module class
             """
             _TestBlkioThrottle.__init__(self, vms, modules)
-            self.dd_cmd = get_dd_cmd("write", dev=self.dev, count=1)
+            self.dd_cmd = get_dd_cmd("write", count=1)
             self.speeds = [0, 1024, 0, 2048, 0, 4096]
 
 
+    class TestDevicesAccess:
+        """
+        It tries to attach scsi_debug disk with different cgroup devices.list
+        setting.
+         * self.permitions are defined as a list of dictionaries:
+           {'property': control property, 'value': permition value,
+            'check_value': check value (from devices.list property),
+            'read_results': excepced read results T/F,
+            'write_results': expected write results T/F}
+        """
+        def __init__(self, vms, modules):
+            """
+            Initialization
+            @param vms: list of vms
+            @param modules: initialized cgroup module class
+            """
+            self.vm = vms[0]      # Virt machines
+            self.modules = modules          # cgroup module handler
+            self.cgroup = Cgroup('devices', '')   # cgroup blkio handler
+            self.files = None   # Temporary files (files of virt disks)
+            self.devices = None # Temporary virt devices
+            self.permitions = None  # Test dictionary, see init for details
+
+
+        def cleanup(self):
+            """ Cleanup """
+            err = ""
+            try:
+                rm_drive(self.vm, self.files, self.devices)
+            except Exception, failure_detail:
+                err += "\nCan't remove PCI drive: %s" % failure_detail
+            try:
+                del(self.cgroup)
+            except Exception, failure_detail:
+                err += "\nCan't remove Cgroup: %s" % failure_detail
+
+            if err:
+                logging.error("Some cleanup operations failed: %s", err)
+                raise error.TestError("Some cleanup operations failed: %s"
+                                      % err)
+
+
+        def init(self):
+            """
+            Initialization
+             * creates a new scsi_debug device
+             * prepares one cgroup and assign vm to it
+            """
+            # Only create the host /dev/sd? device
+            (self.files, self.devices) = add_scsi_drive(self.vm)
+            rm_drive(self.vm, host_file=None, device=self.devices)
+            self.devices = None # We don't want to mess cleanup
+
+            time.sleep(3)
+            dev = "%s:%s" % get_maj_min(self.files)
+
+            self.cgroup.initialize(self.modules)
+            self.cgroup.mk_cgroup()
+            self.cgroup.set_cgroup(self.vm.get_shell_pid(),
+                                   self.cgroup.cgroups[0])
+            for pid in utils.get_children_pids(self.vm.get_shell_pid()):
+                self.cgroup.set_cgroup(int(pid), self.cgroup.cgroups[0])
+
+            # Test dictionary
+            # Beware of persistence of some setting to another round!!!
+            self.permitions = [
+                               {'property'      : 'deny',
+                                'value'         : 'a',
+                                'check_value'   : '',
+                                'result'        : False},
+                               {'property'      : 'allow',
+                                'value'         : 'b %s rm' % dev,
+                                'check_value'   : True,
+                                'result'        : False},
+                               {'property'      : 'allow',
+                                'value'         : 'b %s w' % dev,
+                                'check_value'   : 'b %s rwm' % dev,
+                                'result'        : True},
+                               {'property'      : 'deny',
+                                'value'         : 'b %s r' % dev,
+                                'check_value'   : 'b %s wm' % dev,
+                                'result'        : False},
+                               {'property'      : 'deny',
+                                'value'         : 'b %s wm' % dev,
+                                'check_value'   : '',
+                                'result'        : False},
+                               {'property'      : 'allow',
+                                'value'         : 'a',
+                                'check_value'   : 'a *:* rwm',
+                                'result'        : True},
+                              ]
+
+
+
+        def run(self):
+            """
+            Actual test:
+             * For each self.permitions sets the cgroup devices permition
+               and tries attach the disk. Checks the results with prescription.
+            """
+            def set_permitions(cgroup, permitions):
+                """
+                Wrapper for setting permitions to first cgroup
+                @param self.permitions: is defined as a list of dictionaries:
+                   {'property': control property, 'value': permition value,
+                    'check_value': check value (from devices.list property),
+                    'read_results': excepced read results T/F,
+                    'write_results': expected write results T/F}
+                """
+                cgroup.set_property('devices.'+permitions['property'],
+                                    permitions['value'],
+                                    cgroup.cgroups[0],
+                                    check=permitions['check_value'],
+                                    checkprop='devices.list')
+
+
+            session = self.vm.wait_for_login(timeout=30)
+
+            cgroup = self.cgroup
+            results = ""
+            for i in range(len(self.permitions)):
+                perm = self.permitions[i]
+                set_permitions(cgroup, perm)
+                logging.debug("Setting permitions: {%s: %s}, value: %s",
+                              perm['property'], perm['value'],
+                              cgroup.get_property('devices.list',
+                                                  cgroup.cgroups[0]))
+
+                try:
+                    (_, self.devices) = add_scsi_drive(self.vm,
+                                                        host_file=self.files)
+                except Exception, details:
+                    if perm['result']:
+                        logging.error("Perm: {%s: %s}: drive was not attached:"
+                                      " %s", perm['property'], perm['value'],
+                                      details)
+                        results += ("{%s: %s => NotAttached}, " %
+                                     (perm['property'], perm['value']))
+                else:
+                    if not perm['result']:
+                        logging.error("Perm: {%s: %s}: drive was attached",
+                                      perm['property'], perm['value'])
+                        results += ("{%s: %s => Attached}, " %
+                                     (perm['property'], perm['value']))
+                    rm_drive(self.vm, host_file=None, device=self.devices)
+                    self.devices = None
+
+            session.close()
+            if results:
+                raise error.TestFail("Some restrictions were broken: {%s}" %
+                                      results[:-2])
+
+            time.sleep(10)
+
+            return ("All restrictions enforced successfully.")
+
     # Setup
     # TODO: Add all new tests here
     tests = {"blkio_bandwidth_weigth_read"  : TestBlkioBandwidthWeigthRead,
@@ -605,9 +775,10 @@ def run_cgroup(test, params, env):
              "blkio_throttle_write"         : TestBlkioThrottleWrite,
              "blkio_throttle_multiple_read" : TestBlkioThrottleMultipleRead,
              "blkio_throttle_multiple_write" : TestBlkioThrottleMultipleWrite,
+             "devices_access"               : TestDevicesAccess,
             }
     modules = CgroupModules()
-    if (modules.init(['blkio']) <= 0):
+    if (modules.init(['blkio', 'devices']) <= 0):
         raise error.TestFail('Can\'t mount any cgroup modules')
     # Add all vms
     vms = []
@@ -674,14 +845,15 @@ def run_cgroup(test, params, env):
                     results += ("\n [F] %s: {%s} FAILED: %s" %
                                  (cg_test, err[:-2], out))
                 elif err:
-                    results += ("\n [E] %s: Test passed but {%s} FAILED: %s" %
+                    results += ("\n [W] %s: Test passed but {%s} FAILED: %s" %
                                  (cg_test, err[:-2], out))
                 else:
                     results += ("\n [P] %s: PASSED: %s" % (cg_test, out))
 
-    out = ("SUM: All tests finished (%d PASS / %d FAIL = %d TOTAL)%s" %
-           (results.count("PASSED"), results.count("FAILED"),
-            (results.count("PASSED")+results.count("FAILED")), results))
+    out = ("SUM: All tests finished (%d PASS / %d WARN / %d FAIL = %d TOTAL)%s"%
+           (results.count("\n [P]"), results.count("\n [W]"),
+            results.count("\n [F]"), (results.count("\n [P]") +
+            results.count("\n [F]") + results.count("\n [W]")), results))
     logging.info(out)
     if results.count("FAILED"):
         raise error.TestFail("Some subtests failed\n%s" % out)
-- 
1.7.6.2

^ permalink raw reply related

* [kvm-autotest]client.tests.kvm.tests.cgroup: Add TestDeviceAccess subtest
From: Lukas Doktor @ 2011-10-24 12:46 UTC (permalink / raw)
  To: autotest, kvm, kvm-autotest, akong, lmr, ldoktor, jzupka

Hi guys,

I have a new subtest which tests the 'devices' cgroup subsystem and improve the logging a bit.

Please find the pull request on github:
https://github.com/autotest/autotest/pull/48

Cheers,
Lukáš


^ permalink raw reply

* [PATCH v2 4/5] ARM: SPMP8000: Add SPMP8000 SoC and Letcool board dts descriptions
From: Rob Herring @ 2011-10-24 12:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1319040118-29773-5-git-send-email-zoss@devai.org>

On 10/19/2011 11:01 AM, Zoltan Devai wrote:
> This adds the DT description of the SPMP8000 SoC, the Letcool
> N350JP board and documentation of required bindings
> 
> Cc: Grant Likely <grant.likely@secretlab.ca>
> CC: devicetree-discuss at lists.ozlabs.org
> Signed-off-by: Zoltan Devai <zoss@devai.org>
> ---

Only a couple of minor things below. Otherwise looks good.

Acked-by: Rob Herring <rob.herring@calxeda.com>

>  Documentation/devicetree/bindings/arm/spmp8000.txt |   25 +++++
>  arch/arm/boot/dts/spmp8000-letcool.dts             |   20 ++++
>  arch/arm/boot/dts/spmp8000.dtsi                    |   93 ++++++++++++++++++++
>  3 files changed, 138 insertions(+), 0 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/arm/spmp8000.txt
>  create mode 100644 arch/arm/boot/dts/spmp8000-letcool.dts
>  create mode 100644 arch/arm/boot/dts/spmp8000.dtsi
> 
> diff --git a/Documentation/devicetree/bindings/arm/spmp8000.txt b/Documentation/devicetree/bindings/arm/spmp8000.txt
> new file mode 100644
> index 0000000..c392b09
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/spmp8000.txt
> @@ -0,0 +1,25 @@
> +Sunplus SPMP8000 device tree bindings
> +=====================================
> +
> +Required root node properties:
> +	- compatible:
> +		- "gameware,letcool" : Letcool N350JO handheld game console board
> +		- "sunplus,spmp8000" : A board based on the SPMP8000 SoC
> +
> +Timer required properties:
> +	- compatible = "sunplus,spmp8000-timer"
> +	- interrupt-parent : The interrupt controller node this timer belongs to
> +	- interrupts : IRQs of the timer
> +	- clock-freq : The frequency in HZ of the timer.

"clock-frequency" is what is commonly used.

> +	- reg : The register bank for the timer.
> +
> +Watchdog required properties:
> +	- compatible = "sunplus,spmp8000-watchdof"

s/watchdof/watchdog/

> +	- interrupt-parent : The interrupt controller node this timer belongs to
> +	- interrupts : IRQs of the watchdog
> +	- reg : The register bank for the watchdog controller
> +
> +SCU required properties:
> +	- compatible: "sunplus,spmp8000-scua" or "sunplus,spmp8000-scuab" or
> +		      "sunplus,spmp8000-scuc"
> +	- reg: The register bank of the System Control Unit
> diff --git a/arch/arm/boot/dts/spmp8000-letcool.dts b/arch/arm/boot/dts/spmp8000-letcool.dts
> new file mode 100644
> index 0000000..424f2aa
> --- /dev/null
> +++ b/arch/arm/boot/dts/spmp8000-letcool.dts
> @@ -0,0 +1,20 @@
> +/dts-v1/;
> +/include/ "spmp8000.dtsi"
> +/ {
> +	model = "Letcool N350JP handheld game console";
> +	compatible = "gameware,letcool", "sunplus,spmp8000";
> +
> +	memory {
> +		reg = <0x00000000 0x02000000>;
> +	};
> +
> +	chosen {
> +		linux,stdout-path = &uart0;
> +	};
> +
> +	armapb {
> +		timer at 90000000 {
> +			clock-freq = <15187500>;
> +		};
> +	};
> +};
> diff --git a/arch/arm/boot/dts/spmp8000.dtsi b/arch/arm/boot/dts/spmp8000.dtsi
> new file mode 100644
> index 0000000..15b25fa
> --- /dev/null
> +++ b/arch/arm/boot/dts/spmp8000.dtsi
> @@ -0,0 +1,93 @@
> +/ {
> +	compatible = "sunplus,spmp8000";
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +
> +	armapb {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x90000000 0x10000>;
> +
> +		timer at 90000000 {
> +			compatible = "sunplus,spmp8000-timer";
> +			reg = <0x0 0x1000>;
> +			interrupt-parent = <&vic0>;
> +			interrupts = <7 8 9>;
> +		};
> +
> +		watchdog at 90001000 {
> +			compatible = "sunplus,spmp8000-wdt";
> +			reg = <0x1000 0x1000>;
> +			interrupt-parent = <&vic0>;
> +			interrupts = <10 11 12>;
> +		};
> +
> +		scu-b at 90005000 {
> +			compatible = "sunplus,spmp8000-scub";
> +			reg = <0x5000 0x1000>;
> +		};
> +	};
> +
> +	armahb {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x90010000 0x20000>;
> +
> +		vic0: interrupt-controller at 90010000 {
> +			compatible = "arm,pl192";
> +			interrupt-controller;
> +			#interrupt-cells = <1>;
> +			reg = <0x0 0x1000>;
> +		};
> +
> +		vic1: interrupt-controller at 900020000 {
> +			compatible = "arm,pl192";
> +			interrupt-controller;
> +			#interrupt-cells = <1>;
> +			reg = <0x10000 0x1000>;
> +		};
> +	};
> +
> +	plat {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x92000000 0x1000000>;
> +
> +		scu-c at 92005000 {
> +			compatible = "sunplus,spmp8000-scuc";
> +			reg = <0x5000 0x1000>;
> +		};
> +
> +		plat-apb {
> +			compatible = "simple-bus";
> +			#address-cells = <1>;
> +			#size-cells = <1>;
> +			ranges = <0 0xB00000 0x10000>;

lower case for hex is preferred.

> +
> +			uart0: uart-c0 at 92B04000 {
> +				compatible = "ns16550a";
> +				reg = <0x4000 0x1000>;
> +				clock-frequency = <2076923>;
> +				interrupt-parent = <&vic1>;
> +				interrupts = <24>;
> +				current-speed = <115200>;
> +				reg-shift = <2>;
> +			};
> +		};
> +	};
> +
> +	axi {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x93000000 0x20000>;
> +
> +		scu-a at 93007000 {
> +			compatible = "sunplus,spmp8000-scua";
> +			reg = <0x7000 0x1000>;
> +		};
> +	};
> +};

^ permalink raw reply

* Re: [PATCH v2 4/5] ARM: SPMP8000: Add SPMP8000 SoC and Letcool board dts descriptions
From: Rob Herring @ 2011-10-24 12:47 UTC (permalink / raw)
  To: Zoltan Devai
  Cc: Grant Likely, devicetree-discuss, Russell King, Arnd Bergmann,
	linux-arm-kernel
In-Reply-To: <1319040118-29773-5-git-send-email-zoss@devai.org>

On 10/19/2011 11:01 AM, Zoltan Devai wrote:
> This adds the DT description of the SPMP8000 SoC, the Letcool
> N350JP board and documentation of required bindings
> 
> Cc: Grant Likely <grant.likely@secretlab.ca>
> CC: devicetree-discuss@lists.ozlabs.org
> Signed-off-by: Zoltan Devai <zoss@devai.org>
> ---

Only a couple of minor things below. Otherwise looks good.

Acked-by: Rob Herring <rob.herring@calxeda.com>

>  Documentation/devicetree/bindings/arm/spmp8000.txt |   25 +++++
>  arch/arm/boot/dts/spmp8000-letcool.dts             |   20 ++++
>  arch/arm/boot/dts/spmp8000.dtsi                    |   93 ++++++++++++++++++++
>  3 files changed, 138 insertions(+), 0 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/arm/spmp8000.txt
>  create mode 100644 arch/arm/boot/dts/spmp8000-letcool.dts
>  create mode 100644 arch/arm/boot/dts/spmp8000.dtsi
> 
> diff --git a/Documentation/devicetree/bindings/arm/spmp8000.txt b/Documentation/devicetree/bindings/arm/spmp8000.txt
> new file mode 100644
> index 0000000..c392b09
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/spmp8000.txt
> @@ -0,0 +1,25 @@
> +Sunplus SPMP8000 device tree bindings
> +=====================================
> +
> +Required root node properties:
> +	- compatible:
> +		- "gameware,letcool" : Letcool N350JO handheld game console board
> +		- "sunplus,spmp8000" : A board based on the SPMP8000 SoC
> +
> +Timer required properties:
> +	- compatible = "sunplus,spmp8000-timer"
> +	- interrupt-parent : The interrupt controller node this timer belongs to
> +	- interrupts : IRQs of the timer
> +	- clock-freq : The frequency in HZ of the timer.

"clock-frequency" is what is commonly used.

> +	- reg : The register bank for the timer.
> +
> +Watchdog required properties:
> +	- compatible = "sunplus,spmp8000-watchdof"

s/watchdof/watchdog/

> +	- interrupt-parent : The interrupt controller node this timer belongs to
> +	- interrupts : IRQs of the watchdog
> +	- reg : The register bank for the watchdog controller
> +
> +SCU required properties:
> +	- compatible: "sunplus,spmp8000-scua" or "sunplus,spmp8000-scuab" or
> +		      "sunplus,spmp8000-scuc"
> +	- reg: The register bank of the System Control Unit
> diff --git a/arch/arm/boot/dts/spmp8000-letcool.dts b/arch/arm/boot/dts/spmp8000-letcool.dts
> new file mode 100644
> index 0000000..424f2aa
> --- /dev/null
> +++ b/arch/arm/boot/dts/spmp8000-letcool.dts
> @@ -0,0 +1,20 @@
> +/dts-v1/;
> +/include/ "spmp8000.dtsi"
> +/ {
> +	model = "Letcool N350JP handheld game console";
> +	compatible = "gameware,letcool", "sunplus,spmp8000";
> +
> +	memory {
> +		reg = <0x00000000 0x02000000>;
> +	};
> +
> +	chosen {
> +		linux,stdout-path = &uart0;
> +	};
> +
> +	armapb {
> +		timer@90000000 {
> +			clock-freq = <15187500>;
> +		};
> +	};
> +};
> diff --git a/arch/arm/boot/dts/spmp8000.dtsi b/arch/arm/boot/dts/spmp8000.dtsi
> new file mode 100644
> index 0000000..15b25fa
> --- /dev/null
> +++ b/arch/arm/boot/dts/spmp8000.dtsi
> @@ -0,0 +1,93 @@
> +/ {
> +	compatible = "sunplus,spmp8000";
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +
> +	armapb {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x90000000 0x10000>;
> +
> +		timer@90000000 {
> +			compatible = "sunplus,spmp8000-timer";
> +			reg = <0x0 0x1000>;
> +			interrupt-parent = <&vic0>;
> +			interrupts = <7 8 9>;
> +		};
> +
> +		watchdog@90001000 {
> +			compatible = "sunplus,spmp8000-wdt";
> +			reg = <0x1000 0x1000>;
> +			interrupt-parent = <&vic0>;
> +			interrupts = <10 11 12>;
> +		};
> +
> +		scu-b@90005000 {
> +			compatible = "sunplus,spmp8000-scub";
> +			reg = <0x5000 0x1000>;
> +		};
> +	};
> +
> +	armahb {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x90010000 0x20000>;
> +
> +		vic0: interrupt-controller@90010000 {
> +			compatible = "arm,pl192";
> +			interrupt-controller;
> +			#interrupt-cells = <1>;
> +			reg = <0x0 0x1000>;
> +		};
> +
> +		vic1: interrupt-controller@900020000 {
> +			compatible = "arm,pl192";
> +			interrupt-controller;
> +			#interrupt-cells = <1>;
> +			reg = <0x10000 0x1000>;
> +		};
> +	};
> +
> +	plat {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x92000000 0x1000000>;
> +
> +		scu-c@92005000 {
> +			compatible = "sunplus,spmp8000-scuc";
> +			reg = <0x5000 0x1000>;
> +		};
> +
> +		plat-apb {
> +			compatible = "simple-bus";
> +			#address-cells = <1>;
> +			#size-cells = <1>;
> +			ranges = <0 0xB00000 0x10000>;

lower case for hex is preferred.

> +
> +			uart0: uart-c0@92B04000 {
> +				compatible = "ns16550a";
> +				reg = <0x4000 0x1000>;
> +				clock-frequency = <2076923>;
> +				interrupt-parent = <&vic1>;
> +				interrupts = <24>;
> +				current-speed = <115200>;
> +				reg-shift = <2>;
> +			};
> +		};
> +	};
> +
> +	axi {
> +		compatible = "simple-bus";
> +		#address-cells = <1>;
> +		#size-cells = <1>;
> +		ranges = <0 0x93000000 0x20000>;
> +
> +		scu-a@93007000 {
> +			compatible = "sunplus,spmp8000-scua";
> +			reg = <0x7000 0x1000>;
> +		};
> +	};
> +};

^ permalink raw reply

* Re: [RFD] Network configuration data in sysfs
From: Kay Sievers @ 2011-10-24 12:46 UTC (permalink / raw)
  To: David Miller
  Cc: kirill, netdev, kuznet, jmorris, yoshfuji, kaber, gregkh,
	gladkov.alexey
In-Reply-To: <20111024.005900.1091819103500072631.davem@davemloft.net>

On Mon, Oct 24, 2011 at 06:59, David Miller <davem@davemloft.net> wrote:
> From: "Kirill A. Shutemov" <kirill@shutemov.name>
> Date: Mon, 24 Oct 2011 07:24:00 +0300
>
>> On Sun, Oct 23, 2011 at 11:24:16PM -0400, David Miller wrote:
>>> From: "Kirill A. Shutemov" <kirill@shutemov.name>
>>> Date: Mon, 24 Oct 2011 04:34:07 +0300
>>>
>>> You can use netlink to perform any configuration change you want, or
>>> to view any network configuration setting.
>>
>> You need /sbin/ip or similar tool to do this, right?
>
> I'm talking about udev using netlink natively.

Kirill, what exactly is the use case? And why what does udev support
mean in that context?

I doubt that "not having /sbin/ip installed" should be a reason to add
and expose complex interfaces in /sys, while we already have a
perfectly working native way to do it.

Kay

^ permalink raw reply

* GFS2: Pre-pull patch posting (merge window)
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel

Hi,

Since the merge window is upon us, here is the current content of
the GFS2 git tree. A few things will be help back to the following
merge window in order to ensure a greater test time, but those currently
in the tree are ready for the current window.

Recently I've reconstituted the GFS2 git tree, so it can be pulled
(via http) from:

http://sucs.org/~rohan/git/gfs2-3.0-nmw

and viewed via gitweb at:

http://sucs.org/gitweb/

This is thanks to the Swansea University Computer Society for providing
a temporary (or possibly permanent) home for the GFS2 git trees. Please
treat their server kindly as this will only continue while it doesn't
generate too much traffic. I figure that there will not be too many
people pulling the GFS2 tree at once, but we'll see.

Some highlights of the current patch set:
 o Reduction in code of approx 400 lines
 o Big clean up (and speed up) in the resource group code
   - This is a nice base to build some forthcoming improvements on
   - It should improve performance with multi-threaded workloads
 o Some left-over fsync/writeback changes
 o Improvements to readahead when deallocating large directories

Any questions/concerns then please let me know as usual,

Steve.

^ permalink raw reply

* [PATCH 01/24] GFS2: Clean up dir hash table reading
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Since there is now only a single caller to gfs2_dir_read_data()
and it has a number of constant arguments, we can factor
those out. Also some tests relating to the inode size were
being done twice.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 1cc2f8e..2045d70 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -240,16 +240,15 @@ fail:
 	return error;
 }
 
-static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
-				 u64 offset, unsigned int size)
+static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
+				 unsigned int size)
 {
 	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		offset += sizeof(struct gfs2_dinode);
-		memcpy(buf, dibh->b_data + offset, size);
+		memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
 		brelse(dibh);
 	}
 
@@ -261,13 +260,12 @@ static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
  * gfs2_dir_read_data - Read a data from a directory inode
  * @ip: The GFS2 Inode
  * @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
  * @size: Amount of data to transfer
  *
  * Returns: The amount of data actually copied or the error
  */
-static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
-			      unsigned int size, unsigned ra)
+static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
+			      unsigned int size)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	u64 lblock, dblock;
@@ -275,24 +273,14 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 	unsigned int o;
 	int copied = 0;
 	int error = 0;
-	u64 disksize = i_size_read(&ip->i_inode);
-
-	if (offset >= disksize)
-		return 0;
-
-	if (offset + size > disksize)
-		size = disksize - offset;
-
-	if (!size)
-		return 0;
 
 	if (gfs2_is_stuffed(ip))
-		return gfs2_dir_read_stuffed(ip, buf, offset, size);
+		return gfs2_dir_read_stuffed(ip, buf, size);
 
 	if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 		return -EINVAL;
 
-	lblock = offset;
+	lblock = 0;
 	o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 
 	while (copied < size) {
@@ -311,8 +299,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 			if (error || !dblock)
 				goto fail;
 			BUG_ON(extlen < 1);
-			if (!ra)
-				extlen = 1;
 			bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
 		} else {
 			error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
@@ -328,7 +314,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 		extlen--;
 		memcpy(buf, bh->b_data + o, amount);
 		brelse(bh);
-		buf += amount;
+		buf += (amount/sizeof(__be64));
 		copied += amount;
 		lblock++;
 		o = sizeof(struct gfs2_meta_header);
@@ -371,7 +357,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 	if (hc == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
+	ret = gfs2_dir_read_data(ip, hc, hsize);
 	if (ret < 0) {
 		kfree(hc);
 		return ERR_PTR(ret);
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 03/24] GFS2: Fix bug-trap in ail flush code
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

The assert was being tested under the wrong lock, a
legacy of the original code. Also, if it does trigger,
the resulting information was not always a lot of help.

This moves the patch under the correct lock and also
prints out more useful information in tacking down the
source of the problem.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index da21eca..99df483 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,17 @@
 #include "trans.h"
 #include "dir.h"
 
+static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
+{
+	fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
+	       bh, (unsigned long long)bh->b_blocknr, bh->b_state,
+	       bh->b_page->mapping, bh->b_page->flags);
+	fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
+	       gl->gl_name.ln_type, gl->gl_name.ln_number,
+	       gfs2_glock2aspace(gl));
+	gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
+}
+
 /**
  * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
  * @gl: the glock
@@ -41,20 +52,24 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl)
 	struct list_head *head = &gl->gl_ail_list;
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
+	sector_t blocknr;
 
 	spin_lock(&sdp->sd_ail_lock);
 	while (!list_empty(head)) {
 		bd = list_entry(head->next, struct gfs2_bufdata,
 				bd_ail_gl_list);
 		bh = bd->bd_bh;
-		gfs2_remove_from_ail(bd);
-		bd->bd_bh = NULL;
+		blocknr = bh->b_blocknr;
+		if (buffer_busy(bh))
+			gfs2_ail_error(gl, bh);
 		bh->b_private = NULL;
+		gfs2_remove_from_ail(bd); /* drops ref on bh */
 		spin_unlock(&sdp->sd_ail_lock);
 
-		bd->bd_blkno = bh->b_blocknr;
+		bd->bd_bh = NULL;
+		bd->bd_blkno = blocknr;
+
 		gfs2_log_lock(sdp);
-		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
 		gfs2_trans_add_revoke(sdp, bd);
 		gfs2_log_unlock(sdp);
 
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 05/24] GFS2: Fix inode allocation error path
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

If we have got far enough through the inode allocation code
path that an inode has already been allocated, then we must
call iput to dispose of it, if an error occurs during a
later part of the process. This will always be the final iput
since there will be no other references to the inode.

Unlike when the inode has been unlinked, its block state will
be GFS2_BLKST_INODE rather than GFS2_BLKST_UNLINKED so we need
to skip the test in ->evict_inode() for this one case in order
to ensure that it will be deallocated correctly. This patch adds
a new flag in order to ensure that this will happen correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 892ac37..be5801a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -267,6 +267,7 @@ struct gfs2_alloc {
 enum {
 	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
+	GIF_ALLOC_FAILED	= 2,
 	GIF_SW_PAGED		= 3,
 };
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 900cf98..044efe2 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -736,10 +736,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
-	if (inode && !IS_ERR(inode))
-		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq_uninit(ghs);
+	if (inode && !IS_ERR(inode)) {
+		set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags);
+		iput(inode);
+	}
 fail:
 	if (bh)
 		brelse(bh);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index afb8761..9961de7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1471,9 +1471,11 @@ static void gfs2_evict_inode(struct inode *inode)
 		goto out;
 	}
 
-	error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
-	if (error)
-		goto out_truncate;
+	if (!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
+		error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+		if (error)
+			goto out_truncate;
+	}
 
 	if (test_bit(GIF_INVALID, &ip->i_flags)) {
 		error = gfs2_inode_refresh(ip);
@@ -1513,6 +1515,10 @@ static void gfs2_evict_inode(struct inode *inode)
 	goto out_unlock;
 
 out_truncate:
+	gfs2_log_flush(sdp, ip->i_gl);
+	write_inode_now(inode, 1);
+	gfs2_ail_flush(ip->i_gl);
+
 	/* Case 2 starts here */
 	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 	if (error)
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 07/24] GFS2: Use ->dirty_inode()
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

The aim of this patch is to use the newly enhanced ->dirty_inode()
super block operation to deal with atime updates, rather than
piggy backing that code into ->write_inode() as is currently
done.

The net result is a simplification of the code in various places
and a reduction of the number of gfs2_dinode_out() calls since
this is now implied by ->dirty_inode().

Some of the mark_inode_dirty() calls have been moved under glocks
in order to take advantage of then being able to avoid locking in
->dirty_inode() when we already have suitable locks.

One consequence is that generic_write_end() now correctly deals
with file size updates, so that we do not need a separate check
for that afterwards. This also, indirectly, means that fdatasync
should work correctly on GFS2 - the current code always syncs the
metadata whether it needs to or not.

Has survived testing with postmark (with and without atime) and
also fsx.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 34501b6..65978d7 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -82,7 +82,7 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode)
 		iattr.ia_valid = ATTR_MODE;
 		iattr.ia_mode = mode;
 
-		error = gfs2_setattr_simple(GFS2_I(inode), &iattr);
+		error = gfs2_setattr_simple(inode, &iattr);
 	}
 
 	return error;
@@ -160,6 +160,7 @@ out:
 
 int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
 {
+	struct inode *inode = &ip->i_inode;
 	struct posix_acl *acl;
 	char *data;
 	unsigned int len;
@@ -169,7 +170,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (!acl)
-		return gfs2_setattr_simple(ip, attr);
+		return gfs2_setattr_simple(inode, attr);
 
 	error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode);
 	if (error)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f9fbbe9..212fe74 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -787,7 +787,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	u64 to = pos + copied;
 	void *kaddr;
 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
-	struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
 
 	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
 	kaddr = kmap_atomic(page, KM_USER0);
@@ -804,7 +803,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	if (copied) {
 		if (inode->i_size < to)
 			i_size_write(inode, to);
-		gfs2_dinode_out(ip, di);
 		mark_inode_dirty(inode);
 	}
 
@@ -873,10 +871,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 		gfs2_page_add_databufs(ip, page, from, to);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-	if (ret > 0) {
-		gfs2_dinode_out(ip, dibh->b_data);
-		mark_inode_dirty(inode);
-	}
 
 	if (inode == sdp->sd_rindex) {
 		adjust_fs_space(inode);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 2045d70..898e62e 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1681,7 +1681,6 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_dirent *dent, *prev = NULL;
 	struct buffer_head *bh;
-	int error;
 
 	/* Returns _either_ the entry (if its first in block) or the
 	   previous entry otherwise */
@@ -1710,22 +1709,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 	}
 	brelse(bh);
 
-	error = gfs2_meta_inode_buffer(dip, &bh);
-	if (error)
-		return error;
-
 	if (!dip->i_entries)
 		gfs2_consist_inode(dip);
-	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_entries--;
 	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
 	if (S_ISDIR(dentry->d_inode->i_mode))
 		drop_nlink(&dip->i_inode);
-	gfs2_dinode_out(dip, bh->b_data);
-	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
 
-	return error;
+	return 0;
 }
 
 /**
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9d12286..4416a1c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -802,7 +802,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 		from = 0;
 	}
 
-	gfs2_dinode_out(ip, dibh->b_data);
 	mark_inode_dirty(inode);
 
 	brelse(dibh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 044efe2..a0b53d3 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -729,8 +729,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
-	gfs2_glock_dq_uninit_m(2, ghs);
 	mark_inode_dirty(inode);
+	gfs2_glock_dq_uninit_m(2, ghs);
 	d_instantiate(dentry, inode);
 	return 0;
 
@@ -926,8 +926,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	inc_nlink(&ip->i_inode);
 	ip->i_inode.i_ctime = CURRENT_TIME;
-	gfs2_dinode_out(ip, dibh->b_data);
-	mark_inode_dirty(&ip->i_inode);
+	ihold(inode);
+	d_instantiate(dentry, inode);
+	mark_inode_dirty(inode);
 
 out_brelse:
 	brelse(dibh);
@@ -949,11 +950,6 @@ out_child:
 out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
-	if (!error) {
-		ihold(inode);
-		d_instantiate(dentry, inode);
-		mark_inode_dirty(inode);
-	}
 	return error;
 }
 
@@ -1026,8 +1022,6 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
 		clear_nlink(inode);
 	else
 		drop_nlink(inode);
-	gfs2_trans_add_bh(ip->i_gl, bh, 1);
-	gfs2_dinode_out(ip, bh->b_data);
 	mark_inode_dirty(inode);
 	if (inode->i_nlink == 0)
 		gfs2_unlink_di(inode);
@@ -1565,21 +1559,10 @@ int gfs2_permission(struct inode *inode, int mask)
 	return error;
 }
 
-static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
 {
-	struct inode *inode = &ip->i_inode;
-	struct buffer_head *dibh;
-	int error;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
 	return 0;
 }
 
@@ -1591,19 +1574,19 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
  * Returns: errno
  */
 
-int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+int gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
 {
 	int error;
 
 	if (current->journal_info)
-		return __gfs2_setattr_simple(ip, attr);
+		return __gfs2_setattr_simple(inode, attr);
 
-	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0);
+	error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0);
 	if (error)
 		return error;
 
-	error = __gfs2_setattr_simple(ip, attr);
-	gfs2_trans_end(GFS2_SB(&ip->i_inode));
+	error = __gfs2_setattr_simple(inode, attr);
+	gfs2_trans_end(GFS2_SB(inode));
 	return error;
 }
 
@@ -1641,7 +1624,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (error)
 		goto out_gunlock_q;
 
-	error = gfs2_setattr_simple(ip, attr);
+	error = gfs2_setattr_simple(inode, attr);
 	if (error)
 		goto out_end_trans;
 
@@ -1697,12 +1680,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
 		error = gfs2_acl_chmod(ip, attr);
 	else
-		error = gfs2_setattr_simple(ip, attr);
+		error = gfs2_setattr_simple(inode, attr);
 
 out:
-	gfs2_glock_dq_uninit(&i_gh);
 	if (!error)
 		mark_inode_dirty(inode);
+	gfs2_glock_dq_uninit(&i_gh);
 	return error;
 }
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 8d90e0c..276e7b5 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -109,7 +109,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 				  int is_root);
 extern int gfs2_permission(struct inode *inode, int mask);
-extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
+extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 0e8bb13..3a9a974 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -638,7 +638,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
 	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
 	unsigned blocksize, iblock, pos;
-	struct buffer_head *bh, *dibh;
+	struct buffer_head *bh;
 	struct page *page;
 	void *kaddr, *ptr;
 	struct gfs2_quota q, *qp;
@@ -736,22 +736,13 @@ get_a_page:
 		goto get_a_page;
 	}
 
-	/* Update the disk inode timestamp and size (if extended) */
-	err = gfs2_meta_inode_buffer(ip, &dibh);
-	if (err)
-		goto out;
-
 	size = loc + sizeof(struct gfs2_quota);
 	if (size > inode->i_size)
 		i_size_write(inode, size);
 	inode->i_mtime = inode->i_atime = CURRENT_TIME;
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
 	mark_inode_dirty(inode);
-
-out:
 	return err;
+
 unlock_out:
 	unlock_page(page);
 	page_cache_release(page);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9961de7..b05fa59 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -752,47 +752,15 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
 	struct backing_dev_info *bdi = metamapping->backing_dev_info;
-	struct gfs2_holder gh;
-	struct buffer_head *bh;
-	struct timespec atime;
-	struct gfs2_dinode *di;
-	int ret = -EAGAIN;
-	int unlock_required = 0;
-
-	/* Skip timestamp update, if this is from a memalloc */
-	if (current->flags & PF_MEMALLOC)
-		goto do_flush;
-	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
-		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-		if (ret)
-			goto do_flush;
-		unlock_required = 1;
-	}
-	ret = gfs2_meta_inode_buffer(ip, &bh);
-	if (ret == 0) {
-		di = (struct gfs2_dinode *)bh->b_data;
-		atime.tv_sec = be64_to_cpu(di->di_atime);
-		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
-		if (timespec_compare(&inode->i_atime, &atime) > 0) {
-			ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
-			if (ret == 0) {
-				gfs2_trans_add_bh(ip->i_gl, bh, 1);
-				gfs2_dinode_out(ip, bh->b_data);
-				gfs2_trans_end(sdp);
-			}
-		}
-		brelse(bh);
-	}
-	if (unlock_required)
-		gfs2_glock_dq_uninit(&gh);
-do_flush:
+	int ret = 0;
+
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
 	if (bdi->dirty_exceeded)
 		gfs2_ail1_flush(sdp, wbc);
 	else
 		filemap_fdatawrite(metamapping);
-	if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
+	if (wbc->sync_mode == WB_SYNC_ALL)
 		ret = filemap_fdatawait(metamapping);
 	if (ret)
 		mark_inode_dirty_sync(inode);
@@ -800,6 +768,64 @@ do_flush:
 }
 
 /**
+ * gfs2_dirty_inode - check for atime updates
+ * @inode: The inode in question
+ * @flags: The type of dirty
+ *
+ * Unfortunately it can be called under any combination of inode
+ * glock and transaction lock, so we have to check carefully.
+ *
+ * At the moment this deals only with atime - it should be possible
+ * to expand that role in future, once a review of the locking has
+ * been carried out.
+ */
+
+static void gfs2_dirty_inode(struct inode *inode, int flags)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct buffer_head *bh;
+	struct gfs2_holder gh;
+	int need_unlock = 0;
+	int need_endtrans = 0;
+	int ret;
+
+	if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
+		return;
+
+	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
+		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+		if (ret) {
+			fs_err(sdp, "dirty_inode: glock %d\n", ret);
+			return;
+		}
+		need_unlock = 1;
+	}
+
+	if (current->journal_info == NULL) {
+		ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+		if (ret) {
+			fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
+			goto out;
+		}
+		need_endtrans = 1;
+	}
+
+	ret = gfs2_meta_inode_buffer(ip, &bh);
+	if (ret == 0) {
+		gfs2_trans_add_bh(ip->i_gl, bh, 1);
+		gfs2_dinode_out(ip, bh->b_data);
+		brelse(bh);
+	}
+
+	if (need_endtrans)
+		gfs2_trans_end(sdp);
+out:
+	if (need_unlock)
+		gfs2_glock_dq_uninit(&gh);
+}
+
+/**
  * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
  * @sdp: the filesystem
  *
@@ -1578,6 +1604,7 @@ const struct super_operations gfs2_super_ops = {
 	.alloc_inode		= gfs2_alloc_inode,
 	.destroy_inode		= gfs2_destroy_inode,
 	.write_inode		= gfs2_write_inode,
+	.dirty_inode		= gfs2_dirty_inode,
 	.evict_inode		= gfs2_evict_inode,
 	.put_super		= gfs2_put_super,
 	.sync_fs		= gfs2_sync_fs,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 439b61c..695304c 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,7 +1296,8 @@ fail:
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct inode *inode = &ip->i_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1319,7 +1320,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 	if (error)
 		return error;
 
-	error = gfs2_setattr_simple(ip, attr);
+	error = gfs2_setattr_simple(inode, attr);
 	gfs2_trans_end(sdp);
 	return error;
 }
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 09/24] GFS2: Fix lseek after SEEK_DATA, SEEK_HOLE have been added
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse, Andi Kleen
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

We need to take the inode's glock whenever the inode's size
is referenced, otherwise it might not be uptodate. Even
though generic_file_llseek_unlocked() doesn't implement
SEEK_DATA, SEEK_HOLE directly, it does reference the inode's
size in those cases, so we need to add them to the list
of origins which need the glock.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4416a1c..d717b72 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
 	struct gfs2_holder i_gh;
 	loff_t error;
 
-	if (origin == 2) {
+	switch (origin) {
+	case SEEK_END: /* These reference inode->i_size */
+	case SEEK_DATA:
+	case SEEK_HOLE:
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
 			error = generic_file_llseek_unlocked(file, offset, origin);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
-	} else
+		break;
+	case SEEK_CUR:
+	case SEEK_SET:
 		error = generic_file_llseek_unlocked(file, offset, origin);
+		break;
+	default:
+		error = -EINVAL;
+	}
 
 	return error;
 }
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 14/24] GFS2: Call do_strip() directly from recursive_scan()
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

The recursive_scan() function only ever takes a single "bc"
argument, so we might as well just call do_strip() directly
from resource_scan() rather than pass it in as an argument.

Also the "data" argument is always a struct strip_mine, so
we can pass that in, rather than using a void pointer.

This also moves do_strip() ahead of recursive_scan() so that
we don't need to add a prototype.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e602961..9d3a0c2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -36,11 +36,6 @@ struct metapath {
 	__u16 mp_list[GFS2_MAX_META_HEIGHT];
 };
 
-typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
-			     struct buffer_head *bh, __be64 *top,
-			     __be64 *bottom, unsigned int height,
-			     void *data);
-
 struct strip_mine {
 	int sm_first;
 	unsigned int sm_height;
@@ -668,76 +663,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 }
 
 /**
- * recursive_scan - recursively scan through the end of a file
- * @ip: the inode
- * @dibh: the dinode buffer
- * @mp: the path through the metadata to the point to start
- * @height: the height the recursion is at
- * @block: the indirect block to look at
- * @first: 1 if this is the first block
- * @bc: the call to make for each piece of metadata
- * @data: data opaque to this function to pass to @bc
- *
- * When this is first called @height and @block should be zero and
- * @first should be 1.
- *
- * Returns: errno
- */
-
-static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
-			  struct metapath *mp, unsigned int height,
-			  u64 block, int first, block_call_t bc,
-			  void *data)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct buffer_head *bh = NULL;
-	__be64 *top, *bottom;
-	u64 bn;
-	int error;
-	int mh_size = sizeof(struct gfs2_meta_header);
-
-	if (!height) {
-		error = gfs2_meta_inode_buffer(ip, &bh);
-		if (error)
-			return error;
-		dibh = bh;
-
-		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
-		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
-	} else {
-		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
-		if (error)
-			return error;
-
-		top = (__be64 *)(bh->b_data + mh_size) +
-				  (first ? mp->mp_list[height] : 0);
-
-		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
-	}
-
-	error = bc(ip, dibh, bh, top, bottom, height, data);
-	if (error)
-		goto out;
-
-	if (height < ip->i_height - 1)
-		for (; top < bottom; top++, first = 0) {
-			if (!*top)
-				continue;
-
-			bn = be64_to_cpu(*top);
-
-			error = recursive_scan(ip, dibh, mp, height + 1, bn,
-					       first, bc, data);
-			if (error)
-				break;
-		}
-
-out:
-	brelse(bh);
-	return error;
-}
-
-/**
  * do_strip - Look for a layer a particular layer of the file and strip it off
  * @ip: the inode
  * @dibh: the dinode buffer
@@ -752,9 +677,8 @@ out:
 
 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
-		    unsigned int height, void *data)
+		    unsigned int height, struct strip_mine *sm)
 {
-	struct strip_mine *sm = data;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrp_list rlist;
 	u64 bn, bstart;
@@ -886,6 +810,75 @@ out:
 }
 
 /**
+ * recursive_scan - recursively scan through the end of a file
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @mp: the path through the metadata to the point to start
+ * @height: the height the recursion is at
+ * @block: the indirect block to look at
+ * @first: 1 if this is the first block
+ * @sm: data opaque to this function to pass to @bc
+ *
+ * When this is first called @height and @block should be zero and
+ * @first should be 1.
+ *
+ * Returns: errno
+ */
+
+static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
+			  struct metapath *mp, unsigned int height,
+			  u64 block, int first, struct strip_mine *sm)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct buffer_head *bh = NULL;
+	__be64 *top, *bottom;
+	u64 bn;
+	int error;
+	int mh_size = sizeof(struct gfs2_meta_header);
+
+	if (!height) {
+		error = gfs2_meta_inode_buffer(ip, &bh);
+		if (error)
+			return error;
+		dibh = bh;
+
+		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+	} else {
+		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
+		if (error)
+			return error;
+
+		top = (__be64 *)(bh->b_data + mh_size) +
+				  (first ? mp->mp_list[height] : 0);
+
+		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+	}
+
+	error = do_strip(ip, dibh, bh, top, bottom, height, sm);
+	if (error)
+		goto out;
+
+	if (height < ip->i_height - 1)
+		for (; top < bottom; top++, first = 0) {
+			if (!*top)
+				continue;
+
+			bn = be64_to_cpu(*top);
+
+			error = recursive_scan(ip, dibh, mp, height + 1, bn,
+					       first, sm);
+			if (error)
+				break;
+		}
+
+out:
+	brelse(bh);
+	return error;
+}
+
+
+/**
  * gfs2_block_truncate_page - Deal with zeroing out data for truncate
  *
  * This is partly borrowed from ext3.
@@ -1024,7 +1017,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
 		sm.sm_first = !!size;
 		sm.sm_height = height;
 
-		error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
+		error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
 		if (error)
 			break;
 	}
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 13/24] GFS2: Remove obsolete assert
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Given that a resource group has been locked, there is no reason why
we should not be able to allocate as many blocks as are free. The
al_requested parameter should really be considered as a minimum
number of blocks to be available. Should this limit be overshot,
there are other mechanisms which will prevent over allocation.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5bfb970..08b3a80 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1077,15 +1077,8 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
 
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = ip->i_alloc;
 
-	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
-		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
-			     "al_file = %s, al_line = %u\n",
-		             al->al_alloced, al->al_requested, al->al_file,
-			     al->al_line);
-
 	if (al->al_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&al->al_rgd_gh);
 }
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 19/24] GFS2: Fix off-by-one in gfs2_blk2rgrpd
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Bob reported:

I found an off-by-one problem with how I coded this section:
It should be:

+ else if (blk >= cur->rd_data0 + cur->rd_data)

In fact, cur->rd_data0 + cur->rd_data is the start of the next
rgrp (the next ri_addr), so without the "=" check it can land on
the wrong rgrp.

In all normal cases, this won't be a problem: you're searching
for a block _within_ the rgrp, which will pass the test properly.
Where it gets into trouble is if you search the rgrps for the
block exactly equal to ri_addr.  I don't think anything in the
kernel does this, but I found a place in gfs2-utils gfs2_edit
where it does.  So I definitely need to fix it in libgfs2.  I'd
like to suggest we fix it in the kernel as well for the sake of
keeping the functions similar.

So this patch fixes the above mentioned off by one error as well
as removing the unused parent pointer.

Reported-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8ec4174..1daf8a7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -329,17 +329,16 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)

 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
 {
-	struct rb_node **newn, *parent = NULL;
+	struct rb_node **newn;
+	struct gfs2_rgrpd *cur;

 	spin_lock(&sdp->sd_rindex_spin);
 	newn = &sdp->sd_rindex_tree.rb_node;
 	while (*newn) {
-		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
-						  rd_node);
-		parent = *newn;
+		cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node);
 		if (blk < cur->rd_addr)
 			newn = &((*newn)->rb_left);
-		else if (blk > cur->rd_data0 + cur->rd_data)
+		else if (blk >= cur->rd_data0 + cur->rd_data)
 			newn = &((*newn)->rb_right);
 		else {
 			spin_unlock(&sdp->sd_rindex_spin);
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 22/24] GFS2: Misc fixes
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Some items picked up through automated code analysis. A few bits
of unreachable code and two unchecked return values.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index de05b4d..0301be6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -695,8 +695,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
 
 		brelse(bh_log);
 		brelse(bh_ip);
-		if (error)
-			break;
 
 		sdp->sd_replayed_blocks++;
 	}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 69166ff..7e823bb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -651,7 +651,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		fs_err(sdp, "can't lookup journal index: %d\n", error);
 		return PTR_ERR(sdp->sd_jindex);
 	}
-	ip = GFS2_I(sdp->sd_jindex);
 
 	/* Load in the journal index special file */
 
@@ -763,7 +762,6 @@ fail:
 static int init_inodes(struct gfs2_sbd *sdp, int undo)
 {
 	int error = 0;
-	struct gfs2_inode *ip;
 	struct inode *master = sdp->sd_master_dir->d_inode;
 
 	if (undo)
@@ -788,7 +786,6 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		fs_err(sdp, "can't get resource index inode: %d\n", error);
 		goto fail_statfs;
 	}
-	ip = GFS2_I(sdp->sd_rindex);
 	sdp->sd_rindex_uptodate = 0;
 
 	/* Read in the quota inode */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 10a59cd..7e528dc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -645,8 +645,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	int err, nbytes;
 	u64 size;
 
-	if (gfs2_is_stuffed(ip))
-		gfs2_unstuff_dinode(ip, NULL);
+	if (gfs2_is_stuffed(ip)) {
+		err = gfs2_unstuff_dinode(ip, NULL);
+		if (err)
+			return err;
+	}
 
 	memset(&q, 0, sizeof(struct gfs2_quota));
 	err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
@@ -927,7 +930,9 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 	unsigned int x;
 	int error = 0;
 
-	gfs2_quota_hold(ip, uid, gid);
+	error = gfs2_quota_hold(ip, uid, gid);
+	if (error)
+		return error;
 
 	if (capable(CAP_SYS_RESOURCE) ||
 	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
-- 
1.7.4.4


^ permalink raw reply related

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Since the merge window is upon us, here is the current content of
the GFS2 git tree. A few things will be help back to the following
merge window in order to ensure a greater test time, but those currently
in the tree are ready for the current window.

Recently I've reconstituted the GFS2 git tree, so it can be pulled
(via http) from:

http://sucs.org/~rohan/git/gfs2-3.0-nmw

and viewed via gitweb at:

http://sucs.org/gitweb/

This is thanks to the Swansea University Computer Society for providing
a temporary (or possibly permanent) home for the GFS2 git trees. Please
treat their server kindly as this will only continue while it doesn't
generate too much traffic. I figure that there will not be too many
people pulling the GFS2 tree at once, but we'll see.

Some highlights of the current patch set:
 o Reduction in code of approx 400 lines
 o Big clean up (and speed up) in the resource group code
   - This is a nice base to build some forthcoming improvements on
   - It should improve performance with multi-threaded workloads
 o Some left-over fsync/writeback changes
 o Improvements to readahead when deallocating large directories

Any questions/concerns then please let me know as usual,

Steve.

^ permalink raw reply

* [PATCH 24/24] GFS2: Move readahead of metadata during deallocation into its own function
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel, linux-kernel; +Cc: Steven Whitehouse
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Move the recently added readahead of the indirect pointer
tree during deallocation into its own function in order
that we can use it elsewhere in the future. Also this
fixes the resetting of the "first" variable in the
original patch.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 97b6195..41d494d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -269,6 +269,30 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
 	return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
 }
 
+static void gfs2_metapath_ra(struct gfs2_glock *gl,
+			     const struct buffer_head *bh, const __be64 *pos)
+{
+	struct buffer_head *rabh;
+	const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
+	const __be64 *t;
+
+	for (t = pos; t < endp; t++) {
+		if (!*t)
+			continue;
+
+		rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
+		if (trylock_buffer(rabh)) {
+			if (!buffer_uptodate(rabh)) {
+				rabh->b_end_io = end_buffer_read_sync;
+				submit_bh(READA | REQ_META, rabh);
+				continue;
+			}
+			unlock_buffer(rabh);
+		}
+		brelse(rabh);
+	}
+}
+
 /**
  * lookup_metapath - Walk the metadata tree to a specific point
  * @ip: The inode
@@ -843,7 +867,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *bh = NULL;
-	__be64 *top, *bottom, *t2;
+	__be64 *top, *bottom;
 	u64 bn;
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
@@ -872,26 +896,9 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 		goto out;
 
 	if (height < ip->i_height - 1) {
-		struct buffer_head *rabh;
 
-		for (t2 = top; t2 < bottom; t2++, first = 0) {
-			if (!*t2)
-				continue;
+		gfs2_metapath_ra(ip->i_gl, bh, top);
 
-			bn = be64_to_cpu(*t2);
-			rabh = gfs2_getbuf(ip->i_gl, bn, CREATE);
-			if (trylock_buffer(rabh)) {
-				if (buffer_uptodate(rabh)) {
-					unlock_buffer(rabh);
-					brelse(rabh);
-					continue;
-				}
-				rabh->b_end_io = end_buffer_read_sync;
-				submit_bh(READA | REQ_META, rabh);
-				continue;
-			}
-			brelse(rabh);
-		}
 		for (; top < bottom; top++, first = 0) {
 			if (!*top)
 				continue;
-- 
1.7.4.4


^ permalink raw reply related

* [Cluster-devel] [PATCH 01/24] GFS2: Clean up dir hash table reading
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Since there is now only a single caller to gfs2_dir_read_data()
and it has a number of constant arguments, we can factor
those out. Also some tests relating to the inode size were
being done twice.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 1cc2f8e..2045d70 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -240,16 +240,15 @@ fail:
 	return error;
 }
 
-static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
-				 u64 offset, unsigned int size)
+static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
+				 unsigned int size)
 {
 	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		offset += sizeof(struct gfs2_dinode);
-		memcpy(buf, dibh->b_data + offset, size);
+		memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
 		brelse(dibh);
 	}
 
@@ -261,13 +260,12 @@ static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
  * gfs2_dir_read_data - Read a data from a directory inode
  * @ip: The GFS2 Inode
  * @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
  * @size: Amount of data to transfer
  *
  * Returns: The amount of data actually copied or the error
  */
-static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
-			      unsigned int size, unsigned ra)
+static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
+			      unsigned int size)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	u64 lblock, dblock;
@@ -275,24 +273,14 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 	unsigned int o;
 	int copied = 0;
 	int error = 0;
-	u64 disksize = i_size_read(&ip->i_inode);
-
-	if (offset >= disksize)
-		return 0;
-
-	if (offset + size > disksize)
-		size = disksize - offset;
-
-	if (!size)
-		return 0;
 
 	if (gfs2_is_stuffed(ip))
-		return gfs2_dir_read_stuffed(ip, buf, offset, size);
+		return gfs2_dir_read_stuffed(ip, buf, size);
 
 	if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 		return -EINVAL;
 
-	lblock = offset;
+	lblock = 0;
 	o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 
 	while (copied < size) {
@@ -311,8 +299,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 			if (error || !dblock)
 				goto fail;
 			BUG_ON(extlen < 1);
-			if (!ra)
-				extlen = 1;
 			bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
 		} else {
 			error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
@@ -328,7 +314,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 		extlen--;
 		memcpy(buf, bh->b_data + o, amount);
 		brelse(bh);
-		buf += amount;
+		buf += (amount/sizeof(__be64));
 		copied += amount;
 		lblock++;
 		o = sizeof(struct gfs2_meta_header);
@@ -371,7 +357,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 	if (hc == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
+	ret = gfs2_dir_read_data(ip, hc, hsize);
 	if (ret < 0) {
 		kfree(hc);
 		return ERR_PTR(ret);
-- 
1.7.4.4



^ permalink raw reply related

* [Cluster-devel] [PATCH 02/24] GFS2: Split data write & wait in fsync
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

Now that the data writing is part of fsync proper, we can split
the waiting part out and do it later on. This reduces the
number of waits that we do during fsync on average.

There is also no need to take the i_mutex unless we are flushing
metadata to disk, so we can move that to within the metadata
flushing code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index edeb9e8..92c3db4 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -551,8 +551,16 @@ static int gfs2_close(struct inode *inode, struct file *file)
  * @end: the end position in the file to sync
  * @datasync: set if we can ignore timestamp changes
  *
- * The VFS will flush data for us. We only need to worry
- * about metadata here.
+ * We split the data flushing here so that we don't wait for the data
+ * until after we've also sent the metadata to disk. Note that for
+ * data=ordered, we will write & wait for the data at the log flush
+ * stage anyway, so this is unlikely to make much of a difference
+ * except in the data=writeback case.
+ *
+ * If the fdatawrite fails due to any reason except -EIO, we will
+ * continue the remainder of the fsync, although we'll still report
+ * the error at the end. This is to match filemap_write_and_wait_range()
+ * behaviour.
  *
  * Returns: errno
  */
@@ -560,30 +568,36 @@ static int gfs2_close(struct inode *inode, struct file *file)
 static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 		      int datasync)
 {
-	struct inode *inode = file->f_mapping->host;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
 	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
 	struct gfs2_inode *ip = GFS2_I(inode);
-	int ret;
+	int ret, ret1 = 0;
 
-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (ret)
-		return ret;
-	mutex_lock(&inode->i_mutex);
+	if (mapping->nrpages) {
+		ret1 = filemap_fdatawrite_range(mapping, start, end);
+		if (ret1 == -EIO)
+			return ret1;
+	}
 
 	if (datasync)
 		sync_state &= ~I_DIRTY_SYNC;
 
 	if (sync_state) {
+		mutex_lock(&inode->i_mutex);
 		ret = sync_inode_metadata(inode, 1);
 		if (ret) {
 			mutex_unlock(&inode->i_mutex);
 			return ret;
 		}
 		gfs2_ail_flush(ip->i_gl);
+		mutex_unlock(&inode->i_mutex);
 	}
 
-	mutex_unlock(&inode->i_mutex);
-	return 0;
+	if (mapping->nrpages)
+		ret = filemap_fdatawait_range(mapping, start, end);
+
+	return ret ? ret : ret1;
 }
 
 /**
-- 
1.7.4.4



^ permalink raw reply related

* [Cluster-devel] [PATCH 03/24] GFS2: Fix bug-trap in ail flush code
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

The assert was being tested under the wrong lock, a
legacy of the original code. Also, if it does trigger,
the resulting information was not always a lot of help.

This moves the patch under the correct lock and also
prints out more useful information in tacking down the
source of the problem.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index da21eca..99df483 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,17 @@
 #include "trans.h"
 #include "dir.h"
 
+static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
+{
+	fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
+	       bh, (unsigned long long)bh->b_blocknr, bh->b_state,
+	       bh->b_page->mapping, bh->b_page->flags);
+	fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
+	       gl->gl_name.ln_type, gl->gl_name.ln_number,
+	       gfs2_glock2aspace(gl));
+	gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
+}
+
 /**
  * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
  * @gl: the glock
@@ -41,20 +52,24 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl)
 	struct list_head *head = &gl->gl_ail_list;
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
+	sector_t blocknr;
 
 	spin_lock(&sdp->sd_ail_lock);
 	while (!list_empty(head)) {
 		bd = list_entry(head->next, struct gfs2_bufdata,
 				bd_ail_gl_list);
 		bh = bd->bd_bh;
-		gfs2_remove_from_ail(bd);
-		bd->bd_bh = NULL;
+		blocknr = bh->b_blocknr;
+		if (buffer_busy(bh))
+			gfs2_ail_error(gl, bh);
 		bh->b_private = NULL;
+		gfs2_remove_from_ail(bd); /* drops ref on bh */
 		spin_unlock(&sdp->sd_ail_lock);
 
-		bd->bd_blkno = bh->b_blocknr;
+		bd->bd_bh = NULL;
+		bd->bd_blkno = blocknr;
+
 		gfs2_log_lock(sdp);
-		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
 		gfs2_trans_add_revoke(sdp, bd);
 		gfs2_log_unlock(sdp);
 
-- 
1.7.4.4



^ permalink raw reply related

* [Cluster-devel] [PATCH 04/24] GFS2: Make atime checks more efficient
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com
In-Reply-To: <1319460518-9046-1-git-send-email-swhiteho@redhat.com>

We do not need to start a transaction unless the atime
check has proved positive. Also if we are going to flush
the complete ail list anyway, we might as well skip the
writeback for this specific inode's metadata, since that
will be done as part of the ail writeback process in an
order offering potentially more efficient I/O.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b7beadd..afb8761 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -768,30 +768,30 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 			goto do_flush;
 		unlock_required = 1;
 	}
-	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
-	if (ret)
-		goto do_unlock;
 	ret = gfs2_meta_inode_buffer(ip, &bh);
 	if (ret == 0) {
 		di = (struct gfs2_dinode *)bh->b_data;
 		atime.tv_sec = be64_to_cpu(di->di_atime);
 		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
 		if (timespec_compare(&inode->i_atime, &atime) > 0) {
-			gfs2_trans_add_bh(ip->i_gl, bh, 1);
-			gfs2_dinode_out(ip, bh->b_data);
+			ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+			if (ret == 0) {
+				gfs2_trans_add_bh(ip->i_gl, bh, 1);
+				gfs2_dinode_out(ip, bh->b_data);
+				gfs2_trans_end(sdp);
+			}
 		}
 		brelse(bh);
 	}
-	gfs2_trans_end(sdp);
-do_unlock:
 	if (unlock_required)
 		gfs2_glock_dq_uninit(&gh);
 do_flush:
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
-	filemap_fdatawrite(metamapping);
 	if (bdi->dirty_exceeded)
 		gfs2_ail1_flush(sdp, wbc);
+	else
+		filemap_fdatawrite(metamapping);
 	if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
 		ret = filemap_fdatawait(metamapping);
 	if (ret)
-- 
1.7.4.4



^ permalink raw reply related

* [PATCH] Shrink thread_info a bit
From: Russell King - ARM Linux @ 2011-10-24 12:48 UTC (permalink / raw)
  To: linux-arm-kernel

Thread info comes in on Versatile Express at 752 bytes in size, which
is quite large.  Of this, the crunch state is 184 bytes, which is only
used on Cirrus Logic devices.

It is wasteful to have this in every kernel when Cirrus Logic CPUs
are not that widely used.  So make this conditional.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/thread_info.h |    2 ++
 arch/arm/kernel/asm-offsets.c      |    2 ++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7b5cc8d..a030be7 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -59,7 +59,9 @@ struct thread_info {
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
 	unsigned long		tp_value;
+#ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
+#endif
 	union fp_state		fpstate __attribute__((aligned(8)));
 	union vfp_state		vfpstate;
 #ifdef CONFIG_ARM_THUMBEE

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.