From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Mike Marciniszyn
<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
Sebastian Sanchez
<sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 06/12] IB/hfi1: Optimize devdata cachelines
Date: Mon, 17 Oct 2016 04:19:35 -0700 [thread overview]
Message-ID: <20161017111935.7934.23337.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20161017103326.7934.21558.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
From: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Profiling shows hot path struct members that need
to be in a minimum set of cachelines.
Group these struct member in the same cacheline:
sc2vl_lock
sc2vl
rhf_rcv_function_map
rcv_limit
rhf_offset
Group these struct member in the same cacheline:
process_pio_send
process_dma_send
pport
rcd
int_counter
flags
num_pports
first_user_ctxt
Fill holes in struct hfi1_devdata revealed by pahole.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/infiniband/hw/hfi1/hfi.h | 111 +++++++++++++++++++-------------------
1 files changed, 56 insertions(+), 55 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 87847cc..bec4607 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -852,32 +852,29 @@ struct hfi1_devdata {
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr;
- /* receive context data */
- struct hfi1_ctxtdata **rcd;
+ /* Per VL data. Enough for all VLs but not all elements are set/used. */
+ struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
struct send_context_info *send_contexts;
/* map hardware send contexts to software index */
u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock;
- /* Per VL data. Enough for all VLs but not all elements are set/used. */
- struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */
spinlock_t pio_map_lock;
+ /* Send Context initialization lock. */
+ spinlock_t sc_init_lock;
+ /* lock for sdma_map */
+ spinlock_t sde_map_lock;
/* array of kernel send contexts */
struct send_context **kernel_send_context;
/* array of vl maps */
struct pio_vl_map __rcu *pio_map;
- /* seqlock for sc2vl */
- seqlock_t sc2vl_lock;
- u64 sc2vl[4];
- /* Send Context initialization lock. */
- spinlock_t sc_init_lock;
+ /* default flags to last descriptor */
+ u64 default_desc1;
/* fields common to all SDMA engines */
- /* default flags to last descriptor */
- u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */
@@ -888,8 +885,6 @@ struct hfi1_devdata {
u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
- /* lock for sdma_map */
- spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */
struct sdma_engine *per_sdma;
/* array of vl maps */
@@ -898,14 +893,11 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
+ u32 lcb_access_count; /* count of LCB users */
+
/* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data;
- /* hfi1_pportdata, points to array of (physical) port-specific
- * data structs, indexed by pidx (0..n-1)
- */
- struct hfi1_pportdata *pport;
-
/* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase;
/*
@@ -922,20 +914,13 @@ struct hfi1_devdata {
/* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX];
- u32 lcb_access_count; /* count of LCB users */
-
char *boardname; /* human readable board info */
- /* device (not port) flags, basically device capabilities */
- u32 flags;
-
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
- /* percpu int_counter */
- u64 __percpu *int_counter;
- u64 __percpu *rcv_limit;
+
u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
@@ -950,6 +935,7 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
+ u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -971,7 +957,6 @@ struct hfi1_devdata {
* IB link status cheaply
*/
struct hfi1_status *status;
- u32 freezelen; /* max length of freezemsg */
/* revision register shadow */
u64 revision;
@@ -999,6 +984,8 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
+ /* default link down value (poll/sleep) */
+ u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1034,8 +1021,6 @@ struct hfi1_devdata {
u8 hfi1_id;
/* implementation code */
u8 icode;
- /* default link down value (poll/sleep) */
- u8 link_default;
/* vAU of this device */
u8 vau;
/* vCU of this device */
@@ -1046,27 +1031,17 @@ struct hfi1_devdata {
u16 vl15_init;
/* Misc small ints */
- /* Number of physical ports available */
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
u8 n_krcv_queues;
u8 qos_shift;
- u8 qpn_mask;
- u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
+ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
struct diag_client *diag_client;
- spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
-
- u8 psxmitwait_supported;
- /* cycle length of PS* counters in HW (in picoseconds) */
- u16 psxmitwait_check_rate;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1081,6 +1056,9 @@ struct hfi1_devdata {
struct rcv_array_data rcv_entries;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+
/*
* 64 bit synthetic counters
*/
@@ -1113,11 +1091,11 @@ struct hfi1_devdata {
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
- u8 err_info_uncorrectable;
- u8 err_info_fmconfig;
atomic_t drop_packet;
u8 do_drop;
+ u8 err_info_uncorrectable;
+ u8 err_info_fmconfig;
/*
* Software counters for the status bits defined by the
@@ -1140,51 +1118,74 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
+ /* receive interrupt function */
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
+ /* Save the enabled LCB error bits */
+ u64 lcb_err_en;
+
/*
* Handlers for outgoing data so that snoop/capture does not
* have to have its hooks in the send path
*/
- send_routine process_pio_send;
+ send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ /* hfi1_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct hfi1_pportdata *pport;
+ /* receive context data */
+ struct hfi1_ctxtdata **rcd;
+ u64 __percpu *int_counter;
+ /* device (not port) flags, basically device capabilities */
+ u16 flags;
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ /* adding a new field here would make it part of this cacheline */
+
+ /* seqlock for sc2vl */
+ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
+ u64 sc2vl[4];
+ /* receive interrupt functions */
+ rhf_rcv_function_ptr *rhf_rcv_function_map;
+ u64 __percpu *rcv_limit;
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+ /* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
+ u8 dc_shutdown;
+
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
- u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue;
- /* Save the enabled LCB error bits */
- u64 lcb_err_en;
- u8 dc_shutdown;
-
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma;
- bool eprom_available; /* true if EPROM is available for this device */
- bool aspm_supported; /* Does HW support ASPM */
- bool aspm_enabled; /* ASPM state: enabled/disabled */
+ u32 rcv_ovfl_cnt;
/* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
/* Keeps track of user space clients */
atomic_t user_refcount;
+ struct hfi1_affinity *affinity;
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
-
- struct hfi1_affinity *affinity;
+ bool eprom_available; /* true if EPROM is available for this device */
+ bool aspm_supported; /* Does HW support ASPM */
+ bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
+
struct kobject kobj;
};
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2016-10-17 11:19 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-17 11:19 [PATCH 00/12] For 4.9 rc Dennis Dalessandro
[not found] ` <20161017103326.7934.21558.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2016-10-17 11:19 ` [PATCH 01/12] IB/rdmvat: Organize hot path calldowns into a single cacheline Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 02/12] IB/hfi1: Optimize pio cachelines Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 03/12] IB/hfi1: Fix an Oops on pci device force remove Dennis Dalessandro
[not found] ` <20161017111918.7934.72325.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2016-10-17 16:07 ` Jason Gunthorpe
[not found] ` <20161017160731.GA5679-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2016-10-19 18:01 ` Tadeusz Struk
2016-10-25 15:57 ` [PATCH v2 " Dennis Dalessandro
[not found] ` <20161025155754.4950.23412.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2016-10-25 16:48 ` Jason Gunthorpe
[not found] ` <20161025164851.GA28096-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2016-10-25 17:38 ` Tadeusz Struk
[not found] ` <7f4cbe0c-0c83-48b2-9901-4a5e27b306b4-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2016-10-25 18:03 ` Jason Gunthorpe
2016-10-17 11:19 ` [PATCH 04/12] IB/hfi1: Return ENODEV for unsupported PCI device ids Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 05/12] IB/hfi1: Unify access to GUID entries Dennis Dalessandro
2016-10-17 11:19 ` Dennis Dalessandro [this message]
2016-10-17 11:19 ` [PATCH 07/12] IB/hfi1: Fix a potential memory leak in hfi1_create_ctxts() Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 08/12] IB/hfi1: Add active channel and backplane support for integrated devices Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 09/12] IB/hfi1: Remove leftover snoop references Dennis Dalessandro
2016-10-17 11:19 ` [PATCH 10/12] IB/hfi1: Clean up unused argument Dennis Dalessandro
2016-10-17 11:20 ` [PATCH 11/12] IB/hfi1: Delete unused lock Dennis Dalessandro
2016-10-17 11:20 ` [PATCH 12/12] IB/hfi1: Fix rnr_timer addition Dennis Dalessandro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161017111935.7934.23337.stgit@scvm10.sc.intel.com \
--to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
--cc=sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).