* [PATCH net-next 03/12] net: hns3: do not query unsupported commands in debugfs
From: Huazhong Tan @ 2019-08-01 3:55 UTC (permalink / raw)
To: davem
Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
Yufeng Mo, Huazhong Tan
In-Reply-To: <1564631745-36733-1-git-send-email-tanhuazhong@huawei.com>
From: Yufeng Mo <moyufeng@huawei.com>
Some commands are not supported on DCB-unsupported ports.
This patch distinguishes these commands and does not query
unsupported commands in debugfs.
This patch also fix an error in the dump "qos buf cfg"
command in debugfs.
Fixes: 2849d4e7a1be ("net: hns3: Add "tc config" info query function")
Fixes: 7d9d7f8864ba ("net: hns3: Add "qos buffer" config info query function")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Reviewed-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
.../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 70 ++++++++++++++--------
1 file changed, 46 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index ab625c7..e987d18 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -325,6 +325,12 @@ static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
struct hclge_desc desc;
int i, ret;
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ dev_info(&hdev->pdev->dev,
+ "Only DCB-supported dev supports tc\n");
+ return;
+ }
+
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, true);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -409,6 +415,12 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]);
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ dev_info(&hdev->pdev->dev,
+ "Only DCB-supported dev supports tm mapping\n");
+ return;
+ }
+
cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
hclge_cmd_setup_basic_desc(&desc, cmd, true);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -590,6 +602,12 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
dev_info(&hdev->pdev->dev, "%04d | %04d | %02d | %02d\n",
queue_id, qset_id, pri_id, tc_id);
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ dev_info(&hdev->pdev->dev,
+ "Only DCB-supported dev supports tm mapping\n");
+ return;
+ }
+
cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
for (group_id = 0; group_id < 32; group_id++) {
@@ -715,6 +733,34 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n",
rx_buf_cmd->shared_buf);
+ cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
+ hclge_cmd_setup_basic_desc(desc, cmd, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, 1);
+ if (ret)
+ goto err_qos_cmd_send;
+
+ rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data;
+ dev_info(&hdev->pdev->dev, "\n");
+ dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
+ rx_com_wl->com_wl.high, rx_com_wl->com_wl.low);
+
+ cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
+ hclge_cmd_setup_basic_desc(desc, cmd, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, 1);
+ if (ret)
+ goto err_qos_cmd_send;
+
+ rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data;
+ dev_info(&hdev->pdev->dev,
+ "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
+ rx_packet_cnt->com_wl.high, rx_packet_cnt->com_wl.low);
+ dev_info(&hdev->pdev->dev, "\n");
+
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ dev_info(&hdev->pdev->dev,
+ "Only DCB-supported dev supports rx priv wl\n");
+ return;
+ }
cmd = HCLGE_OPC_RX_PRIV_WL_ALLOC;
hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
@@ -723,7 +769,6 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
if (ret)
goto err_qos_cmd_send;
- dev_info(&hdev->pdev->dev, "\n");
rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[0].data;
for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
dev_info(&hdev->pdev->dev,
@@ -758,29 +803,6 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
"rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n", i + 4,
rx_com_thrd->com_thrd[i].high,
rx_com_thrd->com_thrd[i].low);
-
- cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
- hclge_cmd_setup_basic_desc(desc, cmd, true);
- ret = hclge_cmd_send(&hdev->hw, desc, 1);
- if (ret)
- goto err_qos_cmd_send;
-
- rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data;
- dev_info(&hdev->pdev->dev, "\n");
- dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
- rx_com_wl->com_wl.high, rx_com_wl->com_wl.low);
-
- cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
- hclge_cmd_setup_basic_desc(desc, cmd, true);
- ret = hclge_cmd_send(&hdev->hw, desc, 1);
- if (ret)
- goto err_qos_cmd_send;
-
- rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data;
- dev_info(&hdev->pdev->dev,
- "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
- rx_packet_cnt->com_wl.high, rx_packet_cnt->com_wl.low);
-
return;
err_qos_cmd_send:
--
2.7.4
^ permalink raw reply related
* [PATCH net-next 00/12] net: hns3: some code optimizations & bugfixes & features
From: Huazhong Tan @ 2019-08-01 3:55 UTC (permalink / raw)
To: davem
Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
Huazhong Tan
This patch-set includes code optimizations, bugfixes and features for
the HNS3 ethernet controller driver.
[patch 01/12] adds support for reporting link change event.
[patch 02/12] adds handler for NCSI error.
[patch 03/12] fixes bug related to debugfs.
[patch 04/12] adds a code optimization for setting ring parameters.
[patch 05/12 - 09/12] adds some cleanups.
[patch 10/12 - 12/12] adds some patches related to reset issue.
Guojia Liao (1):
net: hns3: rename a member in struct hclge_mac_ethertype_idx_rd_cmd
Huazhong Tan (4):
net: hns3: add handler for NCSI error mailbox
net: hns3: fix some reset handshake issue
net: hns3: clear reset interrupt status in hclge_irq_handle()
net: hns3: activate reset timer when calling reset_event
Jian Shen (3):
net: hns3: add link change event report
net: hns3: refine for set ring parameters
net: hns3: remove unnecessary variable in
hclge_get_mac_vlan_cmd_status()
Weihang Li (1):
net: hns3: simplify hclge_cmd_query_error()
Yufeng Mo (1):
net: hns3: do not query unsupported commands in debugfs
Yunsheng Lin (2):
net: hns3: minor cleanup in hns3_clean_rx_ring
net: hns3: minior error handling change for hclge_tm_schd_info_init
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 2 +
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 22 ++--
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 3 +-
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 88 +++++++++----
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 33 ++++-
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 25 +++-
.../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 76 ++++++++----
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 19 +--
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 137 +++++++++++++++------
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 ++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 45 +++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 18 +--
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 4 +-
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h | 7 +-
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 60 ++++++---
.../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 3 +
16 files changed, 397 insertions(+), 153 deletions(-)
--
2.7.4
^ permalink raw reply
* [PATCH net-next 02/12] net: hns3: add handler for NCSI error mailbox
From: Huazhong Tan @ 2019-08-01 3:55 UTC (permalink / raw)
To: davem
Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
Huazhong Tan
In-Reply-To: <1564631745-36733-1-git-send-email-tanhuazhong@huawei.com>
When NCSI has HW error, the IMP will report this error to the driver
by sending a mailbox. After received this message, the driver should
assert a global reset to fix this kind of HW error.
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Reviewed-by: Peng Li <lipeng321@huawei.com>
---
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 1 +
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 1 +
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 +
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 12 ++++++++++++
4 files changed, 15 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 1564be5..f8a87f8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -48,6 +48,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */
+ HCLGE_MBX_NCSI_ERROR, /* (M7 -> PF) receive a NCSI error */
};
/* below are per-VF mac-vlan subcodes */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 538d101..c20b972 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -394,6 +394,7 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
req = (struct hclge_firmware_compat_cmd *)desc.data;
hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
+ hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
req->compat = cpu_to_le32(compat);
return hclge_cmd_send(&hdev->hw, &desc, 1);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 743c9f4..070b9dd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -1011,6 +1011,7 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd {
};
#define HCLGE_LINK_EVENT_REPORT_EN_B 0
+#define HCLGE_NCSI_ERROR_REPORT_EN_B 1
struct hclge_firmware_compat_cmd {
__le32 compat;
u8 rsv[20];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 87de32d..5a7221e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -582,6 +582,15 @@ static bool hclge_cmd_crq_empty(struct hclge_hw *hw)
return tail == hw->cmq.crq.next_to_use;
}
+static void hclge_handle_ncsi_error(struct hclge_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+
+ ae_dev->ops->set_default_reset_request(ae_dev, HNAE3_GLOBAL_RESET);
+ dev_warn(&hdev->pdev->dev, "requesting reset due to NCSI error\n");
+ ae_dev->ops->reset_event(hdev->pdev, NULL);
+}
+
void hclge_mbx_handler(struct hclge_dev *hdev)
{
struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
@@ -740,6 +749,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
case HCLGE_MBX_PUSH_LINK_STATUS:
hclge_handle_link_change_event(hdev, req);
break;
+ case HCLGE_MBX_NCSI_ERROR:
+ hclge_handle_ncsi_error(hdev);
+ break;
default:
dev_err(&hdev->pdev->dev,
"un-supported mailbox message, code = %d\n",
--
2.7.4
^ permalink raw reply related
* [PATCH net-next 05/12] net: hns3: remove unnecessary variable in hclge_get_mac_vlan_cmd_status()
From: Huazhong Tan @ 2019-08-01 3:55 UTC (permalink / raw)
To: davem
Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
Jian Shen, Huazhong Tan
In-Reply-To: <1564631745-36733-1-git-send-email-tanhuazhong@huawei.com>
From: Jian Shen <shenjian15@huawei.com>
The local variable return_status in hclge_get_mac_val_cmd_status()
is useless. So this patch returns the error code directly, instead of
using this variable. Also, replace some '%d' with '%u' in
hclge_get_mac_val_cmd_status().
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Reviewed-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 50 +++++++++++-----------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 855b65e..4317c8f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6268,7 +6268,6 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
enum hclge_mac_vlan_tbl_opcode op)
{
struct hclge_dev *hdev = vport->back;
- int return_status = -EIO;
if (cmdq_resp) {
dev_err(&hdev->pdev->dev,
@@ -6279,52 +6278,53 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
if (op == HCLGE_MAC_VLAN_ADD) {
if ((!resp_code) || (resp_code == 1)) {
- return_status = 0;
+ return 0;
} else if (resp_code == HCLGE_ADD_UC_OVERFLOW) {
- return_status = -ENOSPC;
dev_err(&hdev->pdev->dev,
"add mac addr failed for uc_overflow.\n");
+ return -ENOSPC;
} else if (resp_code == HCLGE_ADD_MC_OVERFLOW) {
- return_status = -ENOSPC;
dev_err(&hdev->pdev->dev,
"add mac addr failed for mc_overflow.\n");
- } else {
- dev_err(&hdev->pdev->dev,
- "add mac addr failed for undefined, code=%d.\n",
- resp_code);
+ return -ENOSPC;
}
+
+ dev_err(&hdev->pdev->dev,
+ "add mac addr failed for undefined, code=%u.\n",
+ resp_code);
+ return -EIO;
} else if (op == HCLGE_MAC_VLAN_REMOVE) {
if (!resp_code) {
- return_status = 0;
+ return 0;
} else if (resp_code == 1) {
- return_status = -ENOENT;
dev_dbg(&hdev->pdev->dev,
"remove mac addr failed for miss.\n");
- } else {
- dev_err(&hdev->pdev->dev,
- "remove mac addr failed for undefined, code=%d.\n",
- resp_code);
+ return -ENOENT;
}
+
+ dev_err(&hdev->pdev->dev,
+ "remove mac addr failed for undefined, code=%u.\n",
+ resp_code);
+ return -EIO;
} else if (op == HCLGE_MAC_VLAN_LKUP) {
if (!resp_code) {
- return_status = 0;
+ return 0;
} else if (resp_code == 1) {
- return_status = -ENOENT;
dev_dbg(&hdev->pdev->dev,
"lookup mac addr failed for miss.\n");
- } else {
- dev_err(&hdev->pdev->dev,
- "lookup mac addr failed for undefined, code=%d.\n",
- resp_code);
+ return -ENOENT;
}
- } else {
- return_status = -EINVAL;
+
dev_err(&hdev->pdev->dev,
- "unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
- op);
+ "lookup mac addr failed for undefined, code=%u.\n",
+ resp_code);
+ return -EIO;
}
- return return_status;
+ dev_err(&hdev->pdev->dev,
+ "unknown opcode for get_mac_vlan_cmd_status, opcode=%d.\n", op);
+
+ return -EINVAL;
}
static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
--
2.7.4
^ permalink raw reply related
* Re: [PATCH net-next v5 6/6] netfilter: nf_tables_offload: support indr block call
From: Yunsheng Lin @ 2019-08-01 3:58 UTC (permalink / raw)
To: wenxu, jiri, pablo, fw, jakub.kicinski; +Cc: netfilter-devel, netdev
In-Reply-To: <1564628627-10021-7-git-send-email-wenxu@ucloud.cn>
On 2019/8/1 11:03, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>
>
> nftable support indr-block call. It makes nftable an offload vlan
> and tunnel device.
>
> nft add table netdev firewall
> nft add chain netdev firewall aclout { type filter hook ingress offload device mlx_pf0vf0 priority - 300 \; }
> nft add rule netdev firewall aclout ip daddr 10.0.0.1 fwd to vlan0
> nft add chain netdev firewall aclin { type filter hook ingress device vlan0 priority - 300 \; }
> nft add rule netdev firewall aclin ip daddr 10.0.0.7 fwd to mlx_pf0vf0
>
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> ---
> v5: add nft_get_default_block
>
> include/net/netfilter/nf_tables_offload.h | 2 +
> net/netfilter/nf_tables_api.c | 7 ++
> net/netfilter/nf_tables_offload.c | 156 +++++++++++++++++++++++++-----
> 3 files changed, 141 insertions(+), 24 deletions(-)
>
> diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
> index 3196663..ac69087 100644
> --- a/include/net/netfilter/nf_tables_offload.h
> +++ b/include/net/netfilter/nf_tables_offload.h
> @@ -63,6 +63,8 @@ struct nft_flow_rule {
> struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
> void nft_flow_rule_destroy(struct nft_flow_rule *flow);
> int nft_flow_rule_offload_commit(struct net *net);
> +bool nft_indr_get_default_block(struct net_device *dev,
> + struct flow_indr_block_info *info);
>
> #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
> (__reg)->base_offset = \
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 605a7cf..6a1d0b2 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -7593,6 +7593,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
> .exit = nf_tables_exit_net,
> };
>
> +static struct flow_indr_get_block_entry get_block_entry = {
> + .get_block_cb = nft_indr_get_default_block,
> + .list = LIST_HEAD_INIT(get_block_entry.list),
> +};
> +
> static int __init nf_tables_module_init(void)
> {
> int err;
> @@ -7624,6 +7629,7 @@ static int __init nf_tables_module_init(void)
> goto err5;
>
> nft_chain_route_init();
> + flow_indr_add_default_block_cb(&get_block_entry);
> return err;
> err5:
> rhltable_destroy(&nft_objname_ht);
> @@ -7640,6 +7646,7 @@ static int __init nf_tables_module_init(void)
>
> static void __exit nf_tables_module_exit(void)
> {
> + flow_indr_del_default_block_cb(&get_block_entry);
> nfnetlink_subsys_unregister(&nf_tables_subsys);
> unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
> nft_chain_filter_fini();
> diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
> index 64f5fd5..59c9629 100644
> --- a/net/netfilter/nf_tables_offload.c
> +++ b/net/netfilter/nf_tables_offload.c
> @@ -171,24 +171,114 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
> return 0;
> }
>
> +static int nft_block_setup(struct nft_base_chain *basechain,
> + struct flow_block_offload *bo,
> + enum flow_block_command cmd)
> +{
> + int err;
> +
> + switch (cmd) {
> + case FLOW_BLOCK_BIND:
> + err = nft_flow_offload_bind(bo, basechain);
> + break;
> + case FLOW_BLOCK_UNBIND:
> + err = nft_flow_offload_unbind(bo, basechain);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + err = -EOPNOTSUPP;
> + }
> +
> + return err;
> +}
> +
> +static int nft_block_offload_cmd(struct nft_base_chain *chain,
> + struct net_device *dev,
> + enum flow_block_command cmd)
> +{
> + struct netlink_ext_ack extack = {};
> + struct flow_block_offload bo = {};
> + int err;
> +
> + bo.net = dev_net(dev);
> + bo.block = &chain->flow_block;
> + bo.command = cmd;
> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
> + bo.extack = &extack;
> + INIT_LIST_HEAD(&bo.cb_list);
> +
> + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
> + if (err < 0)
> + return err;
> +
> + return nft_block_setup(chain, &bo, cmd);
> +}
> +
> +static void nft_indr_block_ing_cmd(struct net_device *dev,
> + struct flow_block *flow_block,
> + flow_indr_block_bind_cb_t *cb,
> + void *cb_priv,
> + enum flow_block_command cmd)
> +{
> + struct netlink_ext_ack extack = {};
> + struct flow_block_offload bo = {};
> + struct nft_base_chain *chain;
> +
> + if (flow_block)
> + return;
Maybe "if (!flow_block)" ?
> +
> + chain = container_of(flow_block, struct nft_base_chain, flow_block);
> +
> + bo.net = dev_net(dev);
> + bo.block = flow_block;
> + bo.command = cmd;
> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
> + bo.extack = &extack;
> + INIT_LIST_HEAD(&bo.cb_list);
> +
> + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
> +
> + nft_block_setup(chain, &bo, cmd);
> +}
> +
> +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
> + struct net_device *dev,
> + enum flow_block_command cmd)
> +{
> + struct flow_block_offload bo = {};
> + struct netlink_ext_ack extack = {};
> +
> + bo.net = dev_net(dev);
> + bo.block = &chain->flow_block;
> + bo.command = cmd;
> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
> + bo.extack = &extack;
> + INIT_LIST_HEAD(&bo.cb_list);
> +
> + flow_indr_block_call(&chain->flow_block, dev, nft_indr_block_ing_cmd,
> + &bo, cmd);
> +
> + if (list_empty(&bo.cb_list))
> + return -EOPNOTSUPP;
> +
> + return nft_block_setup(chain, &bo, cmd);
> +}
> +
> #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
>
> static int nft_flow_offload_chain(struct nft_trans *trans,
> enum flow_block_command cmd)
> {
> struct nft_chain *chain = trans->ctx.chain;
> - struct netlink_ext_ack extack = {};
> - struct flow_block_offload bo = {};
> struct nft_base_chain *basechain;
> struct net_device *dev;
> - int err;
>
> if (!nft_is_base_chain(chain))
> return -EOPNOTSUPP;
>
> basechain = nft_base_chain(chain);
> dev = basechain->ops.dev;
> - if (!dev || !dev->netdev_ops->ndo_setup_tc)
> + if (!dev)
> return -EOPNOTSUPP;
>
> /* Only default policy to accept is supported for now. */
> @@ -197,26 +287,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
> nft_trans_chain_policy(trans) != NF_ACCEPT)
> return -EOPNOTSUPP;
>
> - bo.command = cmd;
> - bo.block = &basechain->flow_block;
> - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
> - bo.extack = &extack;
> - INIT_LIST_HEAD(&bo.cb_list);
> -
> - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
> - if (err < 0)
> - return err;
> -
> - switch (cmd) {
> - case FLOW_BLOCK_BIND:
> - err = nft_flow_offload_bind(&bo, basechain);
> - break;
> - case FLOW_BLOCK_UNBIND:
> - err = nft_flow_offload_unbind(&bo, basechain);
> - break;
> - }
> -
> - return err;
> + if (dev->netdev_ops->ndo_setup_tc)
> + return nft_block_offload_cmd(basechain, dev, cmd);
> + else
> + return nft_indr_block_offload_cmd(basechain, dev, cmd);
> }
>
> int nft_flow_rule_offload_commit(struct net *net)
> @@ -266,3 +340,37 @@ int nft_flow_rule_offload_commit(struct net *net)
>
> return err;
> }
> +
> +bool nft_indr_get_default_block(struct net_device *dev,
> + struct flow_indr_block_info *info)
> +{
> + struct net *net = dev_net(dev);
> + const struct nft_table *table;
> + const struct nft_chain *chain;
> +
> + rcu_read_lock();
> +
> + list_for_each_entry_rcu(table, &net->nft.tables, list) {
> + if (table->family != NFPROTO_NETDEV)
> + continue;
> +
> + list_for_each_entry_rcu(chain, &table->chains, list) {
> + if (nft_is_base_chain(chain)) {
> + struct nft_base_chain *basechain;
> +
> + basechain = nft_base_chain(chain);
> + if (!strncmp(basechain->dev_name, dev->name,
> + IFNAMSIZ)) {
> + info->flow_block = &basechain->flow_block;
> + info->ing_cmd_cb = nft_indr_block_ing_cmd;
> + rcu_read_unlock();
> + return true;
> + }
> + }
> + }
> + }
> +
> + rcu_read_unlock();
> +
> + return false;
> +}
>
^ permalink raw reply
* [PATCH net-next 01/12] net: hns3: add link change event report
From: Huazhong Tan @ 2019-08-01 3:55 UTC (permalink / raw)
To: davem
Cc: netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
Jian Shen, Huazhong Tan
In-Reply-To: <1564631745-36733-1-git-send-email-tanhuazhong@huawei.com>
From: Jian Shen <shenjian15@huawei.com>
Previously, PF updates link status per second. For some scenario,
it requires link down event being reported more quickly.
To solve it, firmware pushes the link change event to PF with
CMDQ message, and driver updates the link status directly.
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Reviewed-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
---
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 1 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 25 ++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 7 +++++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 9 +++---
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 ++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 33 ++++++++++++++++++++++
6 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 75329ab..1564be5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -47,6 +47,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
+ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */
};
/* below are per-VF mac-vlan subcodes */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index d9858f2..538d101 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -383,6 +383,22 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev)
return ret;
}
+static int hclge_firmware_compat_config(struct hclge_dev *hdev)
+{
+ struct hclge_firmware_compat_cmd *req;
+ struct hclge_desc desc;
+ u32 compat = 0;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_COMPAT_CFG, false);
+
+ req = (struct hclge_firmware_compat_cmd *)desc.data;
+
+ hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
+ req->compat = cpu_to_le32(compat);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
int hclge_cmd_init(struct hclge_dev *hdev)
{
u32 version;
@@ -429,6 +445,15 @@ int hclge_cmd_init(struct hclge_dev *hdev)
hnae3_get_field(version, HNAE3_FW_VERSION_BYTE0_MASK,
HNAE3_FW_VERSION_BYTE0_SHIFT));
+ /* ask the firmware to enable some features, driver can work without
+ * it.
+ */
+ ret = hclge_firmware_compat_config(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev,
+ "Firmware compatible features not enabled(%d).\n",
+ ret);
+
return 0;
err_cmd_init:
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 96840d8..743c9f4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -257,6 +257,7 @@ enum hclge_opcode_type {
/* M7 stats command */
HCLGE_OPC_M7_STATS_BD = 0x7012,
HCLGE_OPC_M7_STATS_INFO = 0x7013,
+ HCLGE_OPC_M7_COMPAT_CFG = 0x701A,
/* SFP command */
HCLGE_OPC_GET_SFP_INFO = 0x7104,
@@ -1009,6 +1010,12 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd {
u8 rsv[4];
};
+#define HCLGE_LINK_EVENT_REPORT_EN_B 0
+struct hclge_firmware_compat_cmd {
+ __le32 compat;
+ u8 rsv[20];
+};
+
int hclge_cmd_init(struct hclge_dev *hdev);
static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4138780..855b65e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2517,7 +2517,7 @@ static void hclge_reset_task_schedule(struct hclge_dev *hdev)
&hdev->rst_service_task);
}
-static void hclge_task_schedule(struct hclge_dev *hdev)
+void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
{
if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
@@ -2526,7 +2526,7 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
hdev->fd_arfs_expire_timer++;
mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
system_wq, &hdev->service_task,
- round_jiffies_relative(HZ));
+ delay_time);
}
}
@@ -3636,7 +3636,7 @@ static void hclge_service_task(struct work_struct *work)
hdev->fd_arfs_expire_timer = 0;
}
- hclge_task_schedule(hdev);
+ hclge_task_schedule(hdev, round_jiffies_relative(HZ));
}
struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
@@ -6175,7 +6175,7 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
struct hclge_dev *hdev = vport->back;
if (enable) {
- hclge_task_schedule(hdev);
+ hclge_task_schedule(hdev, round_jiffies_relative(HZ));
} else {
/* Set the DOWN flag here to disable the service to be
* scheduled again
@@ -6220,6 +6220,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) &&
hdev->reset_type != HNAE3_FUNC_RESET) {
hclge_mac_stop_phy(hdev);
+ hclge_update_link_status(hdev);
return;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 688e425..c9b9867f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -302,6 +302,13 @@ enum hclge_fc_mode {
HCLGE_FC_DEFAULT
};
+enum hclge_link_fail_code {
+ HCLGE_LF_NORMAL,
+ HCLGE_LF_REF_CLOCK_LOST,
+ HCLGE_LF_XSFP_TX_DISABLE,
+ HCLGE_LF_XSFP_ABSENT,
+};
+
#define HCLGE_PG_NUM 4
#define HCLGE_SCH_MODE_SP 0
#define HCLGE_SCH_MODE_DWRR 1
@@ -1021,4 +1028,5 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
u16 state, u16 vlan_tag, u16 qos,
u16 vlan_proto);
+void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 690b999..87de32d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -545,6 +545,36 @@ static int hclge_get_rss_key(struct hclge_vport *vport,
HCLGE_RSS_MBX_RESP_LEN);
}
+static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code)
+{
+ switch (link_fail_code) {
+ case HCLGE_LF_REF_CLOCK_LOST:
+ dev_warn(&hdev->pdev->dev, "Reference clock lost!\n");
+ break;
+ case HCLGE_LF_XSFP_TX_DISABLE:
+ dev_warn(&hdev->pdev->dev, "SFP tx is disabled!\n");
+ break;
+ case HCLGE_LF_XSFP_ABSENT:
+ dev_warn(&hdev->pdev->dev, "SFP is absent!\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static void hclge_handle_link_change_event(struct hclge_dev *hdev,
+ struct hclge_mbx_vf_to_pf_cmd *req)
+{
+#define LINK_STATUS_OFFSET 1
+#define LINK_FAIL_CODE_OFFSET 2
+
+ clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+ hclge_task_schedule(hdev, 0);
+
+ if (!req->msg[LINK_STATUS_OFFSET])
+ hclge_link_fail_parse(hdev, req->msg[LINK_FAIL_CODE_OFFSET]);
+}
+
static bool hclge_cmd_crq_empty(struct hclge_hw *hw)
{
u32 tail = hclge_read_dev(hw, HCLGE_NIC_CRQ_TAIL_REG);
@@ -707,6 +737,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
"PF fail(%d) to media type for VF\n",
ret);
break;
+ case HCLGE_MBX_PUSH_LINK_STATUS:
+ hclge_handle_link_change_event(hdev, req);
+ break;
default:
dev_err(&hdev->pdev->dev,
"un-supported mailbox message, code = %d\n",
--
2.7.4
^ permalink raw reply related
* Re: [PATCH 2/2] net: gmii2rgmii: Switch priv field in mdio device structure
From: Andrew Lunn @ 2019-08-01 4:06 UTC (permalink / raw)
To: Harini Katakam
Cc: f.fainelli, hkallweit1, davem, michal.simek, netdev,
linux-arm-kernel, linux-kernel, harinikatakamlinux,
radhey.shyam.pandey
In-Reply-To: <1564565779-29537-3-git-send-email-harini.katakam@xilinx.com>
On Wed, Jul 31, 2019 at 03:06:19PM +0530, Harini Katakam wrote:
> Use the priv field in mdio device structure instead of the one in
> phy device structure. The phy device priv field may be used by the
> external phy driver and should not be overwritten.
Hi Harini
I _think_ you could use dev_set_drvdata(&mdiodev->dev) in xgmiitorgmii_probe() and
dev_get_drvdata(&phydev->mdiomdio.dev) in _read_status()
Andrew
^ permalink raw reply
* Re: [PATCH net-next v5 6/6] netfilter: nf_tables_offload: support indr block call
From: wenxu @ 2019-08-01 4:47 UTC (permalink / raw)
To: Yunsheng Lin, jiri, pablo, fw, jakub.kicinski; +Cc: netfilter-devel, netdev
In-Reply-To: <71694067-b07f-bed6-c472-4ec37dbeba3d@huawei.com>
On 8/1/2019 11:58 AM, Yunsheng Lin wrote:
> On 2019/8/1 11:03, wenxu@ucloud.cn wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> nftable support indr-block call. It makes nftable an offload vlan
>> and tunnel device.
>>
>> nft add table netdev firewall
>> nft add chain netdev firewall aclout { type filter hook ingress offload device mlx_pf0vf0 priority - 300 \; }
>> nft add rule netdev firewall aclout ip daddr 10.0.0.1 fwd to vlan0
>> nft add chain netdev firewall aclin { type filter hook ingress device vlan0 priority - 300 \; }
>> nft add rule netdev firewall aclin ip daddr 10.0.0.7 fwd to mlx_pf0vf0
>>
>> Signed-off-by: wenxu <wenxu@ucloud.cn>
>> ---
>> v5: add nft_get_default_block
>>
>> include/net/netfilter/nf_tables_offload.h | 2 +
>> net/netfilter/nf_tables_api.c | 7 ++
>> net/netfilter/nf_tables_offload.c | 156 +++++++++++++++++++++++++-----
>> 3 files changed, 141 insertions(+), 24 deletions(-)
>>
>> diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
>> index 3196663..ac69087 100644
>> --- a/include/net/netfilter/nf_tables_offload.h
>> +++ b/include/net/netfilter/nf_tables_offload.h
>> @@ -63,6 +63,8 @@ struct nft_flow_rule {
>> struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
>> void nft_flow_rule_destroy(struct nft_flow_rule *flow);
>> int nft_flow_rule_offload_commit(struct net *net);
>> +bool nft_indr_get_default_block(struct net_device *dev,
>> + struct flow_indr_block_info *info);
>>
>> #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
>> (__reg)->base_offset = \
>> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
>> index 605a7cf..6a1d0b2 100644
>> --- a/net/netfilter/nf_tables_api.c
>> +++ b/net/netfilter/nf_tables_api.c
>> @@ -7593,6 +7593,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
>> .exit = nf_tables_exit_net,
>> };
>>
>> +static struct flow_indr_get_block_entry get_block_entry = {
>> + .get_block_cb = nft_indr_get_default_block,
>> + .list = LIST_HEAD_INIT(get_block_entry.list),
>> +};
>> +
>> static int __init nf_tables_module_init(void)
>> {
>> int err;
>> @@ -7624,6 +7629,7 @@ static int __init nf_tables_module_init(void)
>> goto err5;
>>
>> nft_chain_route_init();
>> + flow_indr_add_default_block_cb(&get_block_entry);
>> return err;
>> err5:
>> rhltable_destroy(&nft_objname_ht);
>> @@ -7640,6 +7646,7 @@ static int __init nf_tables_module_init(void)
>>
>> static void __exit nf_tables_module_exit(void)
>> {
>> + flow_indr_del_default_block_cb(&get_block_entry);
>> nfnetlink_subsys_unregister(&nf_tables_subsys);
>> unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
>> nft_chain_filter_fini();
>> diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
>> index 64f5fd5..59c9629 100644
>> --- a/net/netfilter/nf_tables_offload.c
>> +++ b/net/netfilter/nf_tables_offload.c
>> @@ -171,24 +171,114 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
>> return 0;
>> }
>>
>> +static int nft_block_setup(struct nft_base_chain *basechain,
>> + struct flow_block_offload *bo,
>> + enum flow_block_command cmd)
>> +{
>> + int err;
>> +
>> + switch (cmd) {
>> + case FLOW_BLOCK_BIND:
>> + err = nft_flow_offload_bind(bo, basechain);
>> + break;
>> + case FLOW_BLOCK_UNBIND:
>> + err = nft_flow_offload_unbind(bo, basechain);
>> + break;
>> + default:
>> + WARN_ON_ONCE(1);
>> + err = -EOPNOTSUPP;
>> + }
>> +
>> + return err;
>> +}
>> +
>> +static int nft_block_offload_cmd(struct nft_base_chain *chain,
>> + struct net_device *dev,
>> + enum flow_block_command cmd)
>> +{
>> + struct netlink_ext_ack extack = {};
>> + struct flow_block_offload bo = {};
>> + int err;
>> +
>> + bo.net = dev_net(dev);
>> + bo.block = &chain->flow_block;
>> + bo.command = cmd;
>> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
>> + bo.extack = &extack;
>> + INIT_LIST_HEAD(&bo.cb_list);
>> +
>> + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
>> + if (err < 0)
>> + return err;
>> +
>> + return nft_block_setup(chain, &bo, cmd);
>> +}
>> +
>> +static void nft_indr_block_ing_cmd(struct net_device *dev,
>> + struct flow_block *flow_block,
>> + flow_indr_block_bind_cb_t *cb,
>> + void *cb_priv,
>> + enum flow_block_command cmd)
>> +{
>> + struct netlink_ext_ack extack = {};
>> + struct flow_block_offload bo = {};
>> + struct nft_base_chain *chain;
>> +
>> + if (flow_block)
>> + return;
> Maybe "if (!flow_block)" ?
yes it's a mistake. Thx!
>
>> +
>> + chain = container_of(flow_block, struct nft_base_chain, flow_block);
>> +
>> + bo.net = dev_net(dev);
>> + bo.block = flow_block;
>> + bo.command = cmd;
>> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
>> + bo.extack = &extack;
>> + INIT_LIST_HEAD(&bo.cb_list);
>> +
>> + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
>> +
>> + nft_block_setup(chain, &bo, cmd);
>> +}
>> +
>> +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
>> + struct net_device *dev,
>> + enum flow_block_command cmd)
>> +{
>> + struct flow_block_offload bo = {};
>> + struct netlink_ext_ack extack = {};
>> +
>> + bo.net = dev_net(dev);
>> + bo.block = &chain->flow_block;
>> + bo.command = cmd;
>> + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
>> + bo.extack = &extack;
>> + INIT_LIST_HEAD(&bo.cb_list);
>> +
>> + flow_indr_block_call(&chain->flow_block, dev, nft_indr_block_ing_cmd,
>> + &bo, cmd);
>> +
>> + if (list_empty(&bo.cb_list))
>> + return -EOPNOTSUPP;
>> +
>> + return nft_block_setup(chain, &bo, cmd);
>> +}
>> +
>> #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
>>
>> static int nft_flow_offload_chain(struct nft_trans *trans,
>> enum flow_block_command cmd)
>> {
>> struct nft_chain *chain = trans->ctx.chain;
>> - struct netlink_ext_ack extack = {};
>> - struct flow_block_offload bo = {};
>> struct nft_base_chain *basechain;
>> struct net_device *dev;
>> - int err;
>>
>> if (!nft_is_base_chain(chain))
>> return -EOPNOTSUPP;
>>
>> basechain = nft_base_chain(chain);
>> dev = basechain->ops.dev;
>> - if (!dev || !dev->netdev_ops->ndo_setup_tc)
>> + if (!dev)
>> return -EOPNOTSUPP;
>>
>> /* Only default policy to accept is supported for now. */
>> @@ -197,26 +287,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
>> nft_trans_chain_policy(trans) != NF_ACCEPT)
>> return -EOPNOTSUPP;
>>
>> - bo.command = cmd;
>> - bo.block = &basechain->flow_block;
>> - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
>> - bo.extack = &extack;
>> - INIT_LIST_HEAD(&bo.cb_list);
>> -
>> - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
>> - if (err < 0)
>> - return err;
>> -
>> - switch (cmd) {
>> - case FLOW_BLOCK_BIND:
>> - err = nft_flow_offload_bind(&bo, basechain);
>> - break;
>> - case FLOW_BLOCK_UNBIND:
>> - err = nft_flow_offload_unbind(&bo, basechain);
>> - break;
>> - }
>> -
>> - return err;
>> + if (dev->netdev_ops->ndo_setup_tc)
>> + return nft_block_offload_cmd(basechain, dev, cmd);
>> + else
>> + return nft_indr_block_offload_cmd(basechain, dev, cmd);
>> }
>>
>> int nft_flow_rule_offload_commit(struct net *net)
>> @@ -266,3 +340,37 @@ int nft_flow_rule_offload_commit(struct net *net)
>>
>> return err;
>> }
>> +
>> +bool nft_indr_get_default_block(struct net_device *dev,
>> + struct flow_indr_block_info *info)
>> +{
>> + struct net *net = dev_net(dev);
>> + const struct nft_table *table;
>> + const struct nft_chain *chain;
>> +
>> + rcu_read_lock();
>> +
>> + list_for_each_entry_rcu(table, &net->nft.tables, list) {
>> + if (table->family != NFPROTO_NETDEV)
>> + continue;
>> +
>> + list_for_each_entry_rcu(chain, &table->chains, list) {
>> + if (nft_is_base_chain(chain)) {
>> + struct nft_base_chain *basechain;
>> +
>> + basechain = nft_base_chain(chain);
>> + if (!strncmp(basechain->dev_name, dev->name,
>> + IFNAMSIZ)) {
>> + info->flow_block = &basechain->flow_block;
>> + info->ing_cmd_cb = nft_indr_block_ing_cmd;
>> + rcu_read_unlock();
>> + return true;
>> + }
>> + }
>> + }
>> + }
>> +
>> + rcu_read_unlock();
>> +
>> + return false;
>> +}
>>
>
^ permalink raw reply
* Re: [PATCH V2 7/9] vhost: do not use RCU to synchronize MMU notifier with worker
From: Jason Wang @ 2019-08-01 5:02 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: mst, kvm, virtualization, netdev, linux-kernel, linux-mm
In-Reply-To: <20190731193057.GG3946@ziepe.ca>
On 2019/8/1 上午3:30, Jason Gunthorpe wrote:
> On Wed, Jul 31, 2019 at 09:28:20PM +0800, Jason Wang wrote:
>> On 2019/7/31 下午8:39, Jason Gunthorpe wrote:
>>> On Wed, Jul 31, 2019 at 04:46:53AM -0400, Jason Wang wrote:
>>>> We used to use RCU to synchronize MMU notifier with worker. This leads
>>>> calling synchronize_rcu() in invalidate_range_start(). But on a busy
>>>> system, there would be many factors that may slow down the
>>>> synchronize_rcu() which makes it unsuitable to be called in MMU
>>>> notifier.
>>>>
>>>> A solution is SRCU but its overhead is obvious with the expensive full
>>>> memory barrier. Another choice is to use seqlock, but it doesn't
>>>> provide a synchronization method between readers and writers. The last
>>>> choice is to use vq mutex, but it need to deal with the worst case
>>>> that MMU notifier must be blocked and wait for the finish of swap in.
>>>>
>>>> So this patch switches use a counter to track whether or not the map
>>>> was used. The counter was increased when vq try to start or finish
>>>> uses the map. This means, when it was even, we're sure there's no
>>>> readers and MMU notifier is synchronized. When it was odd, it means
>>>> there's a reader we need to wait it to be even again then we are
>>>> synchronized.
>>> You just described a seqlock.
>>
>> Kind of, see my explanation below.
>>
>>
>>> We've been talking about providing this as some core service from mmu
>>> notifiers because nearly every use of this API needs it.
>>
>> That would be very helpful.
>>
>>
>>> IMHO this gets the whole thing backwards, the common pattern is to
>>> protect the 'shadow pte' data with a seqlock (usually open coded),
>>> such that the mmu notififer side has the write side of that lock and
>>> the read side is consumed by the thread accessing or updating the SPTE.
>>
>> Yes, I've considered something like that. But the problem is, mmu notifier
>> (writer) need to wait for the vhost worker to finish the read before it can
>> do things like setting dirty pages and unmapping page. It looks to me
>> seqlock doesn't provide things like this.
> The seqlock is usually used to prevent a 2nd thread from accessing the
> VA while it is being changed by the mm. ie you use something seqlocky
> instead of the ugly mmu_notifier_unregister/register cycle.
Yes, so we have two mappings:
[1] vring address to VA
[2] VA to PA
And have several readers and writers
1) set_vring_num_addr(): writer of both [1] and [2]
2) MMU notifier: reader of [1] writer of [2]
3) GUP: reader of [1] writer of [2]
4) memory accessors: reader of [1] and [2]
Fortunately, 1) 3) and 4) have already synchronized through vq->mutex.
We only need to deal with synchronization between 2) and each of the reset:
Sync between 1) and 2): For mapping [1], I do
mmu_notifier_unregister/register. This help to avoid holding any lock to
do overlap check. Anyway we only care about one or three pages , but the
whole guest memory could be several TBs. For mapping [2], both 1) and 2)
are writers, so use spinlock (mmu_lock) to synchronize.
Sync between 2) and 3): For mapping [1], both are readers, no need any
synchronization. For mapping [2], both 2) and 3) are writers, so
synchronize through spinlock (mmu_lock);
Sync between 2) and 4): For mapping [1], both are readers, no need any
synchronization. For mapping [2], synchronize through RCU (or something
simliar to seqlock).
You suggestion is about the synchronization of [1] which may make sense,
but it could be done on top as an optimization. What this path tries to
do is to not use RCU for [2]. Of course, the simplest way is to use vq
mutex in 2) but it means:
- we must hold vq lock to check range overlap
- since the critical section was increased, the worst case is to wait
guest memory to be swapped in, this could be even slower than
synchronize_rcu().
>
> You are supposed to use something simple like a spinlock or mutex
> inside the invalidate_range_start to serialized tear down of the SPTEs
> with their accessors.
Technically yes, but we probably can't afford that for vhost fast path,
the atomics eliminate almost all the performance improvement brought by
this patch on a machine without SMAP.
>
>> write_seqcount_begin()
>>
>> map = vq->map[X]
>>
>> write or read through map->addr directly
>>
>> write_seqcount_end()
>>
>>
>> There's no rmb() in write_seqcount_begin(), so map could be read before
>> write_seqcount_begin(), but it looks to me now that this doesn't harm at
>> all, maybe we can try this way.
> That is because it is a write side lock, not a read lock. IIRC
> seqlocks have weaker barriers because the write side needs to be
> serialized in some other way.
Yes. Having a hard thought of the code, it looks to me
write_seqcount_begin()/end() is sufficient here:
- Notifier will only assign NULL to map, so it doesn't harm to read map
before seq, then we will fallback to normal copy_from/to_user() slow
path earlier
- if we write through map->addr, it should be done before increasing the
seqcount because of the smp_wmb() in write_seqcount_end()
- if we read through map->addr which also contain a store to a pointer,
we have a good data dependency so smp_wmb() also work here.
>
> The requirement I see is you need invalidate_range_start to block
> until another thread exits its critical section (ie stops accessing
> the SPTEs).
Yes.
>
> That is a spinlock/mutex.
Or a semantics similar to RCU.
>
> You just can't invent a faster spinlock by open coding something with
> barriers, it doesn't work.
>
> Jason
If write_seqlock() works here, we can simply wait for seqcount to move
advance in MMU notifier. The original idea is to use RCU which solves
this perfectly. But as pointed out it could be slow.
Thanks
^ permalink raw reply
* Re: [PATCH V2 4/9] vhost: reset invalidate_count in vhost_set_vring_num_addr()
From: Jason Wang @ 2019-08-01 5:03 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: mst, kvm, virtualization, netdev, linux-kernel, linux-mm
In-Reply-To: <20190731193252.GH3946@ziepe.ca>
On 2019/8/1 上午3:32, Jason Gunthorpe wrote:
> On Wed, Jul 31, 2019 at 09:29:28PM +0800, Jason Wang wrote:
>> On 2019/7/31 下午8:41, Jason Gunthorpe wrote:
>>> On Wed, Jul 31, 2019 at 04:46:50AM -0400, Jason Wang wrote:
>>>> The vhost_set_vring_num_addr() could be called in the middle of
>>>> invalidate_range_start() and invalidate_range_end(). If we don't reset
>>>> invalidate_count after the un-registering of MMU notifier, the
>>>> invalidate_cont will run out of sync (e.g never reach zero). This will
>>>> in fact disable the fast accessor path. Fixing by reset the count to
>>>> zero.
>>>>
>>>> Reported-by: Michael S. Tsirkin <mst@redhat.com>
>>> Did Michael report this as well?
>>
>> Correct me if I was wrong. I think it's point 4 described in
>> https://lkml.org/lkml/2019/7/21/25.
> I'm not sure what that is talking about
>
> But this fixes what I described:
>
> https://lkml.org/lkml/2019/7/22/554
>
> Jason
I'm sorry I miss this, will add your name as reported-by in the next
version.
Thanks
^ permalink raw reply
* Re: [PATCH net-next 2/2] net: phy: broadcom: add 1000Base-X support for BCM54616S
From: Tao Ren @ 2019-08-01 5:07 UTC (permalink / raw)
To: Andrew Lunn
Cc: Vladimir Oltean, Florian Fainelli, Heiner Kallweit,
David S . Miller, Arun Parameswaran, Justin Chen, netdev, lkml,
Andrew Jeffery, openbmc@lists.ozlabs.org
In-Reply-To: <08f55ccc-7d2a-996b-770b-a0f035889196@fb.com>
On 7/30/19 10:55 PM, Tao Ren wrote:
> On 7/30/19 7:34 PM, Andrew Lunn wrote:
>>> Hi Andrew,
>>>
>>> The BCM54616S PHY on my machine is connected to a BCM5396 switch chip over backplane (1000Base-KX).
>>
>> Ah, that is different. So the board is using it for RGMII to 1000Base-KX?
>>
>> phy-mode is about the MAC-PHY link. So in this case RGMII.
>
> Yes. It's RGMII to 1000Base-KX.
>
>> There is no DT way to configure the PHY-Switch link. However, it
>> sounds like you have the PHY strapped so it is doing 1000BaseX on the
>> PHY-Switch link. So do you actually need to configure this?
>
> The PHY is strapped in RGMII-Fiber Mode (the term used in datasheet), but besides 1000BaseX, 100Base-FX is also supported in this mode.
> The datasheet doesn't say which link type (1000BaseX or 100Base-FX) is active after reset and I cannot find a way to auto-detect the link type, either.
I found bit 0 of 100-FX control register can be used to detect PHY-switch link type (means DT is not needed). Will run more testing and send out v2 patch soon. Thank you all for the input and help.
Cheers,
Tao
^ permalink raw reply
* Re: [PATCH net-next 1/2] net: phy: broadcom: set features explicitly for BCM54616S
From: Tao Ren @ 2019-08-01 5:20 UTC (permalink / raw)
To: Heiner Kallweit, Andrew Lunn
Cc: Florian Fainelli, David S . Miller, Arun Parameswaran,
Justin Chen, Vladimir Oltean, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, Andrew Jeffery,
openbmc@lists.ozlabs.org
In-Reply-To: <fd179662-b9f9-4813-b9b5-91dbd796596e@fb.com>
On 7/30/19 11:00 PM, Tao Ren wrote:
> On 7/30/19 10:53 PM, Heiner Kallweit wrote:
>> On 31.07.2019 02:12, Tao Ren wrote:
>>> On 7/29/19 11:00 PM, Heiner Kallweit wrote:
>>>> On 30.07.2019 07:05, Tao Ren wrote:
>>>>> On 7/29/19 8:35 PM, Andrew Lunn wrote:
>>>>>> On Mon, Jul 29, 2019 at 05:25:32PM -0700, Tao Ren wrote:
>>>>>>> BCM54616S feature "PHY_GBIT_FEATURES" was removed by commit dcdecdcfe1fc
>>>>>>> ("net: phy: switch drivers to use dynamic feature detection"). As dynamic
>>>>>>> feature detection doesn't work when BCM54616S is working in RGMII-Fiber
>>>>>>> mode (different sets of MII Control/Status registers being used), let's
>>>>>>> set "PHY_GBIT_FEATURES" for BCM54616S explicitly.
>>>>>>
>>>>>> Hi Tao
>>>>>>
>>>>>> What exactly does it get wrong?
>>>>>>
>>>>>> Thanks
>>>>>> Andrew
>>>>>
>>>>> Hi Andrew,
>>>>>
>>>>> BCM54616S is set to RGMII-Fiber (1000Base-X) mode on my platform, and none of the features (1000BaseT/100BaseT/10BaseT) can be detected by genphy_read_abilities(), because the PHY only reports 1000BaseX_Full|Half ability in this mode.
>>>>>
>>>> Are you going to use the PHY in copper or fibre mode?
>>>> In case you use fibre mode, why do you need the copper modes set as supported?
>>>> Or does the PHY just start in fibre mode and you want to switch it to copper mode?
>>>
>>> Hi Heiner,
>>>
>>> The phy starts in fiber mode and that's the mode I want.
>>> My observation is: phydev->link is always 0 (Link status bit is never set in MII_BMSR) by using dynamic ability detection on my machine. I checked phydev->supported and it's set to "AutoNeg | TP | MII | Pause | Asym_Pause" by dynamic ability detection. Is it normal/expected? Or maybe the fix should go to different places? Thank you for your help.
>>>
>>
>> Not sure whether you stated already which kernel version you're using.
>> There's a brand-new extension to auto-detect 1000BaseX:
>> f30e33bcdab9 ("net: phy: Add more 1000BaseX support detection")
>> It's included in the 5.3-rc series.
>
> I'm running kernel 5.2.0. Thank you for the sharing and I didn't know the patch. Let me check it out.
I applied above patch and ca72efb6bdc7 ("net: phy: Add detection of 1000BaseX link mode support") to my 5.2.0 tree but got following warning when booting up my machine:
"PHY advertising (0,00000200,000062c0) more modes than genphy supports, some modes not advertised".
The BCM54616S PHY on my machine only reports 1000-X features in RGMII->1000Base-KX mode. Is it a known problem?
Anyways let me see if I missed some dependency/follow-up patches..
Cheers,
Tao
^ permalink raw reply
* Re: [PATCH 01/14] usb: ohci-nxp: enable compile-testing
From: Greg Kroah-Hartman @ 2019-08-01 5:58 UTC (permalink / raw)
To: Arnd Bergmann
Cc: soc, linux-arm-kernel, Vladimir Zapolskiy, Sylvain Lemieux,
Russell King, Gregory Clement, Linus Walleij, Alan Stern,
Jason Cooper, Andrew Lunn, Sebastian Hesselbarth, David S. Miller,
Guenter Roeck, linux-gpio, netdev, linux-serial, linux-usb,
linux-watchdog, linux-kernel
In-Reply-To: <20190731195713.3150463-2-arnd@arndb.de>
On Wed, Jul 31, 2019 at 09:56:43PM +0200, Arnd Bergmann wrote:
> The driver hardcodes a hardware I/O address the way one should
> generally not do, and this prevents both compile-testing, and
> moving the platform to CONFIG_ARCH_MULTIPLATFORM.
>
> Change the code to be independent of the machine headers
> to allow those two. Removing the hardcoded address would
> be hard and is not necessary, so leave that in place for now.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/usb/host/Kconfig | 3 ++-
> drivers/usb/host/ohci-nxp.c | 25 ++++++++++++++++++-------
> 2 files changed, 20 insertions(+), 8 deletions(-)
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply
* Re: [PATCH 02/14] usb: udc: lpc32xx: allow compile-testing
From: Greg Kroah-Hartman @ 2019-08-01 5:58 UTC (permalink / raw)
To: Arnd Bergmann
Cc: soc, linux-arm-kernel, Vladimir Zapolskiy, Sylvain Lemieux,
Russell King, Gregory Clement, Linus Walleij, Felipe Balbi,
Jason Cooper, Andrew Lunn, Sebastian Hesselbarth, David S. Miller,
Alan Stern, Guenter Roeck, linux-gpio, netdev, linux-serial,
linux-usb, linux-watchdog, Alexandre Belloni, linux-kernel
In-Reply-To: <20190731195713.3150463-3-arnd@arndb.de>
On Wed, Jul 31, 2019 at 09:56:44PM +0200, Arnd Bergmann wrote:
> The only thing that prevents building this driver on other
> platforms is the mach/hardware.h include, which is not actually
> used here at all, so remove the line and allow CONFIG_COMPILE_TEST.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/usb/gadget/udc/Kconfig | 3 ++-
> drivers/usb/gadget/udc/lpc32xx_udc.c | 2 --
> 2 files changed, 2 insertions(+), 3 deletions(-)
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply
* Re: [PATCH 04/14] serial: lpc32xx_hs: allow compile-testing
From: Greg Kroah-Hartman @ 2019-08-01 5:58 UTC (permalink / raw)
To: Arnd Bergmann
Cc: soc, linux-arm-kernel, Vladimir Zapolskiy, Sylvain Lemieux,
Russell King, Gregory Clement, Linus Walleij, Jiri Slaby,
Jason Cooper, Andrew Lunn, Sebastian Hesselbarth, David S. Miller,
Alan Stern, Guenter Roeck, linux-gpio, netdev, linux-serial,
linux-usb, linux-watchdog, linux-kernel
In-Reply-To: <20190731195713.3150463-5-arnd@arndb.de>
On Wed, Jul 31, 2019 at 09:56:46PM +0200, Arnd Bergmann wrote:
> The only thing that prevents building this driver on other
> platforms is the mach/hardware.h include, which is not actually
> used here at all, so remove the line and allow CONFIG_COMPILE_TEST.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/tty/serial/Kconfig | 3 ++-
> drivers/tty/serial/lpc32xx_hs.c | 2 --
> 2 files changed, 2 insertions(+), 3 deletions(-)
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply
* Re: [PATCH 09/14] serial: lpc32xx: allow compile testing
From: Greg Kroah-Hartman @ 2019-08-01 5:59 UTC (permalink / raw)
To: Arnd Bergmann
Cc: soc, linux-arm-kernel, Vladimir Zapolskiy, Sylvain Lemieux,
Russell King, Gregory Clement, Linus Walleij, Jason Cooper,
Andrew Lunn, Sebastian Hesselbarth, David S. Miller, Alan Stern,
Guenter Roeck, linux-gpio, netdev, linux-serial, linux-usb,
linux-watchdog, Jiri Slaby, linux-kernel
In-Reply-To: <20190731195713.3150463-10-arnd@arndb.de>
On Wed, Jul 31, 2019 at 09:56:51PM +0200, Arnd Bergmann wrote:
> The lpc32xx_loopback_set() function in hte lpc32xx_hs driver is the
> one thing that relies on platform header files. Move that into the
> core platform code so we only need a variable declaration for it,
> and enable COMPILE_TEST building.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> arch/arm/mach-lpc32xx/serial.c | 30 ++++++++++++++++++++++++
> drivers/tty/serial/lpc32xx_hs.c | 35 ++++------------------------
> include/linux/soc/nxp/lpc32xx-misc.h | 4 ++++
> 3 files changed, 38 insertions(+), 31 deletions(-)
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply
* [PATCH v3 bpf-next 00/12] CO-RE offset relocations
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
This patch set implements central part of CO-RE (Compile Once - Run
Everywhere, see [0] and [1] for slides and video): relocating fields offsets.
Most of the details are written down as comments to corresponding parts of the
code.
Patch #1 adds loading of .BTF.ext offset relocations section and macros to
work with its contents.
Patch #2 implements CO-RE relocations algorithm in libbpf.
Patch #3 introduced BPF_CORE_READ macro, hiding usage of Clang's
__builtin_preserve_access_index intrinsic that records offset relocation.
Patches #4-#12 adds selftests validating various parts of relocation handling,
type compatibility, etc.
For all tests to work, you'll need latest Clang/LLVM supporting
__builtin_preserve_access_index intrinsic, used for recording offset
relocations. Kernel on which selftests run should have BTF information built
in (CONFIG_DEBUG_INFO_BTF=y).
[0] http://vger.kernel.org/bpfconf2019.html#session-2
[1] http://vger.kernel.org/lpc-bpf2018.html#session-2
v2->v3:
- enclose BPF_CORE_READ args in parens (Song);
v1->v2:
- add offsetofend(), fix btf_ext optional fields checks (Song);
- add bpf_core_dump_spec() for logging spec representation;
- move special first element processing out of the loop (Song);
- typo fixes (Song);
- drop BPF_ST | BPF_MEM insn relocation (Alexei);
- extracted BPF_CORE_READ into bpf_helpers (Alexei);
- added extra tests validating Clang capturing relocs correctly (Yonghong);
- switch core_relocs.c to use sub-tests;
- updated mods tests after Clang bug was fixed (Yonghong);
- fix bug enumerating candidate types;
Andrii Nakryiko (12):
libbpf: add .BTF.ext offset relocation section loading
libbpf: implement BPF CO-RE offset relocation algorithm
selftests/bpf: add BPF_CORE_READ relocatable read macro
selftests/bpf: add CO-RE relocs testing setup
selftests/bpf: add CO-RE relocs struct flavors tests
selftests/bpf: add CO-RE relocs nesting tests
selftests/bpf: add CO-RE relocs array tests
selftests/bpf: add CO-RE relocs enum/ptr/func_proto tests
selftests/bpf: add CO-RE relocs modifiers/typedef tests
selftests/bpf: add CO-RE relocs ptr-as-array tests
selftests/bpf: add CO-RE relocs ints tests
selftests/bpf: add CO-RE relocs misc tests
tools/lib/bpf/btf.c | 69 +-
tools/lib/bpf/btf.h | 4 +
tools/lib/bpf/libbpf.c | 915 +++++++++++++++++-
tools/lib/bpf/libbpf.h | 1 +
tools/lib/bpf/libbpf_internal.h | 105 ++
tools/testing/selftests/bpf/bpf_helpers.h | 20 +
.../selftests/bpf/prog_tests/core_reloc.c | 381 ++++++++
.../bpf/progs/btf__core_reloc_arrays.c | 3 +
.../btf__core_reloc_arrays___diff_arr_dim.c | 3 +
...btf__core_reloc_arrays___diff_arr_val_sz.c | 3 +
.../btf__core_reloc_arrays___err_non_array.c | 3 +
...btf__core_reloc_arrays___err_too_shallow.c | 3 +
.../btf__core_reloc_arrays___err_too_small.c | 3 +
..._core_reloc_arrays___err_wrong_val_type1.c | 3 +
..._core_reloc_arrays___err_wrong_val_type2.c | 3 +
.../bpf/progs/btf__core_reloc_flavors.c | 3 +
.../btf__core_reloc_flavors__err_wrong_name.c | 3 +
.../bpf/progs/btf__core_reloc_ints.c | 3 +
.../bpf/progs/btf__core_reloc_ints___bool.c | 3 +
.../btf__core_reloc_ints___err_bitfield.c | 3 +
.../btf__core_reloc_ints___err_wrong_sz_16.c | 3 +
.../btf__core_reloc_ints___err_wrong_sz_32.c | 3 +
.../btf__core_reloc_ints___err_wrong_sz_64.c | 3 +
.../btf__core_reloc_ints___err_wrong_sz_8.c | 3 +
.../btf__core_reloc_ints___reverse_sign.c | 3 +
.../bpf/progs/btf__core_reloc_misc.c | 4 +
.../bpf/progs/btf__core_reloc_mods.c | 3 +
.../progs/btf__core_reloc_mods___mod_swap.c | 3 +
.../progs/btf__core_reloc_mods___typedefs.c | 3 +
.../bpf/progs/btf__core_reloc_nesting.c | 3 +
.../btf__core_reloc_nesting___anon_embed.c | 3 +
...f__core_reloc_nesting___dup_compat_types.c | 5 +
...core_reloc_nesting___err_array_container.c | 3 +
...tf__core_reloc_nesting___err_array_field.c | 3 +
...e_reloc_nesting___err_dup_incompat_types.c | 4 +
...re_reloc_nesting___err_missing_container.c | 3 +
...__core_reloc_nesting___err_missing_field.c | 3 +
..._reloc_nesting___err_nonstruct_container.c | 3 +
...e_reloc_nesting___err_partial_match_dups.c | 4 +
.../btf__core_reloc_nesting___err_too_deep.c | 3 +
.../btf__core_reloc_nesting___extra_nesting.c | 3 +
..._core_reloc_nesting___struct_union_mixup.c | 3 +
.../bpf/progs/btf__core_reloc_primitives.c | 3 +
...f__core_reloc_primitives___diff_enum_def.c | 3 +
..._core_reloc_primitives___diff_func_proto.c | 3 +
...f__core_reloc_primitives___diff_ptr_type.c | 3 +
...tf__core_reloc_primitives___err_non_enum.c | 3 +
...btf__core_reloc_primitives___err_non_int.c | 3 +
...btf__core_reloc_primitives___err_non_ptr.c | 3 +
.../bpf/progs/btf__core_reloc_ptr_as_arr.c | 3 +
.../btf__core_reloc_ptr_as_arr___diff_sz.c | 3 +
.../selftests/bpf/progs/core_reloc_types.h | 667 +++++++++++++
.../bpf/progs/test_core_reloc_arrays.c | 55 ++
.../bpf/progs/test_core_reloc_flavors.c | 62 ++
.../bpf/progs/test_core_reloc_ints.c | 44 +
.../bpf/progs/test_core_reloc_kernel.c | 36 +
.../bpf/progs/test_core_reloc_misc.c | 58 ++
.../bpf/progs/test_core_reloc_mods.c | 62 ++
.../bpf/progs/test_core_reloc_nesting.c | 46 +
.../bpf/progs/test_core_reloc_primitives.c | 43 +
.../bpf/progs/test_core_reloc_ptr_as_arr.c | 30 +
61 files changed, 2686 insertions(+), 49 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/core_reloc.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
create mode 100644 tools/testing/selftests/bpf/progs/core_reloc_types.h
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
--
2.17.1
^ permalink raw reply
* [PATCH v3 bpf-next 03/12] selftests/bpf: add BPF_CORE_READ relocatable read macro
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add BPF_CORE_READ macro used in tests to do bpf_core_read(), which
automatically captures offset relocation.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
tools/testing/selftests/bpf/bpf_helpers.h | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index f804f210244e..e1a430101f40 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -501,4 +501,24 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
+/*
+ * BPF_CORE_READ abstracts away bpf_probe_read() call and captures offset
+ * relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define BPF_CORE_READ(dst, src) \
+ bpf_probe_read((dst), sizeof(*(src)), \
+ __builtin_preserve_access_index(src))
+
#endif
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 02/12] libbpf: implement BPF CO-RE offset relocation algorithm
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
This patch implements the core logic for BPF CO-RE offsets relocations.
Every instruction that needs to be relocated has corresponding
bpf_offset_reloc as part of BTF.ext. Relocations are performed by trying
to match recorded "local" relocation spec against potentially many
compatible "target" types, creating corresponding spec. Details of the
algorithm are noted in corresponding comments in the code.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
tools/lib/bpf/libbpf.c | 915 ++++++++++++++++++++++++++++++++++++++++-
tools/lib/bpf/libbpf.h | 1 +
2 files changed, 909 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ead915aec349..75da90928257 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -38,6 +38,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
@@ -47,6 +48,7 @@
#include "btf.h"
#include "str_error.h"
#include "libbpf_internal.h"
+#include "hashmap.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -1015,17 +1017,22 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return 0;
}
-static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
- __u32 id)
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
+ if (res_id)
+ *res_id = id;
+
while (true) {
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPEDEF:
+ if (res_id)
+ *res_id = t->type;
t = btf__type_by_id(btf, t->type);
break;
default:
@@ -1044,7 +1051,7 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
static bool get_map_field_int(const char *map_name, const struct btf *btf,
const struct btf_type *def,
const struct btf_member *m, __u32 *res) {
- const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
const char *name = btf__name_by_offset(btf, m->name_off);
const struct btf_array *arr_info;
const struct btf_type *arr_t;
@@ -1110,7 +1117,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
return -EOPNOTSUPP;
}
- def = skip_mods_and_typedefs(obj->btf, var->type);
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
pr_warning("map '%s': unexpected def kind %u.\n",
map_name, BTF_INFO_KIND(var->info));
@@ -2292,6 +2299,893 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
return 0;
}
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field byte offset represented by spec */
+ __u32 offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+static int btf_kind(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info);
+}
+
+static bool btf_is_composite(const struct btf_type *t)
+{
+ int kind = btf_kind(t);
+
+ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static bool btf_is_array(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+/*
+ * Turn bpf_offset_reloc into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field offset (in bytes), specified by accessor string. Low-level spec
+ * captures every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ */
+static int bpf_core_spec_parse(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ /* first spec value is always reloc type array index */
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ spec->spec[0].type_id = id;
+ spec->spec[0].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->offset = access_idx * sz;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m = (void *)(t + 1);
+ __u32 offset;
+
+ if (access_idx >= BTF_INFO_VLEN(t->info))
+ return -EINVAL;
+
+ m = &m[access_idx];
+
+ if (BTF_INFO_KFLAG(t->info)) {
+ if (BTF_MEMBER_BITFIELD_SIZE(m->offset))
+ return -EINVAL;
+ offset = BTF_MEMBER_BIT_OFFSET(m->offset);
+ } else {
+ offset = m->offset;
+ }
+ if (m->offset % 8)
+ return -EINVAL;
+ spec->offset += offset / 8;
+
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->spec[spec->len].name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = (void *)(t + 1);
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t || access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->offset += access_idx * sz;
+ } else {
+ pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+ type_id, spec_str, i, id, btf_kind(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+ size_t n = strlen(name);
+ int i = n - 3;
+
+ while (i > 0) {
+ if (name[i] == '_' && name[i + 1] == '_' && name[i + 2] == '_')
+ return i;
+ i--;
+ }
+ return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+ __u32 *data;
+ int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+ free(cand_ids->data);
+ free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+ __u32 local_type_id,
+ const struct btf *targ_btf)
+{
+ size_t local_essent_len, targ_essent_len;
+ const char *local_name, *targ_name;
+ const struct btf_type *t;
+ struct ids_vec *cand_ids;
+ __u32 *new_ids;
+ int i, err, n;
+
+ t = btf__type_by_id(local_btf, local_type_id);
+ if (!t)
+ return ERR_PTR(-EINVAL);
+
+ local_name = btf__name_by_offset(local_btf, t->name_off);
+ if (str_is_empty(local_name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(local_name);
+
+ cand_ids = calloc(1, sizeof(*cand_ids));
+ if (!cand_ids)
+ return ERR_PTR(-ENOMEM);
+
+ n = btf__get_nr_types(targ_btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(targ_btf, i);
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
+ continue;
+
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+ pr_debug("[%d] (%s): found candidate [%d] (%s)\n",
+ local_type_id, local_name, i, targ_name);
+ new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ if (!new_ids) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ cand_ids->data = new_ids;
+ cand_ids->data[cand_ids->len++] = i;
+ }
+ }
+ return cand_ids;
+err_out:
+ bpf_core_free_cands(cand_ids);
+ return ERR_PTR(err);
+}
+
+/* Check two types for compatibility, skipping const/volatile/restrict and
+ * typedefs, to ensure we are relocating offset to the compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible;
+ * - any two PTRs are always compatible;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and bitness should match, signedness is ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ __u16 kind;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (BTF_INFO_KIND(local_type->info) != BTF_INFO_KIND(targ_type->info))
+ return 0;
+
+ kind = BTF_INFO_KIND(local_type->info);
+ switch (kind) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_PTR:
+ return 1;
+ case BTF_KIND_ENUM:
+ return local_type->size == targ_type->size;
+ case BTF_KIND_INT: {
+ __u32 loc_int = *(__u32 *)(local_type + 1);
+ __u32 targ_int = *(__u32 *)(targ_type + 1);
+
+ return BTF_INT_OFFSET(loc_int) == 0 &&
+ BTF_INT_OFFSET(targ_int) == 0 &&
+ local_type->size == targ_type->size &&
+ BTF_INT_BITS(loc_int) == BTF_INT_BITS(targ_int);
+ }
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *loc_a, *targ_a;
+
+ loc_a = (void *)(local_type + 1);
+ targ_a = (void *)(targ_type + 1);
+ local_id = loc_a->type;
+ targ_id = targ_a->type;
+ goto recur;
+ }
+ default:
+ pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
+ kind, local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = (void *)(local_type + 1);
+ local_member += local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = BTF_INFO_VLEN(targ_type->info);
+ m = (void *)(targ_type + 1);
+ for (i = 0; i < n; i++, m++) {
+ __u32 offset;
+
+ /* bitfield relocations not supported */
+ if (BTF_INFO_KFLAG(targ_type->info)) {
+ if (BTF_MEMBER_BITFIELD_SIZE(m->offset))
+ continue;
+ offset = BTF_MEMBER_BIT_OFFSET(m->offset);
+ } else {
+ offset = m->offset;
+ }
+ if (offset % 8)
+ continue;
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->offset += offset / 8;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->offset -= offset / 8;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = (void *)(targ_type + 1);
+ if (local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->offset += local_acc->idx * sz;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ * Expected insn->imm value is provided for validation, as well as the new
+ * relocated value.
+ *
+ * Currently three kinds of BPF instructions are supported:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
+ *
+ * If actual insn->imm value is wrong, bail out.
+ */
+static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
+ __u32 orig_off, __u32 new_off)
+{
+ struct bpf_insn *insn;
+ int insn_idx;
+ __u8 class;
+
+ if (insn_off % sizeof(struct bpf_insn))
+ return -EINVAL;
+ insn_idx = insn_off / sizeof(struct bpf_insn);
+
+ insn = &prog->insns[insn_idx];
+ class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (insn->imm != orig_off)
+ return -EINVAL;
+ insn->imm = new_off;
+ pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
+ bpf_program__title(prog, false),
+ insn_idx, orig_off, new_off);
+ } else {
+ pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ bpf_program__title(prog, false),
+ insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+ insn->off, insn->imm);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+static struct btf *bpf_core_find_kernel_btf(void)
+{
+ const char *locations[] = {
+ "/lib/modules/%1$s/vmlinux-%1$s",
+ "/usr/lib/modules/%1$s/kernel/vmlinux",
+ };
+ char path[PATH_MAX + 1];
+ struct utsname buf;
+ struct btf *btf;
+ int i, err;
+
+ err = uname(&buf);
+ if (err) {
+ pr_warning("failed to uname(): %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(locations); i++) {
+ snprintf(path, PATH_MAX, locations[i], buf.release);
+ pr_debug("attempting to load kernel BTF from '%s'\n", path);
+
+ if (access(path, R_OK))
+ continue;
+
+ btf = btf__parse_elf(path, NULL);
+ if (IS_ERR(btf))
+ continue;
+
+ pr_debug("successfully loaded kernel BTF from '%s'\n", path);
+ return btf;
+ }
+
+ pr_warning("failed to find valid kernel BTF\n");
+ return ERR_PTR(-ESRCH);
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->spec[0].type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+ libbpf_print(level, "[%u] (%s) + ", type_id, s);
+
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%d%s", spec->raw_spec[i],
+ i == spec->raw_len - 1 ? " => " : ":");
+
+ libbpf_print(level, "%u @ &x", spec->offset);
+
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static void *u32_to_ptr(__u32 x)
+{
+ return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is not error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_offset_reloc associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_reloc_offset(struct bpf_program *prog,
+ const struct bpf_offset_reloc *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
+{
+ const char *prog_name = bpf_program__title(prog, false);
+ struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ const void *type_key = u32_to_ptr(relo->type_id);
+ const struct btf_type *local_type, *cand_type;
+ const char *local_name, *cand_name;
+ struct ids_vec *cand_ids;
+ __u32 local_id, cand_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (str_is_empty(local_name))
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: parsing [%d] (%s) + %s failed: %d\n",
+ prog_name, relo_idx, local_id, local_name, spec_str,
+ err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+ if (IS_ERR(cand_ids)) {
+ pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] (%s): %ld",
+ prog_name, relo_idx, local_id, local_name,
+ PTR_ERR(cand_ids));
+ return PTR_ERR(cand_ids);
+ }
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+ if (err) {
+ bpf_core_free_cands(cand_ids);
+ return err;
+ }
+ }
+
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
+ cand_id = cand_ids->data[i];
+ cand_type = btf__type_by_id(targ_btf, cand_id);
+ cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
+
+ err = bpf_core_spec_match(&local_spec, targ_btf,
+ cand_id, &cand_spec);
+ if (err < 0) {
+ pr_warning("prog '%s': relo #%d: failed to match spec ",
+ prog_name, relo_idx);
+ bpf_core_dump_spec(LIBBPF_WARN, &local_spec);
+ libbpf_print(LIBBPF_WARN,
+ " to candidate #%d [%d] (%s): %d\n",
+ i, cand_id, cand_name, err);
+ return err;
+ }
+ if (err == 0) {
+ pr_debug("prog '%s': relo #%d: candidate #%d [%d] (%s) doesn't match spec ",
+ prog_name, relo_idx, i, cand_id, cand_name);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+ continue;
+ }
+
+ pr_debug("prog '%s': relo #%d: candidate #%d matched as spec ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (j == 0) {
+ targ_spec = cand_spec;
+ } else if (cand_spec.offset != targ_spec.offset) {
+ /* if there are many candidates, they should all
+ * resolve to the same offset
+ */
+ pr_warning("prog '%s': relo #%d: candidate #%d spec ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN,
+ " conflicts with target spec ");
+ bpf_core_dump_spec(LIBBPF_WARN, &targ_spec);
+ libbpf_print(LIBBPF_WARN, "\n");
+ return -EINVAL;
+ }
+
+ cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ }
+
+ cand_ids->len = j;
+ if (cand_ids->len == 0) {
+ pr_warning("prog '%s': relo #%d: no matching targets found for [%d] (%s) + %s\n",
+ prog_name, relo_idx, local_id, local_name, spec_str);
+ return -ESRCH;
+ }
+
+ err = bpf_core_reloc_insn(prog, relo->insn_off,
+ local_spec.offset, targ_spec.offset);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+ prog_name, relo_idx, relo->insn_off, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_offset_reloc *rec;
+ const struct btf_ext_info *seg;
+ struct hashmap_entry *entry;
+ struct hashmap *cand_cache = NULL;
+ struct bpf_program *prog;
+ struct btf *targ_btf;
+ const char *sec_name;
+ int i, err = 0;
+
+ if (targ_btf_path)
+ targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ else
+ targ_btf = bpf_core_find_kernel_btf();
+ if (IS_ERR(targ_btf)) {
+ pr_warning("failed to get target BTF: %ld\n",
+ PTR_ERR(targ_btf));
+ return PTR_ERR(targ_btf);
+ }
+
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+ if (IS_ERR(cand_cache)) {
+ err = PTR_ERR(cand_cache);
+ goto out;
+ }
+
+ seg = &obj->btf_ext->offset_reloc_info;
+ for_each_btf_ext_sec(seg, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name)) {
+ err = -EINVAL;
+ goto out;
+ }
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
+ sec_name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ sec_name, sec->num_info);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
+ if (err) {
+ pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
+ sec_name, i, err);
+ goto out;
+ }
+ }
+ }
+
+out:
+ btf__free(targ_btf);
+ if (!IS_ERR_OR_NULL(cand_cache)) {
+ hashmap__for_each_entry(cand_cache, entry, i) {
+ bpf_core_free_cands(entry->value);
+ }
+ hashmap__free(cand_cache);
+ }
+ return err;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+ int err = 0;
+
+ if (obj->btf_ext->offset_reloc_info.len)
+ err = bpf_core_reloc_offsets(obj, targ_btf_path);
+
+ return err;
+}
+
static int
bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
struct reloc_desc *relo)
@@ -2399,14 +3293,21 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
return 0;
}
-
static int
-bpf_object__relocate(struct bpf_object *obj)
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
{
struct bpf_program *prog;
size_t i;
int err;
+ if (obj->btf_ext) {
+ err = bpf_object__relocate_core(obj, targ_btf_path);
+ if (err) {
+ pr_warning("failed to perform CO-RE relocations: %d\n",
+ err);
+ return err;
+ }
+ }
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
@@ -2807,7 +3708,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
obj->loaded = true;
CHECK_ERR(bpf_object__create_maps(obj), err, out);
- CHECK_ERR(bpf_object__relocate(obj), err, out);
+ CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8a9d462a6f6d..e8f70977d137 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -92,6 +92,7 @@ LIBBPF_API void bpf_object__close(struct bpf_object *object);
struct bpf_object_load_attr {
struct bpf_object *obj;
int log_level;
+ const char *target_btf_path;
};
/* Load/unload object into/from kernel */
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 04/12] selftests/bpf: add CO-RE relocs testing setup
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add CO-RE relocation test runner. Add one simple test validating that
libbpf's logic for searching for kernel image and loading BTF out of it
works.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/core_reloc.c | 125 ++++++++++++++++++
.../bpf/progs/test_core_reloc_kernel.c | 36 +++++
2 files changed, 161 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/core_reloc.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
new file mode 100644
index 000000000000..fab7492a8714
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+struct core_reloc_test_case {
+ const char *case_name;
+ const char *bpf_obj_file;
+ const char *btf_src_file;
+ const char *input;
+ int input_len;
+ const char *output;
+ int output_len;
+ bool fails;
+};
+
+static struct core_reloc_test_case test_cases[] = {
+ /* validate we can find kernel image and use its BTF for relocs */
+ {
+ .case_name = "kernel",
+ .bpf_obj_file = "test_core_reloc_kernel.o",
+ .btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
+ .input = "",
+ .input_len = 0,
+ .output = "\1", /* true */
+ .output_len = 1,
+ },
+};
+
+struct data {
+ char in[256];
+ char out[256];
+};
+
+void test_core_reloc(void)
+{
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ struct bpf_object_load_attr load_attr = {};
+ struct core_reloc_test_case *test_case;
+ int err, duration = 0, i, equal;
+ struct bpf_link *link = NULL;
+ struct bpf_map *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const int zero = 0;
+ struct data data;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test_case = &test_cases[i];
+
+ if (!test__start_subtest(test_case->case_name))
+ continue;
+
+ obj = bpf_object__open(test_case->bpf_obj_file);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open '%s': %ld\n",
+ test_case->bpf_obj_file, PTR_ERR(obj)))
+ continue;
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK(!prog, "find_probe",
+ "prog '%s' not found\n", probe_name))
+ goto cleanup;
+ bpf_program__set_type(prog, BPF_PROG_TYPE_RAW_TRACEPOINT);
+
+ load_attr.obj = obj;
+ load_attr.log_level = 0;
+ load_attr.target_btf_path = test_case->btf_src_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (test_case->fails) {
+ CHECK(!err, "obj_load_fail",
+ "should fail to load prog '%s'\n", probe_name);
+ goto cleanup;
+ } else {
+ if (CHECK(err, "obj_load",
+ "failed to load prog '%s': %d\n",
+ probe_name, err))
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link)))
+ goto cleanup;
+
+ data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto cleanup;
+
+ memset(&data, 0, sizeof(data));
+ memcpy(data.in, test_case->input, test_case->input_len);
+
+ err = bpf_map_update_elem(bpf_map__fd(data_map),
+ &zero, &data, 0);
+ if (CHECK(err, "update_data_map",
+ "failed to update .data map: %d\n", err))
+ goto cleanup;
+
+ /* trigger test run */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &data);
+ if (CHECK(err, "get_result",
+ "failed to get output data: %d\n", err))
+ goto cleanup;
+
+ equal = memcmp(data.out, test_case->output,
+ test_case->output_len) == 0;
+ if (CHECK(!equal, "check_result",
+ "input/output data don't match\n")) {
+ int j;
+
+ for (j = 0; j < test_case->output_len; j++) {
+ test__printf("byte #%d, EXP 0x%02hhx GOT 0x%02hhx\n",
+ j, test_case->output[j], data.out[j]);
+ }
+ goto cleanup;
+ }
+
+cleanup:
+ if (!IS_ERR_OR_NULL(link)) {
+ bpf_link__destroy(link);
+ link = NULL;
+ }
+ bpf_object__close(obj);
+ }
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
new file mode 100644
index 000000000000..37e02aa3f0c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+ char in[256];
+ char out[256];
+} data;
+
+struct task_struct {
+ int pid;
+ int tgid;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_kernel(void *ctx)
+{
+ struct task_struct *task = (void *)bpf_get_current_task();
+ uint64_t pid_tgid = bpf_get_current_pid_tgid();
+ int pid, tgid;
+
+ if (BPF_CORE_READ(&pid, &task->pid) ||
+ BPF_CORE_READ(&tgid, &task->tgid))
+ return 1;
+
+ /* validate pid + tgid matches */
+ data.out[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid;
+
+ return 0;
+}
+
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 05/12] selftests/bpf: add CO-RE relocs struct flavors tests
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add tests verifying that BPF program can use various struct/union
"flavors" to extract data from the same target struct/union.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/core_reloc.c | 34 ++++++++++
.../bpf/progs/btf__core_reloc_flavors.c | 3 +
.../btf__core_reloc_flavors__err_wrong_name.c | 3 +
.../selftests/bpf/progs/core_reloc_types.h | 15 +++++
.../bpf/progs/test_core_reloc_flavors.c | 62 +++++++++++++++++++
5 files changed, 117 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
create mode 100644 tools/testing/selftests/bpf/progs/core_reloc_types.h
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index fab7492a8714..c147271deee6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,5 +1,32 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "progs/core_reloc_types.h"
+
+#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
+
+#define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = 42, \
+ .b = 0xc001, \
+ .c = 0xbeef, \
+}
+
+#define FLAVORS_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_flavors.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define FLAVORS_CASE(name) { \
+ FLAVORS_CASE_COMMON(name), \
+ .input = FLAVORS_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = FLAVORS_DATA(core_reloc_flavors), \
+ .output_len = sizeof(struct core_reloc_flavors), \
+}
+
+#define FLAVORS_ERR_CASE(name) { \
+ FLAVORS_CASE_COMMON(name), \
+ .fails = true, \
+}
struct core_reloc_test_case {
const char *case_name;
@@ -23,6 +50,13 @@ static struct core_reloc_test_case test_cases[] = {
.output = "\1", /* true */
.output_len = 1,
},
+
+ /* validate BPF program can use multiple flavors to match against
+ * single target BTF type
+ */
+ FLAVORS_CASE(flavors),
+
+ FLAVORS_ERR_CASE(flavors__err_wrong_name),
};
struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
new file mode 100644
index 000000000000..b74455b91227
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
new file mode 100644
index 000000000000..7b6035f86ee6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors__err_wrong_name x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
new file mode 100644
index 000000000000..33b0c6a61912
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -0,0 +1,15 @@
+/*
+ * FLAVORS
+ */
+struct core_reloc_flavors {
+ int a;
+ int b;
+ int c;
+};
+
+/* this is not a flavor, as it doesn't have triple underscore */
+struct core_reloc_flavors__err_wrong_name {
+ int a;
+ int b;
+ int c;
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
new file mode 100644
index 000000000000..9fda73e87972
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+ char in[256];
+ char out[256];
+} data;
+
+struct core_reloc_flavors {
+ int a;
+ int b;
+ int c;
+};
+
+/* local flavor with reversed layout */
+struct core_reloc_flavors___reversed {
+ int c;
+ int b;
+ int a;
+};
+
+/* local flavor with nested/overlapping layout */
+struct core_reloc_flavors___weird {
+ struct {
+ int b;
+ };
+ /* a and c overlap in local flavor, but this should still work
+ * correctly with target original flavor
+ */
+ union {
+ int a;
+ int c;
+ };
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_flavors(void *ctx)
+{
+ struct core_reloc_flavors *in_orig = (void *)&data.in;
+ struct core_reloc_flavors___reversed *in_rev = (void *)&data.in;
+ struct core_reloc_flavors___weird *in_weird = (void *)&data.in;
+ struct core_reloc_flavors *out = (void *)&data.out;
+
+ /* read a using weird layout */
+ if (BPF_CORE_READ(&out->a, &in_weird->a))
+ return 1;
+ /* read b using reversed layout */
+ if (BPF_CORE_READ(&out->b, &in_rev->b))
+ return 1;
+ /* read c using original layout */
+ if (BPF_CORE_READ(&out->c, &in_orig->c))
+ return 1;
+
+ return 0;
+}
+
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 01/12] libbpf: add .BTF.ext offset relocation section loading
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add support for BPF CO-RE offset relocations. Add section/record
iteration macros for .BTF.ext. These macro are useful for iterating over
each .BTF.ext record, either for dumping out contents or later for BPF
CO-RE relocation handling.
To enable other parts of libbpf to work with .BTF.ext contents, moved
a bunch of type definitions into libbpf_internal.h.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
tools/lib/bpf/btf.c | 69 ++++++++-------------
tools/lib/bpf/btf.h | 4 ++
tools/lib/bpf/libbpf_internal.h | 105 ++++++++++++++++++++++++++++++++
3 files changed, 136 insertions(+), 42 deletions(-)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 467224feb43b..f2a3c356b388 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,47 +42,6 @@ struct btf {
int fd;
};
-struct btf_ext_info {
- /*
- * info points to the individual info section (e.g. func_info and
- * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
- */
- void *info;
- __u32 rec_size;
- __u32 len;
-};
-
-struct btf_ext {
- union {
- struct btf_ext_header *hdr;
- void *data;
- };
- struct btf_ext_info func_info;
- struct btf_ext_info line_info;
- __u32 data_size;
-};
-
-struct btf_ext_info_sec {
- __u32 sec_name_off;
- __u32 num_info;
- /* Followed by num_info * record_size number of bytes */
- __u8 data[0];
-};
-
-/* The minimum bpf_func_info checked by the loader */
-struct bpf_func_info_min {
- __u32 insn_off;
- __u32 type_id;
-};
-
-/* The minimum bpf_line_info checked by the loader */
-struct bpf_line_info_min {
- __u32 insn_off;
- __u32 file_name_off;
- __u32 line_off;
- __u32 line_col;
-};
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -831,6 +790,9 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
/* The start of the info sec (including the __u32 record_size). */
void *info;
+ if (ext_sec->len == 0)
+ return 0;
+
if (ext_sec->off & 0x03) {
pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
ext_sec->desc);
@@ -934,11 +896,24 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
return btf_ext_setup_info(btf_ext, ¶m);
}
+static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->offset_reloc_off,
+ .len = btf_ext->hdr->offset_reloc_len,
+ .min_rec_size = sizeof(struct bpf_offset_reloc),
+ .ext_info = &btf_ext->offset_reloc_info,
+ .desc = "offset_reloc",
+ };
+
+ return btf_ext_setup_info(btf_ext, ¶m);
+}
+
static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
{
const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
- if (data_size < offsetof(struct btf_ext_header, func_info_off) ||
+ if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
data_size < hdr->hdr_len) {
pr_debug("BTF.ext header not found");
return -EINVAL;
@@ -996,6 +971,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
}
memcpy(btf_ext->data, data, size);
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, line_info_len))
+ goto done;
err = btf_ext_setup_func_info(btf_ext);
if (err)
goto done;
@@ -1004,6 +982,13 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, offset_reloc_len))
+ goto done;
+ err = btf_ext_setup_offset_reloc(btf_ext);
+ if (err)
+ goto done;
+
done:
if (err) {
btf_ext__free(btf_ext);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 88a52ae56fc6..287361ee1f6b 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -57,6 +57,10 @@ struct btf_ext_header {
__u32 func_info_len;
__u32 line_info_off;
__u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 offset_reloc_off;
+ __u32 offset_reloc_len;
};
LIBBPF_API void btf__free(struct btf *btf);
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 2ac29bd36226..2e83a34f8c79 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -29,6 +29,10 @@
#ifndef max
# define max(x, y) ((x) < (y) ? (y) : (x))
#endif
+#ifndef offsetofend
+# define offsetofend(TYPE, FIELD) \
+ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
extern void libbpf_print(enum libbpf_print_level level,
const char *format, ...)
@@ -46,4 +50,105 @@ do { \
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len);
+struct btf_ext_info {
+ /*
+ * info points to the individual info section (e.g. func_info and
+ * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+ */
+ void *info;
+ __u32 rec_size;
+ __u32 len;
+};
+
+#define for_each_btf_ext_sec(seg, sec) \
+ for (sec = (seg)->info; \
+ (void *)sec < (seg)->info + (seg)->len; \
+ sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \
+ (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec) \
+ for (i = 0, rec = (void *)&(sec)->data; \
+ i < (sec)->num_info; \
+ i++, rec = (void *)rec + (seg)->rec_size)
+
+struct btf_ext {
+ union {
+ struct btf_ext_header *hdr;
+ void *data;
+ };
+ struct btf_ext_info func_info;
+ struct btf_ext_info line_info;
+ struct btf_ext_info offset_reloc_info;
+ __u32 data_size;
+};
+
+struct btf_ext_info_sec {
+ __u32 sec_name_off;
+ __u32 num_info;
+ /* Followed by num_info * record_size number of bytes */
+ __u8 data[0];
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+/* The minimum bpf_offset_reloc checked by the loader
+ *
+ * Offset relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual offset;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * offset;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * itself encodes an accessed field using a sequence of field and array
+ * indicies, separated by colon (:). It's conceptually very close to LLVM's
+ * getelementptr ([0]) instruction's arguments for identifying offset to
+ * a field.
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit offset relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_offset_reloc {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+};
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 07/12] selftests/bpf: add CO-RE relocs array tests
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add tests for various array handling/relocation scenarios.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/core_reloc.c | 41 ++++++++++
.../bpf/progs/btf__core_reloc_arrays.c | 3 +
.../btf__core_reloc_arrays___diff_arr_dim.c | 3 +
...btf__core_reloc_arrays___diff_arr_val_sz.c | 3 +
.../btf__core_reloc_arrays___err_non_array.c | 3 +
...btf__core_reloc_arrays___err_too_shallow.c | 3 +
.../btf__core_reloc_arrays___err_too_small.c | 3 +
..._core_reloc_arrays___err_wrong_val_type1.c | 3 +
..._core_reloc_arrays___err_wrong_val_type2.c | 3 +
.../selftests/bpf/progs/core_reloc_types.h | 81 +++++++++++++++++++
.../bpf/progs/test_core_reloc_arrays.c | 55 +++++++++++++
11 files changed, 201 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 226c5af28d6b..13e1aaeb1c99 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -51,6 +51,36 @@
.fails = true, \
}
+#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = { [2] = 1 }, \
+ .b = { [1] = { [2] = { [3] = 2 } } }, \
+ .c = { [1] = { .c = 3 } }, \
+ .d = { [0] = { [0] = { .d = 4 } } }, \
+}
+
+#define ARRAYS_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_arrays.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define ARRAYS_CASE(name) { \
+ ARRAYS_CASE_COMMON(name), \
+ .input = ARRAYS_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
+ .a2 = 1, \
+ .b123 = 2, \
+ .c1c = 3, \
+ .d00d = 4, \
+ }, \
+ .output_len = sizeof(struct core_reloc_arrays_output) \
+}
+
+#define ARRAYS_ERR_CASE(name) { \
+ ARRAYS_CASE_COMMON(name), \
+ .fails = true, \
+}
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -96,6 +126,17 @@ static struct core_reloc_test_case test_cases[] = {
NESTING_ERR_CASE(nesting___err_dup_incompat_types),
NESTING_ERR_CASE(nesting___err_partial_match_dups),
NESTING_ERR_CASE(nesting___err_too_deep),
+
+ /* various array access relocation scenarios */
+ ARRAYS_CASE(arrays),
+ ARRAYS_CASE(arrays___diff_arr_dim),
+ ARRAYS_CASE(arrays___diff_arr_val_sz),
+
+ ARRAYS_ERR_CASE(arrays___err_too_small),
+ ARRAYS_ERR_CASE(arrays___err_too_shallow),
+ ARRAYS_ERR_CASE(arrays___err_non_array),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
};
struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
new file mode 100644
index 000000000000..018ed7fbba3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
new file mode 100644
index 000000000000..13d662c57014
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_dim x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
new file mode 100644
index 000000000000..a351f418c85d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_val_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
new file mode 100644
index 000000000000..a8735009becc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_non_array x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
new file mode 100644
index 000000000000..2a67c28b1e75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_shallow x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
new file mode 100644
index 000000000000..1142c08c925f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_small x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
new file mode 100644
index 000000000000..795a5b729176
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_wrong_val_type1 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
new file mode 100644
index 000000000000..3af74b837c4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_wrong_val_type2 x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 340ee2bcd463..45de7986ea2e 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -306,3 +306,84 @@ struct core_reloc_nesting___err_too_deep {
} b;
} b;
};
+
+/*
+ * ARRAYS
+ */
+struct core_reloc_arrays_output {
+ int a2;
+ char b123;
+ int c1c;
+ int d00d;
+};
+
+struct core_reloc_arrays_substruct {
+ int c;
+ int d;
+};
+
+struct core_reloc_arrays {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+/* bigger array dimensions */
+struct core_reloc_arrays___diff_arr_dim {
+ int a[7];
+ char b[3][4][5];
+ struct core_reloc_arrays_substruct c[4];
+ struct core_reloc_arrays_substruct d[2][3];
+};
+
+/* different size of array's value (struct) */
+struct core_reloc_arrays___diff_arr_val_sz {
+ int a[5];
+ char b[2][3][4];
+ struct {
+ int __padding1;
+ int c;
+ int __padding2;
+ } c[3];
+ struct {
+ int __padding1;
+ int d;
+ int __padding2;
+ } d[1][2];
+};
+
+struct core_reloc_arrays___err_too_small {
+ int a[2]; /* this one is too small */
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_too_shallow {
+ int a[5];
+ char b[2][3]; /* this one lacks one dimension */
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_non_array {
+ int a; /* not an array */
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_wrong_val_type1 {
+ char a[5]; /* char instead of int */
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_wrong_val_type2 {
+ int a[5];
+ char b[2][3][4];
+ int c[3]; /* value is not a struct */
+ struct core_reloc_arrays_substruct d[1][2];
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
new file mode 100644
index 000000000000..bf67f0fdf743
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+ char in[256];
+ char out[256];
+} data;
+
+struct core_reloc_arrays_output {
+ int a2;
+ char b123;
+ int c1c;
+ int d00d;
+};
+
+struct core_reloc_arrays_substruct {
+ int c;
+ int d;
+};
+
+struct core_reloc_arrays {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_arrays(void *ctx)
+{
+ struct core_reloc_arrays *in = (void *)&data.in;
+ struct core_reloc_arrays_output *out = (void *)&data.out;
+
+ /* in->a[2] */
+ if (BPF_CORE_READ(&out->a2, &in->a[2]))
+ return 1;
+ /* in->b[1][2][3] */
+ if (BPF_CORE_READ(&out->b123, &in->b[1][2][3]))
+ return 1;
+ /* in->c[1].c */
+ if (BPF_CORE_READ(&out->c1c, &in->c[1].c))
+ return 1;
+ /* in->d[0][0].d */
+ if (BPF_CORE_READ(&out->d00d, &in->d[0][0].d))
+ return 1;
+
+ return 0;
+}
+
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 06/12] selftests/bpf: add CO-RE relocs nesting tests
From: Andrii Nakryiko @ 2019-08-01 6:47 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add a bunch of test validating correct handling of nested
structs/unions.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/core_reloc.c | 39 +++
.../bpf/progs/btf__core_reloc_nesting.c | 3 +
.../btf__core_reloc_nesting___anon_embed.c | 3 +
...f__core_reloc_nesting___dup_compat_types.c | 5 +
...core_reloc_nesting___err_array_container.c | 3 +
...tf__core_reloc_nesting___err_array_field.c | 3 +
...e_reloc_nesting___err_dup_incompat_types.c | 4 +
...re_reloc_nesting___err_missing_container.c | 3 +
...__core_reloc_nesting___err_missing_field.c | 3 +
..._reloc_nesting___err_nonstruct_container.c | 3 +
...e_reloc_nesting___err_partial_match_dups.c | 4 +
.../btf__core_reloc_nesting___err_too_deep.c | 3 +
.../btf__core_reloc_nesting___extra_nesting.c | 3 +
..._core_reloc_nesting___struct_union_mixup.c | 3 +
.../selftests/bpf/progs/core_reloc_types.h | 293 ++++++++++++++++++
.../bpf/progs/test_core_reloc_nesting.c | 46 +++
16 files changed, 421 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index c147271deee6..226c5af28d6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -28,6 +28,29 @@
.fails = true, \
}
+#define NESTING_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
+ .a = { .a = { .a = 42 } }, \
+ .b = { .b = { .b = 0xc001 } }, \
+}
+
+#define NESTING_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_nesting.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define NESTING_CASE(name) { \
+ NESTING_CASE_COMMON(name), \
+ .input = NESTING_DATA(core_reloc_##name), \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = NESTING_DATA(core_reloc_nesting), \
+ .output_len = sizeof(struct core_reloc_nesting) \
+}
+
+#define NESTING_ERR_CASE(name) { \
+ NESTING_CASE_COMMON(name), \
+ .fails = true, \
+}
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -57,6 +80,22 @@ static struct core_reloc_test_case test_cases[] = {
FLAVORS_CASE(flavors),
FLAVORS_ERR_CASE(flavors__err_wrong_name),
+
+ /* various struct/enum nesting and resolution scenarios */
+ NESTING_CASE(nesting),
+ NESTING_CASE(nesting___anon_embed),
+ NESTING_CASE(nesting___struct_union_mixup),
+ NESTING_CASE(nesting___extra_nesting),
+ NESTING_CASE(nesting___dup_compat_types),
+
+ NESTING_ERR_CASE(nesting___err_missing_field),
+ NESTING_ERR_CASE(nesting___err_array_field),
+ NESTING_ERR_CASE(nesting___err_missing_container),
+ NESTING_ERR_CASE(nesting___err_nonstruct_container),
+ NESTING_ERR_CASE(nesting___err_array_container),
+ NESTING_ERR_CASE(nesting___err_dup_incompat_types),
+ NESTING_ERR_CASE(nesting___err_partial_match_dups),
+ NESTING_ERR_CASE(nesting___err_too_deep),
};
struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
new file mode 100644
index 000000000000..4480fcc0f183
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
new file mode 100644
index 000000000000..13e108f76ece
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___anon_embed x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
new file mode 100644
index 000000000000..76b54fda5fbb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
@@ -0,0 +1,5 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___dup_compat_types x) {}
+void f2(struct core_reloc_nesting___dup_compat_types__2 x) {}
+void f3(struct core_reloc_nesting___dup_compat_types__3 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
new file mode 100644
index 000000000000..975fb95db810
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
new file mode 100644
index 000000000000..ad66c67e7980
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
new file mode 100644
index 000000000000..35c5f8da6812
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_dup_incompat_types__1 x) {}
+void f2(struct core_reloc_nesting___err_dup_incompat_types__2 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
new file mode 100644
index 000000000000..142e332041db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
new file mode 100644
index 000000000000..efcae167fab9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
new file mode 100644
index 000000000000..97aaaedd8ada
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_nonstruct_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
new file mode 100644
index 000000000000..ffde35086e90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_partial_match_dups__a x) {}
+void f2(struct core_reloc_nesting___err_partial_match_dups__b x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
new file mode 100644
index 000000000000..39a2fadd8e95
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_too_deep x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
new file mode 100644
index 000000000000..a09d9dfb20df
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___extra_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
new file mode 100644
index 000000000000..3d8a1a74012f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___struct_union_mixup x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 33b0c6a61912..340ee2bcd463 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -13,3 +13,296 @@ struct core_reloc_flavors__err_wrong_name {
int b;
int c;
};
+
+/*
+ * NESTING
+ */
+/* original set up, used to record relocations in BPF program */
+struct core_reloc_nesting_substruct {
+ int a;
+};
+
+union core_reloc_nesting_subunion {
+ int b;
+};
+
+struct core_reloc_nesting {
+ union {
+ struct core_reloc_nesting_substruct a;
+ } a;
+ struct {
+ union core_reloc_nesting_subunion b;
+ } b;
+};
+
+/* inlined anonymous struct/union instead of named structs in original */
+struct core_reloc_nesting___anon_embed {
+ int __just_for_padding;
+ union {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ union {
+ int b;
+ } b;
+ } b;
+};
+
+/* different mix of nested structs/unions than in original */
+struct core_reloc_nesting___struct_union_mixup {
+ int __a;
+ struct {
+ int __a;
+ union {
+ char __a;
+ int a;
+ } a;
+ } a;
+ int __b;
+ union {
+ int __b;
+ union {
+ char __b;
+ int b;
+ } b;
+ } b;
+};
+
+/* extra anon structs/unions, but still valid a.a.a and b.b.b accessors */
+struct core_reloc_nesting___extra_nesting {
+ int __padding;
+ struct {
+ struct {
+ struct {
+ struct {
+ union {
+ int a;
+ } a;
+ };
+ };
+ } a;
+ int __some_more;
+ struct {
+ union {
+ union {
+ union {
+ struct {
+ int b;
+ };
+ } b;
+ };
+ } b;
+ };
+ };
+};
+
+/* three flavors of same struct with different structure but same layout for
+ * a.a.a and b.b.b, thus successfully resolved and relocatable */
+struct core_reloc_nesting___dup_compat_types {
+ char __just_for_padding;
+ /* 3 more bytes of padding */
+ struct {
+ struct {
+ int a; /* offset 4 */
+ } a;
+ } a;
+ long long __more_padding;
+ struct {
+ struct {
+ int b; /* offset 16 */
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___dup_compat_types__2 {
+ int __aligned_padding;
+ struct {
+ int __trickier_noop[0];
+ struct {
+ char __some_more_noops[0];
+ int a; /* offset 4 */
+ } a;
+ } a;
+ int __more_padding;
+ struct {
+ struct {
+ struct {
+ int __critical_padding;
+ int b; /* offset 16 */
+ } b;
+ int __does_not_matter;
+ };
+ } b;
+ int __more_irrelevant_stuff;
+};
+
+struct core_reloc_nesting___dup_compat_types__3 {
+ char __correct_padding[4];
+ struct {
+ struct {
+ int a; /* offset 4 */
+ } a;
+ } a;
+ /* 8 byte padding due to next struct's alignment */
+ struct {
+ struct {
+ int b;
+ } b;
+ } b __attribute__((aligned(16)));
+};
+
+/* b.b.b field is missing */
+struct core_reloc_nesting___err_missing_field {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int x;
+ } b;
+ } b;
+};
+
+/* b.b.b field is an array of integers instead of plain int */
+struct core_reloc_nesting___err_array_field {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b[1];
+ } b;
+ } b;
+};
+
+/* middle b container is missing */
+struct core_reloc_nesting___err_missing_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ int x;
+ } b;
+};
+
+/* middle b container is referenced through pointer instead of being embedded */
+struct core_reloc_nesting___err_nonstruct_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b;
+ } *b;
+ } b;
+};
+
+/* middle b container is an array of structs instead of plain struct */
+struct core_reloc_nesting___err_array_container {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ struct {
+ struct {
+ int b;
+ } b[1];
+ } b;
+};
+
+/* two flavors of same struct with incompatible layout for b.b.b */
+struct core_reloc_nesting___err_dup_incompat_types__1 {
+ struct {
+ struct {
+ int a; /* offset 0 */
+ } a;
+ } a;
+ struct {
+ struct {
+ int b; /* offset 4 */
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___err_dup_incompat_types__2 {
+ struct {
+ struct {
+ int a; /* offset 0 */
+ } a;
+ } a;
+ int __extra_padding;
+ struct {
+ struct {
+ int b; /* offset 8 (!) */
+ } b;
+ } b;
+};
+
+/* two flavors of same struct having one of a.a.a and b.b.b, but not both */
+struct core_reloc_nesting___err_partial_match_dups__a {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+};
+
+struct core_reloc_nesting___err_partial_match_dups__b {
+ struct {
+ struct {
+ int b;
+ } b;
+ } b;
+};
+
+struct core_reloc_nesting___err_too_deep {
+ struct {
+ struct {
+ int a;
+ } a;
+ } a;
+ /* 65 levels of nestedness for b.b.b */
+ struct {
+ struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ struct { struct { struct { struct { struct {
+ /* this one is one too much */
+ struct {
+ int b;
+ };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ }; }; }; }; };
+ } b;
+ } b;
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
new file mode 100644
index 000000000000..3ca30cec2b39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+ char in[256];
+ char out[256];
+} data;
+
+struct core_reloc_nesting_substruct {
+ int a;
+};
+
+union core_reloc_nesting_subunion {
+ int b;
+};
+
+/* int a.a.a and b.b.b accesses */
+struct core_reloc_nesting {
+ union {
+ struct core_reloc_nesting_substruct a;
+ } a;
+ struct {
+ union core_reloc_nesting_subunion b;
+ } b;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_nesting(void *ctx)
+{
+ struct core_reloc_nesting *in = (void *)&data.in;
+ struct core_reloc_nesting *out = (void *)&data.out;
+
+ if (BPF_CORE_READ(&out->a.a.a, &in->a.a.a))
+ return 1;
+ if (BPF_CORE_READ(&out->b.b.b, &in->b.b.b))
+ return 1;
+
+ return 0;
+}
+
--
2.17.1
^ permalink raw reply related
* [PATCH v3 bpf-next 09/12] selftests/bpf: add CO-RE relocs modifiers/typedef tests
From: Andrii Nakryiko @ 2019-08-01 6:48 UTC (permalink / raw)
To: bpf, netdev, ast, daniel, yhs, songliubraving
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190801064803.2519675-1-andriin@fb.com>
Add tests validating correct handling of various combinations of
typedefs and const/volatile/restrict modifiers.
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/core_reloc.c | 27 +++++++
.../bpf/progs/btf__core_reloc_mods.c | 3 +
.../progs/btf__core_reloc_mods___mod_swap.c | 3 +
.../progs/btf__core_reloc_mods___typedefs.c | 3 +
.../selftests/bpf/progs/core_reloc_types.h | 72 +++++++++++++++++++
.../bpf/progs/test_core_reloc_mods.c | 62 ++++++++++++++++
6 files changed, 170 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index dd2bab1b2e7d..adb8480d14dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -107,6 +107,28 @@
.fails = true, \
}
+#define MODS_CASE(name) { \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_mods.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) { \
+ .a = 1, \
+ .b = 2, \
+ .c = (void *)3, \
+ .d = (void *)4, \
+ .e = { [2] = 5 }, \
+ .f = { [1] = 6 }, \
+ .g = { .x = 7 }, \
+ .h = { .y = 8 }, \
+ }, \
+ .input_len = sizeof(struct core_reloc_##name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_mods_output) { \
+ .a = 1, .b = 2, .c = 3, .d = 4, \
+ .e = 5, .f = 6, .g = 7, .h = 8, \
+ }, \
+ .output_len = sizeof(struct core_reloc_mods_output), \
+}
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -173,6 +195,11 @@ static struct core_reloc_test_case test_cases[] = {
PRIMITIVES_ERR_CASE(primitives___err_non_enum),
PRIMITIVES_ERR_CASE(primitives___err_non_int),
PRIMITIVES_ERR_CASE(primitives___err_non_ptr),
+
+ /* const/volatile/restrict and typedefs scenarios */
+ MODS_CASE(mods),
+ MODS_CASE(mods___mod_swap),
+ MODS_CASE(mods___typedefs),
};
struct data {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
new file mode 100644
index 000000000000..124197a2e813
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
new file mode 100644
index 000000000000..f8a6592ca75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___mod_swap x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
new file mode 100644
index 000000000000..5c0d73687247
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___typedefs x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 7526a5f5755b..3401e8342e57 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -454,3 +454,75 @@ struct core_reloc_primitives___err_non_ptr {
int d; /* int instead of ptr */
int (*f)(const char *);
};
+
+/*
+ * MODS
+ */
+struct core_reloc_mods_output {
+ int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+ int x;
+ int y;
+};
+
+typedef struct {
+ int x;
+ int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+ int a;
+ int_t b;
+ char *c;
+ char_ptr_t d;
+ int e[3];
+ arr_t f;
+ struct core_reloc_mods_substruct g;
+ core_reloc_mods_substruct_t h;
+};
+
+/* a/b, c/d, e/f, and g/h pairs are swapped */
+struct core_reloc_mods___mod_swap {
+ int b;
+ int_t a;
+ char *d;
+ char_ptr_t c;
+ int f[3];
+ arr_t e;
+ struct {
+ int y;
+ int x;
+ } h;
+ core_reloc_mods_substruct_t g;
+};
+
+typedef int int1_t;
+typedef int1_t int2_t;
+typedef int2_t int3_t;
+
+typedef int arr1_t[5];
+typedef arr1_t arr2_t;
+typedef arr2_t arr3_t;
+typedef arr3_t arr4_t;
+
+typedef const char * const volatile restrict fancy_char_ptr_t;
+
+typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
+
+/* we need more typedefs */
+struct core_reloc_mods___typedefs {
+ core_reloc_mods_substruct_tt g;
+ core_reloc_mods_substruct_tt h;
+ arr4_t f;
+ arr4_t e;
+ fancy_char_ptr_t d;
+ fancy_char_ptr_t c;
+ int3_t b;
+ int3_t a;
+};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
new file mode 100644
index 000000000000..f98b942c062b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+ char in[256];
+ char out[256];
+} data;
+
+struct core_reloc_mods_output {
+ int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+ int x;
+ int y;
+};
+
+typedef struct {
+ int x;
+ int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+ int a;
+ int_t b;
+ char *c;
+ char_ptr_t d;
+ int e[3];
+ arr_t f;
+ struct core_reloc_mods_substruct g;
+ core_reloc_mods_substruct_t h;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_mods(void *ctx)
+{
+ struct core_reloc_mods *in = (void *)&data.in;
+ struct core_reloc_mods_output *out = (void *)&data.out;
+
+ if (BPF_CORE_READ(&out->a, &in->a) ||
+ BPF_CORE_READ(&out->b, &in->b) ||
+ BPF_CORE_READ(&out->c, &in->c) ||
+ BPF_CORE_READ(&out->d, &in->d) ||
+ BPF_CORE_READ(&out->e, &in->e[2]) ||
+ BPF_CORE_READ(&out->f, &in->f[1]) ||
+ BPF_CORE_READ(&out->g, &in->g.x) ||
+ BPF_CORE_READ(&out->h, &in->h.y))
+ return 1;
+
+ return 0;
+}
+
--
2.17.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox