* [PATCH nft 1/2] src: split monitor trace code into new trace.c
2025-07-07 9:47 [PATCH nft 0/2] src: add conntrack information to trace monitor mode Florian Westphal
@ 2025-07-07 9:47 ` Florian Westphal
2025-07-07 20:13 ` Pablo Neira Ayuso
2025-07-07 9:47 ` [PATCH nft 2/2] src: add conntrack information to trace monitor mode Florian Westphal
2025-07-07 19:02 ` [PATCH nft 0/2] " Pablo Neira Ayuso
2 siblings, 1 reply; 7+ messages in thread
From: Florian Westphal @ 2025-07-07 9:47 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Preparation patch to avoid putting more trace functionality into
netlink.c.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
Makefile.am | 1 +
include/netlink.h | 5 -
include/trace.h | 8 ++
src/monitor.c | 2 +-
src/netlink.c | 332 -------------------------------------------
src/trace.c | 353 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 363 insertions(+), 338 deletions(-)
create mode 100644 include/trace.h
create mode 100644 src/trace.c
diff --git a/Makefile.am b/Makefile.am
index fb64105dda88..ba09e7f0953d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -220,6 +220,7 @@ src_libnftables_la_SOURCES = \
src/misspell.c \
src/mnl.c \
src/monitor.c \
+ src/trace.c \
src/netlink.c \
src/netlink_delinearize.c \
src/netlink_linearize.c \
diff --git a/include/netlink.h b/include/netlink.h
index c7da6f9e3bcb..2737d5708b29 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -227,11 +227,6 @@ struct ruleset_parse {
struct cmd *cmd;
};
-struct nftnl_parse_ctx;
-
-int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
- struct netlink_mon_handler *monh);
-
enum nft_data_types dtype_map_to_kernel(const struct datatype *dtype);
void netlink_linearize_init(struct netlink_linearize_ctx *lctx,
diff --git a/include/trace.h b/include/trace.h
new file mode 100644
index 000000000000..ebebb47d5c66
--- /dev/null
+++ b/include/trace.h
@@ -0,0 +1,8 @@
+#ifndef NFTABLES_TRACE_H
+#define NFTABLES_TRACE_H
+#include <linux/netlink.h>
+
+struct netlink_mon_handler;
+int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
+ struct netlink_mon_handler *monh);
+#endif /* NFTABLES_TRACE_H */
diff --git a/src/monitor.c b/src/monitor.c
index e3e38c2a12b7..83977907e266 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -16,7 +16,6 @@
#include <inttypes.h>
#include <libnftnl/table.h>
-#include <libnftnl/trace.h>
#include <libnftnl/chain.h>
#include <libnftnl/expr.h>
#include <libnftnl/object.h>
@@ -32,6 +31,7 @@
#include <nftables.h>
#include <netlink.h>
#include <mnl.h>
+#include <trace.h>
#include <expression.h>
#include <statement.h>
#include <gmputil.h>
diff --git a/src/netlink.c b/src/netlink.c
index f3157d9d7f1c..e01cb56c4a4f 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -18,7 +18,6 @@
#include <inttypes.h>
#include <libnftnl/table.h>
-#include <libnftnl/trace.h>
#include <libnftnl/chain.h>
#include <libnftnl/expr.h>
#include <libnftnl/object.h>
@@ -41,7 +40,6 @@
#include <gmputil.h>
#include <utils.h>
#include <erec.h>
-#include <iface.h>
#define nft_mon_print(monh, ...) nft_print(&monh->ctx->nft->output, __VA_ARGS__)
@@ -1964,333 +1962,3 @@ int netlink_list_flowtables(struct netlink_ctx *ctx, const struct handle *h)
nftnl_flowtable_list_free(flowtable_cache);
return err;
}
-
-static void trace_print_hdr(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- nft_print(octx, "trace id %08x %s ",
- nftnl_trace_get_u32(nlt, NFTNL_TRACE_ID),
- family2str(nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY)));
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_TABLE))
- nft_print(octx, "%s ",
- nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE));
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CHAIN))
- nft_print(octx, "%s ",
- nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN));
-}
-
-static void trace_print_expr(const struct nftnl_trace *nlt, unsigned int attr,
- struct expr *lhs, struct output_ctx *octx)
-{
- struct expr *rhs, *rel;
- const void *data;
- uint32_t len;
-
- data = nftnl_trace_get_data(nlt, attr, &len);
- rhs = constant_expr_alloc(&netlink_location,
- lhs->dtype, lhs->byteorder,
- len * BITS_PER_BYTE, data);
- rel = relational_expr_alloc(&netlink_location, OP_EQ, lhs, rhs);
-
- expr_print(rel, octx);
- nft_print(octx, " ");
- expr_free(rel);
-}
-
-static void trace_print_verdict(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- struct expr *chain_expr = NULL;
- const char *chain = NULL;
- unsigned int verdict;
- struct expr *expr;
-
- verdict = nftnl_trace_get_u32(nlt, NFTNL_TRACE_VERDICT);
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_JUMP_TARGET)) {
- chain = xstrdup(nftnl_trace_get_str(nlt, NFTNL_TRACE_JUMP_TARGET));
- chain_expr = constant_expr_alloc(&netlink_location,
- &string_type,
- BYTEORDER_HOST_ENDIAN,
- strlen(chain) * BITS_PER_BYTE,
- chain);
- }
- expr = verdict_expr_alloc(&netlink_location, verdict, chain_expr);
-
- nft_print(octx, "verdict ");
- expr_print(expr, octx);
- expr_free(expr);
-}
-
-static void trace_print_policy(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- unsigned int policy;
- struct expr *expr;
-
- policy = nftnl_trace_get_u32(nlt, NFTNL_TRACE_POLICY);
-
- expr = verdict_expr_alloc(&netlink_location, policy, NULL);
-
- nft_print(octx, "policy ");
- expr_print(expr, octx);
- expr_free(expr);
-}
-
-static struct rule *trace_lookup_rule(const struct nftnl_trace *nlt,
- uint64_t rule_handle,
- struct nft_cache *cache)
-{
- struct chain *chain;
- struct table *table;
- struct handle h;
-
- h.family = nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY);
- h.table.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE);
- h.chain.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN);
-
- if (!h.table.name)
- return NULL;
-
- table = table_cache_find(&cache->table_cache, h.table.name, h.family);
- if (!table)
- return NULL;
-
- chain = chain_cache_find(table, h.chain.name);
- if (!chain)
- return NULL;
-
- return rule_lookup(chain, rule_handle);
-}
-
-static void trace_print_rule(const struct nftnl_trace *nlt,
- struct output_ctx *octx, struct nft_cache *cache)
-{
- uint64_t rule_handle;
- struct rule *rule;
-
- rule_handle = nftnl_trace_get_u64(nlt, NFTNL_TRACE_RULE_HANDLE);
- rule = trace_lookup_rule(nlt, rule_handle, cache);
-
- trace_print_hdr(nlt, octx);
-
- if (rule) {
- nft_print(octx, "rule ");
- rule_print(rule, octx);
- } else {
- nft_print(octx, "unknown rule handle %" PRIu64, rule_handle);
- }
-
- nft_print(octx, " (");
- trace_print_verdict(nlt, octx);
- nft_print(octx, ")\n");
-}
-
-static void trace_gen_stmts(struct list_head *stmts,
- struct proto_ctx *ctx, struct payload_dep_ctx *pctx,
- const struct nftnl_trace *nlt, unsigned int attr,
- enum proto_bases base)
-{
- struct list_head unordered = LIST_HEAD_INIT(unordered);
- struct list_head list;
- struct expr *rel, *lhs, *rhs, *tmp, *nexpr;
- struct stmt *stmt;
- const struct proto_desc *desc;
- const void *hdr;
- uint32_t hlen;
- unsigned int n;
-
- if (!nftnl_trace_is_set(nlt, attr))
- return;
- hdr = nftnl_trace_get_data(nlt, attr, &hlen);
-
- lhs = payload_expr_alloc(&netlink_location, NULL, 0);
- payload_init_raw(lhs, base, 0, hlen * BITS_PER_BYTE);
- rhs = constant_expr_alloc(&netlink_location,
- &invalid_type, BYTEORDER_INVALID,
- hlen * BITS_PER_BYTE, hdr);
-
-restart:
- init_list_head(&list);
- payload_expr_expand(&list, lhs, ctx);
- expr_free(lhs);
-
- desc = NULL;
- list_for_each_entry_safe(lhs, nexpr, &list, list) {
- if (desc && desc != ctx->protocol[base].desc) {
- /* Chained protocols */
- lhs->payload.offset = 0;
- if (ctx->protocol[base].desc == NULL)
- break;
- goto restart;
- }
-
- tmp = constant_expr_splice(rhs, lhs->len);
- expr_set_type(tmp, lhs->dtype, lhs->byteorder);
- if (tmp->byteorder == BYTEORDER_HOST_ENDIAN)
- mpz_switch_byteorder(tmp->value, tmp->len / BITS_PER_BYTE);
-
- /* Skip unknown and filtered expressions */
- desc = lhs->payload.desc;
- if (lhs->dtype == &invalid_type ||
- lhs->payload.tmpl == &proto_unknown_template ||
- desc->checksum_key == payload_hdr_field(lhs) ||
- desc->format.filter & (1 << payload_hdr_field(lhs))) {
- expr_free(lhs);
- expr_free(tmp);
- continue;
- }
-
- rel = relational_expr_alloc(&lhs->location, OP_EQ, lhs, tmp);
- stmt = expr_stmt_alloc(&rel->location, rel);
- list_add_tail(&stmt->list, &unordered);
-
- desc = ctx->protocol[base].desc;
- relational_expr_pctx_update(ctx, rel);
- }
-
- expr_free(rhs);
-
- n = 0;
-next:
- list_for_each_entry(stmt, &unordered, list) {
- enum proto_bases b = base;
-
- rel = stmt->expr;
- lhs = rel->left;
-
- /* Move statements to result list in defined order */
- desc = lhs->payload.desc;
- if (desc->format.order[n] &&
- desc->format.order[n] != payload_hdr_field(lhs))
- continue;
-
- list_move_tail(&stmt->list, stmts);
- n++;
-
- if (payload_is_stacked(desc, rel))
- b--;
-
- /* Don't strip 'icmp type' from payload dump. */
- if (pctx->icmp_type == 0)
- payload_dependency_kill(pctx, lhs, ctx->family);
- if (lhs->flags & EXPR_F_PROTOCOL)
- payload_dependency_store(pctx, stmt, b);
-
- goto next;
- }
-}
-
-static void trace_print_packet(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- struct list_head stmts = LIST_HEAD_INIT(stmts);
- const struct proto_desc *ll_desc;
- struct payload_dep_ctx pctx = {};
- struct proto_ctx ctx;
- uint16_t dev_type;
- uint32_t nfproto;
- struct stmt *stmt, *next;
-
- trace_print_hdr(nlt, octx);
-
- nft_print(octx, "packet: ");
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_IIF))
- trace_print_expr(nlt, NFTNL_TRACE_IIF,
- meta_expr_alloc(&netlink_location,
- NFT_META_IIF), octx);
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_OIF))
- trace_print_expr(nlt, NFTNL_TRACE_OIF,
- meta_expr_alloc(&netlink_location,
- NFT_META_OIF), octx);
-
- proto_ctx_init(&ctx, nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY), 0, false);
- ll_desc = ctx.protocol[PROTO_BASE_LL_HDR].desc;
- if ((ll_desc == &proto_inet || ll_desc == &proto_netdev) &&
- nftnl_trace_is_set(nlt, NFTNL_TRACE_NFPROTO)) {
- nfproto = nftnl_trace_get_u32(nlt, NFTNL_TRACE_NFPROTO);
-
- proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location, NULL);
- proto_ctx_update(&ctx, PROTO_BASE_NETWORK_HDR, &netlink_location,
- proto_find_upper(ll_desc, nfproto));
- }
- if (ctx.protocol[PROTO_BASE_LL_HDR].desc == NULL &&
- nftnl_trace_is_set(nlt, NFTNL_TRACE_IIFTYPE)) {
- dev_type = nftnl_trace_get_u16(nlt, NFTNL_TRACE_IIFTYPE);
- proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location,
- proto_dev_desc(dev_type));
- }
-
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_LL_HEADER,
- PROTO_BASE_LL_HDR);
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_NETWORK_HEADER,
- PROTO_BASE_NETWORK_HDR);
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_TRANSPORT_HEADER,
- PROTO_BASE_TRANSPORT_HDR);
-
- list_for_each_entry_safe(stmt, next, &stmts, list) {
- stmt_print(stmt, octx);
- nft_print(octx, " ");
- stmt_free(stmt);
- }
- nft_print(octx, "\n");
-}
-
-int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
- struct netlink_mon_handler *monh)
-{
- struct nftnl_trace *nlt;
-
- assert(type == NFT_MSG_TRACE);
-
- nlt = nftnl_trace_alloc();
- if (!nlt)
- memory_allocation_error();
-
- if (nftnl_trace_nlmsg_parse(nlh, nlt) < 0)
- netlink_abi_error();
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_LL_HEADER) ||
- nftnl_trace_is_set(nlt, NFTNL_TRACE_NETWORK_HEADER))
- trace_print_packet(nlt, &monh->ctx->nft->output);
-
- switch (nftnl_trace_get_u32(nlt, NFTNL_TRACE_TYPE)) {
- case NFT_TRACETYPE_RULE:
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_RULE_HANDLE))
- trace_print_rule(nlt, &monh->ctx->nft->output,
- &monh->ctx->nft->cache);
- break;
- case NFT_TRACETYPE_POLICY:
- trace_print_hdr(nlt, &monh->ctx->nft->output);
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_POLICY)) {
- trace_print_policy(nlt, &monh->ctx->nft->output);
- nft_mon_print(monh, " ");
- }
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
- trace_print_expr(nlt, NFTNL_TRACE_MARK,
- meta_expr_alloc(&netlink_location,
- NFT_META_MARK),
- &monh->ctx->nft->output);
- nft_mon_print(monh, "\n");
- break;
- case NFT_TRACETYPE_RETURN:
- trace_print_hdr(nlt, &monh->ctx->nft->output);
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_VERDICT)) {
- trace_print_verdict(nlt, &monh->ctx->nft->output);
- nft_mon_print(monh, " ");
- }
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
- trace_print_expr(nlt, NFTNL_TRACE_MARK,
- meta_expr_alloc(&netlink_location,
- NFT_META_MARK),
- &monh->ctx->nft->output);
- nft_mon_print(monh, "\n");
- break;
- }
-
- nftnl_trace_free(nlt);
- return MNL_CB_OK;
-}
diff --git a/src/trace.c b/src/trace.c
new file mode 100644
index 000000000000..a7cc8ff08251
--- /dev/null
+++ b/src/trace.c
@@ -0,0 +1,353 @@
+#include <nft.h>
+#include <trace.h>
+
+#include <libnftnl/trace.h>
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <inttypes.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter.h>
+
+#include <nftables.h>
+#include <mnl.h>
+#include <parser.h>
+#include <netlink.h>
+#include <expression.h>
+#include <statement.h>
+#include <utils.h>
+
+#define nft_mon_print(monh, ...) nft_print(&monh->ctx->nft->output, __VA_ARGS__)
+
+static void trace_print_hdr(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ nft_print(octx, "trace id %08x %s ",
+ nftnl_trace_get_u32(nlt, NFTNL_TRACE_ID),
+ family2str(nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY)));
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_TABLE))
+ nft_print(octx, "%s ",
+ nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE));
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CHAIN))
+ nft_print(octx, "%s ",
+ nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN));
+}
+
+static void trace_print_expr(const struct nftnl_trace *nlt, unsigned int attr,
+ struct expr *lhs, struct output_ctx *octx)
+{
+ struct expr *rhs, *rel;
+ const void *data;
+ uint32_t len;
+
+ data = nftnl_trace_get_data(nlt, attr, &len);
+ rhs = constant_expr_alloc(&netlink_location,
+ lhs->dtype, lhs->byteorder,
+ len * BITS_PER_BYTE, data);
+ rel = relational_expr_alloc(&netlink_location, OP_EQ, lhs, rhs);
+
+ expr_print(rel, octx);
+ nft_print(octx, " ");
+ expr_free(rel);
+}
+
+static void trace_print_verdict(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ struct expr *chain_expr = NULL;
+ const char *chain = NULL;
+ unsigned int verdict;
+ struct expr *expr;
+
+ verdict = nftnl_trace_get_u32(nlt, NFTNL_TRACE_VERDICT);
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_JUMP_TARGET)) {
+ chain = xstrdup(nftnl_trace_get_str(nlt, NFTNL_TRACE_JUMP_TARGET));
+ chain_expr = constant_expr_alloc(&netlink_location,
+ &string_type,
+ BYTEORDER_HOST_ENDIAN,
+ strlen(chain) * BITS_PER_BYTE,
+ chain);
+ }
+ expr = verdict_expr_alloc(&netlink_location, verdict, chain_expr);
+
+ nft_print(octx, "verdict ");
+ expr_print(expr, octx);
+ expr_free(expr);
+}
+
+static void trace_print_policy(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ unsigned int policy;
+ struct expr *expr;
+
+ policy = nftnl_trace_get_u32(nlt, NFTNL_TRACE_POLICY);
+
+ expr = verdict_expr_alloc(&netlink_location, policy, NULL);
+
+ nft_print(octx, "policy ");
+ expr_print(expr, octx);
+ expr_free(expr);
+}
+
+static struct rule *trace_lookup_rule(const struct nftnl_trace *nlt,
+ uint64_t rule_handle,
+ struct nft_cache *cache)
+{
+ struct chain *chain;
+ struct table *table;
+ struct handle h;
+
+ h.family = nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY);
+ h.table.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE);
+ h.chain.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN);
+
+ if (!h.table.name)
+ return NULL;
+
+ table = table_cache_find(&cache->table_cache, h.table.name, h.family);
+ if (!table)
+ return NULL;
+
+ chain = chain_cache_find(table, h.chain.name);
+ if (!chain)
+ return NULL;
+
+ return rule_lookup(chain, rule_handle);
+}
+
+static void trace_print_rule(const struct nftnl_trace *nlt,
+ struct output_ctx *octx, struct nft_cache *cache)
+{
+ uint64_t rule_handle;
+ struct rule *rule;
+
+ rule_handle = nftnl_trace_get_u64(nlt, NFTNL_TRACE_RULE_HANDLE);
+ rule = trace_lookup_rule(nlt, rule_handle, cache);
+
+ trace_print_hdr(nlt, octx);
+
+ if (rule) {
+ nft_print(octx, "rule ");
+ rule_print(rule, octx);
+ } else {
+ nft_print(octx, "unknown rule handle %" PRIu64, rule_handle);
+ }
+
+ nft_print(octx, " (");
+ trace_print_verdict(nlt, octx);
+ nft_print(octx, ")\n");
+}
+
+static void trace_gen_stmts(struct list_head *stmts,
+ struct proto_ctx *ctx, struct payload_dep_ctx *pctx,
+ const struct nftnl_trace *nlt, unsigned int attr,
+ enum proto_bases base)
+{
+ struct list_head unordered = LIST_HEAD_INIT(unordered);
+ struct list_head list;
+ struct expr *rel, *lhs, *rhs, *tmp, *nexpr;
+ struct stmt *stmt;
+ const struct proto_desc *desc;
+ const void *hdr;
+ uint32_t hlen;
+ unsigned int n;
+
+ if (!nftnl_trace_is_set(nlt, attr))
+ return;
+ hdr = nftnl_trace_get_data(nlt, attr, &hlen);
+
+ lhs = payload_expr_alloc(&netlink_location, NULL, 0);
+ payload_init_raw(lhs, base, 0, hlen * BITS_PER_BYTE);
+ rhs = constant_expr_alloc(&netlink_location,
+ &invalid_type, BYTEORDER_INVALID,
+ hlen * BITS_PER_BYTE, hdr);
+
+restart:
+ init_list_head(&list);
+ payload_expr_expand(&list, lhs, ctx);
+ expr_free(lhs);
+
+ desc = NULL;
+ list_for_each_entry_safe(lhs, nexpr, &list, list) {
+ if (desc && desc != ctx->protocol[base].desc) {
+ /* Chained protocols */
+ lhs->payload.offset = 0;
+ if (ctx->protocol[base].desc == NULL)
+ break;
+ goto restart;
+ }
+
+ tmp = constant_expr_splice(rhs, lhs->len);
+ expr_set_type(tmp, lhs->dtype, lhs->byteorder);
+ if (tmp->byteorder == BYTEORDER_HOST_ENDIAN)
+ mpz_switch_byteorder(tmp->value, tmp->len / BITS_PER_BYTE);
+
+ /* Skip unknown and filtered expressions */
+ desc = lhs->payload.desc;
+ if (lhs->dtype == &invalid_type ||
+ lhs->payload.tmpl == &proto_unknown_template ||
+ desc->checksum_key == payload_hdr_field(lhs) ||
+ desc->format.filter & (1 << payload_hdr_field(lhs))) {
+ expr_free(lhs);
+ expr_free(tmp);
+ continue;
+ }
+
+ rel = relational_expr_alloc(&lhs->location, OP_EQ, lhs, tmp);
+ stmt = expr_stmt_alloc(&rel->location, rel);
+ list_add_tail(&stmt->list, &unordered);
+
+ desc = ctx->protocol[base].desc;
+ relational_expr_pctx_update(ctx, rel);
+ }
+
+ expr_free(rhs);
+
+ n = 0;
+next:
+ list_for_each_entry(stmt, &unordered, list) {
+ enum proto_bases b = base;
+
+ rel = stmt->expr;
+ lhs = rel->left;
+
+ /* Move statements to result list in defined order */
+ desc = lhs->payload.desc;
+ if (desc->format.order[n] &&
+ desc->format.order[n] != payload_hdr_field(lhs))
+ continue;
+
+ list_move_tail(&stmt->list, stmts);
+ n++;
+
+ if (payload_is_stacked(desc, rel))
+ b--;
+
+ /* Don't strip 'icmp type' from payload dump. */
+ if (pctx->icmp_type == 0)
+ payload_dependency_kill(pctx, lhs, ctx->family);
+ if (lhs->flags & EXPR_F_PROTOCOL)
+ payload_dependency_store(pctx, stmt, b);
+
+ goto next;
+ }
+}
+
+static void trace_print_packet(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ struct list_head stmts = LIST_HEAD_INIT(stmts);
+ const struct proto_desc *ll_desc;
+ struct payload_dep_ctx pctx = {};
+ struct proto_ctx ctx;
+ uint16_t dev_type;
+ uint32_t nfproto;
+ struct stmt *stmt, *next;
+
+ trace_print_hdr(nlt, octx);
+
+ nft_print(octx, "packet: ");
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_IIF))
+ trace_print_expr(nlt, NFTNL_TRACE_IIF,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_IIF), octx);
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_OIF))
+ trace_print_expr(nlt, NFTNL_TRACE_OIF,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_OIF), octx);
+
+ proto_ctx_init(&ctx, nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY), 0, false);
+ ll_desc = ctx.protocol[PROTO_BASE_LL_HDR].desc;
+ if ((ll_desc == &proto_inet || ll_desc == &proto_netdev) &&
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_NFPROTO)) {
+ nfproto = nftnl_trace_get_u32(nlt, NFTNL_TRACE_NFPROTO);
+
+ proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location, NULL);
+ proto_ctx_update(&ctx, PROTO_BASE_NETWORK_HDR, &netlink_location,
+ proto_find_upper(ll_desc, nfproto));
+ }
+ if (ctx.protocol[PROTO_BASE_LL_HDR].desc == NULL &&
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_IIFTYPE)) {
+ dev_type = nftnl_trace_get_u16(nlt, NFTNL_TRACE_IIFTYPE);
+ proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location,
+ proto_dev_desc(dev_type));
+ }
+
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_LL_HEADER,
+ PROTO_BASE_LL_HDR);
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_NETWORK_HEADER,
+ PROTO_BASE_NETWORK_HDR);
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_TRANSPORT_HEADER,
+ PROTO_BASE_TRANSPORT_HDR);
+
+ list_for_each_entry_safe(stmt, next, &stmts, list) {
+ stmt_print(stmt, octx);
+ nft_print(octx, " ");
+ stmt_free(stmt);
+ }
+ nft_print(octx, "\n");
+}
+
+int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
+ struct netlink_mon_handler *monh)
+{
+ struct nftnl_trace *nlt;
+
+ assert(type == NFT_MSG_TRACE);
+
+ nlt = nftnl_trace_alloc();
+ if (!nlt)
+ memory_allocation_error();
+
+ if (nftnl_trace_nlmsg_parse(nlh, nlt) < 0)
+ netlink_abi_error();
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_LL_HEADER) ||
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_NETWORK_HEADER))
+ trace_print_packet(nlt, &monh->ctx->nft->output);
+
+ switch (nftnl_trace_get_u32(nlt, NFTNL_TRACE_TYPE)) {
+ case NFT_TRACETYPE_RULE:
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_RULE_HANDLE))
+ trace_print_rule(nlt, &monh->ctx->nft->output,
+ &monh->ctx->nft->cache);
+ break;
+ case NFT_TRACETYPE_POLICY:
+ trace_print_hdr(nlt, &monh->ctx->nft->output);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_POLICY)) {
+ trace_print_policy(nlt, &monh->ctx->nft->output);
+ nft_mon_print(monh, " ");
+ }
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
+ trace_print_expr(nlt, NFTNL_TRACE_MARK,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_MARK),
+ &monh->ctx->nft->output);
+ nft_mon_print(monh, "\n");
+ break;
+ case NFT_TRACETYPE_RETURN:
+ trace_print_hdr(nlt, &monh->ctx->nft->output);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_VERDICT)) {
+ trace_print_verdict(nlt, &monh->ctx->nft->output);
+ nft_mon_print(monh, " ");
+ }
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
+ trace_print_expr(nlt, NFTNL_TRACE_MARK,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_MARK),
+ &monh->ctx->nft->output);
+ nft_mon_print(monh, "\n");
+ break;
+ }
+
+ nftnl_trace_free(nlt);
+ return MNL_CB_OK;
+}
--
2.49.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH nft 2/2] src: add conntrack information to trace monitor mode
2025-07-07 9:47 [PATCH nft 0/2] src: add conntrack information to trace monitor mode Florian Westphal
2025-07-07 9:47 ` [PATCH nft 1/2] src: split monitor trace code into new trace.c Florian Westphal
@ 2025-07-07 9:47 ` Florian Westphal
2025-07-07 19:02 ` [PATCH nft 0/2] " Pablo Neira Ayuso
2 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2025-07-07 9:47 UTC (permalink / raw)
To: netfilter-devel; +Cc: Florian Westphal
Upcoming kernel change provides the packets conntrack state in the
trace message data.
This allows to see if packet is seen as original or reply, the conntrack
state (new, establieshed, related) and the status bits which show if e.g.
NAT was applied. Alsoi include conntrack ID so users can use conntrack
tool to query the kernel for more information via ctnetlink.
This improves debugging when e.g. packets do not pick up the expected
NAT mapping, which could e.g. also happen because of expectations
following the NAT binding of the owning conntrack entry.
Example output ("conntrack: " lines are new):
trace id 32 t PRE_RAW packet: iif "enp0s3" ether saddr [..]
trace id 32 t PRE_RAW rule tcp flags syn meta nftrace set 1 (verdict continue)
trace id 32 t PRE_RAW policy accept
trace id 32 t PRE_MANGLE conntrack: ct direction original ct state new ct id 2641368242
trace id 32 t PRE_MANGLE packet: iif "enp0s3" ether saddr [..]
trace id 32 t ct_new_pre rule jump rpfilter (verdict jump rpfilter)
trace id 32 t PRE_MANGLE policy accept
trace id 32 t INPUT conntrack: ct direction original ct state new ct status dnat-done ct id 2641368242
trace id 32 t INPUT packet: iif "enp0s3" [..]
trace id 32 t public_in rule tcp dport 443 accept (verdict accept)
v2: add more status bits: helper, clash, offload, hw-offload.
add flag explanation to documentation.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
doc/data-types.txt | 31 ++---
include/linux/netfilter/nf_conntrack_common.h | 16 +++
src/ct.c | 8 ++
src/trace.c | 109 ++++++++++++++++++
4 files changed, 149 insertions(+), 15 deletions(-)
diff --git a/doc/data-types.txt b/doc/data-types.txt
index 46b0867cb5a4..d688350c4afd 100644
--- a/doc/data-types.txt
+++ b/doc/data-types.txt
@@ -378,21 +378,22 @@ For each of the types above, keywords are available for convenience:
.conntrack status (ct_status)
[options="header"]
|==================
-|Keyword| Value
-|expected|
-1
-|seen-reply|
-2
-|assured|
-4
-|confirmed|
-8
-|snat|
-16
-|dnat|
-32
-|dying|
-512
+|Keyword| Value | Description
+|expected|1| Expected connection; conntrack helper set it up
+|seen-reply|2| Conntrack has seen packets in both directions
+|assured| 4 |Conntrack entry will not be removed if hash table is full
+|confirmed | 8 | Initial packet processed
+|snat| 16 | Original source address differs from reply destination
+|dnat| 32 | Original destination differs from reply source
+|seq-adjust| 64 | tcp sequence number rewrite due to conntrack helper or synproxy
+|snat-done| 128 | tried to find matching snat/masquerade rule
+|dnat-done| 256 | tried to find matching dnat/redirect rule
+|dying| 512 | Connection about to be deleted
+|fixed-timeout | 1024 | entry expires even if traffic is active
+|clash | 4096 | packet drop avoidance scheme
+|helper | 8192 | connection is monitored by conntrack helper
+|offload | 16384 | connection is offloaded to a flow table
+|hw-offload | 32768 | connection is offloaded to hardware
|================
.conntrack event bits (ct_event)
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 768ff251308b..22bbb6c92fb1 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -77,6 +77,22 @@ enum ip_conntrack_status {
/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
+
+ /* Conntrack is a fake untracked entry. Obsolete and not used anymore */
+ IPS_UNTRACKED_BIT = 12,
+ IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */
+ IPS_HELPER_BIT = 13,
+ IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+ /* Conntrack has been offloaded to flow table. */
+ IPS_OFFLOAD_BIT = 14,
+ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+
+ /* Conntrack has been offloaded to hardware. */
+ IPS_HW_OFFLOAD_BIT = 15,
+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
};
/* Connection tracking event types */
diff --git a/src/ct.c b/src/ct.c
index 71ebb9483893..7fa808800fcf 100644
--- a/src/ct.c
+++ b/src/ct.c
@@ -98,7 +98,15 @@ static const struct symbol_table ct_status_tbl = {
SYMBOL("confirmed", IPS_CONFIRMED),
SYMBOL("snat", IPS_SRC_NAT),
SYMBOL("dnat", IPS_DST_NAT),
+ SYMBOL("seq-adjust", IPS_SEQ_ADJUST),
+ SYMBOL("snat-done", IPS_SRC_NAT_DONE),
+ SYMBOL("dnat-done", IPS_DST_NAT_DONE),
SYMBOL("dying", IPS_DYING),
+ SYMBOL("fixed-timeout", IPS_FIXED_TIMEOUT),
+ SYMBOL("clash", IPS_UNTRACKED_BIT),
+ SYMBOL("helper", IPS_HELPER_BIT),
+ SYMBOL("offload", IPS_OFFLOAD_BIT),
+ SYMBOL("hw-offload", IPS_HW_OFFLOAD_BIT),
SYMBOL_LIST_END
},
};
diff --git a/src/trace.c b/src/trace.c
index a7cc8ff08251..b270951025b8 100644
--- a/src/trace.c
+++ b/src/trace.c
@@ -237,6 +237,114 @@ next:
}
}
+static struct expr *trace_alloc_list(const struct datatype *dtype,
+ enum byteorder byteorder,
+ unsigned int len, const void *data)
+{
+ struct expr *list_expr;
+ unsigned int i;
+ mpz_t value;
+ uint32_t v;
+
+ if (len != sizeof(v))
+ return constant_expr_alloc(&netlink_location,
+ dtype, byteorder,
+ len * BITS_PER_BYTE, data);
+
+ list_expr = list_expr_alloc(&netlink_location);
+
+ mpz_init2(value, 32);
+ mpz_import_data(value, data, byteorder, len);
+ v = mpz_get_uint32(value);
+ if (v == 0) {
+ mpz_clear(value);
+ return NULL;
+ }
+
+ for (i = 0; i < 32; i++) {
+ uint32_t bitv = v & (1 << i);
+
+ if (bitv == 0)
+ continue;
+
+ compound_expr_add(list_expr,
+ constant_expr_alloc(&netlink_location,
+ dtype, byteorder,
+ len * BITS_PER_BYTE,
+ &bitv));
+ }
+
+ mpz_clear(value);
+ return list_expr;
+}
+
+static void trace_print_ct_expr(const struct nftnl_trace *nlt, unsigned int attr,
+ enum nft_ct_keys key, struct output_ctx *octx)
+{
+ struct expr *lhs, *rhs, *rel;
+ const void *data;
+ uint32_t len;
+
+ data = nftnl_trace_get_data(nlt, attr, &len);
+ lhs = ct_expr_alloc(&netlink_location, key, -1);
+
+ switch (key) {
+ case NFT_CT_STATUS:
+ rhs = trace_alloc_list(lhs->dtype, lhs->byteorder, len, data);
+ if (!rhs) {
+ expr_free(lhs);
+ return;
+ }
+ rel = binop_expr_alloc(&netlink_location, OP_IMPLICIT, lhs, rhs);
+ break;
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATE:
+ case NFT_CT_ID:
+ /* fallthrough */
+ default:
+ rhs = constant_expr_alloc(&netlink_location,
+ lhs->dtype, lhs->byteorder,
+ len * BITS_PER_BYTE, data);
+ rel = relational_expr_alloc(&netlink_location, OP_IMPLICIT, lhs, rhs);
+ break;
+ }
+
+ expr_print(rel, octx);
+ nft_print(octx, " ");
+ expr_free(rel);
+}
+
+static void trace_print_ct(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ bool ct = nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATE);
+
+ if (!ct)
+ return;
+
+ trace_print_hdr(nlt, octx);
+
+ nft_print(octx, "conntrack: ");
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_DIRECTION))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_DIRECTION,
+ NFT_CT_DIRECTION, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATE))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_STATE,
+ NFT_CT_STATE, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATUS))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_STATUS,
+ NFT_CT_STATUS, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_ID))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_ID,
+ NFT_CT_ID, octx);
+
+ nft_print(octx, "\n");
+}
+
static void trace_print_packet(const struct nftnl_trace *nlt,
struct output_ctx *octx)
{
@@ -248,6 +356,7 @@ static void trace_print_packet(const struct nftnl_trace *nlt,
uint32_t nfproto;
struct stmt *stmt, *next;
+ trace_print_ct(nlt, octx);
trace_print_hdr(nlt, octx);
nft_print(octx, "packet: ");
--
2.49.0
^ permalink raw reply related [flat|nested] 7+ messages in thread