[PATCH libnftnl 1/2] src: add batch abstraction

netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH libnftnl 1/2] src: add batch abstraction
@ 2015-04-14 18:59 Pablo Neira Ayuso
  2015-04-14 19:00 ` [PATCH nft] mnl: use new libnftnl batch API Pablo Neira Ayuso
  0 siblings, 1 reply; 2+ messages in thread
From: Pablo Neira Ayuso @ 2015-04-14 18:59 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

This patch adds a new batch class to libnftnl, it basically generalizes what we
already have.

A batch is composed of one or more page objects. Every page may contain one or
more netlink messages.

 batch
   *      .------.   .------.         .------.
   |      |      |   |      |         |      |
   `----> | page |-->| page |-->...-->| page |
          |      |   |      |         |      |
          `------'   `------'         `------'

You can create a batch via:

	batch = nft_batch_alloc(...);

This batch initially comes with one initial page.

You can fetch a pointer to the next spare area in the current page to add a new
netlink message to the batch.

	void *nft_batch_buffer(struct nft_batch *batch);

Once you have added a netlink message, you have to call:

	nft_batch_update(batch);

this internally updates the pointer to the next spare data area in the page.

Every page has a limit threshold after which you start using the overrun area.

  page  .------.
        |      |
        |      |
        .      . page area
        |      |
        |      |
        |------|<--- limit
        |      |
        |      | overrun area
        |      |
        '______'<--- real page size

If we write over the limit, then the next call to nft_batch_update() results in
a new empty page added to the batch. With careful page size and limit
selection, we ensure that a netlink message always fit into the page, so we
avoid the overhead of canceling netlink messages that doesn't fit in.

Once your batch is complete, if you want to send it out to kernel-space, you
can convert them to iovec via:

	nft_batch_iovec(batch, iov, iov_len);

Then, after having sent the batch, you can release it via:

	nft_batch_free(batch);

This class relies on the libmnl batching infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/libnftnl/Makefile.am |    3 +-
 include/libnftnl/batch.h     |   18 +++++
 src/Makefile.am              |    1 +
 src/batch.c                  |  162 ++++++++++++++++++++++++++++++++++++++++++
 src/libnftnl.map             |    8 +++
 5 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 include/libnftnl/batch.h
 create mode 100644 src/batch.c

diff --git a/include/libnftnl/Makefile.am b/include/libnftnl/Makefile.am
index 010c01f..a20aaee 100644
--- a/include/libnftnl/Makefile.am
+++ b/include/libnftnl/Makefile.am
@@ -1,4 +1,5 @@
-pkginclude_HEADERS = table.h		\
+pkginclude_HEADERS = batch.h		\
+		     table.h		\
 		     chain.h		\
 		     rule.h		\
 		     expr.h		\
diff --git a/include/libnftnl/batch.h b/include/libnftnl/batch.h
new file mode 100644
index 0000000..40416e6
--- /dev/null
+++ b/include/libnftnl/batch.h
@@ -0,0 +1,18 @@
+#ifndef _LIBNFTNL_BATCH_H_
+#define _LIBNFTNL_BATCH_H_
+
+#include <stdint.h>
+
+struct nft_batch;
+
+struct nft_batch *nft_batch_alloc(uint32_t pg_size, uint32_t pg_overrun_size);
+int nft_batch_update(struct nft_batch *batch);
+void nft_batch_free(struct nft_batch *batch);
+
+void *nft_batch_buffer(struct nft_batch *batch);
+uint32_t nft_batch_buffer_len(struct nft_batch *batch);
+
+int nft_batch_iovec_len(struct nft_batch *batch);
+void nft_batch_iovec(struct nft_batch *batch, struct iovec *iov, uint32_t iovlen);
+
+#endif
diff --git a/src/Makefile.am b/src/Makefile.am
index 3fca5e4..edabb52 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -6,6 +6,7 @@ libnftnl_la_LDFLAGS = -Wl,--version-script=$(srcdir)/libnftnl.map	\
 		      -version-info $(LIBVERSION)
 
 libnftnl_la_SOURCES = utils.c		\
+		      batch.c		\
 		      buffer.c		\
 		      common.c		\
 		      gen.c		\
diff --git a/src/batch.c b/src/batch.c
new file mode 100644
index 0000000..ec9f728
--- /dev/null
+++ b/src/batch.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2013-2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "internal.h"
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <libnftnl/batch.h>
+
+struct nft_batch {
+	uint32_t		num_pages;
+	struct nft_batch_page	*current_page;
+	uint32_t		page_size;
+	uint32_t		page_overrun_size;
+	struct list_head	page_list;
+};
+
+struct nft_batch_page {
+	struct list_head	head;
+	struct mnl_nlmsg_batch	*batch;
+};
+
+static struct nft_batch_page *nft_batch_page_alloc(struct nft_batch *batch)
+{
+	struct nft_batch_page *page;
+	char *buf;
+
+	page = malloc(sizeof(struct nft_batch_page));
+	if (page == NULL)
+		return NULL;
+
+	buf = malloc(batch->page_size + batch->page_overrun_size);
+	if (buf == NULL)
+		goto err1;
+
+	page->batch = mnl_nlmsg_batch_start(buf, batch->page_size);
+	if (page->batch == NULL)
+		goto err2;
+
+	return page;
+err2:
+	free(buf);
+err1:
+	free(page);
+	return NULL;
+}
+
+static void nft_batch_add_page(struct nft_batch_page *page,
+			       struct nft_batch *batch)
+{
+	batch->current_page = page;
+	batch->num_pages++;
+	list_add_tail(&page->head, &batch->page_list);
+}
+
+struct nft_batch *nft_batch_alloc(uint32_t pg_size, uint32_t pg_overrun_size)
+{
+	struct nft_batch *batch;
+	struct nft_batch_page *page;
+
+	batch = calloc(1, sizeof(struct nft_batch));
+	if (batch == NULL)
+		return NULL;
+
+	batch->page_size = pg_size;
+	batch->page_overrun_size = pg_overrun_size;
+	INIT_LIST_HEAD(&batch->page_list);
+
+	page = nft_batch_page_alloc(batch);
+	if (page == NULL)
+		goto err1;
+
+	nft_batch_add_page(page, batch);
+	return batch;
+err1:
+	free(batch);
+	return NULL;
+}
+EXPORT_SYMBOL(nft_batch_alloc);
+
+void nft_batch_free(struct nft_batch *batch)
+{
+	struct nft_batch_page *page, *next;
+
+	list_for_each_entry_safe(page, next, &batch->page_list, head) {
+		free(mnl_nlmsg_batch_head(page->batch));
+		mnl_nlmsg_batch_stop(page->batch);
+		free(page);
+	}
+
+	free(batch);
+}
+EXPORT_SYMBOL(nft_batch_free);
+
+int nft_batch_update(struct nft_batch *batch)
+{
+	struct nft_batch_page *page;
+	struct nlmsghdr *last_nlh;
+
+	if (mnl_nlmsg_batch_next(batch->current_page->batch))
+		return 0;
+
+	last_nlh = nft_batch_buffer(batch);
+
+	page = nft_batch_page_alloc(batch);
+	if (page == NULL)
+		goto err1;
+
+	nft_batch_add_page(page, batch);
+
+	memcpy(nft_batch_buffer(batch), last_nlh, last_nlh->nlmsg_len);
+	mnl_nlmsg_batch_next(batch->current_page->batch);
+
+	return 0;
+err1:
+	return -1;
+}
+EXPORT_SYMBOL(nft_batch_update);
+
+void *nft_batch_buffer(struct nft_batch *batch)
+{
+	return mnl_nlmsg_batch_current(batch->current_page->batch);
+}
+EXPORT_SYMBOL(nft_batch_buffer);
+
+uint32_t nft_batch_buffer_len(struct nft_batch *batch)
+{
+	return mnl_nlmsg_batch_size(batch->current_page->batch);
+}
+EXPORT_SYMBOL(nft_batch_buffer_len);
+
+int nft_batch_iovec_len(struct nft_batch *batch)
+{
+	int num_pages = batch->num_pages;
+
+	/* Skip last page if it's empty */
+	if (mnl_nlmsg_batch_is_empty(batch->current_page->batch))
+		num_pages--;
+
+	return num_pages;
+}
+EXPORT_SYMBOL(nft_batch_iovec_len);
+
+void nft_batch_iovec(struct nft_batch *batch, struct iovec *iov, uint32_t iovlen)
+{
+	struct nft_batch_page *page;
+	int i = 0;
+
+	list_for_each_entry(page, &batch->page_list, head) {
+		if (i >= iovlen)
+			break;
+
+		iov[i].iov_base = mnl_nlmsg_batch_head(page->batch);
+		iov[i].iov_len = mnl_nlmsg_batch_size(page->batch);
+		i++;
+	}
+}
+EXPORT_SYMBOL(nft_batch_iovec);
diff --git a/src/libnftnl.map b/src/libnftnl.map
index 7c74fbc..78e1799 100644
--- a/src/libnftnl.map
+++ b/src/libnftnl.map
@@ -234,4 +234,12 @@ LIBNFTNL_1.2.0 {
   nft_ruleset_ctx_get_u32;
   nft_ruleset_parse_file_cb;
   nft_ruleset_parse_buffer_cb;
+
+  nft_batch_alloc;
+  nft_batch_update;
+  nft_batch_free;
+  nft_batch_buffer;
+  nft_batch_buffer_len;
+  nft_batch_iovec_len;
+  nft_batch_iovec;
 } LIBNFTNL_1.2;
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH nft] mnl: use new libnftnl batch API
  2015-04-14 18:59 [PATCH libnftnl 1/2] src: add batch abstraction Pablo Neira Ayuso
@ 2015-04-14 19:00 ` Pablo Neira Ayuso
  0 siblings, 0 replies; 2+ messages in thread
From: Pablo Neira Ayuso @ 2015-04-14 19:00 UTC (permalink / raw)
  To: netfilter-devel; +Cc: kaber

Each batch page has a size of 320 Kbytes, and the limit has been set to 256
KBytes, so the overrun area is 64 KBytes long to accomodate the largest netlink
message (sets).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 src/mnl.c |  124 ++++++++++++++++---------------------------------------------
 1 file changed, 33 insertions(+), 91 deletions(-)

diff --git a/src/mnl.c b/src/mnl.c
index 89c2bb5..76a9714 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -16,6 +16,7 @@
 #include <libnftnl/rule.h>
 #include <libnftnl/expr.h>
 #include <libnftnl/set.h>
+#include <libnftnl/batch.h>
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
@@ -126,77 +127,26 @@ static int check_genid(const struct nlmsghdr *nlh)
  */
 #define BATCH_PAGE_SIZE getpagesize() * 32
 
-static struct mnl_nlmsg_batch *mnl_batch_alloc(void)
-{
-	static char *buf;
-
-	/* libmnl needs higher buffer to handle batch overflows. */
-	buf = xmalloc(BATCH_PAGE_SIZE + NFT_NLMSG_MAXSIZE);
-	return mnl_nlmsg_batch_start(buf, BATCH_PAGE_SIZE);
-}
-
-static LIST_HEAD(batch_page_list);
-static int batch_num_pages;
-
-struct batch_page {
-	struct list_head	head;
-	struct mnl_nlmsg_batch *batch;
-};
+static struct nft_batch *batch;
 
 void mnl_batch_init(void)
 {
-	struct batch_page *batch_page;
-
-	batch_page = xmalloc(sizeof(struct batch_page));
-	batch_page->batch = mnl_batch_alloc();
-	batch_num_pages++;
-	list_add_tail(&batch_page->head, &batch_page_list);
-}
-
-static struct batch_page *nft_batch_page_current(void)
-{
-	return list_entry(batch_page_list.prev, struct batch_page, head);
-}
-
-static void *nft_nlmsg_batch_current(void)
-{
-	return mnl_nlmsg_batch_current(nft_batch_page_current()->batch);
-}
-
-static void nft_batch_page_add(void)
-{
-	struct nlmsghdr *last_nlh;
-
-	/* Get the last message not fitting in the batch */
-	last_nlh = nft_nlmsg_batch_current();
-	/* Add new batch page */
-	mnl_batch_init();
-	/* Copy the last message not fitting to the new batch page */
-	memcpy(nft_nlmsg_batch_current(), last_nlh, last_nlh->nlmsg_len);
-	/* No overflow may happen as this is a new empty batch page */
-	mnl_nlmsg_batch_next(nft_batch_page_current()->batch);
-}
-
-static void nft_batch_page_release(struct batch_page *batch_page)
-{
-	list_del(&batch_page->head);
-	xfree(mnl_nlmsg_batch_head(batch_page->batch));
-	mnl_nlmsg_batch_stop(batch_page->batch);
-	xfree(batch_page);
-	batch_num_pages--;
+	batch = nft_batch_alloc(BATCH_PAGE_SIZE, NFT_NLMSG_MAXSIZE);
+	if (batch == NULL)
+		memory_allocation_error();
 }
 
 static void nft_batch_continue(void)
 {
-	if (!mnl_nlmsg_batch_next(nft_batch_page_current()->batch))
-		nft_batch_page_add();
+	if (nft_batch_update(batch) < 0)
+		memory_allocation_error();
 }
 
 uint32_t mnl_batch_begin(void)
 {
 	uint32_t seq = mnl_seqnum_alloc();
 
-	nft_batch_begin(nft_nlmsg_batch_current(), seq);
+	nft_batch_begin(nft_batch_buffer(batch), seq);
 	nft_batch_continue();
 
 	return seq;
@@ -204,7 +154,7 @@ uint32_t mnl_batch_begin(void)
 
 void mnl_batch_end(void)
 {
-	nft_batch_end(nft_nlmsg_batch_current(), mnl_seqnum_alloc());
+	nft_batch_end(nft_batch_buffer(batch), mnl_seqnum_alloc());
 	nft_batch_continue();
 }
 
@@ -213,16 +163,13 @@ bool mnl_batch_ready(void)
 	/* Check if the batch only contains the initial and trailing batch
 	 * messages. In that case, the batch is empty.
 	 */
-	return mnl_nlmsg_batch_size(nft_batch_page_current()->batch) !=
-	       (NLMSG_HDRLEN+sizeof(struct nfgenmsg)) * 2;
+	return nft_batch_buffer_len(batch) !=
+	       (NLMSG_HDRLEN + sizeof(struct nfgenmsg)) * 2;
 }
 
 void mnl_batch_reset(void)
 {
-	struct batch_page *batch_page, *next;
-
-	list_for_each_entry_safe(batch_page, next, &batch_page_list, head)
-		nft_batch_page_release(batch_page);
+	nft_batch_free(batch);
 }
 
 static void mnl_err_list_node_add(struct list_head *err_list, int error,
@@ -247,10 +194,10 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl)
 {
 	int newbuffsiz;
 
-	if (batch_num_pages * BATCH_PAGE_SIZE <= nlbuffsiz)
+	if (nft_batch_iovec_len(batch) * BATCH_PAGE_SIZE <= nlbuffsiz)
 		return;
 
-	newbuffsiz = batch_num_pages * BATCH_PAGE_SIZE;
+	newbuffsiz = nft_batch_iovec_len(batch) * BATCH_PAGE_SIZE;
 
 	/* Rise sender buffer length to avoid hitting -EMSGSIZE */
 	if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUFFORCE,
@@ -265,27 +212,26 @@ static ssize_t mnl_nft_socket_sendmsg(const struct mnl_socket *nl)
 	static const struct sockaddr_nl snl = {
 		.nl_family = AF_NETLINK
 	};
-	struct iovec iov[batch_num_pages];
+	uint32_t iov_len = nft_batch_iovec_len(batch);
+	struct iovec iov[iov_len];
 	struct msghdr msg = {
 		.msg_name	= (struct sockaddr *) &snl,
 		.msg_namelen	= sizeof(snl),
 		.msg_iov	= iov,
-		.msg_iovlen	= batch_num_pages,
+		.msg_iovlen	= iov_len,
 	};
-	struct batch_page *batch_page;
-	int i = 0;
+#ifdef DEBUG
+	uint32_t i;
+#endif
 
 	mnl_set_sndbuffer(nl);
+	nft_batch_iovec(batch, iov, iov_len);
 
-	list_for_each_entry(batch_page, &batch_page_list, head) {
-		iov[i].iov_base = mnl_nlmsg_batch_head(batch_page->batch);
-		iov[i].iov_len = mnl_nlmsg_batch_size(batch_page->batch);
-		i++;
 #ifdef DEBUG
+	for (i = 0; i < iov_len; i++) {
 		if (debug_level & DEBUG_MNL) {
 			mnl_nlmsg_fprintf(stdout,
-					  mnl_nlmsg_batch_head(batch_page->batch),
-					  mnl_nlmsg_batch_size(batch_page->batch),
+					  iov[i].iov_base, iov[i].iov_len,
 					  sizeof(struct nfgenmsg));
 		}
 #endif
@@ -304,10 +250,6 @@ int mnl_batch_talk(struct mnl_socket *nl, struct list_head *err_list)
 		.tv_usec	= 0
 	};
 
-	/* Remove last page from the batch if it's empty */
-	if (mnl_nlmsg_batch_is_empty(nft_batch_page_current()->batch))
-		nft_batch_page_release(nft_batch_page_current());
-
 	ret = mnl_nft_socket_sendmsg(nl);
 	if (ret == -1)
 		return -1;
@@ -347,7 +289,7 @@ int mnl_nft_rule_batch_add(struct nft_rule *nlr, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_rule_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_rule_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_NEWRULE,
 			nft_rule_attr_get_u32(nlr, NFT_RULE_ATTR_FAMILY),
 			NLM_F_CREATE | flags, seqnum);
@@ -363,7 +305,7 @@ int mnl_nft_rule_batch_del(struct nft_rule *nlr, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_rule_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_rule_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_DELRULE,
 			nft_rule_attr_get_u32(nlr, NFT_RULE_ATTR_FAMILY),
 			0, seqnum);
@@ -476,7 +418,7 @@ int mnl_nft_chain_batch_add(struct nft_chain *nlc, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_chain_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_chain_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_NEWCHAIN,
 			nft_chain_attr_get_u32(nlc, NFT_CHAIN_ATTR_FAMILY),
 			NLM_F_CREATE | flags, seqnum);
@@ -505,7 +447,7 @@ int mnl_nft_chain_batch_del(struct nft_chain *nlc, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_chain_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_chain_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_DELCHAIN,
 			nft_chain_attr_get_u32(nlc, NFT_CHAIN_ATTR_FAMILY),
 			NLM_F_ACK, seqnum);
@@ -604,7 +546,7 @@ int mnl_nft_table_batch_add(struct nft_table *nlt, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_table_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_table_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_NEWTABLE,
 			nft_table_attr_get_u32(nlt, NFT_TABLE_ATTR_FAMILY),
 			flags, seqnum);
@@ -633,7 +575,7 @@ int mnl_nft_table_batch_del(struct nft_table *nlt, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_table_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_table_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_DELTABLE,
 			nft_table_attr_get_u32(nlt, NFT_TABLE_ATTR_FAMILY),
 			NLM_F_ACK, seqnum);
@@ -754,7 +696,7 @@ int mnl_nft_set_batch_add(struct nft_set *nls, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_set_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_set_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_NEWSET,
 			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
 			NLM_F_CREATE | flags, seqnum);
@@ -769,7 +711,7 @@ int mnl_nft_set_batch_del(struct nft_set *nls, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_set_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_set_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_DELSET,
 			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
 			flags, seqnum);
@@ -920,7 +862,7 @@ int mnl_nft_setelem_batch_add(struct nft_set *nls, unsigned int flags,
 		memory_allocation_error();
 
 	do {
-		nlh = nft_set_elem_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+		nlh = nft_set_elem_nlmsg_build_hdr(nft_batch_buffer(batch),
 				NFT_MSG_NEWSETELEM,
 				nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
 				NLM_F_CREATE | flags, seqnum);
@@ -938,7 +880,7 @@ int mnl_nft_setelem_batch_del(struct nft_set *nls, unsigned int flags,
 {
 	struct nlmsghdr *nlh;
 
-	nlh = nft_set_elem_nlmsg_build_hdr(nft_nlmsg_batch_current(),
+	nlh = nft_set_elem_nlmsg_build_hdr(nft_batch_buffer(batch),
 			NFT_MSG_DELSETELEM,
 			nft_set_attr_get_u32(nls, NFT_SET_ATTR_FAMILY),
 			0, seqnum);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-04-14 18:55 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-14 18:59 [PATCH libnftnl 1/2] src: add batch abstraction Pablo Neira Ayuso
2015-04-14 19:00 ` [PATCH nft] mnl: use new libnftnl batch API Pablo Neira Ayuso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).