netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [iproute2][PATCH 1/2] tc: add ipset ematch
@ 2012-08-09 19:18 Florian Westphal
  2012-08-09 19:18 ` [iproute2][PATCH 2/2] add ematch man page Florian Westphal
  0 siblings, 1 reply; 2+ messages in thread
From: Florian Westphal @ 2012-08-09 19:18 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

example usage:
tc filter add dev $dev parent $id: basic match not ipset'(foobar src)' ..

also updates iproute2/ematch_map, else tc complains:
Error: Unable to find ematch "ipset" in /etc/iproute2/ematch_map
Please assign a unique ID to the ematch kind the suggested entry is:
        8       ipset

when trying to use this ematch.

(text ematch (5) only exists in kernel, a vlan ematch (6) exists neither in
 kernel nor userspace, but kernel headers define TCF_EM_VLAN == 6).
---
 configure               |   34 ++++++
 etc/iproute2/ematch_map |    1 +
 tc/Makefile             |    4 +
 tc/em_ipset.c           |  265 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 304 insertions(+), 0 deletions(-)
 create mode 100644 tc/em_ipset.c

diff --git a/configure b/configure
index 0f4444f..a1916de 100755
--- a/configure
+++ b/configure
@@ -183,6 +183,37 @@ fi
 rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
 }
 
+check_ipset()
+{
+cat >$TMPDIR/ipsettest.c <<EOF
+#include <linux/netfilter/ipset/ip_set.h>
+#ifndef IP_SET_INVALID
+#define IPSET_DIM_MAX 3
+typedef unsigned short ip_set_id_t;
+#endif
+#include <linux/netfilter/xt_set.h>
+
+struct xt_set_info info;
+#if IPSET_PROTOCOL == 6
+int main(void)
+{
+	return IPSET_MAXNAMELEN;
+}
+#else
+#error unknown ipset version
+#endif
+EOF
+
+if gcc -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1
+then
+	echo "TC_CONFIG_IPSET:=y" >>Config
+	echo "yes"
+else
+	echo "no"
+fi
+rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
+}
+
 echo "# Generated config based on" $INCLUDE >Config
 
 echo "TC schedulers"
@@ -196,6 +227,9 @@ check_xt_old
 check_xt_old_internal_h
 check_ipt
 
+echo -n " IPSET  "
+check_ipset
+
 echo -n "iptables modules directory: "
 check_ipt_lib_dir
 
diff --git a/etc/iproute2/ematch_map b/etc/iproute2/ematch_map
index 7c6a281..69b007d 100644
--- a/etc/iproute2/ematch_map
+++ b/etc/iproute2/ematch_map
@@ -3,3 +3,4 @@
 2	nbyte
 3	u32
 4	meta
+8	ipset
diff --git a/tc/Makefile b/tc/Makefile
index 64d93ad..dfbfac5 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -50,6 +50,10 @@ TCMODULES += q_mqprio.o
 TCMODULES += q_codel.o
 TCMODULES += q_fq_codel.o
 
+ifeq ($(TC_CONFIG_IPSET), y)
+  TCMODULES += em_ipset.o
+endif
+
 TCSO :=
 ifeq ($(TC_CONFIG_ATM),y)
   TCSO += q_atm.so
diff --git a/tc/em_ipset.c b/tc/em_ipset.c
new file mode 100644
index 0000000..a2d0d15
--- /dev/null
+++ b/tc/em_ipset.c
@@ -0,0 +1,265 @@
+/*
+ * em_ipset.c		IPset Ematch
+ *
+ * (C) 2012 Florian Westphal <fw@strlen.de>
+ *
+ * Parts taken from iptables libxt_set.h:
+ * Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ *                         Martin Josefsson <gandalf@wlug.westbo.se>
+ * Copyright (C) 2003-2010 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <errno.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+#include <xtables.h>
+#include <linux/netfilter/ipset/ip_set.h>
+
+#ifndef IPSET_INVALID_ID
+typedef __u16 ip_set_id_t;
+
+enum ip_set_dim {
+	IPSET_DIM_ZERO = 0,
+	IPSET_DIM_ONE,
+	IPSET_DIM_TWO,
+	IPSET_DIM_THREE,
+	IPSET_DIM_MAX = 6,
+};
+#endif /* IPSET_INVALID_ID */
+
+#include <linux/netfilter/xt_set.h>
+#include "m_ematch.h"
+
+#ifndef IPSET_INVALID_ID
+#define IPSET_INVALID_ID	65535
+#define SO_IP_SET		83
+
+union ip_set_name_index {
+	char name[IPSET_MAXNAMELEN];
+	__u16 index;
+};
+
+#define IP_SET_OP_GET_BYNAME	0x00000006	/* Get set index by name */
+struct ip_set_req_get_set {
+	unsigned op;
+	unsigned version;
+	union ip_set_name_index set;
+};
+
+#define IP_SET_OP_GET_BYINDEX	0x00000007	/* Get set name by index */
+/* Uses ip_set_req_get_set */
+
+#define IP_SET_OP_VERSION	0x00000100	/* Ask kernel version */
+struct ip_set_req_version {
+	unsigned op;
+	unsigned version;
+};
+#endif /* IPSET_INVALID_ID */
+
+extern struct ematch_util ipset_ematch_util;
+
+static int get_version(unsigned *version)
+{
+	int res, sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+	struct ip_set_req_version req_version;
+	socklen_t size = sizeof(req_version);
+
+	if (sockfd < 0) {
+		fputs("Can't open socket to ipset.\n", stderr);
+		return -1;
+	}
+
+	req_version.op = IP_SET_OP_VERSION;
+	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, &req_version, &size);
+	if (res != 0) {
+		perror("xt_set getsockopt");
+		return -1;
+	}
+
+	*version = req_version.version;
+	return sockfd;
+}
+
+static int do_getsockopt(struct ip_set_req_get_set *req)
+{
+	int sockfd, res;
+	socklen_t size = sizeof(struct ip_set_req_get_set);
+	sockfd = get_version(&req->version);
+	if (sockfd < 0)
+		return -1;
+	res = getsockopt(sockfd, SOL_IP, SO_IP_SET, req, &size);
+	if (res != 0)
+		perror("Problem when communicating with ipset");
+	close(sockfd);
+	if (res != 0)
+		return -1;
+
+	if (size != sizeof(struct ip_set_req_get_set)) {
+		fprintf(stderr,
+			"Incorrect return size from kernel during ipset lookup, "
+			"(want %zu, got %zu)\n",
+			sizeof(struct ip_set_req_get_set), (size_t)size);
+		return -1;
+	}
+
+	return res;
+}
+
+static int
+get_set_byid(char *setname, unsigned int idx)
+{
+	struct ip_set_req_get_set req;
+	int res;
+
+	req.op = IP_SET_OP_GET_BYINDEX;
+	req.set.index = idx;
+	res = do_getsockopt(&req);
+	if (res != 0)
+		return -1;
+	if (req.set.name[0] == '\0') {
+		fprintf(stderr,
+			"Set with index %i in kernel doesn't exist.\n", idx);
+		return -1;
+	}
+
+	strncpy(setname, req.set.name, IPSET_MAXNAMELEN);
+	return 0;
+}
+
+static int
+get_set_byname(const char *setname, struct xt_set_info *info)
+{
+	struct ip_set_req_get_set req;
+	int res;
+
+	req.op = IP_SET_OP_GET_BYNAME;
+	strncpy(req.set.name, setname, IPSET_MAXNAMELEN);
+	req.set.name[IPSET_MAXNAMELEN - 1] = '\0';
+	res = do_getsockopt(&req);
+	if (res != 0)
+		return -1;
+	if (req.set.index == IPSET_INVALID_ID)
+		return -1;
+	info->index = req.set.index;
+	return 0;
+}
+
+static int
+parse_dirs(const char *opt_arg, struct xt_set_info *info)
+{
+        char *saved = strdup(opt_arg);
+        char *ptr, *tmp = saved;
+
+	if (!tmp) {
+		perror("strdup");
+		return -1;
+	}
+
+        while (info->dim < IPSET_DIM_MAX && tmp != NULL) {
+                info->dim++;
+                ptr = strsep(&tmp, ",");
+                if (strncmp(ptr, "src", 3) == 0)
+                        info->flags |= (1 << info->dim);
+                else if (strncmp(ptr, "dst", 3) != 0) {
+                        fputs("You must specify (the comma separated list of) 'src' or 'dst'\n", stderr);
+			free(saved);
+			return -1;
+		}
+        }
+
+        if (tmp)
+                fprintf(stderr, "Can't be more src/dst options than %u", IPSET_DIM_MAX);
+        free(saved);
+	return tmp ? -1 : 0;
+}
+
+static void ipset_print_usage(FILE *fd)
+{
+	fprintf(fd,
+	    "Usage: ipset(SETNAME FLAGS)\n" \
+	    "where: SETNAME:= string\n" \
+	    "       FLAGS  := { FLAG[,FLAGS] }\n" \
+	    "       FLAG   := { src | dst }\n" \
+	    "\n" \
+	    "Example: 'ipset(bulk src,dst)'\n");
+}
+
+static int ipset_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+			    struct bstr *args)
+{
+	struct xt_set_info set_info;
+	int ret;
+
+	memset(&set_info, 0, sizeof(set_info));
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+	em_parse_error(EINVAL, args, CARG, &ipset_ematch_util, FMT ,##ARGS)
+
+	if (args == NULL)
+		return PARSE_ERR(args, "ipset: missing set name");
+
+	if (args->len >= IPSET_MAXNAMELEN)
+		return PARSE_ERR(args, "ipset: set name too long (max %u)", IPSET_MAXNAMELEN - 1);
+	ret = get_set_byname(args->data, &set_info);
+	if (ret < 0)
+		return PARSE_ERR(args, "ipset: unknown set name '%s'", args->data);
+
+	if (args->next == NULL)
+		return PARSE_ERR(args, "ipset: missing set flags");
+
+	args = bstr_next(args);
+	if (parse_dirs(args->data, &set_info))
+		return PARSE_ERR(args, "ipset: error parsing set flags");
+
+	if (args->next) {
+		args = bstr_next(args);
+		return PARSE_ERR(args, "ipset: unknown parameter");
+	}
+
+	addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+	addraw_l(n, MAX_MSG, &set_info, sizeof(set_info));
+
+#undef PARSE_ERR
+	return 0;
+}
+
+static int ipset_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+			    int data_len)
+{
+	int i;
+        char setname[IPSET_MAXNAMELEN];
+	const struct xt_set_info *set_info = data;
+
+	if (data_len != sizeof(*set_info)) {
+		fprintf(stderr, "xt_set_info struct size mismatch\n");
+		return -1;
+	}
+
+        if (get_set_byid(setname, set_info->index))
+		return -1;
+	fputs(setname, fd);
+	for (i = 1; i <= set_info->dim; i++) {
+		fprintf(fd, "%s%s", i == 1 ? " " : ",", set_info->flags & (1 << i) ? "src" : "dst");
+	}
+
+	return 0;
+}
+
+struct ematch_util ipset_ematch_util = {
+	.kind = "ipset",
+	.kind_num = TCF_EM_IPSET,
+	.parse_eopt = ipset_parse_eopt,
+	.print_eopt = ipset_print_eopt,
+	.print_usage = ipset_print_usage
+};
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [iproute2][PATCH 2/2] add ematch man page
  2012-08-09 19:18 [iproute2][PATCH 1/2] tc: add ipset ematch Florian Westphal
@ 2012-08-09 19:18 ` Florian Westphal
  0 siblings, 0 replies; 2+ messages in thread
From: Florian Westphal @ 2012-08-09 19:18 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

---
 could need more work, but a terse one is better than none.

 man/man8/tc-ematch.8 |  152 ++++++++++++++++++++++++++++++++++++++++++++++++++
 man/man8/tc.8        |    1 +
 2 files changed, 153 insertions(+), 0 deletions(-)
 create mode 100644 man/man8/tc-ematch.8

diff --git a/man/man8/tc-ematch.8 b/man/man8/tc-ematch.8
new file mode 100644
index 0000000..53ae161
--- /dev/null
+++ b/man/man8/tc-ematch.8
@@ -0,0 +1,152 @@
+.TH filter ematch "6 August 2012" iproute2 Linux
+.
+.SH NAME
+ematch \- extended matches for use with "basic" or "flow" filters
+.
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B "tc filter add .. basic match"
+.RI EXPR
+.B .. flowid ..
+.sp
+
+.ti -8
+.IR EXPR " := " TERM " [ { "
+.B and | or
+}
+.IR EXPR
+]
+
+.ti -8
+.IR TERM " := [ " not " ] { " MATCH " | '(' " EXPR " ')' } "
+
+.ti -8
+.IR MATCH " := " module " '(' " ARGS " ')' "
+
+.ti -8
+.IR ARGS " := " ARG1 " " ARG2 " ..
+
+.SH MATCHES
+
+.SS cmp
+Simple comparison ematch: arithmetic compare of packet data to a given value.
+.ti
+.IR cmp "( " ALIGN " at " OFFSET " [ " ATTRS " ]  { " eq " | " lt " | " gt "  } " VALUE " )
+
+.ti
+.IR ALIGN " := { " u8 " | " u16 " | " u32 " } "
+
+.ti
+.IR ATTRS " := [  layer " LAYER " ] [ mask " MASK " ] [ " trans " ] "
+
+.ti
+.IR ALIGN " := { " u8 " | " u16 " | " u32 } "
+
+.ti
+.IR LAYER " := { " link " | " network " | " transport " | " 0..%d " }
+
+.SS meta
+Metadata ematch
+.ti
+.IR meta "( " OBJECT " { " eq " | " lt "  |" gt " } " OBJECT " )
+
+.ti
+.IR OBJECT " := { " META_ID " |  " VALUE " }
+
+.ti
+.IR META_ID " := id " [ shift " SHIFT " ] [ mask " MASK " ]
+
+.TP
+meta attributes:
+
+\fBrandom\fP 32 bit random value
+
+\fBloadavg_1\fP Load average in last 5 minutes
+
+\fBnf_mark\fP Netfilter mark
+
+\fBvlan\fP Vlan tag
+
+\fBsk_rcvbuf\fP Receive buffer size
+
+\fBsk_snd_queue\fP Send queue length
+
+.PP
+A full list of meta attributes can be obtained via
+
+# tc filter add dev eth1 basic match 'meta(list)'
+
+.SS nbyte
+match packet data byte sequence
+.ti
+.IR nbyte "( " NEEDLE  " at " OFFSET " [ layer " LAYER " ] )
+
+.ti
+.IR NEEDLE  " := { " string " | " c-escape-sequence "  } "
+
+.ti
+.IR OFFSET  " := " int
+
+.ti
+.IR LAYER " := { " link " | " network " | " transport " | " 0..%d " }
+
+.SS u32
+u32 ematch
+.ti
+.IR u32 "( " ALIGN VALUE MASK " at " [ nexthdr+ ] " OFFSET " )
+
+.ti
+.IR ALIGN " := " { " u8 " | " u16 " | " u32 " }
+
+.SS ipset
+test packet agains ipset membership
+.ti
+.IR ipset "( " SETNAME FLAGS )
+
+.ti
+.IR SETNAME " := " string
+
+.ti
+.IR FLAGS " := " { " FLAG " [, " FLAGS "] }
+
+The flag options are the same as those used by the iptables "set" match.
+
+When using the ipset ematch with the "ip_set_hash:net,iface" set type,
+the interface can be queried using "src,dst (source ip address, outgoing interface) or
+"src,src" (source ip address, incoming interface) syntax.
+
+.SH CAVEATS
+
+The ematch syntax uses '(' and ')' to group expressions. All braces need to be
+escaped properly to prevent shell commandline from interpreting these directly.
+
+When using the ipset ematch with the "ifb" device, the outgoing device will be the
+ifb device itself, e.g. "ifb0".
+The original interface (i.e. the device the packet arrived on) is treated as the incoming interface.
+
+.SH EXAMPLE & USAGE
+
+# tc filter add .. basic match ...
+
+# 'cmp(u16 at 3 layer 2 mask 0xff00 gt 20)'
+
+# 'meta(nfmark gt 24)' and 'meta(tcindex mask 0xf0 eq 0xf0)'
+
+# 'nbyte("ababa" at 12 layer 1)'
+
+# 'u32(u16 0x1122 0xffff at nexthdr+4)'
+
+Check if packet source ip address is member of set named \fBbulk\fP:
+
+# 'ipset(bulk src)'
+
+Check if packet source ip and the interface the packet arrived on is member of "hash:net,iface" set named \fBinteractive\fP:
+
+# 'ipset(interactive src,src)'
+
+.SH "AUTHOR"
+
+The extended match infrastructure was added by Thomas Graf.
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index 95571a3..a285c49 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -374,6 +374,7 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2.
 .BR tc-choke (8),
 .BR tc-codel (8),
 .BR tc-drr (8),
+.BR tc-ematch (8),
 .BR tc-fq_codel (8),
 .BR tc-hfsc (7),
 .BR tc-hfsc (8),
-- 
1.7.3.4

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-08-09 19:21 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-09 19:18 [iproute2][PATCH 1/2] tc: add ipset ematch Florian Westphal
2012-08-09 19:18 ` [iproute2][PATCH 2/2] add ematch man page Florian Westphal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).