* [PATCH 1 of 3] Remus: add python control extensions
2009-11-13 1:10 [PATCH 0 of 3] Remus: control tool Brendan Cully
@ 2009-11-13 1:10 ` Brendan Cully
2009-11-13 1:10 ` [PATCH 2 of 3] Remus: add control script to activate remus on a VM Brendan Cully
` (2 subsequent siblings)
3 siblings, 0 replies; 16+ messages in thread
From: Brendan Cully @ 2009-11-13 1:10 UTC (permalink / raw)
To: xen-devel; +Cc: andy
# HG changeset patch
# User Brendan Cully <brendan@cs.ubc.ca>
# Date 1258073720 28800
# Node ID 213fb814acf431d2a382e8f9c09b4cea106c0958
# Parent accded2f185f4178f875b170a5c01544648a68d2
Remus: add python control extensions
Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
diff --git a/tools/python/setup.py b/tools/python/setup.py
--- a/tools/python/setup.py
+++ b/tools/python/setup.py
@@ -67,10 +67,28 @@
libraries = libraries,
sources = [ "ptsname/ptsname.c" ])
+checkpoint = Extension("checkpoint",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs,
+ library_dirs = library_dirs,
+ libraries = libraries + [ "rt" ],
+ sources = [ "xen/lowlevel/checkpoint/checkpoint.c",
+ "xen/lowlevel/checkpoint/libcheckpoint.c"])
+
+netlink = Extension("netlink",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs,
+ library_dirs = library_dirs,
+ libraries = libraries,
+ sources = [ "xen/lowlevel/netlink/netlink.c",
+ "xen/lowlevel/netlink/libnetlink.c"])
+
modules = [ xc, xs, ptsname, acm, flask ]
-if os.uname()[0] == 'SunOS':
- modules.append(scf)
- modules.append(process)
+plat = os.uname()[0]
+if plat == 'SunOS':
+ modules.extend([ scf, process ])
+if plat == 'Linux':
+ modules.extend([ checkpoint, netlink ])
setup(name = 'xen',
version = '3.0',
@@ -89,6 +107,7 @@
'xen.web',
'xen.sv',
'xen.xsview',
+ 'xen.remus',
'xen.xend.tests',
'xen.xend.server.tests',
diff --git a/tools/python/xen/lowlevel/checkpoint/checkpoint.c b/tools/python/xen/lowlevel/checkpoint/checkpoint.c
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/checkpoint/checkpoint.c
@@ -0,0 +1,363 @@
+/* python bridge to checkpointing API */
+
+#include <Python.h>
+
+#include <xs.h>
+#include <xenctrl.h>
+
+#include "checkpoint.h"
+
+#define PKG "xen.lowlevel.checkpoint"
+
+static PyObject* CheckpointError;
+
+typedef struct {
+ PyObject_HEAD
+ checkpoint_state cps;
+
+ /* milliseconds between checkpoints */
+ unsigned int interval;
+ int armed;
+
+ PyObject* suspend_cb;
+ PyObject* postcopy_cb;
+ PyObject* checkpoint_cb;
+
+ PyThreadState* threadstate;
+} CheckpointObject;
+
+static int suspend_trampoline(void* data);
+static int postcopy_trampoline(void* data);
+static int checkpoint_trampoline(void* data);
+
+static PyObject* Checkpoint_new(PyTypeObject* type, PyObject* args,
+ PyObject* kwargs)
+{
+ CheckpointObject* self = (CheckpointObject*)type->tp_alloc(type, 0);
+
+ if (!self)
+ return NULL;
+
+ checkpoint_init(&self->cps);
+ self->suspend_cb = NULL;
+ self->armed = 0;
+
+ return (PyObject*)self;
+}
+
+static int Checkpoint_init(PyObject* obj, PyObject* args, PyObject* kwargs)
+{
+ return 0;
+}
+
+static void Checkpoint_dealloc(CheckpointObject* self)
+{
+ checkpoint_close(&self->cps);
+
+ self->ob_type->tp_free((PyObject*)self);
+}
+
+static PyObject* pycheckpoint_open(PyObject* obj, PyObject* args)
+{
+ CheckpointObject* self = (CheckpointObject*)obj;
+ checkpoint_state* cps = &self->cps;
+ unsigned int domid;
+
+ if (!PyArg_ParseTuple(args, "I", &domid))
+ return NULL;
+
+ if (checkpoint_open(cps, domid) < 0) {
+ PyErr_SetString(CheckpointError, checkpoint_error(cps));
+
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* pycheckpoint_close(PyObject* obj, PyObject* args)
+{
+ CheckpointObject* self = (CheckpointObject*)obj;
+
+ checkpoint_close(&self->cps);
+
+ Py_XDECREF(self->suspend_cb);
+ self->suspend_cb = NULL;
+ Py_XDECREF(self->postcopy_cb);
+ self->postcopy_cb = NULL;
+ Py_XDECREF(self->checkpoint_cb);
+ self->checkpoint_cb = NULL;
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* pycheckpoint_start(PyObject* obj, PyObject* args) {
+ CheckpointObject* self = (CheckpointObject*)obj;
+
+ PyObject* iofile;
+ PyObject* suspend_cb = NULL;
+ PyObject* postcopy_cb = NULL;
+ PyObject* checkpoint_cb = NULL;
+ unsigned int interval = 0;
+
+ int fd;
+ struct save_callbacks callbacks;
+ int rc;
+
+ if (!PyArg_ParseTuple(args, "O|OOOI", &iofile, &suspend_cb, &postcopy_cb,
+ &checkpoint_cb, &interval))
+ return NULL;
+
+ self->interval = interval;
+
+ Py_INCREF(iofile);
+ Py_XINCREF(suspend_cb);
+ Py_XINCREF(postcopy_cb);
+ Py_XINCREF(checkpoint_cb);
+
+ fd = PyObject_AsFileDescriptor(iofile);
+ Py_DECREF(iofile);
+ if (fd < 0) {
+ PyErr_SetString(PyExc_TypeError, "invalid file handle");
+ return NULL;
+ }
+
+ if (suspend_cb && suspend_cb != Py_None) {
+ if (!PyCallable_Check(suspend_cb)) {
+ PyErr_SetString(PyExc_TypeError, "suspend callback not callable");
+ goto err;
+ }
+ self->suspend_cb = suspend_cb;
+ } else
+ self->suspend_cb = NULL;
+
+ if (postcopy_cb && postcopy_cb != Py_None) {
+ if (!PyCallable_Check(postcopy_cb)) {
+ PyErr_SetString(PyExc_TypeError, "postcopy callback not callable");
+ return NULL;
+ }
+ self->postcopy_cb = postcopy_cb;
+ } else
+ self->postcopy_cb = NULL;
+
+ if (checkpoint_cb && checkpoint_cb != Py_None) {
+ if (!PyCallable_Check(checkpoint_cb)) {
+ PyErr_SetString(PyExc_TypeError, "checkpoint callback not callable");
+ return NULL;
+ }
+ self->checkpoint_cb = checkpoint_cb;
+ } else
+ self->checkpoint_cb = NULL;
+
+ callbacks.suspend = suspend_trampoline;
+ callbacks.postcopy = postcopy_trampoline;
+ callbacks.checkpoint = checkpoint_trampoline;
+ callbacks.data = self;
+
+ self->threadstate = PyEval_SaveThread();
+ rc = checkpoint_start(&self->cps, fd, &callbacks);
+ PyEval_RestoreThread(self->threadstate);
+
+ if (rc < 0) {
+ PyErr_SetString(CheckpointError, checkpoint_error(&self->cps));
+ goto err;
+ }
+
+ Py_RETURN_NONE;
+
+ err:
+ self->suspend_cb = NULL;
+ Py_XDECREF(suspend_cb);
+ self->postcopy_cb = NULL;
+ Py_XDECREF(postcopy_cb);
+ self->checkpoint_cb = NULL;
+ Py_XDECREF(checkpoint_cb);
+
+ return NULL;
+}
+
+static PyMethodDef Checkpoint_methods[] = {
+ { "open", pycheckpoint_open, METH_VARARGS,
+ "open connection to xen" },
+ { "close", pycheckpoint_close, METH_NOARGS,
+ "close connection to xen" },
+ { "start", pycheckpoint_start, METH_VARARGS | METH_KEYWORDS,
+ "begin a checkpoint" },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyTypeObject CheckpointType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ PKG ".checkpointer", /* tp_name */
+ sizeof(CheckpointObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)Checkpoint_dealloc, /* tp_dealloc */
+ NULL, /* tp_print */
+ NULL, /* tp_getattr */
+ NULL, /* tp_setattr */
+ NULL, /* tp_compare */
+ NULL, /* tp_repr */
+ NULL, /* tp_as_number */
+ NULL, /* tp_as_sequence */
+ NULL, /* tp_as_mapping */
+ NULL, /* tp_hash */
+ NULL, /* tp_call */
+ NULL, /* tp_str */
+ NULL, /* tp_getattro */
+ NULL, /* tp_setattro */
+ NULL, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ "Checkpoint object", /* tp_doc */
+ NULL, /* tp_traverse */
+ NULL, /* tp_clear */
+ NULL, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ NULL, /* tp_iter */
+ NULL, /* tp_iternext */
+ Checkpoint_methods, /* tp_methods */
+ NULL, /* tp_members */
+ NULL, /* tp_getset */
+ NULL, /* tp_base */
+ NULL, /* tp_dict */
+ NULL, /* tp_descr_get */
+ NULL, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)Checkpoint_init, /* tp_init */
+ NULL, /* tp_alloc */
+ Checkpoint_new, /* tp_new */
+};
+
+static PyMethodDef methods[] = {
+ { NULL }
+};
+
+static char doc[] = "checkpoint API";
+
+PyMODINIT_FUNC initcheckpoint(void) {
+ PyObject *m;
+
+ if (PyType_Ready(&CheckpointType) < 0)
+ return;
+
+ m = Py_InitModule3(PKG, methods, doc);
+
+ if (!m)
+ return;
+
+ Py_INCREF(&CheckpointType);
+ PyModule_AddObject(m, "checkpointer", (PyObject*)&CheckpointType);
+
+ CheckpointError = PyErr_NewException(PKG ".error", NULL, NULL);
+ Py_INCREF(CheckpointError);
+ PyModule_AddObject(m, "error", CheckpointError);
+
+ block_timer();
+}
+
+/* private functions */
+
+/* bounce C suspend call into python equivalent.
+ * returns 1 on success or 0 on failure */
+static int suspend_trampoline(void* data)
+{
+ CheckpointObject* self = (CheckpointObject*)data;
+
+ PyObject* result;
+
+ /* call default suspend function, then python hook if available */
+ if (self->armed) {
+ if (checkpoint_wait(&self->cps) < 0) {
+ fprintf(stderr, "%s\n", checkpoint_error(&self->cps));
+ return 0;
+ }
+ } else {
+ if (self->interval) {
+ self->armed = 1;
+ checkpoint_settimer(&self->cps, self->interval);
+ }
+
+ if (!checkpoint_suspend(&self->cps)) {
+ fprintf(stderr, "%s\n", checkpoint_error(&self->cps));
+ return 0;
+ }
+ }
+
+ if (!self->suspend_cb)
+ return 1;
+
+ PyEval_RestoreThread(self->threadstate);
+ result = PyObject_CallFunction(self->suspend_cb, NULL);
+ self->threadstate = PyEval_SaveThread();
+
+ if (!result)
+ return 0;
+
+ if (result == Py_None || PyObject_IsTrue(result)) {
+ Py_DECREF(result);
+ return 1;
+ }
+
+ Py_DECREF(result);
+
+ return 0;
+}
+
+static int postcopy_trampoline(void* data)
+{
+ CheckpointObject* self = (CheckpointObject*)data;
+
+ PyObject* result;
+ int rc = 0;
+
+ if (!self->postcopy_cb)
+ goto resume;
+
+ PyEval_RestoreThread(self->threadstate);
+ result = PyObject_CallFunction(self->postcopy_cb, NULL);
+
+ if (result && (result == Py_None || PyObject_IsTrue(result)))
+ rc = 1;
+
+ Py_XDECREF(result);
+ self->threadstate = PyEval_SaveThread();
+
+ resume:
+ if (checkpoint_resume(&self->cps) < 0) {
+ fprintf(stderr, "%s\n", checkpoint_error(&self->cps));
+ return 0;
+ }
+
+ return rc;
+}
+
+static int checkpoint_trampoline(void* data)
+{
+ CheckpointObject* self = (CheckpointObject*)data;
+
+ PyObject* result;
+
+ if (checkpoint_postflush(&self->cps) < 0) {
+ fprintf(stderr, "%s\n", checkpoint_error(&self->cps));
+ return -1;
+ }
+
+ if (!self->checkpoint_cb)
+ return 0;
+
+ PyEval_RestoreThread(self->threadstate);
+ result = PyObject_CallFunction(self->checkpoint_cb, NULL);
+ self->threadstate = PyEval_SaveThread();
+
+ if (!result)
+ return 0;
+
+ if (result == Py_None || PyObject_IsTrue(result)) {
+ Py_DECREF(result);
+ return 1;
+ }
+
+ Py_DECREF(result);
+
+ return 0;
+}
diff --git a/tools/python/xen/lowlevel/checkpoint/checkpoint.h b/tools/python/xen/lowlevel/checkpoint/checkpoint.h
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/checkpoint/checkpoint.h
@@ -0,0 +1,59 @@
+/* API for checkpointing */
+
+#ifndef _CHECKPOINT_H_
+#define _CHECKPOINT_H_ 1
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <time.h>
+
+#include <xenguest.h>
+#include <xs.h>
+
+typedef enum {
+ dt_unknown,
+ dt_pv,
+ dt_hvm,
+ dt_pvhvm /* HVM with PV drivers */
+} checkpoint_domtype;
+
+typedef struct {
+ int xch; /* xc handle */
+ int xce; /* event channel handle */
+ struct xs_handle* xsh; /* xenstore handle */
+ int watching_shutdown; /* state of watch on @releaseDomain */
+
+ unsigned int domid;
+ checkpoint_domtype domtype;
+ int fd;
+
+ int suspend_evtchn;
+
+ char* errstr;
+
+ /* suspend deadline thread support */
+ volatile int suspended;
+ volatile int done;
+ pthread_t suspend_thr;
+ sem_t suspended_sem;
+ sem_t resumed_sem;
+ timer_t timer;
+} checkpoint_state;
+
+char* checkpoint_error(checkpoint_state* s);
+
+void checkpoint_init(checkpoint_state* s);
+int checkpoint_open(checkpoint_state* s, unsigned int domid);
+void checkpoint_close(checkpoint_state* s);
+int checkpoint_start(checkpoint_state* s, int fd,
+ struct save_callbacks* callbacks);
+int checkpoint_suspend(checkpoint_state* s);
+int checkpoint_resume(checkpoint_state* s);
+int checkpoint_postflush(checkpoint_state* s);
+
+int checkpoint_settimer(checkpoint_state* s, int millis);
+int checkpoint_wait(checkpoint_state* s);
+void block_timer(void);
+void unblock_timer(void);
+
+#endif
diff --git a/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c b/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
@@ -0,0 +1,782 @@
+/* API for checkpointing */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/stat.h>
+
+#include <xenctrl.h>
+#include <xenguest.h>
+#include <xs.h>
+
+#include "checkpoint.h"
+
+static char errbuf[256];
+
+static int setup_suspend_evtchn(checkpoint_state* s);
+static void release_suspend_evtchn(checkpoint_state *s);
+static int setup_shutdown_watch(checkpoint_state* s);
+static int check_shutdown_watch(checkpoint_state* s);
+static void release_shutdown_watch(checkpoint_state* s);
+static int poll_evtchn(checkpoint_state* s);
+
+static int switch_qemu_logdirty(checkpoint_state* s, int enable);
+static int suspend_hvm(checkpoint_state* s);
+static int suspend_qemu(checkpoint_state* s);
+static int resume_qemu(checkpoint_state* s);
+static int send_qemu(checkpoint_state* s);
+
+static int create_suspend_timer(checkpoint_state* s);
+static int delete_suspend_timer(checkpoint_state* s);
+static int create_suspend_thread(checkpoint_state* s);
+static void stop_suspend_thread(checkpoint_state* s);
+
+/* Returns a string describing the most recent error returned by
+ * a checkpoint function. Static -- do not free. */
+char* checkpoint_error(checkpoint_state* s)
+{
+ return s->errstr;
+}
+
+void checkpoint_init(checkpoint_state* s)
+{
+ s->xch = -1;
+ s->xce = -1;
+ s->xsh = NULL;
+ s->watching_shutdown = 0;
+
+ s->domid = 0;
+ s->domtype = dt_unknown;
+ s->fd = -1;
+
+ s->suspend_evtchn = -1;
+
+ s->errstr = NULL;
+
+ s->suspended = 0;
+ s->done = 0;
+ s->suspend_thr = 0;
+ s->timer = 0;
+}
+
+/* open a checkpoint session to guest domid */
+int checkpoint_open(checkpoint_state* s, unsigned int domid)
+{
+ xc_dominfo_t dominfo;
+ unsigned long pvirq;
+
+ s->domid = domid;
+
+ s->xch = xc_interface_open();
+ if (s->xch < 0) {
+ s->errstr = "could not open control interface (are you root?)";
+
+ return -1;
+ }
+
+ s->xsh = xs_daemon_open();
+ if (!s->xsh) {
+ checkpoint_close(s);
+ s->errstr = "could not open xenstore handle";
+
+ return -1;
+ }
+
+ s->xce = xc_evtchn_open();
+ if (s->xce < 0) {
+ checkpoint_close(s);
+ s->errstr = "could not open event channel handle";
+
+ return -1;
+ }
+
+ if (xc_domain_getinfo(s->xch, s->domid, 1, &dominfo) < 0) {
+ checkpoint_close(s);
+ s->errstr = "could not get domain info";
+
+ return -1;
+ }
+ if (dominfo.hvm) {
+ if (xc_get_hvm_param(s->xch, s->domid, HVM_PARAM_CALLBACK_IRQ, &pvirq)) {
+ checkpoint_close(s);
+ s->errstr = "could not get HVM callback IRQ";
+
+ return -1;
+ }
+ s->domtype = pvirq ? dt_pvhvm : dt_hvm;
+ } else
+ s->domtype = dt_pv;
+
+ if (setup_shutdown_watch(s) < 0) {
+ checkpoint_close(s);
+
+ return -1;
+ }
+
+ if (s->domtype == dt_pv) {
+ if (setup_suspend_evtchn(s) < 0) {
+ checkpoint_close(s);
+
+ return -1;
+ }
+ } else if (s->domtype == dt_pvhvm) {
+ checkpoint_close(s);
+ s->errstr = "PV-on-HVM is unsupported";
+
+ return -1;
+ }
+
+ return 0;
+}
+
+void checkpoint_close(checkpoint_state* s)
+{
+ if (s->timer)
+ delete_suspend_timer(s);
+ if (s->suspend_thr)
+ stop_suspend_thread(s);
+
+ release_shutdown_watch(s);
+ release_suspend_evtchn(s);
+
+ if (s->xch >= 0) {
+ xc_interface_close(s->xch);
+ s->xch = -1;
+ }
+ if (s->xce >= 0) {
+ xc_evtchn_close(s->xce);
+ s->xce = -1;
+ }
+ if (s->xsh) {
+ xs_daemon_close(s->xsh);
+ s->xsh = NULL;
+ }
+
+ s->domid = 0;
+ s->fd = -1;
+ s->suspend_evtchn = -1;
+}
+
+/* we toggle logdirty ourselves around the xc_domain_save call --
+ * it avoids having to pass around checkpoint_state */
+static void noop_switch_logdirty(int domid, unsigned enable)
+{
+ return;
+}
+
+int checkpoint_start(checkpoint_state* s, int fd,
+ struct save_callbacks* callbacks)
+{
+ int hvm, rc;
+ int flags = XCFLAGS_LIVE;
+
+ if (!s->domid) {
+ s->errstr = "checkpoint state not opened";
+ return -1;
+ }
+
+ s->fd = fd;
+
+ hvm = s->domtype > dt_pv;
+ if (hvm) {
+ flags |= XCFLAGS_HVM;
+ if ((rc = switch_qemu_logdirty(s, 1)))
+ return rc;
+ }
+
+ rc = xc_domain_save(s->xch, fd, s->domid, 0, 0, flags, callbacks, hvm,
+ noop_switch_logdirty);
+
+ if (hvm)
+ switch_qemu_logdirty(s, 0);
+
+ return rc;
+}
+
+/* suspend the domain. Returns 0 on failure, 1 on success */
+int checkpoint_suspend(checkpoint_state* s)
+{
+ struct timeval tv;
+ int rc;
+
+ gettimeofday(&tv, NULL);
+ fprintf(stderr, "PROF: suspending at %lu.%06lu\n", (unsigned long)tv.tv_sec,
+ (unsigned long)tv.tv_usec);
+
+ if (s->domtype == dt_hvm) {
+ return suspend_hvm(s) < 0 ? 0 : 1;
+ }
+
+ rc = xc_evtchn_notify(s->xce, s->suspend_evtchn);
+ if (rc < 0) {
+ snprintf(errbuf, sizeof(errbuf),
+ "failed to notify suspend event channel: %d", rc);
+ s->errstr = errbuf;
+
+ return 0;
+ }
+
+ do {
+ rc = poll_evtchn(s);
+ } while (rc >= 0 && rc != s->suspend_evtchn);
+ if (rc <= 0) {
+ snprintf(errbuf, sizeof(errbuf),
+ "failed to receive suspend notification: %d", rc);
+ s->errstr = errbuf;
+
+ return 0;
+ }
+ if (xc_evtchn_unmask(s->xce, s->suspend_evtchn) < 0) {
+ snprintf(errbuf, sizeof(errbuf),
+ "failed to unmask suspend notification channel: %d", rc);
+ s->errstr = errbuf;
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/* wait for a suspend to be triggered by another thread */
+int checkpoint_wait(checkpoint_state* s)
+{
+ int rc;
+
+ if (!s->suspend_thr) {
+ s->errstr = "checkpoint timer is not active\n";
+ return -1;
+ }
+
+ do {
+ rc = sem_wait(&s->suspended_sem);
+ if (rc < 0 && errno != EINTR) {
+ snprintf(errbuf, sizeof(errbuf),
+ "error waiting for suspend semaphore: %d %d\n", rc, errno);
+ s->errstr = errbuf;
+ return -1;
+ }
+ } while (rc < 0);
+
+ if (!s->suspended) {
+ snprintf(errbuf, sizeof(errbuf), "domain not suspended?\n");
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* let guest execution resume */
+int checkpoint_resume(checkpoint_state* s)
+{
+ struct timeval tv;
+ int rc;
+
+ if (xc_domain_resume(s->xch, s->domid, 1)) {
+ snprintf(errbuf, sizeof(errbuf), "error resuming domain: %d", errno);
+ s->errstr = errbuf;
+
+ return -1;
+ }
+
+ gettimeofday(&tv, NULL);
+ fprintf(stderr, "PROF: resumed at %lu.%06lu\n", (unsigned long)tv.tv_sec,
+ (unsigned long)tv.tv_usec);
+
+ if (s->domtype > dt_pv && resume_qemu(s) < 0)
+ return -1;
+
+ /* restore watchability in xenstore */
+ if (xs_resume_domain(s->xsh, s->domid) < 0)
+ fprintf(stderr, "error resuming domain in xenstore\n");
+
+ s->suspended = 0;
+
+ if (s->suspend_thr) {
+ if ((rc = sem_post(&s->resumed_sem)))
+ fprintf(stderr, "error posting resume semaphore\n");
+ }
+
+ return 0;
+}
+
+/* called after xc_domain_save has flushed its buffer */
+int checkpoint_postflush(checkpoint_state *s)
+{
+ if (s->domtype > dt_pv && send_qemu(s) < 0)
+ return -1;
+
+ return 0;
+}
+
+/* force suspend within millis ms if copy hasn't completed yet */
+int checkpoint_settimer(checkpoint_state* s, int millis)
+{
+ struct itimerspec t;
+ int err;
+
+ if (!s->suspend_thr) {
+ if (create_suspend_timer(s) < 0)
+ return -1;
+
+ if (create_suspend_thread(s) < 0) {
+ delete_suspend_timer(s);
+ return -1;
+ }
+ }
+
+ t.it_value.tv_sec = millis / 1000;
+ t.it_value.tv_nsec = (millis % 1000) * 1000000L;
+ t.it_interval.tv_sec = t.it_value.tv_sec;
+ t.it_interval.tv_nsec = t.it_value.tv_nsec;
+
+ if ((err = timer_settime(s->timer, 0, &t, NULL))) {
+ fprintf(stderr, "Error arming timer: %d\n", err);
+ return -1;
+ }
+
+ return 0;
+}
+
+int delete_suspend_timer(checkpoint_state* s)
+{
+ int rc = 0;
+
+ if (s->timer) {
+ if ((rc = timer_delete(s->timer)))
+ fprintf(stderr, "Error deleting timer: %s\n", strerror(errno));
+ s->timer = NULL;
+ }
+
+ return rc;
+}
+
+/* Set up event channel used to signal a guest to suspend itself */
+static int setup_suspend_evtchn(checkpoint_state* s)
+{
+ int port;
+
+ port = xs_suspend_evtchn_port(s->domid);
+ if (port < 0) {
+ s->errstr = "failed to read suspend event channel";
+ return -1;
+ }
+
+ s->suspend_evtchn = xc_suspend_evtchn_init(s->xch, s->xce, s->domid, port);
+ if (s->suspend_evtchn < 0) {
+ snprintf(errbuf, sizeof(errbuf), "failed to bind suspend event channel");
+ s->errstr = errbuf;
+
+ return -1;
+ }
+
+ fprintf(stderr, "bound to suspend event channel %u:%d as %d\n", s->domid, port,
+ s->suspend_evtchn);
+
+ return 0;
+}
+
+/* release suspend event channels bound to guest */
+static void release_suspend_evtchn(checkpoint_state *s)
+{
+ /* TODO: teach xen to clean up if port is unbound */
+ if (s->xce >= 0 && s->suspend_evtchn > 0) {
+ xc_suspend_evtchn_release(s->xce, s->suspend_evtchn);
+ s->suspend_evtchn = 0;
+ }
+}
+
+static int setup_shutdown_watch(checkpoint_state* s)
+{
+ char buf[16];
+
+ /* write domain ID to watch so we can ignore other domain shutdowns */
+ snprintf(buf, sizeof(buf), "%u", s->domid);
+ if ( !xs_watch(s->xsh, "@releaseDomain", buf) ) {
+ fprintf(stderr, "Could not bind to shutdown watch\n");
+ return -1;
+ }
+ /* watch fires once on registration */
+ s->watching_shutdown = 1;
+ check_shutdown_watch(s);
+
+ return 0;
+}
+
+static int check_shutdown_watch(checkpoint_state* s) {
+ unsigned int count;
+ char **vec;
+ char buf[16];
+
+ vec = xs_read_watch(s->xsh, &count);
+ if (s->watching_shutdown == 1) {
+ s->watching_shutdown = 2;
+ return 0;
+ }
+ if (!vec) {
+ fprintf(stderr, "empty watch fired\n");
+ return 0;
+ }
+ snprintf(buf, sizeof(buf), "%d", s->domid);
+ if (!strcmp(vec[XS_WATCH_TOKEN], buf)) {
+ fprintf(stderr, "domain %d shut down\n", s->domid);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void release_shutdown_watch(checkpoint_state* s) {
+ char buf[16];
+
+ if (!s->xsh)
+ return;
+
+ if (!s->watching_shutdown)
+ return;
+
+ snprintf(buf, sizeof(buf), "%u", s->domid);
+ if (!xs_unwatch(s->xsh, "@releaseDomain", buf))
+ fprintf(stderr, "Could not release shutdown watch\n");
+}
+
+/* wrapper around xc_evtchn_pending which detects errors */
+static int poll_evtchn(checkpoint_state* s)
+{
+ int fd, xsfd, maxfd;
+ fd_set rfds, efds;
+ struct timeval tv;
+ int rc;
+
+ fd = xc_evtchn_fd(s->xce);
+ xsfd = xs_fileno(s->xsh);
+ maxfd = fd > xsfd ? fd : xsfd;
+ FD_ZERO(&rfds);
+ FD_ZERO(&efds);
+ FD_SET(fd, &rfds);
+ FD_SET(xsfd, &rfds);
+ FD_SET(fd, &efds);
+ FD_SET(xsfd, &efds);
+
+ /* give it 500 ms to respond */
+ tv.tv_sec = 0;
+ tv.tv_usec = 500000;
+
+ rc = select(maxfd + 1, &rfds, NULL, &efds, &tv);
+ if (rc < 0)
+ fprintf(stderr, "error polling event channel: %s\n", strerror(errno));
+ else if (!rc)
+ fprintf(stderr, "timeout waiting for event channel\n");
+ else if (FD_ISSET(fd, &rfds))
+ return xc_evtchn_pending(s->xce);
+ else if (FD_ISSET(xsfd, &rfds))
+ return check_shutdown_watch(s);
+
+ return -1;
+}
+
+/* adapted from the eponymous function in xc_save */
+static int switch_qemu_logdirty(checkpoint_state *s, int enable)
+{
+ char path[128];
+ char *tail, *cmd, *response;
+ char **vec;
+ unsigned int len;
+
+ sprintf(path, "/local/domain/0/device-model/%u/logdirty/", s->domid);
+ tail = path + strlen(path);
+
+ strcpy(tail, "ret");
+ if (!xs_watch(s->xsh, path, "qemu-logdirty-ret")) {
+ s->errstr = "error watching qemu logdirty return";
+ return -1;
+ }
+ /* null fire. XXX unify with shutdown watch! */
+ vec = xs_read_watch(s->xsh, &len);
+ free(vec);
+
+ strcpy(tail, "cmd");
+ cmd = enable ? "enable" : "disable";
+ if (!xs_write(s->xsh, XBT_NULL, path, cmd, strlen(cmd))) {
+ s->errstr = "error signalling qemu logdirty";
+ return -1;
+ }
+
+ vec = xs_read_watch(s->xsh, &len);
+ free(vec);
+
+ strcpy(tail, "ret");
+ xs_unwatch(s->xsh, path, "qemu-logdirty-ret");
+
+ response = xs_read(s->xsh, XBT_NULL, path, &len);
+ if (!len || strcmp(response, cmd)) {
+ if (len)
+ free(response);
+ s->errstr = "qemu logdirty command failed";
+ return -1;
+ }
+ free(response);
+ fprintf(stderr, "qemu logdirty mode: %s\n", cmd);
+
+ return 0;
+}
+
+static int suspend_hvm(checkpoint_state *s)
+{
+ int rc = -1;
+
+ fprintf(stderr, "issuing HVM suspend hypercall\n");
+ rc = xc_domain_shutdown(s->xch, s->domid, SHUTDOWN_suspend);
+ if (rc < 0) {
+ s->errstr = "shutdown hypercall failed";
+ return -1;
+ }
+ fprintf(stderr, "suspend hypercall returned %d\n", rc);
+
+ if (check_shutdown_watch(s) >= 0)
+ return -1;
+
+ rc = suspend_qemu(s);
+
+ return rc;
+}
+
+static int suspend_qemu(checkpoint_state *s)
+{
+ char path[128];
+
+ fprintf(stderr, "pausing QEMU\n");
+
+ sprintf(path, "/local/domain/0/device-model/%d/command", s->domid);
+ if (!xs_write(s->xsh, XBT_NULL, path, "save", 4)) {
+ fprintf(stderr, "error signalling QEMU to save\n");
+ return -1;
+ }
+
+ sprintf(path, "/local/domain/0/device-model/%d/state", s->domid);
+
+ do {
+ char* state;
+ unsigned int len;
+
+ state = xs_read(s->xsh, XBT_NULL, path, &len);
+ if (!state) {
+ s->errstr = "error reading QEMU state";
+ return -1;
+ }
+
+ if (!strcmp(state, "paused")) {
+ free(state);
+ return 0;
+ }
+
+ free(state);
+ usleep(1000);
+ } while(1);
+
+ return -1;
+}
+
+static int resume_qemu(checkpoint_state *s)
+{
+ char path[128];
+ fprintf(stderr, "resuming QEMU\n");
+
+ sprintf(path, "/local/domain/0/device-model/%d/command", s->domid);
+ if (!xs_write(s->xsh, XBT_NULL, path, "continue", 8)) {
+ fprintf(stderr, "error signalling QEMU to resume\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int send_qemu(checkpoint_state *s)
+{
+ char buf[8192];
+ char path[128];
+ struct stat sb;
+ uint32_t qlen = 0;
+ int qfd;
+ int rc;
+
+ if (s->fd < 0)
+ return -1;
+
+ sprintf(path, "/var/lib/xen/qemu-save.%d", s->domid);
+
+ if (stat(path, &sb) < 0) {
+ snprintf(errbuf, sizeof(errbuf),
+ "error getting QEMU state file status: %s", strerror(errno));
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ qlen = sb.st_size;
+ qfd = open(path, O_RDONLY);
+ if (qfd < 0) {
+ snprintf(errbuf, sizeof(errbuf), "error opening QEMU state file: %s",
+ strerror(errno));
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ fprintf(stderr, "Sending %u bytes of QEMU state\n", qlen);
+ if (write(s->fd, "RemusDeviceModelState", 21) != 21) {
+ s->errstr = "error writing QEMU header";
+ close(qfd);
+ return -1;
+ }
+ if (write(s->fd, &qlen, sizeof(qlen)) != sizeof(qlen)) {
+ s->errstr = "error writing QEMU size";
+ close(qfd);
+ return -1;
+ }
+
+ while ((rc = read(qfd, buf, qlen > sizeof(buf) ? sizeof(buf) : qlen)) > 0) {
+ qlen -= rc;
+ if (write(s->fd, buf, rc) != rc) {
+ rc = -1;
+ break;
+ }
+ }
+ if (rc < 0) {
+ snprintf(errbuf, sizeof(errbuf), "error writing QEMU state: %s",
+ strerror(errno));
+ s->errstr = errbuf;
+ }
+
+ close(qfd);
+
+ return rc;
+}
+
+/*thread responsible to suspend the domain early if necessary*/
+static void *suspend_thread(void *arg)
+{
+ checkpoint_state* s = (checkpoint_state*)arg;
+ sigset_t tss;
+ int rc;
+ int sig;
+
+ fprintf(stderr, "Suspend thread started\n");
+
+ sigemptyset(&tss);
+ sigaddset(&tss, SIGRTMIN);
+
+ while (1) {
+ /* wait for checkpoint thread to signal resume */
+ if ((rc = sem_wait(&s->resumed_sem)))
+ fprintf(stderr, "Error waiting on resume semaphore\n");
+
+ if ((rc = sigwait(&tss, &sig))) {
+ fprintf(stderr, "sigwait failed: %d %d\n", rc, errno);
+ break;
+ }
+ if (sig != SIGRTMIN)
+ fprintf(stderr, "received unexpected signal %d\n", sig);
+
+ if (s->done)
+ break;
+
+ if (s->suspended) {
+ fprintf(stderr, "domain already suspended?\n");
+ } else {
+ rc = checkpoint_suspend(s);
+ if (rc)
+ s->suspended = 1;
+ else
+ fprintf(stderr, "checkpoint_suspend failed\n");
+ }
+
+ if ((rc = sem_post(&s->suspended_sem)))
+ fprintf(stderr, "Error posting suspend semaphore\n");
+ }
+
+ fprintf(stderr, "Suspend thread exiting\n");
+
+ return NULL;
+}
+
+static int create_suspend_timer(checkpoint_state* s)
+{
+ struct sigevent event;
+ int err;
+
+ event.sigev_notify = SIGEV_SIGNAL;
+ event.sigev_signo = SIGRTMIN;
+ event.sigev_value.sival_int = 0;
+
+ if ((err = timer_create(CLOCK_REALTIME, &event, &s->timer))) {
+ snprintf(errbuf, sizeof(errbuf), "Error creating timer: %d\n", err);
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ return 0;
+}
+
+void block_timer(void)
+{
+ sigset_t tss;
+
+ sigemptyset(&tss);
+ sigaddset(&tss, SIGRTMIN);
+
+ pthread_sigmask(SIG_BLOCK, &tss, NULL);
+}
+
+void unblock_timer(void)
+{
+ sigset_t tss;
+
+ sigemptyset(&tss);
+ sigaddset(&tss, SIGRTMIN);
+
+ pthread_sigmask(SIG_UNBLOCK, &tss, NULL);
+}
+
+static int create_suspend_thread(checkpoint_state* s)
+{
+ int err;
+
+ if ((err = sem_init(&s->suspended_sem, 0, 0))) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Error initializing suspend semaphore: %d\n", err);
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ if ((err = sem_init(&s->resumed_sem, 0, 0))) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Error initializing resume semaphore: %d\n", err);
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ /* signal mask should be inherited */
+ block_timer();
+
+ if ((err = pthread_create(&s->suspend_thr, NULL, suspend_thread, s))) {
+ snprintf(errbuf, sizeof(errbuf), "Error creating suspend thread: %d\n", err);
+ s->errstr = errbuf;
+ return -1;
+ }
+
+ return 0;
+}
+
+static void stop_suspend_thread(checkpoint_state* s)
+{
+ int err;
+
+ s->done = 1;
+
+ err = sem_post(&s->resumed_sem);
+
+ err = pthread_join(s->suspend_thr, NULL);
+ s->suspend_thr = 0;
+}
diff --git a/tools/python/xen/lowlevel/netlink/libnetlink.c b/tools/python/xen/lowlevel/netlink/libnetlink.c
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/netlink/libnetlink.c
@@ -0,0 +1,585 @@
+/*
+ * libnetlink.c RTnetlink service routines.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <net/if_arp.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/uio.h>
+
+#include "libnetlink.h"
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+ close(rth->fd);
+}
+
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions,
+ int protocol)
+{
+ socklen_t addr_len;
+ int sndbuf = 32768;
+ int rcvbuf = 32768;
+
+ memset(rth, 0, sizeof(rth));
+
+ rth->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+ if (rth->fd < 0) {
+ perror("Cannot open netlink socket");
+ return -1;
+ }
+
+ if (setsockopt(rth->fd,SOL_SOCKET,SO_SNDBUF,&sndbuf,sizeof(sndbuf)) < 0) {
+ perror("SO_SNDBUF");
+ return -1;
+ }
+
+ if (setsockopt(rth->fd,SOL_SOCKET,SO_RCVBUF,&rcvbuf,sizeof(rcvbuf)) < 0) {
+ perror("SO_RCVBUF");
+ return -1;
+ }
+
+ memset(&rth->local, 0, sizeof(rth->local));
+ rth->local.nl_family = AF_NETLINK;
+ rth->local.nl_groups = subscriptions;
+
+ if (bind(rth->fd, (struct sockaddr*)&rth->local, sizeof(rth->local)) < 0) {
+ perror("Cannot bind netlink socket");
+ return -1;
+ }
+ addr_len = sizeof(rth->local);
+ if (getsockname(rth->fd, (struct sockaddr*)&rth->local, &addr_len) < 0) {
+ perror("Cannot getsockname");
+ return -1;
+ }
+ if (addr_len != sizeof(rth->local)) {
+ fprintf(stderr, "Wrong address length %d\n", addr_len);
+ return -1;
+ }
+ if (rth->local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "Wrong address family %d\n", rth->local.nl_family);
+ return -1;
+ }
+ rth->seq = time(NULL);
+ return 0;
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
+{
+ return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
+}
+
+int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct rtgenmsg g;
+ } req;
+ struct sockaddr_nl nladdr;
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ memset(&req, 0, sizeof(req));
+ req.nlh.nlmsg_len = sizeof(req);
+ req.nlh.nlmsg_type = type;
+ req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+ req.nlh.nlmsg_pid = 0;
+ req.nlh.nlmsg_seq = rth->dump = ++rth->seq;
+ req.g.rtgen_family = family;
+
+ return sendto(rth->fd, (void*)&req, sizeof(req), 0,
+ (struct sockaddr*)&nladdr, sizeof(nladdr));
+}
+
+int rtnl_send(struct rtnl_handle *rth, const char *buf, int len)
+{
+ struct sockaddr_nl nladdr;
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ return sendto(rth->fd, buf, len, 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
+}
+
+int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len)
+{
+ struct nlmsghdr nlh;
+ struct sockaddr_nl nladdr;
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof(nlh) },
+ { .iov_base = req, .iov_len = len }
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ nlh.nlmsg_len = NLMSG_LENGTH(len);
+ nlh.nlmsg_type = type;
+ nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+ nlh.nlmsg_pid = 0;
+ nlh.nlmsg_seq = rth->dump = ++rth->seq;
+
+ return sendmsg(rth->fd, &msg, 0);
+}
+
+int rtnl_dump_filter(struct rtnl_handle *rth,
+ rtnl_filter_t filter,
+ void *arg1,
+ rtnl_filter_t junk,
+ void *arg2)
+{
+ struct sockaddr_nl nladdr;
+ struct iovec iov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ char buf[16384];
+
+ iov.iov_base = buf;
+ while (1) {
+ int status;
+ struct nlmsghdr *h;
+
+ iov.iov_len = sizeof(buf);
+ status = recvmsg(rth->fd, &msg, 0);
+
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("OVERRUN");
+ continue;
+ }
+
+ if (status == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -1;
+ }
+
+ h = (struct nlmsghdr*)buf;
+ while (NLMSG_OK(h, status)) {
+ int err;
+
+ if (nladdr.nl_pid != 0 ||
+ h->nlmsg_pid != rth->local.nl_pid ||
+ h->nlmsg_seq != rth->dump) {
+ if (junk) {
+ err = junk(&nladdr, h, arg2);
+ if (err < 0)
+ return err;
+ }
+ goto skip_it;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE)
+ return 0;
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+ fprintf(stderr, "ERROR truncated\n");
+ } else {
+ errno = -err->error;
+ perror("RTNETLINK answers");
+ }
+ return -1;
+ }
+ err = filter(&nladdr, h, arg1);
+ if (err < 0)
+ return err;
+
+skip_it:
+ h = NLMSG_NEXT(h, status);
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "!!!Remnant of size %d\n", status);
+ exit(1);
+ }
+ }
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
+ unsigned groups, struct nlmsghdr *answer,
+ rtnl_filter_t junk,
+ void *jarg)
+{
+ int status;
+ unsigned seq;
+ struct nlmsghdr *h;
+ struct sockaddr_nl nladdr;
+ struct iovec iov = {
+ .iov_base = (void*) n,
+ .iov_len = n->nlmsg_len
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ char buf[16384];
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ n->nlmsg_seq = seq = ++rtnl->seq;
+
+ if (answer == NULL)
+ n->nlmsg_flags |= NLM_F_ACK;
+
+ status = sendmsg(rtnl->fd, &msg, 0);
+
+ if (status < 0) {
+ perror("Cannot talk to rtnetlink");
+ return -1;
+ }
+
+ memset(buf,0,sizeof(buf));
+
+ iov.iov_base = buf;
+
+ while (1) {
+ iov.iov_len = sizeof(buf);
+ status = recvmsg(rtnl->fd, &msg, 0);
+
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("OVERRUN");
+ continue;
+ }
+ if (status == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -1;
+ }
+ if (msg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
+ exit(1);
+ }
+ for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
+ int err;
+ int len = h->nlmsg_len;
+ int l = len - sizeof(*h);
+
+ if (l<0 || len>status) {
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Truncated message\n");
+ return -1;
+ }
+ fprintf(stderr, "!!!malformed message: len=%d\n", len);
+ exit(1);
+ }
+
+ if (nladdr.nl_pid != peer ||
+ h->nlmsg_pid != rtnl->local.nl_pid ||
+ h->nlmsg_seq != seq) {
+ if (junk) {
+ err = junk(&nladdr, h, jarg);
+ if (err < 0)
+ return err;
+ }
+ continue;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+ if (l < sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "ERROR truncated\n");
+ } else {
+ errno = -err->error;
+ if (errno == 0) {
+ if (answer)
+ memcpy(answer, h, h->nlmsg_len);
+ return 0;
+ }
+ perror("RTNETLINK answers");
+ }
+ return -1;
+ }
+ if (answer) {
+ memcpy(answer, h, h->nlmsg_len);
+ return 0;
+ }
+
+ fprintf(stderr, "Unexpected reply!!!\n");
+
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "!!!Remnant of size %d\n", status);
+ exit(1);
+ }
+ }
+}
+
+int rtnl_listen(struct rtnl_handle *rtnl,
+ rtnl_filter_t handler,
+ void *jarg)
+{
+ int status;
+ struct nlmsghdr *h;
+ struct sockaddr_nl nladdr;
+ struct iovec iov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ char buf[8192];
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = 0;
+ nladdr.nl_groups = 0;
+
+ iov.iov_base = buf;
+ while (1) {
+ iov.iov_len = sizeof(buf);
+ status = recvmsg(rtnl->fd, &msg, 0);
+
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("OVERRUN");
+ continue;
+ }
+ if (status == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -1;
+ }
+ if (msg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr, "Sender address length == %d\n", msg.msg_namelen);
+ exit(1);
+ }
+ for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
+ int err;
+ int len = h->nlmsg_len;
+ int l = len - sizeof(*h);
+
+ if (l<0 || len>status) {
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Truncated message\n");
+ return -1;
+ }
+ fprintf(stderr, "!!!malformed message: len=%d\n", len);
+ exit(1);
+ }
+
+ err = handler(&nladdr, h, jarg);
+ if (err < 0)
+ return err;
+
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "!!!Remnant of size %d\n", status);
+ exit(1);
+ }
+ }
+}
+
+int rtnl_from_file(FILE *rtnl, rtnl_filter_t handler,
+ void *jarg)
+{
+ int status;
+ struct sockaddr_nl nladdr;
+ char buf[8192];
+ struct nlmsghdr *h = (void*)buf;
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = 0;
+ nladdr.nl_groups = 0;
+
+ while (1) {
+ int err, len, type;
+ int l;
+
+ status = fread(&buf, 1, sizeof(*h), rtnl);
+
+ if (status < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("rtnl_from_file: fread");
+ return -1;
+ }
+ if (status == 0)
+ return 0;
+
+ len = h->nlmsg_len;
+ type= h->nlmsg_type;
+ l = len - sizeof(*h);
+
+ if (l<0 || len>sizeof(buf)) {
+ fprintf(stderr, "!!!malformed message: len=%d @%lu\n",
+ len, ftell(rtnl));
+ return -1;
+ }
+
+ status = fread(NLMSG_DATA(h), 1, NLMSG_ALIGN(l), rtnl);
+
+ if (status < 0) {
+ perror("rtnl_from_file: fread");
+ return -1;
+ }
+ if (status < l) {
+ fprintf(stderr, "rtnl-from_file: truncated message\n");
+ return -1;
+ }
+
+ err = handler(&nladdr, h, jarg);
+ if (err < 0)
+ return err;
+ }
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+ int len = RTA_LENGTH(4);
+ struct rtattr *rta;
+ if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen) {
+ fprintf(stderr,"addattr32: Error! max allowed bound %d exceeded\n",maxlen);
+ return -1;
+ }
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ memcpy(RTA_DATA(rta), &data, 4);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
+ return 0;
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
+ int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
+ fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n",maxlen);
+ return -1;
+ }
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ memcpy(RTA_DATA(rta), data, alen);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+ return 0;
+}
+
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len)
+{
+ if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
+ fprintf(stderr, "addraw_l ERROR: message exceeded bound of %d\n",maxlen);
+ return -1;
+ }
+
+ memcpy(NLMSG_TAIL(n), data, len);
+ memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
+ return 0;
+}
+
+int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data)
+{
+ int len = RTA_LENGTH(4);
+ struct rtattr *subrta;
+
+ if (RTA_ALIGN(rta->rta_len) + len > maxlen) {
+ fprintf(stderr,"rta_addattr32: Error! max allowed bound %d exceeded\n",maxlen);
+ return -1;
+ }
+ subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
+ subrta->rta_type = type;
+ subrta->rta_len = len;
+ memcpy(RTA_DATA(subrta), &data, 4);
+ rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
+ return 0;
+}
+
+int rta_addattr_l(struct rtattr *rta, int maxlen, int type,
+ const void *data, int alen)
+{
+ struct rtattr *subrta;
+ int len = RTA_LENGTH(alen);
+
+ if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) {
+ fprintf(stderr,"rta_addattr_l: Error! max allowed bound %d exceeded\n",maxlen);
+ return -1;
+ }
+ subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
+ subrta->rta_type = type;
+ subrta->rta_len = len;
+ memcpy(RTA_DATA(subrta), data, alen);
+ rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len);
+ return 0;
+}
+
+int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ if (rta->rta_type <= max)
+ tb[rta->rta_type] = rta;
+ rta = RTA_NEXT(rta,len);
+ }
+ if (len)
+ fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", len, rta->rta_len);
+ return 0;
+}
+
+int parse_rtattr_byindex(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+ int i = 0;
+
+ memset(tb, 0, sizeof(struct rtattr *) * max);
+ while (RTA_OK(rta, len)) {
+ if (rta->rta_type <= max && i < max)
+ tb[i++] = rta;
+ rta = RTA_NEXT(rta,len);
+ }
+ if (len)
+ fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", len, rta->rta_len);
+ return i;
+}
diff --git a/tools/python/xen/lowlevel/netlink/libnetlink.h b/tools/python/xen/lowlevel/netlink/libnetlink.h
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/netlink/libnetlink.h
@@ -0,0 +1,58 @@
+#ifndef __LIBNETLINK_H__
+#define __LIBNETLINK_H__ 1
+
+#include <netinet/in.h>
+#include <asm/types.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle
+{
+ int fd;
+ struct sockaddr_nl local;
+ struct sockaddr_nl peer;
+ __u32 seq;
+ __u32 dump;
+};
+
+extern int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions);
+extern int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions, int protocol);
+extern void rtnl_close(struct rtnl_handle *rth);
+extern int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type);
+extern int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len);
+
+typedef int (*rtnl_filter_t)(const struct sockaddr_nl *,
+ struct nlmsghdr *n, void *);
+extern int rtnl_dump_filter(struct rtnl_handle *rth, rtnl_filter_t filter,
+ void *arg1,
+ rtnl_filter_t junk,
+ void *arg2);
+extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
+ unsigned groups, struct nlmsghdr *answer,
+ rtnl_filter_t junk,
+ void *jarg);
+extern int rtnl_send(struct rtnl_handle *rth, const char *buf, int);
+
+
+extern int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+extern int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
+extern int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+extern int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data);
+extern int rta_addattr_l(struct rtattr *rta, int maxlen, int type, const void *data, int alen);
+
+extern int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+extern int parse_rtattr_byindex(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta)))
+
+extern int rtnl_listen(struct rtnl_handle *, rtnl_filter_t handler,
+ void *jarg);
+extern int rtnl_from_file(FILE *, rtnl_filter_t handler,
+ void *jarg);
+
+#define NLMSG_TAIL(nmsg) \
+ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+#endif /* __LIBNETLINK_H__ */
+
diff --git a/tools/python/xen/lowlevel/netlink/netlink.c b/tools/python/xen/lowlevel/netlink/netlink.c
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/lowlevel/netlink/netlink.c
@@ -0,0 +1,211 @@
+/* python binding to libnetlink */
+
+#include <Python.h>
+#include "libnetlink.h"
+
+#define PKG "xen.lowlevel.netlink"
+
+typedef struct {
+ PyObject_HEAD
+ int opened;
+ struct rtnl_handle rth;
+} PyRtnlObject;
+
+/* todo: subscriptions? */
+static PyObject* PyRtnl_new(PyTypeObject* type, PyObject* args,
+ PyObject* kwargs)
+{
+ return type->tp_alloc(type, 0);
+}
+
+static int PyRtnl_init(PyObject* obj, PyObject* args, PyObject* kwargs)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+
+ if (rtnl_open(&self->rth, 0) < 0) {
+ PyErr_SetString(PyExc_IOError, "could not open rtnl handle");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void PyRtnl_dealloc(PyRtnlObject* obj)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+
+ rtnl_close(&self->rth);
+}
+
+static PyObject* pyrtnl_talk(PyObject* obj, PyObject* args)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+ char* msg;
+ int len;
+ int peer = 0;
+ int groups = 0;
+
+ if (!PyArg_ParseTuple(args, "s#|ii", &msg, &len, &peer, &groups))
+ return NULL;
+
+ if (rtnl_talk(&self->rth, (struct nlmsghdr*)msg, peer, groups, NULL, NULL,
+ NULL) < 0)
+ {
+ PyErr_SetString(PyExc_IOError, "error sending message");
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* pyrtnl_wilddump_request(PyObject* obj, PyObject* args)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+ int family, type;
+
+ if (!PyArg_ParseTuple(args, "ii", &family, &type))
+ return NULL;
+
+ if (rtnl_wilddump_request(&self->rth, family, type) < 0) {
+ PyErr_SetString(PyExc_IOError, "could not send dump request");
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* pyrtnl_dump_request(PyObject* obj, PyObject* args)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+ int type;
+ char* req;
+ int len;
+
+ if (!PyArg_ParseTuple(args, "is#", &type, &req, &len))
+ return NULL;
+
+ if (rtnl_dump_request(&self->rth, type, req, len) < 0) {
+ PyErr_SetString(PyExc_IOError, "could not send dump request");
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+/* translate args to python and call python callback */
+static int dump_filter_helper(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg)
+{
+ PyObject* filter = arg;
+ PyObject* args;
+ PyObject* result;
+
+ args = Py_BuildValue("s#s#", who, sizeof(*who), n, n->nlmsg_len);
+ result = PyObject_CallObject(filter, args);
+ Py_DECREF(args);
+ if (!result)
+ return -1;
+
+ /* result is ignored as long as an exception isn't raised */
+ Py_DECREF(result);
+ return 0;
+}
+
+static PyObject* pyrtnl_dump_filter(PyObject* obj, PyObject* args)
+{
+ PyRtnlObject* self = (PyRtnlObject*)obj;
+ PyObject *filter;
+
+ if (!PyArg_ParseTuple(args, "O:dump_filter", &filter))
+ return NULL;
+
+ if (!PyCallable_Check(filter)) {
+ PyErr_SetString(PyExc_TypeError, "parameter must be callable");
+ return NULL;
+ }
+
+ Py_INCREF(filter);
+ if (rtnl_dump_filter(&self->rth, dump_filter_helper, filter, NULL,
+ NULL) < 0)
+ {
+ Py_DECREF(filter);
+ return NULL;
+ }
+ Py_DECREF(filter);
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef PyRtnl_methods[] = {
+ { "talk", pyrtnl_talk, METH_VARARGS,
+ "send a message to rtnetlink and receive a response.\n" },
+ { "wilddump_request", pyrtnl_wilddump_request, METH_VARARGS,
+ "dump objects.\n" },
+ { "dump_request", pyrtnl_dump_request, METH_VARARGS,
+ "start a dump of a particular netlink type.\n" },
+ { "dump_filter", pyrtnl_dump_filter, METH_VARARGS,
+ "iterate over an rtnl dump.\n" },
+ { NULL }
+};
+
+static PyTypeObject PyRtnlType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* ob_size */
+ PKG ".rtnl", /* tp_name */
+ sizeof(PyRtnlObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)PyRtnl_dealloc, /* tp_dealloc */
+ NULL, /* tp_print */
+ NULL, /* tp_getattr */
+ NULL, /* tp_setattr */
+ NULL, /* tp_compare */
+ NULL, /* tp_repr */
+ NULL, /* tp_as_number */
+ NULL, /* tp_as_sequence */
+ NULL, /* tp_as_mapping */
+ NULL, /* tp_hash */
+ NULL, /* tp_call */
+ NULL, /* tp_str */
+ NULL, /* tp_getattro */
+ NULL, /* tp_setattro */
+ NULL, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ "rtnetlink handle", /* tp_doc */
+ NULL, /* tp_traverse */
+ NULL, /* tp_clear */
+ NULL, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ NULL, /* tp_iter */
+ NULL, /* tp_iternext */
+ PyRtnl_methods, /* tp_methods */
+ NULL, /* tp_members */
+ NULL, /* tp_getset */
+ NULL, /* tp_base */
+ NULL, /* tp_dict */
+ NULL, /* tp_descr_get */
+ NULL, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ PyRtnl_init, /* tp_init */
+ NULL, /* tp_alloc */
+ PyRtnl_new, /* tp_new */
+};
+
+static PyMethodDef methods[] = {
+ { NULL }
+};
+
+static char doc[] = "libnetlink wrapper";
+
+PyMODINIT_FUNC initnetlink(void)
+{
+ PyObject *mod;
+
+ if (PyType_Ready(&PyRtnlType) == -1)
+ return;
+
+ if (!(mod = Py_InitModule3(PKG, methods, doc)))
+ return;
+
+ Py_INCREF(&PyRtnlType);
+ PyModule_AddObject(mod, "rtnl", (PyObject *)&PyRtnlType);
+}
diff --git a/tools/python/xen/remus/__init__.py b/tools/python/xen/remus/__init__.py
new file mode 100644
diff --git a/tools/python/xen/remus/blkdev.py b/tools/python/xen/remus/blkdev.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/blkdev.py
@@ -0,0 +1,31 @@
+handlers = []
+
+class BlkDevException(Exception): pass
+
+class BlkDev(object):
+ "Object representing a VM block device"
+ def __init__(self, **props):
+ self.uname = ''
+ if 'dev' not in props:
+ raise BlkDevException('no device')
+ #if 'uname' not in props:
+ #raise BlkDevException('no uname')
+ if 'mode' not in props:
+ raise BlkDevException('no mode')
+ self.__dict__.update(props)
+ self.dev = props['dev'].rstrip(':disk')
+
+ def __str__(self):
+ return '%s,%s,%s' % (self.uname, self.dev, self.mode)
+
+def register(handler):
+ "register a block device class with parser"
+ if handler not in handlers:
+ handlers.insert(0, handler)
+
+def parse(props):
+ "turn a vm device dictionary into a blkdev object"
+ for handler in handlers:
+ if handler.handles(**props):
+ return handler(**props)
+ return BlkDev(**props)
diff --git a/tools/python/xen/remus/image.py b/tools/python/xen/remus/image.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/image.py
@@ -0,0 +1,227 @@
+# VM image file manipulation
+
+import logging, struct
+
+import vm
+
+SIGNATURE = 'LinuxGuestRecord'
+LONGLEN = struct.calcsize('L')
+INTLEN = struct.calcsize('i')
+PAGE_SIZE = 4096
+# ~0L
+P2M_EXT_SIG = 4294967295L
+# frames per page
+FPP = 1024
+LTAB_MASK = 0xf << 28
+BATCH_SIZE = 1024
+IDXLEN = INTLEN + BATCH_SIZE * LONGLEN
+
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger()
+
+class VMParseException(Exception): pass
+
+class VMImage(object):
+ def __init__(self, img=None):
+ """img may be a path or a file object.
+ If compact is True, apply checkpoints to base image instead
+ of simply concatenating them.
+ """
+ self.img = img
+
+ self.dom = None
+ self.fd = None
+ self.header = None
+ self.nr_pfns = 0
+ # p2m extension header (unparsed)
+ self.p2mext = None
+
+ if self.img:
+ self.open(self.img)
+
+ def open(self, img):
+ if isinstance(img, str):
+ self.fd = file(img, 'rb')
+ else:
+ self.fd = img
+
+ self.readheader()
+
+ def readheader(self):
+ sig = self.fd.read(len(SIGNATURE))
+ if sig != SIGNATURE:
+ raise VMParseException("Bad signature in image")
+
+ hlen = self.fd.read(INTLEN)
+ hlen, = struct.unpack('!i', hlen)
+
+ self.header = self.fd.read(hlen)
+ self.dom = parseheader(self.header)
+
+ def readp2mfl(self):
+ "read the P2M frame list"
+ pfnlen = self.fd.read(LONGLEN)
+ self.nr_pfns, = struct.unpack('L', pfnlen)
+ p2m0 = self.fd.read(LONGLEN)
+
+ p2mhdr = p2m0
+ p2m0, = struct.unpack('L', p2m0)
+ if p2m0 == P2M_EXT_SIG:
+ elen = self.fd.read(INTLEN)
+ elen, = struct.unpack('I', elen)
+
+ self.p2mext = self.fd.read(elen)
+
+ p2m0 = self.fd.read(LONGLEN)
+ p2m0, = struct.unpack('L', p2m0)
+ p2mfl = [p2m0]
+
+ p2mfle = (self.nr_pfns + FPP - 1)/FPP - 1
+ p2ms = self.fd.read(LONGLEN * p2mfle)
+ p2mfl.extend(struct.unpack('%dL' % p2mfle, p2ms))
+
+ self.p2mfl = p2mfl
+
+ def flush(self):
+ self.ofd.write(self.tail)
+
+class Writer(object):
+ """compress a stream of checkpoints into a single image of the
+ last checkpoint"""
+ def __init__(self, fd, compact=False):
+ self.fd = fd
+ self.compact = compact
+
+ self.vm = None
+ self.tail = None
+ # offset to first batch of pages
+ self.imgstart = 0
+ # PFN mappings
+ self.pfns = []
+
+ def __del__(self):
+ self.close()
+
+ def writeheader(self):
+ hlen = struct.pack('!i', len(self.vm.header))
+ header = ''.join([SIGNATURE, hlen, self.vm.header])
+ self.fd.write(header)
+
+ def writep2mfl(self):
+ p2m = [struct.pack('L', self.vm.nr_pfns)]
+ if self.vm.p2mext:
+ p2m.extend([struct.pack('L', P2M_EXT_SIG), self.vm.p2mext])
+ p2m.append(struct.pack('%dL' % len(self.vm.p2mfl), *self.vm.p2mfl))
+ self.fd.write(''.join(p2m))
+
+ def writebatch(self, batch):
+ def offset(pfn):
+ isz = (pfn / BATCH_SIZE + 1) * IDXLEN
+ return self.imgstart + isz + pfn * PAGE_SIZE
+
+ if not self.compact:
+ return self.fd.write(batch)
+
+ batch = parsebatch(batch)
+ # sort pages for better disk seek behaviour
+ batch.sort(lambda x, y: cmp(x[0] & ~LTAB_MASK, y[0] & ~LTAB_MASK))
+
+ for pfndesc, page in batch:
+ pfn = pfndesc & ~LTAB_MASK
+ if pfn > self.vm.nr_pfns:
+ log.error('INVALID PFN: %d' % pfn)
+ if len(self.pfns) <= pfn:
+ self.pfns.extend([0] * (pfn - len(self.pfns) + 1))
+ self.pfns[pfn] = pfndesc
+ self.fd.seek(offset(pfn))
+ self.fd.write(page)
+
+ #print "max offset: %d, %d" % (len(self.pfns), offset(self.pfns[-1]))
+
+ def writeindex(self):
+ "Write batch header in front of each page"
+ hdrlen = INTLEN + BATCH_SIZE * LONGLEN
+ batches = (len(self.pfns) + BATCH_SIZE - 1) / BATCH_SIZE
+
+ for i in xrange(batches):
+ offset = self.imgstart + i * (hdrlen + (PAGE_SIZE * BATCH_SIZE))
+ pfnoff = i * BATCH_SIZE
+ # python auto-clamps overreads
+ pfns = self.pfns[pfnoff:pfnoff + BATCH_SIZE]
+
+ self.fd.seek(offset)
+ self.fd.write(struct.pack('i', len(pfns)))
+ self.fd.write(struct.pack('%dL' % len(pfns), *pfns))
+
+ def slurp(self, ifd):
+ """Apply an incremental checkpoint to a loaded image.
+ accepts a path or a file object."""
+ if isinstance(ifd, str):
+ ifd = file(ifd, 'rb')
+
+ if not self.vm:
+ self.vm = VMImage(ifd)
+ self.writeheader()
+
+ self.vm.readp2mfl()
+ self.writep2mfl()
+ self.imgstart = self.fd.tell()
+
+ while True:
+ l, batch = readbatch(ifd)
+ if l <= 0:
+ break
+ self.writebatch(batch)
+ self.tail = batch + ifd.read()
+
+ def flush(self):
+ if self.tail:
+ self.fd.seek(0, 2)
+ self.fd.write(self.tail)
+ if self.compact:
+ self.writeindex()
+ self.tail = None
+
+ def close(self):
+ self.flush()
+
+def parseheader(header):
+ "parses a header sexpression"
+ return vm.parsedominfo(vm.strtosxpr(header))
+
+def makeheader(dominfo):
+ "create an image header from a VM dominfo sxpr"
+ items = [SIGNATURE]
+ sxpr = vm.sxprtostr(dominfo)
+ items.append(struct.pack('!i', len(sxpr)))
+ items.append(sxpr)
+ return ''.join(items)
+
+def readbatch(fd):
+ batch = []
+ batchlen = fd.read(INTLEN)
+ batch.append(batchlen)
+ batchlen, = struct.unpack('i', batchlen)
+ log.info("batch length: %d" % batchlen)
+ if batchlen <= 0:
+ return (batchlen, batch[0])
+
+ batchfns = fd.read(LONGLEN * batchlen)
+ batch.append(batchfns)
+ pages = fd.read(PAGE_SIZE * batchlen)
+ if len(pages) != PAGE_SIZE * batchlen:
+ log.error('SHORT READ: %d' % len(pages))
+ batch.append(pages)
+
+ return (batchlen, ''.join(batch))
+
+def parsebatch(batch):
+ "parse a batch string into pages"
+ batchlen, batch = batch[:INTLEN], batch[INTLEN:]
+ batchlen, = struct.unpack('i', batchlen)
+ #print 'batch length: %d' % batchlen
+ pfnlen = batchlen * LONGLEN
+ pfns = struct.unpack('%dL' % batchlen, batch[:pfnlen])
+ pagebuf = batch[pfnlen:]
+ pages = [pagebuf[i*PAGE_SIZE:(i+1)*PAGE_SIZE] for i in xrange(batchlen)]
+ return zip(pfns, pages)
diff --git a/tools/python/xen/remus/netlink.py b/tools/python/xen/remus/netlink.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/netlink.py
@@ -0,0 +1,314 @@
+# netlink wrappers
+
+import socket, struct
+import xen.lowlevel.netlink
+
+NETLINK_ROUTE = 0
+
+NLM_F_REQUEST = 1 # It is request message.
+NLM_F_MULTI = 2 # Multipart message, terminated by NLMSG_DONE
+NLM_F_ACK = 4 # Reply with ack, with zero or error code
+NLM_F_ECHO = 8 # Echo this request
+
+# Modifiers to GET request
+NLM_F_ROOT = 0x100 # specify tree root
+NLM_F_MATCH = 0x200 # return all matching
+NLM_F_ATOMIC = 0x400 # atomic GET
+NLM_F_DUMP = NLM_F_ROOT|NLM_F_MATCH
+
+# Modifiers to NEW request
+NLM_F_REPLACE = 0x100 # Override existing
+NLM_F_EXCL = 0x200 # Do not touch, if it exists
+NLM_F_CREATE = 0x400 # Create, if it does not exist
+NLM_F_APPEND = 0x800 # Add to end of list
+
+RTM_NEWLINK = 16
+RTM_GETLINK = 18
+RTM_NEWQDISC = 36
+RTM_DELQDISC = 37
+RTM_GETQDISC = 38
+
+IFLA_UNSPEC = 0
+IFLA_ADDRESS = 1
+IFLA_BROADCAST = 2
+IFLA_IFNAME = 3
+IFLA_MTU = 4
+IFLA_LINK = 5
+IFLA_QDISC = 6
+IFLA_STATS = 7
+IFLA_COST = 8
+IFLA_PRIORITY = 9
+IFLA_MASTER = 10
+IFLA_WIRELESS = 11
+IFLA_PROTINFO = 12
+IFLA_TXQLEN = 13
+IFLA_MAP = 14
+IFLA_WEIGHT = 15
+
+TCA_UNSPEC = 0
+TCA_KIND = 1
+TCA_OPTIONS = 2
+TCA_STATS = 3
+TCA_XSTATS = 4
+TCA_RATE = 5
+TCA_FCNT = 6
+TCA_STATS2 = 7
+
+class RTNLException(Exception): pass
+
+def align(l, alignto=4):
+ return (l + alignto - 1) & ~(alignto - 1)
+
+class rtattr(object):
+ "rtattribute"
+ fmt = "HH"
+ fmtlen = struct.calcsize(fmt)
+
+ def __init__(self, msg=None):
+ if msg:
+ self.unpack(msg)
+ else:
+ self.rta_len = 0
+ self.rta_type = 0
+
+ self.body = ''
+
+ def __len__(self):
+ return align(self.rta_len)
+
+ def pack(self):
+ self.rta_len = align(self.fmtlen + len(self.body))
+ s = struct.pack(self.fmt, self.rta_len, self.rta_type) + self.body
+ pad = self.rta_len - len(s)
+ if pad:
+ s += '\0' * pad
+ return s
+
+ def unpack(self, msg):
+ args = struct.unpack(self.fmt, msg[:self.fmtlen])
+ self.rta_len, self.rta_type = args
+
+ self.body = msg[align(self.fmtlen):self.rta_len]
+
+class rtattrlist(object):
+ def __init__(self, msg):
+ self.start = msg
+
+ def __iter__(self):
+ body = self.start
+ while len(body) > rtattr.fmtlen:
+ rta = rtattr(body)
+ yield rta
+ body = body[len(rta):]
+
+class nlmsg(object):
+ "netlink message header"
+ fmt = "IHHII"
+ fmtlen = struct.calcsize(fmt)
+
+ def __init__(self, msg=None):
+ if msg:
+ self.unpack(msg)
+ else:
+ self.nlmsg_len = 0
+ self.nlmsg_type = 0
+ self.nlmsg_flags = 0
+ self.nlmsg_seq = 0
+ self.nlmsg_pid = 0
+
+ self.rta = ''
+ self.body = ''
+
+ def __len__(self):
+ return align(self.fmtlen + len(self.body) + len(self.rta))
+
+ def addattr(self, type, data):
+ attr = rtattr()
+ attr.rta_type = type
+ attr.body = data
+ self.rta += attr.pack()
+
+ def settype(self, cmd):
+ self.nlmsg_type = cmd
+
+ def pack(self):
+ return struct.pack(self.fmt, len(self), self.nlmsg_type,
+ self.nlmsg_flags, self.nlmsg_seq,
+ self.nlmsg_pid) + self.body + self.rta
+
+ def unpack(self, msg):
+ args = struct.unpack(self.fmt, msg[:self.fmtlen])
+ self.nlmsg_len, self.nlmsg_type, self.nlmsg_flags = args[:3]
+ self.nlmsg_seq, self.nlmsg_pid = args[3:]
+
+ self.body = msg[align(self.fmtlen):]
+ self.rta = ''
+
+ def __str__(self):
+ return '<netlink message, len %d, type %d>' % \
+ (self.nlmsg_len, self.nlmsg_type)
+
+class ifinfomsg(object):
+ "interface info message"
+ fmt = "BxHiII"
+ fmtlen = struct.calcsize(fmt)
+
+ def __init__(self, msg=None):
+ if msg:
+ self.unpack(msg)
+ else:
+ self.ifi_family = 0
+ self.ifi_type = 0
+ self.ifi_index = 0
+ self.ifi_flags = 0
+ self.ifi_change = 0
+
+ self.body = ''
+
+ def unpack(self, msg):
+ args = struct.unpack(self.fmt, msg[:self.fmtlen])
+ self.ifi_family, self.ifi_type, self.ifi_index= args[:3]
+ self.ifi_flags, self.ifi_change = args[3:]
+
+ self.body = msg[align(self.fmtlen):]
+
+ def __str__(self):
+ return '<ifinfo message, family %d, type %d, index %d>' % \
+ (self.ifi_family, self.ifi_type, self.ifi_index)
+
+class tcmsg(object):
+ "TC message"
+ fmt = "BxxxiIII"
+ fmtlen = struct.calcsize(fmt)
+
+ def __init__(self, msg=None):
+ if msg:
+ self.unpack(msg)
+ else:
+ self.tcm_family = socket.AF_UNSPEC
+ self.tcm_ifindex = 0
+ self.tcm_handle = 0
+ self.tcm_parent = 0
+ self.tcm_info = 0
+
+ self.rta = ''
+
+ def unpack(self, msg):
+ args = struct.unpack(self.fmt, msg[:self.fmtlen])
+ self.tcm_family, self.tcm_ifindex, self.tcm_handle = args[:3]
+ self.tcm_parent, self.tcm_info = args[3:]
+
+ self.rta = msg[align(self.fmtlen):]
+
+ def pack(self):
+ return struct.pack(self.fmt, self.tcm_family, self.tcm_ifindex,
+ self.tcm_handle, self.tcm_parent, self.tcm_info)
+
+ def __str__(self):
+ return '<tc message, family %d, index %d>' % \
+ (self.tcm_family, self.tcm_ifindex)
+
+class newlinkmsg(object):
+ def __init__(self, nlmsg):
+ if nlmsg.nlmsg_type != RTM_NEWLINK:
+ raise RTNLException("wrong message type")
+ self.nlmsg = nlmsg
+ self.ifi = ifinfomsg(self.nlmsg.body)
+
+ self.rtattrs = {}
+ for rta in rtattrlist(self.ifi.body):
+ self.rtattrs[rta.rta_type] = rta.body
+
+class newqdiscmsg(object):
+ def __init__(self, nlmsg):
+ if nlmsg.nlmsg_type != RTM_NEWQDISC:
+ raise RTNLException("wrong message type")
+ self.nlmsg = nlmsg
+ self.t = tcmsg(self.nlmsg.body)
+
+ self.rtattrs = {}
+ for rta in rtattrlist(self.t.rta):
+ self.rtattrs[rta.rta_type] = rta.body
+
+class rtnl(object):
+ def __init__(self):
+ self._rth = xen.lowlevel.netlink.rtnl()
+ self._linkcache = None
+
+ def getlink(self, key, cached=False):
+ """returns the interface object corresponding to the key, which
+ may be an index number or device name."""
+ if not cached:
+ self._linkcache = None
+ if self._linkcache is None:
+ self._linkcache = self.getlinks()
+
+ if isinstance(key, int):
+ return self._linkcache.get(key)
+
+ for k, v in self._linkcache.iteritems():
+ if v['name'] == key:
+ return v
+
+ return None
+
+ def getlinks(self):
+ """returns a dictionary of interfaces keyed by kernel
+ interface index"""
+ links = {}
+ def dumpfilter(addr, msgstr):
+ msg = newlinkmsg(nlmsg(msgstr))
+ idx = msg.ifi.ifi_index
+ ifname = msg.rtattrs[IFLA_IFNAME].strip('\0')
+ address = msg.rtattrs.get(IFLA_ADDRESS)
+
+ link = {'index': idx,
+ 'type': msg.ifi.ifi_type,
+ 'name': ifname,
+ 'address': address}
+ links[idx] = link
+
+ self._rth.wilddump_request(socket.AF_UNSPEC, RTM_GETLINK)
+ self._rth.dump_filter(dumpfilter)
+
+ return links
+
+ def getqdisc(self, dev):
+ """returns the queueing discipline on device dev, which may be
+ specified by kernel index or device name"""
+ qdiscs = self.getqdiscs(dev)
+ if qdiscs:
+ return qdiscs.values()[0]
+ return None
+
+ def getqdiscs(self, dev=None):
+ """returns a dictionary of queueing disciplines keyed by kernel
+ interface index"""
+ qdiscs = {}
+ def dumpfilter(addr, msgstr):
+ msg = newqdiscmsg(nlmsg(msgstr))
+ idx = msg.t.tcm_ifindex
+ handle = msg.t.tcm_handle
+ kind = msg.rtattrs[TCA_KIND].strip('\0')
+ opts = msg.rtattrs.get(TCA_OPTIONS)
+
+ qdisc = {'index': idx,
+ 'handle': handle,
+ 'kind': kind,
+ 'options': opts}
+ qdiscs[idx] = qdisc
+
+ tcm = tcmsg()
+ if dev:
+ link = self.getlink(dev)
+ if not link:
+ raise QdiscException('device %s not found' % dev)
+ tcm.tcm_ifindex = link['index']
+
+ msg = tcm.pack()
+ self._rth.dump_request(RTM_GETQDISC, msg)
+ self._rth.dump_filter(dumpfilter)
+ return qdiscs
+
+ def talk(self, req):
+ self._rth.talk(req)
diff --git a/tools/python/xen/remus/profile.py b/tools/python/xen/remus/profile.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/profile.py
@@ -0,0 +1,56 @@
+"""Simple profiling module
+"""
+
+import time
+
+class ProfileBlock(object):
+ """A section of code to be profiled"""
+ def __init__(self, name):
+ self.name = name
+
+ def enter(self):
+ print "PROF: entered %s at %f" % (self.name, time.time())
+
+ def exit(self):
+ print "PROF: exited %s at %f" % (self.name, time.time())
+
+class NullProfiler(object):
+ def enter(self, name):
+ pass
+
+ def exit(self, name=None):
+ pass
+
+class Profiler(object):
+ def __init__(self):
+ self.blocks = {}
+ self.running = []
+
+ def enter(self, name):
+ try:
+ block = self.blocks[name]
+ except KeyError:
+ block = ProfileBlock(name)
+ self.blocks[name] = block
+
+ block.enter()
+ self.running.append(block)
+
+ def exit(self, name=None):
+ if name is not None:
+ block = None
+ while self.running:
+ tmp = self.running.pop()
+ if tmp.name == name:
+ block = tmp
+ break
+ tmp.exit()
+ if not block:
+ raise KeyError('block %s not running' % name)
+ else:
+ try:
+ block = self.running.pop()
+ except IndexError:
+ raise KeyError('no block running')
+
+ block.exit()
diff --git a/tools/python/xen/remus/qdisc.py b/tools/python/xen/remus/qdisc.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/qdisc.py
@@ -0,0 +1,178 @@
+import socket, struct
+
+import netlink
+
+qdisc_kinds = {}
+
+TC_H_ROOT = 0xFFFFFFFF
+
+class QdiscException(Exception): pass
+
+class request(object):
+ "qdisc request message"
+ def __init__(self, cmd, flags=0, dev=None, handle=0):
+ self.n = netlink.nlmsg()
+ self.t = netlink.tcmsg()
+
+ self.n.nlmsg_flags = netlink.NLM_F_REQUEST|flags
+ self.n.nlmsg_type = cmd
+ self.t.tcm_family = socket.AF_UNSPEC
+
+ if not handle:
+ handle = TC_H_ROOT
+ self.t.tcm_parent = handle
+
+ if dev:
+ self.t.tcm_ifindex = dev
+
+ def pack(self):
+ t = self.t.pack()
+ self.n.body = t
+ return self.n.pack()
+
+class addrequest(request):
+ def __init__(self, dev, handle, qdisc):
+ flags = netlink.NLM_F_EXCL|netlink.NLM_F_CREATE
+ super(addrequest, self).__init__(netlink.RTM_NEWQDISC, flags=flags,
+ dev=dev, handle=handle)
+ self.n.addattr(netlink.TCA_KIND, qdisc.kind)
+ opts = qdisc.pack()
+ if opts:
+ self.n.addattr(netlink.TCA_OPTIONS, opts)
+
+class delrequest(request):
+ def __init__(self, dev, handle):
+ super(delrequest, self).__init__(netlink.RTM_DELQDISC, dev=dev,
+ handle=handle)
+
+class changerequest(request):
+ def __init__(self, dev, handle, qdisc):
+ super(changerequest, self).__init__(netlink.RTM_NEWQDISC,
+ dev=dev, handle=handle)
+ self.n.addattr(netlink.TCA_KIND, qdisc.kind)
+ opts = qdisc.pack()
+ if opts:
+ self.n.addattr(netlink.TCA_OPTIONS, opts)
+
+class Qdisc(object):
+ def __new__(cls, qdict=None, *args, **opts):
+ if qdict:
+ kind = qdict.get('kind')
+ cls = qdisc_kinds.get(kind, cls)
+ obj = super(Qdisc, cls).__new__(cls, qdict=qdict, *args, **opts)
+ return obj
+
+ def __init__(self, qdict):
+ self._qdict = qdict
+ self.kind = qdict['kind']
+ self.handle = qdict['handle'] >> 16
+
+ def parse(self, opts):
+ if opts:
+ raise QdiscException('cannot parse qdisc parameters')
+
+ def optstr(self):
+ if self.qdict['options']:
+ return '[cannot parse qdisc parameters]'
+ else:
+ return ''
+
+ def pack(self):
+ return ''
+
+TC_PRIO_MAX = 15
+class PrioQdisc(Qdisc):
+ fmt = 'i%sB' % (TC_PRIO_MAX + 1)
+
+ def __init__(self, qdict):
+ super(PrioQdisc, self).__init__(qdict)
+
+ if qdict.get('options'):
+ self.unpack(qdict['options'])
+ else:
+ self.bands = 3
+ self.priomap = [1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
+
+ def pack(self):
+ #return struct.pack(self.fmt, self.bands, *self.priomap)
+ return ''
+
+ def unpack(self, opts):
+ args = struct.unpack(self.fmt, opts)
+ self.bands = args[0]
+ self.priomap = args[1:]
+
+ def optstr(self):
+ mapstr = ' '.join([str(p) for p in self.priomap])
+ return 'bands %d priomap %s' % (self.bands, mapstr)
+
+qdisc_kinds['prio'] = PrioQdisc
+qdisc_kinds['pfifo_fast'] = PrioQdisc
+
+class CfifoQdisc(Qdisc):
+ fmt = 'II'
+
+ def __init__(self, qdict):
+ super(CfifoQdisc, self).__init__(qdict)
+
+ if qdict.get('options'):
+ self.unpack(qdict['options'])
+ else:
+ self.epoch = 0
+ self.vmid = 0
+
+ def pack(self):
+ return struct.pack(self.fmt, self.epoch, self.vmid)
+
+ def unpack(self, opts):
+ self.epoch, self.vmid = struct.unpack(self.fmt, opts)
+
+ def parse(self, opts):
+ args = list(opts)
+ try:
+ while args:
+ arg = args.pop(0)
+ if arg == 'epoch':
+ self.epoch = int(args.pop(0))
+ continue
+ if arg.lower() == 'vmid':
+ self.vmid = int(args.pop(0))
+ continue
+ except Exception, inst:
+ raise QdiscException(str(inst))
+
+ def optstr(self):
+ return 'epoch %d vmID %d' % (self.epoch, self.vmid)
+
+qdisc_kinds['cfifo'] = CfifoQdisc
+
+TC_QUEUE_CHECKPOINT = 0
+TC_QUEUE_RELEASE = 1
+
+class QueueQdisc(Qdisc):
+ fmt = 'I'
+
+ def __init__(self, qdict=None):
+ if not qdict:
+ qdict = {'kind': 'queue',
+ 'handle': TC_H_ROOT}
+ super(QueueQdisc, self).__init__(qdict)
+
+ self.action = 0
+
+ def pack(self):
+ return struct.pack(self.fmt, self.action)
+
+ def parse(self, args):
+ if not args:
+ raise QdiscException('no action given')
+ arg = args[0]
+
+ if arg == 'checkpoint':
+ self.action = TC_QUEUE_CHECKPOINT
+ elif arg == 'release':
+ self.action = TC_QUEUE_RELEASE
+ else:
+ raise QdiscException('unknown action')
+
+qdisc_kinds['queue'] = QueueQdisc
diff --git a/tools/python/xen/remus/save.py b/tools/python/xen/remus/save.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/save.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+
+import os, select, socket, threading, time, signal, xmlrpclib
+
+from xen.xend.XendClient import server
+from xen.xend.xenstore.xswatch import xswatch
+
+import xen.lowlevel.xc
+from xen.xend.xenstore import xsutil
+xc = xen.lowlevel.xc.xc()
+
+import xen.lowlevel.checkpoint
+
+import vm, image
+
+XCFLAGS_LIVE = 1
+
+xcsave = '/usr/lib/xen/bin/xc_save'
+
+class _proxy(object):
+ "proxy simulates an object without inheritance"
+ def __init__(self, obj):
+ self._obj = obj
+
+ def __getattr__(self, name):
+ return getattr(self._obj, name)
+
+ def proxy(self, obj):
+ self._obj = obj
+
+class CheckpointError(Exception): pass
+
+class CheckpointingFile(_proxy):
+ """Tee writes into separate file objects for each round.
+ This is necessary because xc_save gets a single file descriptor
+ for the duration of checkpointing.
+ """
+ def __init__(self, path):
+ self.path = path
+
+ self.round = 0
+ self.rfd, self.wfd = os.pipe()
+ self.fd = file(path, 'wb')
+
+ # this pipe is used to notify the writer thread of checkpoints
+ self.cprfd, self.cpwfd = os.pipe()
+
+ super(CheckpointingFile, self).__init__(self.fd)
+
+ wt = threading.Thread(target=self._wrthread, name='disk-write-thread')
+ wt.setDaemon(True)
+ wt.start()
+ self.wt = wt
+
+ def fileno(self):
+ return self.wfd
+
+ def close(self):
+ os.close(self.wfd)
+ # closing wfd should signal writer to stop
+ self.wt.join()
+ os.close(self.rfd)
+ os.close(self.cprfd)
+ os.close(self.cpwfd)
+ self.fd.close()
+ self.wt = None
+
+ def checkpoint(self):
+ os.write(self.cpwfd, '1')
+
+ def _wrthread(self):
+ while True:
+ r, o, e = select.select((self.rfd, self.cprfd), (), ())
+ if self.rfd in r:
+ data = os.read(self.rfd, 256 * 1024)
+ if not data:
+ break
+ self.fd.write(data)
+ if self.cprfd in r:
+ junk = os.read(self.cprfd, 1)
+ self.round += 1
+ self.fd = file('%s.%d' % (self.path, self.round), 'wb')
+ self.proxy(self.fd)
+
+class MigrationSocket(_proxy):
+ def __init__(self, address):
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect(address)
+
+ sock.send("receive\n")
+ sock.recv(80)
+
+ fd = os.fdopen(sock.fileno(), 'w+')
+
+ self.sock = sock
+ super(MigrationSocket, self).__init__(fd)
+
+class Keepalive(object):
+ "Call a keepalive method at intervals"
+ def __init__(self, method, interval=0.1):
+ self.keepalive = method
+ self.interval = interval
+
+ self.thread = None
+ self.running = False
+
+ def start(self):
+ if not self.interval:
+ return
+ self.thread = threading.Thread(target=self.run, name='keepalive-thread')
+ self.thread.setDaemon(True)
+ self.running = True
+ self.thread.start()
+
+ def stop(self):
+ if not self.thread:
+ return
+ self.running = False
+ self.thread.join()
+ self.thread = None
+
+ def run(self):
+ while self.running:
+ self.keepalive()
+ time.sleep(self.interval)
+ self.keepalive(stop=True)
+
+class Saver(object):
+ def __init__(self, domid, fd, suspendcb=None, resumecb=None,
+ checkpointcb=None, interval=0):
+ """Create a Saver object for taking guest checkpoints.
+ domid: name, number or UUID of a running domain
+ fd: a stream to which checkpoint data will be written.
+ suspendcb: callback invoked after guest is suspended
+ resumecb: callback invoked before guest resumes
+ checkpointcb: callback invoked when a checkpoint is complete. Return
+ True to take another checkpoint, or False to stop.
+ """
+ self.fd = fd
+ self.suspendcb = suspendcb
+ self.resumecb = resumecb
+ self.checkpointcb = checkpointcb
+ self.interval = interval
+
+ self.vm = vm.VM(domid)
+
+ self.checkpointer = None
+
+ def start(self):
+ vm.getshadowmem(self.vm)
+
+ hdr = image.makeheader(self.vm.dominfo)
+ self.fd.write(hdr)
+ self.fd.flush()
+
+ self.checkpointer = xen.lowlevel.checkpoint.checkpointer()
+ try:
+ self.checkpointer.open(self.vm.domid)
+ self.checkpointer.start(self.fd, self.suspendcb, self.resumecb,
+ self.checkpointcb, self.interval)
+ self.checkpointer.close()
+ except xen.lowlevel.checkpoint.error, e:
+ raise CheckpointError(e)
+
+ def _resume(self):
+ """low-overhead version of XendDomainInfo.resumeDomain"""
+ # TODO: currently assumes SUSPEND_CANCEL is available
+ if True:
+ xc.domain_resume(self.vm.domid, 1)
+ xsutil.ResumeDomain(self.vm.domid)
+ else:
+ server.xend.domain.resumeDomain(self.vm.domid)
diff --git a/tools/python/xen/remus/tapdisk.py b/tools/python/xen/remus/tapdisk.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/tapdisk.py
@@ -0,0 +1,4 @@
+import blkdev
+
+class TapDisk(BlkDev):
+ pass
diff --git a/tools/python/xen/remus/util.py b/tools/python/xen/remus/util.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/util.py
@@ -0,0 +1,31 @@
+# utility functions
+
+import os, subprocess
+
+class PipeException(Exception):
+ def __init__(self, message, errno):
+ self.errno = errno
+ message = '%s: %d, %s' % (message, errno, os.strerror(errno))
+ Exception.__init__(self, message)
+
+def canonifymac(mac):
+ return ':'.join(['%02x' % int(field, 16) for field in mac.split(':')])
+
+def runcmd(args, cwd=None):
+ # TODO: stdin handling
+ if type(args) == str:
+ args = args.split(' ')
+ try:
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, close_fds=True,
+ cwd=cwd)
+ stdout = proc.stdout.read()
+ stderr = proc.stderr.read()
+ proc.wait()
+ if proc.returncode:
+ print ' '.join(args)
+ print stderr.strip()
+ raise PipeException('%s failed' % args[0], proc.returncode)
+ return stdout
+ except (OSError, IOError), inst:
+ raise PipeException('could not run %s' % args[0], inst.errno)
diff --git a/tools/python/xen/remus/vbd.py b/tools/python/xen/remus/vbd.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/vbd.py
@@ -0,0 +1,9 @@
+import blkdev
+
+class VBD(blkdev.BlkDev):
+ def handles(self, **props):
+ uname = props.get('uname', '')
+ return uname.startswith('phy:')
+ handles = classmethod(handles)
+
+blkdev.register(VBD)
diff --git a/tools/python/xen/remus/vdi.py b/tools/python/xen/remus/vdi.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/vdi.py
@@ -0,0 +1,121 @@
+#code to play with vdis and snapshots
+
+import os
+
+def run(cmd):
+ fd = os.popen(cmd)
+ res = [l for l in fd if l.rstrip()]
+ return not fd.close(), res
+
+
+_blockstore = '/blockstore.dat'
+
+def set_blockstore(blockstore):
+ global _blockstore
+ __blockstore = blockstore
+
+
+class SnapShot:
+ def __init__(self, vdi, block, index):
+ self.__vdi = vdi
+ self.__block = block
+ self.__index = index
+
+ #TODO add snapshot date and radix
+
+ def __str__(self):
+ return '%d %d %d' % (self.__vdi.id(), self.__block, self.__index)
+
+ def vdi(self):
+ return self.__vdi
+
+ def block(self):
+ return self.__block
+
+ def index(self):
+ return self.__index
+
+ def match(self, block, index):
+ return self.__block == block and self.__index == index
+
+
+class VDIException(Exception):
+ pass
+
+
+class VDI:
+ def __init__(self, id, name):
+ self.__id = id
+ self.__name = name
+
+ def __str__(self):
+ return 'vdi: %d %s' % (self.__id, self.__name)
+
+ def id(self):
+ return self.__id
+
+ def name(self):
+ return self.__name
+
+ def list_snapshots(self):
+ res, ls = run('vdi_snap_list %s %d' % (_blockstore, self.__id))
+ if res:
+ return [SnapShot(self, int(l[0]), int(l[1])) for l in [l.split() for l in ls[1:]]]
+ else:
+ raise VDIException("Error reading snapshot list")
+
+ def snapshot(self):
+ res, ls = run('vdi_checkpoint %s %d' % (_blockstore, self.__id))
+ if res:
+ _, block, idx = ls[0].split()
+ return SnapShot(self, int(block), int(idx))
+ else:
+ raise VDIException("Error taking vdi snapshot")
+
+
+def create(name, snap):
+ res, _ = run('vdi_create %s %s %d %d'
+ % (_blockstore, name, snap.block(), snap.index()))
+ if res:
+ return lookup_by_name(name)
+ else:
+ raise VDIException('Unable to create vdi from snapshot')
+
+
+def fill(name, img_file):
+ res, _ = run('vdi_create %s %s' % (_blockstore, name))
+
+ if res:
+ vdi = lookup_by_name(name)
+ res, _ = run('vdi_fill %d %s' % (vdi.id(), img_file))
+ if res:
+ return vdi
+ raise VDIException('Unable to create vdi from disk img file')
+
+
+def list_vdis():
+ vdis = []
+ res, lines = run('vdi_list %s' % _blockstore)
+ if res:
+ for l in lines:
+ r = l.split()
+ vdis.append(VDI(int(r[0]), r[1]))
+ return vdis
+ else:
+ raise VDIException("Error doing vdi list")
+
+
+def lookup_by_id(id):
+ vdis = list_vdis()
+ for v in vdis:
+ if v.id() == id:
+ return v
+ raise VDIException("No match from vdi id")
+
+
+def lookup_by_name(name):
+ vdis = list_vdis()
+ for v in vdis:
+ if v.name() == name:
+ return v
+ raise VDIException("No match for vdi name")
diff --git a/tools/python/xen/remus/vif.py b/tools/python/xen/remus/vif.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/vif.py
@@ -0,0 +1,14 @@
+from xen.remus.util import canonifymac
+
+class VIF(object):
+ def __init__(self, **props):
+ self.__dict__.update(props)
+ if 'mac' in props:
+ self.mac = canonifymac(props['mac'])
+
+ def __str__(self):
+ return self.mac
+
+def parse(props):
+ "turn a vm device dictionary into a vif object"
+ return VIF(**props)
diff --git a/tools/python/xen/remus/vm.py b/tools/python/xen/remus/vm.py
new file mode 100644
--- /dev/null
+++ b/tools/python/xen/remus/vm.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+import xmlrpclib
+
+from xen.xend.XendClient import server
+from xen.xend import sxp
+# XXX XendDomain is voodoo to let balloon import succeed
+from xen.xend import XendDomain, balloon
+
+import vif
+import blkdev
+# need a nicer way to load disk drivers
+import vbd
+
+class VMException(Exception): pass
+
+class VM(object):
+ "Representation of a virtual machine"
+ def __init__(self, domid=None, dominfo=None):
+ self.dominfo = dominfo
+
+ self.domid = -1
+ self.name = 'unknown'
+ self.dom = {}
+ self.disks = []
+ self.vifs = []
+
+ if domid:
+ try:
+ self.dominfo = server.xend.domain(domid, 'all')
+ except xmlrpclib.Fault:
+ raise VMException('error looking up domain %s' % str(domid))
+
+ if self.dominfo:
+ self.loaddominfo()
+
+ def loaddominfo(self):
+ self.dom = parsedominfo(self.dominfo)
+ self.domid = self.dom['domid']
+ self.name = self.dom['name']
+
+ self.disks = getdisks(self.dom)
+ self.vifs = getvifs(self.dom)
+
+ def __str__(self):
+ return 'VM %d (%s), MACs: [%s], disks: [%s]' % \
+ (self.domid, self.name, self.epoch, ', '.join(self.macs),
+ ', '.join([str(d) for d in self.disks]))
+
+def parsedominfo(dominfo):
+ "parses a dominfo sexpression in the form of python lists of lists"
+ def s2d(s):
+ r = {}
+ for elem in s:
+ if len(elem) == 0:
+ continue
+ name = elem[0]
+ if len(elem) == 1:
+ val = None
+ else:
+ val = elem[1]
+ if isinstance(val, list):
+ val = s2d(elem[1:])
+ if isinstance(name, list):
+ # hack for ['cpus', [[1]]]
+ return s2d(elem)
+ if name in r:
+ for k, v in val.iteritems():
+ if k in r[name]:
+ if not isinstance(r[name][k], list):
+ r[name][k] = [r[name][k]]
+ r[name][k].append(v)
+ else:
+ r[name][k] = v
+ else:
+ r[name] = val
+ return r
+
+ return s2d(dominfo[1:])
+
+def domtosxpr(dom):
+ "convert a dominfo into a python sxpr"
+ def d2s(d):
+ r = []
+ for k, v in d.iteritems():
+ elem = [k]
+ if isinstance(v, dict):
+ elem.extend(d2s(v))
+ else:
+ if v is None:
+ v = ''
+ elem.append(v)
+ r.append(elem)
+ return r
+
+ sxpr = ['domain']
+ sxpr.extend(d2s(dom))
+ return sxpr
+
+def strtosxpr(s):
+ "convert a string to a python sxpr"
+ p = sxp.Parser()
+ p.input(s)
+ return p.get_val()
+
+def sxprtostr(sxpr):
+ "convert an sxpr to string"
+ return sxp.to_string(sxpr)
+
+def getvifs(dom):
+ "return vif objects for devices in dom"
+ vifs = dom['device'].get('vif', [])
+ if type(vifs) != list:
+ vifs = [vifs]
+
+ return [vif.parse(v) for v in vifs]
+
+def getdisks(dom):
+ "return block device objects for devices in dom"
+ disks = dom['device'].get('vbd', [])
+ if type(disks) != list:
+ disks = [disks]
+
+ # tapdisk1 devices
+ tap1s = dom['device'].get('tap', [])
+ if type(tap1s) != list:
+ disks.append(tap1s)
+ else:
+ disks.extend(tap1s)
+
+ # tapdisk2 devices
+ tap2s = dom['device'].get('tap2', [])
+ if type(tap2s) != list:
+ disks.append(tap2s)
+ else:
+ disks.extend(tap2s)
+
+ return [blkdev.parse(disk) for disk in disks]
+
+def fromxend(domid):
+ "create a VM object from xend information"
+ return VM(domid)
+
+def getshadowmem(vm):
+ "Balloon down domain0 to create free memory for shadow paging."
+ maxmem = int(vm.dom['maxmem'])
+ shadow = int(vm.dom['shadow_memory'])
+ vcpus = int(vm.dom['vcpus'])
+
+ # from XendDomainInfo.checkLiveMigrateMemory:
+ # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than
+ # the minimum that Xen would allocate if no value were given.
+ needed = vcpus * 1024 + maxmem * 4 - shadow * 1024
+ if needed > 0:
+ print "Freeing %d kB for shadow mode" % needed
+ balloon.free(needed, vm.dominfo)
^ permalink raw reply [flat|nested] 16+ messages in thread* [PATCH 3 of 3] Remus: support for network buffering
2009-11-13 1:10 [PATCH 0 of 3] Remus: control tool Brendan Cully
2009-11-13 1:10 ` [PATCH 1 of 3] Remus: add python control extensions Brendan Cully
2009-11-13 1:10 ` [PATCH 2 of 3] Remus: add control script to activate remus on a VM Brendan Cully
@ 2009-11-13 1:10 ` Brendan Cully
2009-11-13 14:18 ` [PATCH 0 of 3] Remus: control tool Konrad Rzeszutek Wilk
3 siblings, 0 replies; 16+ messages in thread
From: Brendan Cully @ 2009-11-13 1:10 UTC (permalink / raw)
To: xen-devel; +Cc: andy
# HG changeset patch
# User Brendan Cully <brendan@cs.ubc.ca>
# Date 1258074170 28800
# Node ID 6caed9eff54375d6fb561dab0ea1213e37e98339
# Parent 4e36da19dc8f433910be8adabd8a3e4e5cead5d6
Remus: support for network buffering
This currently relies on the third-party IMQ patch (linuximq.net) being present
in dom0. The plan is to replace this with a direct hook into netback eventually.
This patch includes a pared-down and patched copy of ebtables to install IMQ
on a VIF.
Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -205,6 +205,8 @@
^tools/pygrub/build/.*$
^tools/python/build/.*$
^tools/python/xen/util/path\.py$
+^tools/remus/imqebt/imqebt$
+^tools/remus/kmod/.*(\.cmd|\.mod|\.ko|\.mod\.c|\.symvers|\.xen)$
^tools/security/secpol_tool$
^tools/security/xen/.*$
^tools/security/xensec_tool$
diff --git a/tools/remus/Makefile b/tools/remus/Makefile
--- a/tools/remus/Makefile
+++ b/tools/remus/Makefile
@@ -1,20 +1,17 @@
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
+SUBDIRS-y := imqebt kmod
+
SCRIPTS = remus
.PHONY: all
-all: build
-
-.PHONY: build
-build:
- echo "Nothing to do"
+all: subdirs-all
.PHONY: install
-install:
+install: subdirs-install
$(INSTALL_DIR) $(DESTDIR)$(BINDIR)
$(INSTALL_PYTHON_PROG) $(SCRIPTS) $(DESTDIR)$(BINDIR)
.PHONY: clean
-clean:
- echo "Nothing to do"
+clean: subdirs-clean
diff --git a/tools/remus/imqebt/Makefile b/tools/remus/imqebt/Makefile
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/Makefile
@@ -0,0 +1,97 @@
+# ebtables Makefile (reworked for Remus IMQ control)
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+PROGNAME:=ebtables
+PROGRELEASE:=1
+PROGVERSION_:=2.0.9
+PROGVERSION:=$(PROGVERSION_)-$(PROGRELEASE)
+PROGDATE:=June\ 2009
+
+ifeq ($(shell uname -m),sparc64)
+CFLAGS+=-DEBT_MIN_ALIGN=8 -DKERNEL_64_USERSPACE_32
+endif
+
+include extensions/Makefile
+
+OBJECTS2:=getethertype.o communication.o libebtc.o \
+useful_functions.o ebtables.o
+
+OBJECTS:=$(OBJECTS2) $(EXT_OBJS) $(EXT_LIBS)
+
+KERNEL_INCLUDES?=include/
+
+ETHERTYPESPATH?=$(ETCDIR)
+ETHERTYPESFILE:=$(ETHERTYPESPATH)/ethertypes
+
+PIPE_DIR?=/tmp/$(PROGNAME)-v$(PROGVERSION)
+PIPE=$(PIPE_DIR)/ebtablesd_pipe
+EBTD_CMDLINE_MAXLN?=2048
+EBTD_ARGC_MAX?=50
+
+PROGSPECS:=-DPROGVERSION=\"$(PROGVERSION)\" \
+ -DPROGNAME=\"$(PROGNAME)\" \
+ -DPROGDATE=\"$(PROGDATE)\" \
+ -D_PATH_ETHERTYPES=\"$(ETHERTYPESFILE)\" \
+ -DEBTD_ARGC_MAX=$(EBTD_ARGC_MAX) \
+ -DEBTD_CMDLINE_MAXLN=$(EBTD_CMDLINE_MAXLN)
+
+# Uncomment for debugging (slower)
+#PROGSPECS+=-DEBT_DEBUG
+#CFLAGS+=-ggdb
+
+PROGRAMS = imqebt
+
+.PHONY: all
+all: build
+
+.PHONY: build
+build: $(PROGRAMS)
+
+# a little scripting for a static binary, making one for ebtables-restore
+# should be completely analogous
+imqebt: extensions/ebt_*.c extensions/ebtable_*.c ebtables.c communication.c ebtables-standalone.c getethertype.c libebtc.c useful_functions.c
+ cp ebtables-standalone.c ebtables-standalone.c_ ; \
+ cp include/ebtables_u.h include/ebtables_u.h_ ; \
+ sed "s/ main(/ pseudomain(/" ebtables-standalone.c > ebtables-standalone.c__ ; \
+ mv ebtables-standalone.c__ ebtables-standalone.c ; \
+ printf "\nint main(int argc, char *argv[])\n{\n " >> ebtables-standalone.c ; \
+ for arg in $(EXT_FUNC) \
+ ; do \
+ sed s/_init/_$${arg}_init/ extensions/ebt_$${arg}.c > extensions/ebt_$${arg}.c_ ; \
+ mv extensions/ebt_$${arg}.c_ extensions/ebt_$${arg}.c ; \
+ printf "\t%s();\n" _$${arg}_init >> ebtables-standalone.c ; \
+ printf "extern void %s(void);\n" _$${arg}_init >> include/ebtables_u.h ; \
+ done ; \
+ for arg in $(EXT_TABLES) \
+ ; do \
+ sed s/_init/_t_$${arg}_init/ extensions/ebtable_$${arg}.c > extensions/ebtable_$${arg}.c_ ; \
+ mv extensions/ebtable_$${arg}.c_ extensions/ebtable_$${arg}.c ; \
+ printf "\t%s();\n" _t_$${arg}_init >> ebtables-standalone.c ; \
+ printf "extern void %s(void);\n" _t_$${arg}_init >> include/ebtables_u.h ; \
+ done ; \
+ printf "\n\tpseudomain(argc, argv);\n\treturn 0;\n}\n" >> ebtables-standalone.c ;\
+ $(CC) $(CFLAGS) $(PROGSPECS) -o $@ $^ -I$(KERNEL_INCLUDES) -Iinclude ; \
+ for arg in $(EXT_FUNC) \
+ ; do \
+ sed "s/ .*_init/ _init/" extensions/ebt_$${arg}.c > extensions/ebt_$${arg}.c_ ; \
+ mv extensions/ebt_$${arg}.c_ extensions/ebt_$${arg}.c ; \
+ done ; \
+ for arg in $(EXT_TABLES) \
+ ; do \
+ sed "s/ .*_init/ _init/" extensions/ebtable_$${arg}.c > extensions/ebtable_$${arg}.c_ ; \
+ mv extensions/ebtable_$${arg}.c_ extensions/ebtable_$${arg}.c ; \
+ done ; \
+ mv ebtables-standalone.c_ ebtables-standalone.c ; \
+ mv include/ebtables_u.h_ include/ebtables_u.h
+
+.PHONY: install
+install: build
+ $(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR)
+ $(INSTALL_PROG) $(PROGRAMS) $(DESTDIR)$(PRIVATE_BINDIR)
+
+.PHONY: clean
+clean:
+ rm -f imqebt
+ rm -f *.o *~ *.so
diff --git a/tools/remus/imqebt/README b/tools/remus/imqebt/README
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/README
@@ -0,0 +1,2 @@
+This is a fork of ebtables for installing IMQ on a bridged device.
+Like the original code, it is released under the GPL.
\ No newline at end of file
diff --git a/tools/remus/imqebt/communication.c b/tools/remus/imqebt/communication.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/communication.c
@@ -0,0 +1,762 @@
+/*
+ * communication.c, v2.0 July 2002
+ *
+ * Author: Bart De Schuymer
+ *
+ */
+
+/*
+ * All the userspace/kernel communication is in this file.
+ * The other code should not have to know anything about the way the
+ * kernel likes the structure of the table data.
+ * The other code works with linked lists. So, the translation is done here.
+ */
+
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include "include/ebtables_u.h"
+
+extern char* hooknames[NF_BR_NUMHOOKS];
+
+#ifdef KERNEL_64_USERSPACE_32
+#define sparc_cast (uint64_t)
+#else
+#define sparc_cast
+#endif
+
+int sockfd = -1;
+
+static int get_sockfd(void)
+{
+ int ret = 0;
+ if (sockfd == -1) {
+ sockfd = socket(AF_INET, SOCK_RAW, PF_INET);
+ if (sockfd < 0) {
+ ebt_print_error("Problem getting a socket, "
+ "you probably don't have the right "
+ "permissions");
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static struct ebt_replace *translate_user2kernel(struct ebt_u_replace *u_repl)
+{
+ struct ebt_replace *new;
+ struct ebt_u_entry *e;
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+ struct ebt_u_entries *entries;
+ char *p, *base;
+ int i, j;
+ unsigned int entries_size = 0, *chain_offsets;
+
+ new = (struct ebt_replace *)malloc(sizeof(struct ebt_replace));
+ if (!new)
+ ebt_print_memory();
+ new->valid_hooks = u_repl->valid_hooks;
+ strcpy(new->name, u_repl->name);
+ new->nentries = u_repl->nentries;
+ new->num_counters = u_repl->num_counters;
+ new->counters = sparc_cast u_repl->counters;
+ chain_offsets = (unsigned int *)malloc(u_repl->num_chains * sizeof(unsigned int));
+ /* Determine size */
+ for (i = 0; i < u_repl->num_chains; i++) {
+ if (!(entries = u_repl->chains[i]))
+ continue;
+ chain_offsets[i] = entries_size;
+ entries_size += sizeof(struct ebt_entries);
+ j = 0;
+ e = entries->entries->next;
+ while (e != entries->entries) {
+ j++;
+ entries_size += sizeof(struct ebt_entry);
+ m_l = e->m_list;
+ while (m_l) {
+ entries_size += m_l->m->match_size +
+ sizeof(struct ebt_entry_match);
+ m_l = m_l->next;
+ }
+ w_l = e->w_list;
+ while (w_l) {
+ entries_size += w_l->w->watcher_size +
+ sizeof(struct ebt_entry_watcher);
+ w_l = w_l->next;
+ }
+ entries_size += e->t->target_size +
+ sizeof(struct ebt_entry_target);
+ e = e->next;
+ }
+ /* A little sanity check */
+ if (j != entries->nentries)
+ ebt_print_bug("Wrong nentries: %d != %d, hook = %s", j,
+ entries->nentries, entries->name);
+ }
+
+ new->entries_size = entries_size;
+ p = (char *)malloc(entries_size);
+ if (!p)
+ ebt_print_memory();
+
+ /* Put everything in one block */
+ new->entries = sparc_cast p;
+ for (i = 0; i < u_repl->num_chains; i++) {
+ struct ebt_entries *hlp;
+
+ hlp = (struct ebt_entries *)p;
+ if (!(entries = u_repl->chains[i]))
+ continue;
+ if (i < NF_BR_NUMHOOKS)
+ new->hook_entry[i] = sparc_cast hlp;
+ hlp->nentries = entries->nentries;
+ hlp->policy = entries->policy;
+ strcpy(hlp->name, entries->name);
+ hlp->counter_offset = entries->counter_offset;
+ hlp->distinguisher = 0; /* Make the kernel see the light */
+ p += sizeof(struct ebt_entries);
+ e = entries->entries->next;
+ while (e != entries->entries) {
+ struct ebt_entry *tmp = (struct ebt_entry *)p;
+
+ tmp->bitmask = e->bitmask | EBT_ENTRY_OR_ENTRIES;
+ tmp->invflags = e->invflags;
+ tmp->ethproto = e->ethproto;
+ strcpy(tmp->in, e->in);
+ strcpy(tmp->out, e->out);
+ strcpy(tmp->logical_in, e->logical_in);
+ strcpy(tmp->logical_out, e->logical_out);
+ memcpy(tmp->sourcemac, e->sourcemac,
+ sizeof(tmp->sourcemac));
+ memcpy(tmp->sourcemsk, e->sourcemsk,
+ sizeof(tmp->sourcemsk));
+ memcpy(tmp->destmac, e->destmac, sizeof(tmp->destmac));
+ memcpy(tmp->destmsk, e->destmsk, sizeof(tmp->destmsk));
+
+ base = p;
+ p += sizeof(struct ebt_entry);
+ m_l = e->m_list;
+ while (m_l) {
+ memcpy(p, m_l->m, m_l->m->match_size +
+ sizeof(struct ebt_entry_match));
+ p += m_l->m->match_size +
+ sizeof(struct ebt_entry_match);
+ m_l = m_l->next;
+ }
+ tmp->watchers_offset = p - base;
+ w_l = e->w_list;
+ while (w_l) {
+ memcpy(p, w_l->w, w_l->w->watcher_size +
+ sizeof(struct ebt_entry_watcher));
+ p += w_l->w->watcher_size +
+ sizeof(struct ebt_entry_watcher);
+ w_l = w_l->next;
+ }
+ tmp->target_offset = p - base;
+ memcpy(p, e->t, e->t->target_size +
+ sizeof(struct ebt_entry_target));
+ if (!strcmp(e->t->u.name, EBT_STANDARD_TARGET)) {
+ struct ebt_standard_target *st =
+ (struct ebt_standard_target *)p;
+ /* Translate the jump to a udc */
+ if (st->verdict >= 0)
+ st->verdict = chain_offsets
+ [st->verdict + NF_BR_NUMHOOKS];
+ }
+ p += e->t->target_size +
+ sizeof(struct ebt_entry_target);
+ tmp->next_offset = p - base;
+ e = e->next;
+ }
+ }
+
+ /* Sanity check */
+ if (p - (char *)new->entries != new->entries_size)
+ ebt_print_bug("Entries_size bug");
+ free(chain_offsets);
+ return new;
+}
+
+static void store_table_in_file(char *filename, struct ebt_replace *repl)
+{
+ char *data;
+ int size;
+ int fd;
+
+ /* Start from an empty file with right priviliges */
+ if (!(fd = creat(filename, 0600))) {
+ ebt_print_error("Couldn't create file %s", filename);
+ return;
+ }
+
+ size = sizeof(struct ebt_replace) + repl->entries_size +
+ repl->nentries * sizeof(struct ebt_counter);
+ data = (char *)malloc(size);
+ if (!data)
+ ebt_print_memory();
+ memcpy(data, repl, sizeof(struct ebt_replace));
+ memcpy(data + sizeof(struct ebt_replace), (char *)repl->entries,
+ repl->entries_size);
+ /* Initialize counters to zero, deliver_counters() can update them */
+ memset(data + sizeof(struct ebt_replace) + repl->entries_size,
+ 0, repl->nentries * sizeof(struct ebt_counter));
+ if (write(fd, data, size) != size)
+ ebt_print_error("Couldn't write everything to file %s",
+ filename);
+ close(fd);
+ free(data);
+}
+
+void ebt_deliver_table(struct ebt_u_replace *u_repl)
+{
+ socklen_t optlen;
+ struct ebt_replace *repl;
+
+ /* Translate the struct ebt_u_replace to a struct ebt_replace */
+ repl = translate_user2kernel(u_repl);
+ if (u_repl->filename != NULL) {
+ store_table_in_file(u_repl->filename, repl);
+ goto free_repl;
+ }
+ /* Give the data to the kernel */
+ optlen = sizeof(struct ebt_replace) + repl->entries_size;
+ if (get_sockfd())
+ goto free_repl;
+ if (!setsockopt(sockfd, IPPROTO_IP, EBT_SO_SET_ENTRIES, repl, optlen))
+ goto free_repl;
+ if (u_repl->command == 8) { /* The ebtables module may not
+ * yet be loaded with --atomic-commit */
+ ebtables_insmod("ebtables");
+ if (!setsockopt(sockfd, IPPROTO_IP, EBT_SO_SET_ENTRIES,
+ repl, optlen))
+ goto free_repl;
+ }
+
+ ebt_print_error("The kernel doesn't support a certain ebtables"
+ " extension, consider recompiling your kernel or insmod"
+ " the extension");
+free_repl:
+ if (repl) {
+ free(repl->entries);
+ free(repl);
+ }
+}
+
+static int store_counters_in_file(char *filename, struct ebt_u_replace *repl)
+{
+ int size = repl->nentries * sizeof(struct ebt_counter), ret = 0;
+ unsigned int entries_size;
+ struct ebt_replace hlp;
+ FILE *file;
+
+ if (!(file = fopen(filename, "r+b"))) {
+ ebt_print_error("Could not open file %s", filename);
+ return -1;
+ }
+ /* Find out entries_size and then set the file pointer to the
+ * counters */
+ if (fseek(file, (char *)(&hlp.entries_size) - (char *)(&hlp), SEEK_SET)
+ || fread(&entries_size, sizeof(char), sizeof(unsigned int), file) !=
+ sizeof(unsigned int) ||
+ fseek(file, entries_size + sizeof(struct ebt_replace), SEEK_SET)) {
+ ebt_print_error("File %s is corrupt", filename);
+ ret = -1;
+ goto close_file;
+ }
+ if (fwrite(repl->counters, sizeof(char), size, file) != size) {
+ ebt_print_error("Could not write everything to file %s",
+ filename);
+ ret = -1;
+ }
+close_file:
+ fclose(file);
+ return 0;
+}
+
+/* Gets executed after ebt_deliver_table. Delivers the counters to the kernel
+ * and resets the counterchanges to CNT_NORM */
+void ebt_deliver_counters(struct ebt_u_replace *u_repl)
+{
+ struct ebt_counter *old, *new, *newcounters;
+ socklen_t optlen;
+ struct ebt_replace repl;
+ struct ebt_cntchanges *cc = u_repl->cc->next, *cc2;
+ struct ebt_u_entries *entries = NULL;
+ struct ebt_u_entry *next = NULL;
+ int i, chainnr = 0;
+
+ if (u_repl->nentries == 0)
+ return;
+
+ newcounters = (struct ebt_counter *)
+ malloc(u_repl->nentries * sizeof(struct ebt_counter));
+ if (!newcounters)
+ ebt_print_memory();
+ memset(newcounters, 0, u_repl->nentries * sizeof(struct ebt_counter));
+ old = u_repl->counters;
+ new = newcounters;
+ while (cc != u_repl->cc) {
+ if (!next || next == entries->entries) {
+ while (chainnr < u_repl->num_chains && (!(entries = u_repl->chains[chainnr++]) ||
+ (next = entries->entries->next) == entries->entries));
+ if (chainnr == u_repl->num_chains)
+ break;
+ }
+ if (cc->type == CNT_NORM) {
+ /* 'Normal' rule, meaning we didn't do anything to it
+ * So, we just copy */
+ *new = *old;
+ next->cnt = *new;
+ next->cnt_surplus.pcnt = next->cnt_surplus.bcnt = 0;
+ old++; /* We've used an old counter */
+ new++; /* We've set a new counter */
+ next = next->next;
+ } else if (cc->type == CNT_DEL) {
+ old++; /* Don't use this old counter */
+ } else {
+ if (cc->type == CNT_CHANGE) {
+ if (cc->change % 3 == 1)
+ new->pcnt = old->pcnt + next->cnt_surplus.pcnt;
+ else if (cc->change % 3 == 2)
+ new->pcnt = old->pcnt - next->cnt_surplus.pcnt;
+ else
+ new->pcnt = next->cnt.pcnt;
+ if (cc->change / 3 == 1)
+ new->bcnt = old->bcnt + next->cnt_surplus.bcnt;
+ else if (cc->change / 3 == 2)
+ new->bcnt = old->bcnt - next->cnt_surplus.bcnt;
+ else
+ new->bcnt = next->cnt.bcnt;
+ } else
+ *new = next->cnt;
+ next->cnt = *new;
+ next->cnt_surplus.pcnt = next->cnt_surplus.bcnt = 0;
+ if (cc->type == CNT_ADD)
+ new++;
+ else {
+ old++;
+ new++;
+ }
+ next = next->next;
+ }
+ cc = cc->next;
+ }
+
+ free(u_repl->counters);
+ u_repl->counters = newcounters;
+ u_repl->num_counters = u_repl->nentries;
+ /* Reset the counterchanges to CNT_NORM and delete the unused cc */
+ i = 0;
+ cc = u_repl->cc->next;
+ while (cc != u_repl->cc) {
+ if (cc->type == CNT_DEL) {
+ cc->prev->next = cc->next;
+ cc->next->prev = cc->prev;
+ cc2 = cc->next;
+ free(cc);
+ cc = cc2;
+ } else {
+ cc->type = CNT_NORM;
+ cc->change = 0;
+ i++;
+ cc = cc->next;
+ }
+ }
+ if (i != u_repl->nentries)
+ ebt_print_bug("i != u_repl->nentries");
+ if (u_repl->filename != NULL) {
+ store_counters_in_file(u_repl->filename, u_repl);
+ return;
+ }
+ optlen = u_repl->nentries * sizeof(struct ebt_counter) +
+ sizeof(struct ebt_replace);
+ /* Now put the stuff in the kernel's struct ebt_replace */
+ repl.counters = sparc_cast u_repl->counters;
+ repl.num_counters = u_repl->num_counters;
+ memcpy(repl.name, u_repl->name, sizeof(repl.name));
+
+ if (get_sockfd())
+ return;
+ if (setsockopt(sockfd, IPPROTO_IP, EBT_SO_SET_COUNTERS, &repl, optlen))
+ ebt_print_bug("Couldn't update kernel counters");
+}
+
+static int
+ebt_translate_match(struct ebt_entry_match *m, struct ebt_u_match_list ***l)
+{
+ struct ebt_u_match_list *new;
+ int ret = 0;
+
+ new = (struct ebt_u_match_list *)
+ malloc(sizeof(struct ebt_u_match_list));
+ if (!new)
+ ebt_print_memory();
+ new->m = (struct ebt_entry_match *)
+ malloc(m->match_size + sizeof(struct ebt_entry_match));
+ if (!new->m)
+ ebt_print_memory();
+ memcpy(new->m, m, m->match_size + sizeof(struct ebt_entry_match));
+ new->next = NULL;
+ **l = new;
+ *l = &new->next;
+ if (ebt_find_match(new->m->u.name) == NULL) {
+ ebt_print_error("Kernel match %s unsupported by userspace tool",
+ new->m->u.name);
+ ret = -1;
+ }
+ return ret;
+}
+
+static int
+ebt_translate_watcher(struct ebt_entry_watcher *w,
+ struct ebt_u_watcher_list ***l)
+{
+ struct ebt_u_watcher_list *new;
+ int ret = 0;
+
+ new = (struct ebt_u_watcher_list *)
+ malloc(sizeof(struct ebt_u_watcher_list));
+ if (!new)
+ ebt_print_memory();
+ new->w = (struct ebt_entry_watcher *)
+ malloc(w->watcher_size + sizeof(struct ebt_entry_watcher));
+ if (!new->w)
+ ebt_print_memory();
+ memcpy(new->w, w, w->watcher_size + sizeof(struct ebt_entry_watcher));
+ new->next = NULL;
+ **l = new;
+ *l = &new->next;
+ if (ebt_find_watcher(new->w->u.name) == NULL) {
+ ebt_print_error("Kernel watcher %s unsupported by userspace "
+ "tool", new->w->u.name);
+ ret = -1;
+ }
+ return ret;
+}
+
+static int
+ebt_translate_entry(struct ebt_entry *e, int *hook, int *n, int *cnt,
+ int *totalcnt, struct ebt_u_entry **u_e, struct ebt_u_replace *u_repl,
+ unsigned int valid_hooks, char *base, struct ebt_cntchanges **cc)
+{
+ /* An entry */
+ if (e->bitmask & EBT_ENTRY_OR_ENTRIES) {
+ struct ebt_u_entry *new;
+ struct ebt_u_match_list **m_l;
+ struct ebt_u_watcher_list **w_l;
+ struct ebt_entry_target *t;
+
+ new = (struct ebt_u_entry *)malloc(sizeof(struct ebt_u_entry));
+ if (!new)
+ ebt_print_memory();
+ new->bitmask = e->bitmask;
+ /*
+ * Plain userspace code doesn't know about
+ * EBT_ENTRY_OR_ENTRIES
+ */
+ new->bitmask &= ~EBT_ENTRY_OR_ENTRIES;
+ new->invflags = e->invflags;
+ new->ethproto = e->ethproto;
+ strcpy(new->in, e->in);
+ strcpy(new->out, e->out);
+ strcpy(new->logical_in, e->logical_in);
+ strcpy(new->logical_out, e->logical_out);
+ memcpy(new->sourcemac, e->sourcemac, sizeof(new->sourcemac));
+ memcpy(new->sourcemsk, e->sourcemsk, sizeof(new->sourcemsk));
+ memcpy(new->destmac, e->destmac, sizeof(new->destmac));
+ memcpy(new->destmsk, e->destmsk, sizeof(new->destmsk));
+ if (*totalcnt >= u_repl->nentries)
+ ebt_print_bug("*totalcnt >= u_repl->nentries");
+ new->cnt = u_repl->counters[*totalcnt];
+ new->cnt_surplus.pcnt = new->cnt_surplus.bcnt = 0;
+ new->cc = *cc;
+ *cc = (*cc)->next;
+ new->m_list = NULL;
+ new->w_list = NULL;
+ new->next = (*u_e)->next;
+ new->next->prev = new;
+ (*u_e)->next = new;
+ new->prev = *u_e;
+ *u_e = new;
+ m_l = &new->m_list;
+ EBT_MATCH_ITERATE(e, ebt_translate_match, &m_l);
+ w_l = &new->w_list;
+ EBT_WATCHER_ITERATE(e, ebt_translate_watcher, &w_l);
+
+ t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+ new->t = (struct ebt_entry_target *)
+ malloc(t->target_size + sizeof(struct ebt_entry_target));
+ if (!new->t)
+ ebt_print_memory();
+ if (ebt_find_target(t->u.name) == NULL) {
+ ebt_print_error("Kernel target %s unsupported by "
+ "userspace tool", t->u.name);
+ return -1;
+ }
+ memcpy(new->t, t, t->target_size +
+ sizeof(struct ebt_entry_target));
+ /* Deal with jumps to udc */
+ if (!strcmp(t->u.name, EBT_STANDARD_TARGET)) {
+ char *tmp = base;
+ int verdict = ((struct ebt_standard_target *)t)->verdict;
+ int i;
+
+ if (verdict >= 0) {
+ tmp += verdict;
+ for (i = NF_BR_NUMHOOKS; i < u_repl->num_chains; i++)
+ if (u_repl->chains[i]->kernel_start == tmp)
+ break;
+ if (i == u_repl->num_chains)
+ ebt_print_bug("Can't find udc for jump");
+ ((struct ebt_standard_target *)new->t)->verdict = i-NF_BR_NUMHOOKS;
+ }
+ }
+
+ (*cnt)++;
+ (*totalcnt)++;
+ return 0;
+ } else { /* A new chain */
+ int i;
+ struct ebt_entries *entries = (struct ebt_entries *)e;
+
+ if (*n != *cnt)
+ ebt_print_bug("Nr of entries in the chain is wrong");
+ *n = entries->nentries;
+ *cnt = 0;
+ for (i = *hook + 1; i < NF_BR_NUMHOOKS; i++)
+ if (valid_hooks & (1 << i))
+ break;
+ *hook = i;
+ *u_e = u_repl->chains[*hook]->entries;
+ return 0;
+ }
+}
+
+/* Initialize all chain headers */
+static int
+ebt_translate_chains(struct ebt_entry *e, int *hook,
+ struct ebt_u_replace *u_repl, unsigned int valid_hooks)
+{
+ int i;
+ struct ebt_entries *entries = (struct ebt_entries *)e;
+ struct ebt_u_entries *new;
+
+ if (!(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
+ for (i = *hook + 1; i < NF_BR_NUMHOOKS; i++)
+ if (valid_hooks & (1 << i))
+ break;
+ new = (struct ebt_u_entries *)malloc(sizeof(struct ebt_u_entries));
+ if (!new)
+ ebt_print_memory();
+ if (i == u_repl->max_chains)
+ ebt_double_chains(u_repl);
+ u_repl->chains[i] = new;
+ if (i >= NF_BR_NUMHOOKS)
+ new->kernel_start = (char *)e;
+ *hook = i;
+ new->nentries = entries->nentries;
+ new->policy = entries->policy;
+ new->entries = (struct ebt_u_entry *)malloc(sizeof(struct ebt_u_entry));
+ if (!new->entries)
+ ebt_print_memory();
+ new->entries->next = new->entries->prev = new->entries;
+ new->counter_offset = entries->counter_offset;
+ strcpy(new->name, entries->name);
+ }
+ return 0;
+}
+
+static int retrieve_from_file(char *filename, struct ebt_replace *repl,
+ char command)
+{
+ FILE *file;
+ char *hlp = NULL, *entries;
+ struct ebt_counter *counters;
+ int size, ret = 0;
+
+ if (!(file = fopen(filename, "r+b"))) {
+ ebt_print_error("Could not open file %s", filename);
+ return -1;
+ }
+ /* Make sure table name is right if command isn't -L or --atomic-commit */
+ if (command != 'L' && command != 8) {
+ hlp = (char *)malloc(strlen(repl->name) + 1);
+ if (!hlp)
+ ebt_print_memory();
+ strcpy(hlp, repl->name);
+ }
+ if (fread(repl, sizeof(char), sizeof(struct ebt_replace), file)
+ != sizeof(struct ebt_replace)) {
+ ebt_print_error("File %s is corrupt", filename);
+ ret = -1;
+ goto close_file;
+ }
+ if (command != 'L' && command != 8 && strcmp(hlp, repl->name)) {
+ ebt_print_error("File %s contains wrong table name or is "
+ "corrupt", filename);
+ ret = -1;
+ goto close_file;
+ } else if (!ebt_find_table(repl->name)) {
+ ebt_print_error("File %s contains invalid table name",
+ filename);
+ ret = -1;
+ goto close_file;
+ }
+
+ size = sizeof(struct ebt_replace) +
+ repl->nentries * sizeof(struct ebt_counter) + repl->entries_size;
+ fseek(file, 0, SEEK_END);
+ if (size != ftell(file)) {
+ ebt_print_error("File %s has wrong size", filename);
+ ret = -1;
+ goto close_file;
+ }
+ entries = (char *)malloc(repl->entries_size);
+ if (!entries)
+ ebt_print_memory();
+ repl->entries = sparc_cast entries;
+ if (repl->nentries) {
+ counters = (struct ebt_counter *)
+ malloc(repl->nentries * sizeof(struct ebt_counter));
+ repl->counters = sparc_cast counters;
+ if (!repl->counters)
+ ebt_print_memory();
+ } else
+ repl->counters = sparc_cast NULL;
+ /* Copy entries and counters */
+ if (fseek(file, sizeof(struct ebt_replace), SEEK_SET) ||
+ fread((char *)repl->entries, sizeof(char), repl->entries_size, file)
+ != repl->entries_size ||
+ fseek(file, sizeof(struct ebt_replace) + repl->entries_size,
+ SEEK_SET)
+ || fread((char *)repl->counters, sizeof(char),
+ repl->nentries * sizeof(struct ebt_counter), file)
+ != repl->nentries * sizeof(struct ebt_counter)) {
+ ebt_print_error("File %s is corrupt", filename);
+ free(entries);
+ repl->entries = NULL;
+ ret = -1;
+ }
+close_file:
+ fclose(file);
+ free(hlp);
+ return ret;
+}
+
+static int retrieve_from_kernel(struct ebt_replace *repl, char command,
+ int init)
+{
+ socklen_t optlen;
+ int optname;
+ char *entries;
+
+ optlen = sizeof(struct ebt_replace);
+ if (get_sockfd())
+ return -1;
+ /* --atomic-init || --init-table */
+ if (init)
+ optname = EBT_SO_GET_INIT_INFO;
+ else
+ optname = EBT_SO_GET_INFO;
+ if (getsockopt(sockfd, IPPROTO_IP, optname, repl, &optlen))
+ return -1;
+
+ if ( !(entries = (char *)malloc(repl->entries_size)) )
+ ebt_print_memory();
+ repl->entries = sparc_cast entries;
+ if (repl->nentries) {
+ struct ebt_counter *counters;
+
+ if (!(counters = (struct ebt_counter *)
+ malloc(repl->nentries * sizeof(struct ebt_counter))) )
+ ebt_print_memory();
+ repl->counters = sparc_cast counters;
+ }
+ else
+ repl->counters = sparc_cast NULL;
+
+ /* We want to receive the counters */
+ repl->num_counters = repl->nentries;
+ optlen += repl->entries_size + repl->num_counters *
+ sizeof(struct ebt_counter);
+ if (init)
+ optname = EBT_SO_GET_INIT_ENTRIES;
+ else
+ optname = EBT_SO_GET_ENTRIES;
+ if (getsockopt(sockfd, IPPROTO_IP, optname, repl, &optlen))
+ ebt_print_bug("Hmm, what is wrong??? bug#1");
+
+ return 0;
+}
+
+int ebt_get_table(struct ebt_u_replace *u_repl, int init)
+{
+ int i, j, k, hook;
+ struct ebt_replace repl;
+ struct ebt_u_entry *u_e = NULL;
+ struct ebt_cntchanges *new_cc, *cc;
+
+ strcpy(repl.name, u_repl->name);
+ if (u_repl->filename != NULL) {
+ if (init)
+ ebt_print_bug("Getting initial table data from a file is impossible");
+ if (retrieve_from_file(u_repl->filename, &repl, u_repl->command))
+ return -1;
+ /* -L with a wrong table name should be dealt with silently */
+ strcpy(u_repl->name, repl.name);
+ } else if (retrieve_from_kernel(&repl, u_repl->command, init))
+ return -1;
+
+ /* Translate the struct ebt_replace to a struct ebt_u_replace */
+ u_repl->valid_hooks = repl.valid_hooks;
+ u_repl->nentries = repl.nentries;
+ u_repl->num_counters = repl.num_counters;
+ u_repl->counters = repl.counters;
+ u_repl->cc = (struct ebt_cntchanges *)malloc(sizeof(struct ebt_cntchanges));
+ if (!u_repl->cc)
+ ebt_print_memory();
+ u_repl->cc->next = u_repl->cc->prev = u_repl->cc;
+ cc = u_repl->cc;
+ for (i = 0; i < repl.nentries; i++) {
+ new_cc = (struct ebt_cntchanges *)malloc(sizeof(struct ebt_cntchanges));
+ if (!new_cc)
+ ebt_print_memory();
+ new_cc->type = CNT_NORM;
+ new_cc->change = 0;
+ new_cc->prev = cc;
+ cc->next = new_cc;
+ cc = new_cc;
+ }
+ if (repl.nentries) {
+ new_cc->next = u_repl->cc;
+ u_repl->cc->prev = new_cc;
+ }
+ u_repl->chains = (struct ebt_u_entries **)calloc(EBT_ORI_MAX_CHAINS, sizeof(void *));
+ u_repl->max_chains = EBT_ORI_MAX_CHAINS;
+ hook = -1;
+ /* FIXME: Clean up when an error is encountered */
+ EBT_ENTRY_ITERATE(repl.entries, repl.entries_size, ebt_translate_chains,
+ &hook, u_repl, u_repl->valid_hooks);
+ if (hook >= NF_BR_NUMHOOKS)
+ u_repl->num_chains = hook + 1;
+ else
+ u_repl->num_chains = NF_BR_NUMHOOKS;
+ i = 0; /* Holds the expected nr. of entries for the chain */
+ j = 0; /* Holds the up to now counted entries for the chain */
+ k = 0; /* Holds the total nr. of entries, should equal u_repl->nentries afterwards */
+ cc = u_repl->cc->next;
+ hook = -1;
+ EBT_ENTRY_ITERATE((char *)repl.entries, repl.entries_size,
+ ebt_translate_entry, &hook, &i, &j, &k, &u_e, u_repl,
+ u_repl->valid_hooks, (char *)repl.entries, &cc);
+ if (k != u_repl->nentries)
+ ebt_print_bug("Wrong total nentries");
+ free(repl.entries);
+ return 0;
+}
diff --git a/tools/remus/imqebt/ebtables-standalone.c b/tools/remus/imqebt/ebtables-standalone.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/ebtables-standalone.c
@@ -0,0 +1,14 @@
+#include <string.h>
+#include "include/ebtables_u.h"
+
+static struct ebt_u_replace replace;
+void ebt_early_init_once();
+
+int main(int argc, char *argv[])
+{
+ ebt_silent = 0;
+ ebt_early_init_once();
+ strcpy(replace.name, "filter");
+ do_command(argc, argv, EXEC_STYLE_PRG, &replace);
+ return 0;
+}
diff --git a/tools/remus/imqebt/ebtables.c b/tools/remus/imqebt/ebtables.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/ebtables.c
@@ -0,0 +1,1233 @@
+/*
+ * ebtables.c, v2.0 July 2002
+ *
+ * Author: Bart De Schuymer
+ *
+ * This code was stongly inspired on the iptables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <getopt.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include "include/ebtables_u.h"
+#include "include/ethernetdb.h"
+
+/* Checks whether a command has already been specified */
+#define OPT_COMMANDS (replace->flags & OPT_COMMAND || replace->flags & OPT_ZERO)
+
+#define OPT_COMMAND 0x01
+#define OPT_TABLE 0x02
+#define OPT_IN 0x04
+#define OPT_OUT 0x08
+#define OPT_JUMP 0x10
+#define OPT_PROTOCOL 0x20
+#define OPT_SOURCE 0x40
+#define OPT_DEST 0x80
+#define OPT_ZERO 0x100
+#define OPT_LOGICALIN 0x200
+#define OPT_LOGICALOUT 0x400
+#define OPT_KERNELDATA 0x800 /* This value is also defined in ebtablesd.c */
+#define OPT_COUNT 0x1000 /* This value is also defined in libebtc.c */
+#define OPT_CNT_INCR 0x2000 /* This value is also defined in libebtc.c */
+#define OPT_CNT_DECR 0x4000 /* This value is also defined in libebtc.c */
+
+/* Default command line options. Do not mess around with the already
+ * assigned numbers unless you know what you are doing */
+static struct option ebt_original_options[] =
+{
+ { "append" , required_argument, 0, 'A' },
+ { "insert" , required_argument, 0, 'I' },
+ { "delete" , required_argument, 0, 'D' },
+ { "list" , optional_argument, 0, 'L' },
+ { "Lc" , no_argument , 0, 4 },
+ { "Ln" , no_argument , 0, 5 },
+ { "Lx" , no_argument , 0, 6 },
+ { "Lmac2" , no_argument , 0, 12 },
+ { "zero" , optional_argument, 0, 'Z' },
+ { "flush" , optional_argument, 0, 'F' },
+ { "policy" , required_argument, 0, 'P' },
+ { "in-interface" , required_argument, 0, 'i' },
+ { "in-if" , required_argument, 0, 'i' },
+ { "logical-in" , required_argument, 0, 2 },
+ { "logical-out" , required_argument, 0, 3 },
+ { "out-interface" , required_argument, 0, 'o' },
+ { "out-if" , required_argument, 0, 'o' },
+ { "version" , no_argument , 0, 'V' },
+ { "help" , no_argument , 0, 'h' },
+ { "jump" , required_argument, 0, 'j' },
+ { "set-counters" , required_argument, 0, 'c' },
+ { "change-counters", required_argument, 0, 'C' },
+ { "proto" , required_argument, 0, 'p' },
+ { "protocol" , required_argument, 0, 'p' },
+ { "db" , required_argument, 0, 'b' },
+ { "source" , required_argument, 0, 's' },
+ { "src" , required_argument, 0, 's' },
+ { "destination" , required_argument, 0, 'd' },
+ { "dst" , required_argument, 0, 'd' },
+ { "table" , required_argument, 0, 't' },
+ { "modprobe" , required_argument, 0, 'M' },
+ { "new-chain" , required_argument, 0, 'N' },
+ { "rename-chain" , required_argument, 0, 'E' },
+ { "delete-chain" , optional_argument, 0, 'X' },
+ { "atomic-init" , no_argument , 0, 7 },
+ { "atomic-commit" , no_argument , 0, 8 },
+ { "atomic-file" , required_argument, 0, 9 },
+ { "atomic-save" , no_argument , 0, 10 },
+ { "init-table" , no_argument , 0, 11 },
+ { 0 }
+};
+
+static struct option *ebt_options = ebt_original_options;
+
+/* Holds all the data */
+static struct ebt_u_replace *replace;
+
+/* The chosen table */
+static struct ebt_u_table *table;
+
+/* The pointers in here are special:
+ * The struct ebt_target pointer is actually a struct ebt_u_target pointer.
+ * I do not feel like using a union.
+ * We need a struct ebt_u_target pointer because we know the address of the data
+ * they point to won't change. We want to allow that the struct ebt_u_target.t
+ * member can change.
+ * The same holds for the struct ebt_match and struct ebt_watcher pointers */
+static struct ebt_u_entry *new_entry;
+
+
+static int global_option_offset;
+#define OPTION_OFFSET 256
+static struct option *merge_options(struct option *oldopts,
+ const struct option *newopts, unsigned int *options_offset)
+{
+ unsigned int num_old, num_new, i;
+ struct option *merge;
+
+ if (!newopts || !oldopts || !options_offset)
+ ebt_print_bug("merge wrong");
+ for (num_old = 0; oldopts[num_old].name; num_old++);
+ for (num_new = 0; newopts[num_new].name; num_new++);
+
+ global_option_offset += OPTION_OFFSET;
+ *options_offset = global_option_offset;
+
+ merge = malloc(sizeof(struct option) * (num_new + num_old + 1));
+ if (!merge)
+ ebt_print_memory();
+ memcpy(merge, oldopts, num_old * sizeof(struct option));
+ for (i = 0; i < num_new; i++) {
+ merge[num_old + i] = newopts[i];
+ merge[num_old + i].val += *options_offset;
+ }
+ memset(merge + num_old + num_new, 0, sizeof(struct option));
+ /* Only free dynamically allocated stuff */
+ if (oldopts != ebt_original_options)
+ free(oldopts);
+
+ return merge;
+}
+
+static void merge_match(struct ebt_u_match *m)
+{
+ ebt_options = merge_options
+ (ebt_options, m->extra_ops, &(m->option_offset));
+}
+
+static void merge_watcher(struct ebt_u_watcher *w)
+{
+ ebt_options = merge_options
+ (ebt_options, w->extra_ops, &(w->option_offset));
+}
+
+static void merge_target(struct ebt_u_target *t)
+{
+ ebt_options = merge_options
+ (ebt_options, t->extra_ops, &(t->option_offset));
+}
+
+/* Be backwards compatible, so don't use '+' in kernel */
+#define IF_WILDCARD 1
+static void print_iface(const char *iface)
+{
+ char *c;
+
+ if ((c = strchr(iface, IF_WILDCARD)))
+ *c = '+';
+ printf("%s ", iface);
+ if (c)
+ *c = IF_WILDCARD;
+}
+
+/* We use replace->flags, so we can't use the following values:
+ * 0x01 == OPT_COMMAND, 0x02 == OPT_TABLE, 0x100 == OPT_ZERO */
+#define LIST_N 0x04
+#define LIST_C 0x08
+#define LIST_X 0x10
+#define LIST_MAC2 0x20
+
+/* Helper function for list_rules() */
+static void list_em(struct ebt_u_entries *entries)
+{
+ int i, j, space = 0, digits;
+ struct ebt_u_entry *hlp;
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+ struct ebt_u_match *m;
+ struct ebt_u_watcher *w;
+ struct ebt_u_target *t;
+
+ if (replace->flags & LIST_MAC2)
+ ebt_printstyle_mac = 2;
+ else
+ ebt_printstyle_mac = 0;
+ hlp = entries->entries->next;
+ if (replace->flags & LIST_X && entries->policy != EBT_ACCEPT) {
+ printf("ebtables -t %s -P %s %s\n", replace->name,
+ entries->name, ebt_standard_targets[-entries->policy - 1]);
+ } else if (!(replace->flags & LIST_X)) {
+ printf("\nBridge chain: %s, entries: %d, policy: %s\n",
+ entries->name, entries->nentries,
+ ebt_standard_targets[-entries->policy - 1]);
+ }
+
+ if (replace->flags & LIST_N) {
+ i = entries->nentries;
+ while (i > 9) {
+ space++;
+ i /= 10;
+ }
+ }
+
+ for (i = 0; i < entries->nentries; i++) {
+ if (replace->flags & LIST_N) {
+ digits = 0;
+ /* A little work to get nice rule numbers. */
+ j = i + 1;
+ while (j > 9) {
+ digits++;
+ j /= 10;
+ }
+ for (j = 0; j < space - digits; j++)
+ printf(" ");
+ printf("%d. ", i + 1);
+ }
+ if (replace->flags & LIST_X)
+ printf("ebtables -t %s -A %s ",
+ replace->name, entries->name);
+
+ /* The standard target's print() uses this to find out
+ * the name of a udc */
+ hlp->replace = replace;
+
+ /* Don't print anything about the protocol if no protocol was
+ * specified, obviously this means any protocol will do. */
+ if (!(hlp->bitmask & EBT_NOPROTO)) {
+ printf("-p ");
+ if (hlp->invflags & EBT_IPROTO)
+ printf("! ");
+ if (hlp->bitmask & EBT_802_3)
+ printf("Length ");
+ else {
+ struct ethertypeent *ent;
+
+ ent = getethertypebynumber(ntohs(hlp->ethproto));
+ if (!ent)
+ printf("0x%x ", ntohs(hlp->ethproto));
+ else
+ printf("%s ", ent->e_name);
+ }
+ }
+ if (hlp->bitmask & EBT_SOURCEMAC) {
+ printf("-s ");
+ if (hlp->invflags & EBT_ISOURCE)
+ printf("! ");
+ ebt_print_mac_and_mask(hlp->sourcemac, hlp->sourcemsk);
+ printf(" ");
+ }
+ if (hlp->bitmask & EBT_DESTMAC) {
+ printf("-d ");
+ if (hlp->invflags & EBT_IDEST)
+ printf("! ");
+ ebt_print_mac_and_mask(hlp->destmac, hlp->destmsk);
+ printf(" ");
+ }
+ if (hlp->in[0] != '\0') {
+ printf("-i ");
+ if (hlp->invflags & EBT_IIN)
+ printf("! ");
+ print_iface(hlp->in);
+ }
+ if (hlp->logical_in[0] != '\0') {
+ printf("--logical-in ");
+ if (hlp->invflags & EBT_ILOGICALIN)
+ printf("! ");
+ print_iface(hlp->logical_in);
+ }
+ if (hlp->logical_out[0] != '\0') {
+ printf("--logical-out ");
+ if (hlp->invflags & EBT_ILOGICALOUT)
+ printf("! ");
+ print_iface(hlp->logical_out);
+ }
+ if (hlp->out[0] != '\0') {
+ printf("-o ");
+ if (hlp->invflags & EBT_IOUT)
+ printf("! ");
+ print_iface(hlp->out);
+ }
+
+ m_l = hlp->m_list;
+ while (m_l) {
+ m = ebt_find_match(m_l->m->u.name);
+ if (!m)
+ ebt_print_bug("Match not found");
+ m->print(hlp, m_l->m);
+ m_l = m_l->next;
+ }
+ w_l = hlp->w_list;
+ while (w_l) {
+ w = ebt_find_watcher(w_l->w->u.name);
+ if (!w)
+ ebt_print_bug("Watcher not found");
+ w->print(hlp, w_l->w);
+ w_l = w_l->next;
+ }
+
+ printf("-j ");
+ if (strcmp(hlp->t->u.name, EBT_STANDARD_TARGET))
+ printf("%s ", hlp->t->u.name);
+ t = ebt_find_target(hlp->t->u.name);
+ if (!t)
+ ebt_print_bug("Target '%s' not found", hlp->t->u.name);
+ t->print(hlp, hlp->t);
+ if (replace->flags & LIST_C) {
+ uint64_t pcnt = hlp->cnt.pcnt;
+ uint64_t bcnt = hlp->cnt.bcnt;
+
+ if (replace->flags & LIST_X)
+ printf("-c %llu %llu", pcnt, bcnt);
+ else
+ printf(", pcnt = %"PRIu64" -- bcnt = %"PRIu64, pcnt, bcnt);
+ }
+ printf("\n");
+ hlp = hlp->next;
+ }
+}
+
+static void print_help(void)
+{
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+
+ PRINT_VERSION;
+ printf(
+"Usage:\n"
+"ebtables -[ADI] chain rule-specification [options]\n"
+"ebtables -P chain target\n"
+"ebtables -[LFZ] [chain]\n"
+"ebtables -[NX] [chain]\n"
+"ebtables -E old-chain-name new-chain-name\n\n"
+"Commands:\n"
+"--append -A chain : append to chain\n"
+"--delete -D chain : delete matching rule from chain\n"
+"--delete -D chain rulenum : delete rule at position rulenum from chain\n"
+"--change-counters -C chain\n"
+" [rulenum] pcnt bcnt : change counters of existing rule\n"
+"--insert -I chain rulenum : insert rule at position rulenum in chain\n"
+"--list -L [chain] : list the rules in a chain or in all chains\n"
+"--flush -F [chain] : delete all rules in chain or in all chains\n"
+"--init-table : replace the kernel table with the initial table\n"
+"--zero -Z [chain] : put counters on zero in chain or in all chains\n"
+"--policy -P chain target : change policy on chain to target\n"
+"--new-chain -N chain : create a user defined chain\n"
+"--rename-chain -E old new : rename a chain\n"
+"--delete-chain -X [chain] : delete a user defined chain\n"
+"--atomic-commit : update the kernel w/t table contained in <FILE>\n"
+"--atomic-init : put the initial kernel table into <FILE>\n"
+"--atomic-save : put the current kernel table into <FILE>\n"
+"--atomic-file file : set <FILE> to file\n\n"
+"Options:\n"
+"--proto -p [!] proto : protocol hexadecimal, by name or LENGTH\n"
+"--src -s [!] address[/mask]: source mac address\n"
+"--dst -d [!] address[/mask]: destination mac address\n"
+"--in-if -i [!] name[+] : network input interface name\n"
+"--out-if -o [!] name[+] : network output interface name\n"
+"--logical-in [!] name[+] : logical bridge input interface name\n"
+"--logical-out [!] name[+] : logical bridge output interface name\n"
+"--set-counters -c chain\n"
+" pcnt bcnt : set the counters of the to be added rule\n"
+"--modprobe -M program : try to insert modules using this program\n"
+"--version -V : print package version\n\n"
+"Environment variable:\n"
+ATOMIC_ENV_VARIABLE " : if set <FILE> (see above) will equal its value"
+"\n\n");
+ m_l = new_entry->m_list;
+ while (m_l) {
+ ((struct ebt_u_match *)m_l->m)->help();
+ printf("\n");
+ m_l = m_l->next;
+ }
+ w_l = new_entry->w_list;
+ while (w_l) {
+ ((struct ebt_u_watcher *)w_l->w)->help();
+ printf("\n");
+ w_l = w_l->next;
+ }
+ ((struct ebt_u_target *)new_entry->t)->help();
+ printf("\n");
+ if (table->help)
+ table->help(ebt_hooknames);
+}
+
+/* Execute command L */
+static void list_rules(void)
+{
+ int i;
+
+ if (!(replace->flags & LIST_X))
+ printf("Bridge table: %s\n", table->name);
+ if (replace->selected_chain != -1)
+ list_em(ebt_to_chain(replace));
+ else {
+ /* Create new chains and rename standard chains when necessary */
+ if (replace->flags & LIST_X && replace->num_chains > NF_BR_NUMHOOKS) {
+ for (i = NF_BR_NUMHOOKS; i < replace->num_chains; i++)
+ printf("ebtables -t %s -N %s\n", replace->name, replace->chains[i]->name);
+ for (i = 0; i < NF_BR_NUMHOOKS; i++)
+ if (replace->chains[i] && strcmp(replace->chains[i]->name, ebt_hooknames[i]))
+ printf("ebtables -t %s -E %s %s\n", replace->name, ebt_hooknames[i], replace->chains[i]->name);
+ }
+ for (i = 0; i < replace->num_chains; i++)
+ if (replace->chains[i])
+ list_em(replace->chains[i]);
+ }
+}
+
+static int parse_rule_range(const char *argv, int *rule_nr, int *rule_nr_end)
+{
+ char *colon = strchr(argv, ':'), *buffer;
+
+ if (colon) {
+ *colon = '\0';
+ if (*(colon + 1) == '\0')
+ *rule_nr_end = -1; /* Until the last rule */
+ else {
+ *rule_nr_end = strtol(colon + 1, &buffer, 10);
+ if (*buffer != '\0' || *rule_nr_end == 0)
+ return -1;
+ }
+ }
+ if (colon == argv)
+ *rule_nr = 1; /* Beginning with the first rule */
+ else {
+ *rule_nr = strtol(argv, &buffer, 10);
+ if (*buffer != '\0' || *rule_nr == 0)
+ return -1;
+ }
+ if (!colon)
+ *rule_nr_end = *rule_nr;
+ return 0;
+}
+
+/* Incrementing or decrementing rules in daemon mode is not supported as the
+ * involved code overload is not worth it (too annoying to take the increased
+ * counters in the kernel into account). */
+static int parse_change_counters_rule(int argc, char **argv, int *rule_nr, int *rule_nr_end, int exec_style)
+{
+ char *buffer;
+ int ret = 0;
+
+ if (optind + 1 >= argc || (argv[optind][0] == '-' && (argv[optind][1] < '0' || argv[optind][1] > '9')) ||
+ (argv[optind + 1][0] == '-' && (argv[optind + 1][1] < '0' && argv[optind + 1][1] > '9')))
+ ebt_print_error2("The command -C needs at least 2 arguments");
+ if (optind + 2 < argc && (argv[optind + 2][0] != '-' || (argv[optind + 2][1] >= '0' && argv[optind + 2][1] <= '9'))) {
+ if (optind + 3 != argc)
+ ebt_print_error2("No extra options allowed with -C start_nr[:end_nr] pcnt bcnt");
+ if (parse_rule_range(argv[optind], rule_nr, rule_nr_end))
+ ebt_print_error2("Something is wrong with the rule number specification '%s'", argv[optind]);
+ optind++;
+ }
+
+ if (argv[optind][0] == '+') {
+ if (exec_style == EXEC_STYLE_DAEMON)
+daemon_incr:
+ ebt_print_error2("Incrementing rule counters (%s) not allowed in daemon mode", argv[optind]);
+ ret += 1;
+ new_entry->cnt_surplus.pcnt = strtoull(argv[optind] + 1, &buffer, 10);
+ } else if (argv[optind][0] == '-') {
+ if (exec_style == EXEC_STYLE_DAEMON)
+daemon_decr:
+ ebt_print_error2("Decrementing rule counters (%s) not allowed in daemon mode", argv[optind]);
+ ret += 2;
+ new_entry->cnt_surplus.pcnt = strtoull(argv[optind] + 1, &buffer, 10);
+ } else
+ new_entry->cnt_surplus.pcnt = strtoull(argv[optind], &buffer, 10);
+
+ if (*buffer != '\0')
+ goto invalid;
+ optind++;
+ if (argv[optind][0] == '+') {
+ if (exec_style == EXEC_STYLE_DAEMON)
+ goto daemon_incr;
+ ret += 3;
+ new_entry->cnt_surplus.bcnt = strtoull(argv[optind] + 1, &buffer, 10);
+ } else if (argv[optind][0] == '-') {
+ if (exec_style == EXEC_STYLE_DAEMON)
+ goto daemon_decr;
+ ret += 6;
+ new_entry->cnt_surplus.bcnt = strtoull(argv[optind] + 1, &buffer, 10);
+ } else
+ new_entry->cnt_surplus.bcnt = strtoull(argv[optind], &buffer, 10);
+
+ if (*buffer != '\0')
+ goto invalid;
+ optind++;
+ return ret;
+invalid:
+ ebt_print_error2("Packet counter '%s' invalid", argv[optind]);
+}
+
+static int parse_iface(char *iface, char *option)
+{
+ char *c;
+
+ if ((c = strchr(iface, '+'))) {
+ if (*(c + 1) != '\0') {
+ ebt_print_error("Spurious characters after '+' wildcard for '%s'", option);
+ return -1;
+ } else
+ *c = IF_WILDCARD;
+ }
+ return 0;
+}
+
+void ebt_early_init_once(void)
+{
+ ebt_iterate_matches(merge_match);
+ ebt_iterate_watchers(merge_watcher);
+ ebt_iterate_targets(merge_target);
+}
+
+/* We use exec_style instead of #ifdef's because ebtables.so is a shared object. */
+int do_command(int argc, char *argv[], int exec_style,
+ struct ebt_u_replace *replace_)
+{
+ char *buffer;
+ int c, i;
+ int zerochain = -1; /* Needed for the -Z option (we can have -Z <this> -L <that>) */
+ int chcounter = 0; /* Needed for -C */
+ int policy = 0;
+ int rule_nr = 0;
+ int rule_nr_end = 0;
+ struct ebt_u_target *t;
+ struct ebt_u_match *m;
+ struct ebt_u_watcher *w;
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+ struct ebt_u_entries *entries;
+
+ opterr = 0;
+ ebt_modprobe = NULL;
+
+ replace = replace_;
+
+ /* The daemon doesn't use the environment variable */
+ if (exec_style == EXEC_STYLE_PRG) {
+ buffer = getenv(ATOMIC_ENV_VARIABLE);
+ if (buffer) {
+ replace->filename = malloc(strlen(buffer) + 1);
+ if (!replace->filename)
+ ebt_print_memory();
+ strcpy(replace->filename, buffer);
+ buffer = NULL;
+ }
+ }
+
+ replace->flags &= OPT_KERNELDATA; /* ebtablesd needs OPT_KERNELDATA */
+ replace->selected_chain = -1;
+ replace->command = 'h';
+
+ if (!new_entry) {
+ new_entry = (struct ebt_u_entry *)malloc(sizeof(struct ebt_u_entry));
+ if (!new_entry)
+ ebt_print_memory();
+ }
+ /* Put some sane values in our new entry */
+ ebt_initialize_entry(new_entry);
+ new_entry->replace = replace;
+
+ /* The scenario induced by this loop makes that:
+ * '-t' ,'-M' and --atomic (if specified) have to come
+ * before '-A' and the like */
+
+ /* Getopt saves the day */
+ while ((c = getopt_long(argc, argv,
+ "-A:D:C:I:N:E:X::L::Z::F::P:Vhi:o:j:c:p:s:d:t:M:", ebt_options, NULL)) != -1) {
+ switch (c) {
+
+ case 'A': /* Add a rule */
+ case 'D': /* Delete a rule */
+ case 'C': /* Change counters */
+ case 'P': /* Define policy */
+ case 'I': /* Insert a rule */
+ case 'N': /* Make a user defined chain */
+ case 'E': /* Rename chain */
+ case 'X': /* Delete chain */
+ /* We allow -N chainname -P policy */
+ if (replace->command == 'N' && c == 'P') {
+ replace->command = c;
+ optind--; /* No table specified */
+ goto handle_P;
+ }
+ if (OPT_COMMANDS)
+ ebt_print_error2("Multiple commands are not allowed");
+
+ replace->command = c;
+ replace->flags |= OPT_COMMAND;
+ if (!(replace->flags & OPT_KERNELDATA))
+ ebt_get_kernel_table(replace, 0);
+ if (optarg && (optarg[0] == '-' || !strcmp(optarg, "!")))
+ ebt_print_error2("No chain name specified");
+ if (c == 'N') {
+ if (ebt_get_chainnr(replace, optarg) != -1)
+ ebt_print_error2("Chain %s already exists", optarg);
+ else if (ebt_find_target(optarg))
+ ebt_print_error2("Target with name %s exists", optarg);
+ else if (strlen(optarg) >= EBT_CHAIN_MAXNAMELEN)
+ ebt_print_error2("Chain name length can't exceed %d",
+ EBT_CHAIN_MAXNAMELEN - 1);
+ else if (strchr(optarg, ' ') != NULL)
+ ebt_print_error2("Use of ' ' not allowed in chain names");
+ ebt_new_chain(replace, optarg, EBT_ACCEPT);
+ /* This is needed to get -N x -P y working */
+ replace->selected_chain = ebt_get_chainnr(replace, optarg);
+ break;
+ } else if (c == 'X') {
+ if (optind >= argc) {
+ replace->selected_chain = -1;
+ ebt_delete_chain(replace);
+ break;
+ }
+
+ if (optind < argc - 1)
+ ebt_print_error2("No extra options allowed with -X");
+
+ if ((replace->selected_chain = ebt_get_chainnr(replace, argv[optind])) == -1)
+ ebt_print_error2("Chain '%s' doesn't exist", argv[optind]);
+ ebt_delete_chain(replace);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ optind++;
+ break;
+ }
+
+ if ((replace->selected_chain = ebt_get_chainnr(replace, optarg)) == -1)
+ ebt_print_error2("Chain '%s' doesn't exist", optarg);
+ if (c == 'E') {
+ if (optind >= argc)
+ ebt_print_error2("No new chain name specified");
+ else if (optind < argc - 1)
+ ebt_print_error2("No extra options allowed with -E");
+ else if (strlen(argv[optind]) >= EBT_CHAIN_MAXNAMELEN)
+ ebt_print_error2("Chain name length can't exceed %d characters", EBT_CHAIN_MAXNAMELEN - 1);
+ else if (ebt_get_chainnr(replace, argv[optind]) != -1)
+ ebt_print_error2("Chain '%s' already exists", argv[optind]);
+ else if (ebt_find_target(argv[optind]))
+ ebt_print_error2("Target with name '%s' exists", argv[optind]);
+ else if (strchr(argv[optind], ' ') != NULL)
+ ebt_print_error2("Use of ' ' not allowed in chain names");
+ ebt_rename_chain(replace, argv[optind]);
+ optind++;
+ break;
+ } else if (c == 'D' && optind < argc && (argv[optind][0] != '-' || (argv[optind][1] >= '0' && argv[optind][1] <= '9'))) {
+ if (optind != argc - 1)
+ ebt_print_error2("No extra options allowed with -D start_nr[:end_nr]");
+ if (parse_rule_range(argv[optind], &rule_nr, &rule_nr_end))
+ ebt_print_error2("Problem with the specified rule number(s) '%s'", argv[optind]);
+ optind++;
+ } else if (c == 'C') {
+ if ((chcounter = parse_change_counters_rule(argc, argv, &rule_nr, &rule_nr_end, exec_style)) == -1)
+ return -1;
+ } else if (c == 'I') {
+ if (optind >= argc || (argv[optind][0] == '-' && (argv[optind][1] < '0' || argv[optind][1] > '9')))
+ rule_nr = 1;
+ else {
+ rule_nr = strtol(argv[optind], &buffer, 10);
+ if (*buffer != '\0')
+ ebt_print_error2("Problem with the specified rule number '%s'", argv[optind]);
+ optind++;
+ }
+ } else if (c == 'P') {
+handle_P:
+ if (optind >= argc)
+ ebt_print_error2("No policy specified");
+ for (i = 0; i < NUM_STANDARD_TARGETS; i++)
+ if (!strcmp(argv[optind], ebt_standard_targets[i])) {
+ policy = -i -1;
+ if (policy == EBT_CONTINUE)
+ ebt_print_error2("Wrong policy '%s'", argv[optind]);
+ break;
+ }
+ if (i == NUM_STANDARD_TARGETS)
+ ebt_print_error2("Unknown policy '%s'", argv[optind]);
+ optind++;
+ }
+ break;
+ case 'L': /* List */
+ case 'F': /* Flush */
+ case 'Z': /* Zero counters */
+ if (c == 'Z') {
+ if ((replace->flags & OPT_ZERO) || (replace->flags & OPT_COMMAND && replace->command != 'L'))
+print_zero:
+ ebt_print_error2("Command -Z only allowed together with command -L");
+ replace->flags |= OPT_ZERO;
+ } else {
+ if (replace->flags & OPT_COMMAND)
+ ebt_print_error2("Multiple commands are not allowed");
+ replace->command = c;
+ replace->flags |= OPT_COMMAND;
+ if (replace->flags & OPT_ZERO && c != 'L')
+ goto print_zero;
+ }
+
+#ifdef SILENT_DAEMON
+ if (c== 'L' && exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("-L not supported in daemon mode");
+#endif
+
+ if (!(replace->flags & OPT_KERNELDATA))
+ ebt_get_kernel_table(replace, 0);
+ i = -1;
+ if (optind < argc && argv[optind][0] != '-') {
+ if ((i = ebt_get_chainnr(replace, argv[optind])) == -1)
+ ebt_print_error2("Chain '%s' doesn't exist", argv[optind]);
+ optind++;
+ }
+ if (i != -1) {
+ if (c == 'Z')
+ zerochain = i;
+ else
+ replace->selected_chain = i;
+ }
+ break;
+ case 'V': /* Version */
+ if (OPT_COMMANDS)
+ ebt_print_error2("Multiple commands are not allowed");
+ replace->command = 'V';
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2(PROGNAME" v"PROGVERSION" ("PROGDATE")\n");
+ PRINT_VERSION;
+ exit(0);
+ case 'M': /* Modprobe */
+ if (OPT_COMMANDS)
+ ebt_print_error2("Please put the -M option earlier");
+ free(ebt_modprobe);
+ ebt_modprobe = optarg;
+ break;
+ case 'h': /* Help */
+#ifdef SILENT_DAEMON
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("-h not supported in daemon mode");
+#endif
+ if (OPT_COMMANDS)
+ ebt_print_error2("Multiple commands are not allowed");
+ replace->command = 'h';
+
+ /* All other arguments should be extension names */
+ while (optind < argc) {
+ struct ebt_u_match *m;
+ struct ebt_u_watcher *w;
+
+ if (!strcasecmp("list_extensions", argv[optind])) {
+ ebt_list_extensions();
+ exit(0);
+ }
+ if ((m = ebt_find_match(argv[optind])))
+ ebt_add_match(new_entry, m);
+ else if ((w = ebt_find_watcher(argv[optind])))
+ ebt_add_watcher(new_entry, w);
+ else {
+ if (!(t = ebt_find_target(argv[optind])))
+ ebt_print_error2("Extension '%s' not found", argv[optind]);
+ if (replace->flags & OPT_JUMP)
+ ebt_print_error2("Sorry, you can only see help for one target extension at a time");
+ replace->flags |= OPT_JUMP;
+ new_entry->t = (struct ebt_entry_target *)t;
+ }
+ optind++;
+ }
+ break;
+ case 't': /* Table */
+ if (OPT_COMMANDS)
+ ebt_print_error2("Please put the -t option first");
+ ebt_check_option2(&(replace->flags), OPT_TABLE);
+ if (strlen(optarg) > EBT_TABLE_MAXNAMELEN - 1)
+ ebt_print_error2("Table name length cannot exceed %d characters", EBT_TABLE_MAXNAMELEN - 1);
+ strcpy(replace->name, optarg);
+ break;
+ case 'i': /* Input interface */
+ case 2 : /* Logical input interface */
+ case 'o': /* Output interface */
+ case 3 : /* Logical output interface */
+ case 'j': /* Target */
+ case 'p': /* Net family protocol */
+ case 's': /* Source mac */
+ case 'd': /* Destination mac */
+ case 'c': /* Set counters */
+ if (!OPT_COMMANDS)
+ ebt_print_error2("No command specified");
+ if (replace->command != 'A' && replace->command != 'D' && replace->command != 'I' && replace->command != 'C')
+ ebt_print_error2("Command and option do not match");
+ if (c == 'i') {
+ ebt_check_option2(&(replace->flags), OPT_IN);
+ if (replace->selected_chain > 2 && replace->selected_chain < NF_BR_BROUTING)
+ ebt_print_error2("Use -i only in INPUT, FORWARD, PREROUTING and BROUTING chains");
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_IIN;
+
+ if (strlen(optarg) >= IFNAMSIZ)
+big_iface_length:
+ ebt_print_error2("Interface name length cannot exceed %d characters", IFNAMSIZ - 1);
+ strcpy(new_entry->in, optarg);
+ if (parse_iface(new_entry->in, "-i"))
+ return -1;
+ break;
+ } else if (c == 2) {
+ ebt_check_option2(&(replace->flags), OPT_LOGICALIN);
+ if (replace->selected_chain > 2 && replace->selected_chain < NF_BR_BROUTING)
+ ebt_print_error2("Use --logical-in only in INPUT, FORWARD, PREROUTING and BROUTING chains");
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_ILOGICALIN;
+
+ if (strlen(optarg) >= IFNAMSIZ)
+ goto big_iface_length;
+ strcpy(new_entry->logical_in, optarg);
+ if (parse_iface(new_entry->logical_in, "--logical-in"))
+ return -1;
+ break;
+ } else if (c == 'o') {
+ ebt_check_option2(&(replace->flags), OPT_OUT);
+ if (replace->selected_chain < 2 || replace->selected_chain == NF_BR_BROUTING)
+ ebt_print_error2("Use -o only in OUTPUT, FORWARD and POSTROUTING chains");
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_IOUT;
+
+ if (strlen(optarg) >= IFNAMSIZ)
+ goto big_iface_length;
+ strcpy(new_entry->out, optarg);
+ if (parse_iface(new_entry->out, "-o"))
+ return -1;
+ break;
+ } else if (c == 3) {
+ ebt_check_option2(&(replace->flags), OPT_LOGICALOUT);
+ if (replace->selected_chain < 2 || replace->selected_chain == NF_BR_BROUTING)
+ ebt_print_error2("Use --logical-out only in OUTPUT, FORWARD and POSTROUTING chains");
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_ILOGICALOUT;
+
+ if (strlen(optarg) >= IFNAMSIZ)
+ goto big_iface_length;
+ strcpy(new_entry->logical_out, optarg);
+ if (parse_iface(new_entry->logical_out, "--logical-out"))
+ return -1;
+ break;
+ } else if (c == 'j') {
+ ebt_check_option2(&(replace->flags), OPT_JUMP);
+ for (i = 0; i < NUM_STANDARD_TARGETS; i++)
+ if (!strcmp(optarg, ebt_standard_targets[i])) {
+ t = ebt_find_target(EBT_STANDARD_TARGET);
+ ((struct ebt_standard_target *) t->t)->verdict = -i - 1;
+ break;
+ }
+ if (-i - 1 == EBT_RETURN && replace->selected_chain < NF_BR_NUMHOOKS) {
+ ebt_print_error2("Return target only for user defined chains");
+ } else if (i != NUM_STANDARD_TARGETS)
+ break;
+
+ if ((i = ebt_get_chainnr(replace, optarg)) != -1) {
+ if (i < NF_BR_NUMHOOKS)
+ ebt_print_error2("Don't jump to a standard chain");
+ t = ebt_find_target(EBT_STANDARD_TARGET);
+ ((struct ebt_standard_target *) t->t)->verdict = i - NF_BR_NUMHOOKS;
+ break;
+ } else {
+ /* Must be an extension then */
+ struct ebt_u_target *t;
+
+ t = ebt_find_target(optarg);
+ /* -j standard not allowed either */
+ if (!t || t == (struct ebt_u_target *)new_entry->t)
+ ebt_print_error2("Illegal target name '%s'", optarg);
+ new_entry->t = (struct ebt_entry_target *)t;
+ ebt_find_target(EBT_STANDARD_TARGET)->used = 0;
+ t->used = 1;
+ }
+ break;
+ } else if (c == 's') {
+ ebt_check_option2(&(replace->flags), OPT_SOURCE);
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_ISOURCE;
+
+ if (ebt_get_mac_and_mask(optarg, new_entry->sourcemac, new_entry->sourcemsk))
+ ebt_print_error2("Problem with specified source mac '%s'", optarg);
+ new_entry->bitmask |= EBT_SOURCEMAC;
+ break;
+ } else if (c == 'd') {
+ ebt_check_option2(&(replace->flags), OPT_DEST);
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_IDEST;
+
+ if (ebt_get_mac_and_mask(optarg, new_entry->destmac, new_entry->destmsk))
+ ebt_print_error2("Problem with specified destination mac '%s'", optarg);
+ new_entry->bitmask |= EBT_DESTMAC;
+ break;
+ } else if (c == 'c') {
+ ebt_check_option2(&(replace->flags), OPT_COUNT);
+ if (ebt_check_inverse2(optarg))
+ ebt_print_error2("Unexpected '!' after -c");
+ if (optind >= argc || optarg[0] == '-' || argv[optind][0] == '-')
+ ebt_print_error2("Option -c needs 2 arguments");
+
+ new_entry->cnt.pcnt = strtoull(optarg, &buffer, 10);
+ if (*buffer != '\0')
+ ebt_print_error2("Packet counter '%s' invalid", optarg);
+ new_entry->cnt.bcnt = strtoull(argv[optind], &buffer, 10);
+ if (*buffer != '\0')
+ ebt_print_error2("Packet counter '%s' invalid", argv[optind]);
+ optind++;
+ break;
+ }
+ ebt_check_option2(&(replace->flags), OPT_PROTOCOL);
+ if (ebt_check_inverse2(optarg))
+ new_entry->invflags |= EBT_IPROTO;
+
+ new_entry->bitmask &= ~((unsigned int)EBT_NOPROTO);
+ i = strtol(optarg, &buffer, 16);
+ if (*buffer == '\0' && (i < 0 || i > 0xFFFF))
+ ebt_print_error2("Problem with the specified protocol");
+ if (*buffer != '\0') {
+ struct ethertypeent *ent;
+
+ if (!strcasecmp(optarg, "LENGTH")) {
+ new_entry->bitmask |= EBT_802_3;
+ break;
+ }
+ ent = getethertypebyname(optarg);
+ if (!ent)
+ ebt_print_error2("Problem with the specified Ethernet protocol '%s', perhaps "_PATH_ETHERTYPES " is missing", optarg);
+ new_entry->ethproto = ent->e_ethertype;
+ } else
+ new_entry->ethproto = i;
+
+ if (new_entry->ethproto < 0x0600)
+ ebt_print_error2("Sorry, protocols have values above or equal to 0x0600");
+ break;
+ case 4 : /* Lc */
+#ifdef SILENT_DAEMON
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("--Lc is not supported in daemon mode");
+#endif
+ ebt_check_option2(&(replace->flags), LIST_C);
+ if (replace->command != 'L')
+ ebt_print_error("Use --Lc with -L");
+ replace->flags |= LIST_C;
+ break;
+ case 5 : /* Ln */
+#ifdef SILENT_DAEMON
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("--Ln is not supported in daemon mode");
+#endif
+ ebt_check_option2(&(replace->flags), LIST_N);
+ if (replace->command != 'L')
+ ebt_print_error2("Use --Ln with -L");
+ if (replace->flags & LIST_X)
+ ebt_print_error2("--Lx is not compatible with --Ln");
+ replace->flags |= LIST_N;
+ break;
+ case 6 : /* Lx */
+#ifdef SILENT_DAEMON
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("--Lx is not supported in daemon mode");
+#endif
+ ebt_check_option2(&(replace->flags), LIST_X);
+ if (replace->command != 'L')
+ ebt_print_error2("Use --Lx with -L");
+ if (replace->flags & LIST_N)
+ ebt_print_error2("--Lx is not compatible with --Ln");
+ replace->flags |= LIST_X;
+ break;
+ case 12 : /* Lmac2 */
+#ifdef SILENT_DAEMON
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error("--Lmac2 is not supported in daemon mode");
+#endif
+ ebt_check_option2(&(replace->flags), LIST_MAC2);
+ if (replace->command != 'L')
+ ebt_print_error2("Use --Lmac2 with -L");
+ replace->flags |= LIST_MAC2;
+ break;
+ case 8 : /* atomic-commit */
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("--atomic-commit is not supported in daemon mode");
+ replace->command = c;
+ if (OPT_COMMANDS)
+ ebt_print_error2("Multiple commands are not allowed");
+ replace->flags |= OPT_COMMAND;
+ if (!replace->filename)
+ ebt_print_error2("No atomic file specified");
+ /* Get the information from the file */
+ ebt_get_table(replace, 0);
+ /* We don't want the kernel giving us its counters,
+ * they would overwrite the counters extracted from
+ * the file */
+ replace->num_counters = 0;
+ /* Make sure the table will be written to the kernel */
+ free(replace->filename);
+ replace->filename = NULL;
+ break;
+ case 7 : /* atomic-init */
+ case 10: /* atomic-save */
+ case 11: /* init-table */
+ if (exec_style == EXEC_STYLE_DAEMON) {
+ if (c == 7) {
+ ebt_print_error2("--atomic-init is not supported in daemon mode");
+ } else if (c == 10)
+ ebt_print_error2("--atomic-save is not supported in daemon mode");
+ ebt_print_error2("--init-table is not supported in daemon mode");
+ }
+ replace->command = c;
+ if (OPT_COMMANDS)
+ ebt_print_error2("Multiple commands are not allowed");
+ if (c != 11 && !replace->filename)
+ ebt_print_error2("No atomic file specified");
+ replace->flags |= OPT_COMMAND;
+ {
+ char *tmp = replace->filename;
+
+ /* Get the kernel table */
+ replace->filename = NULL;
+ ebt_get_kernel_table(replace, c == 10 ? 0 : 1);
+ replace->filename = tmp;
+ }
+ break;
+ case 9 : /* atomic */
+ if (exec_style == EXEC_STYLE_DAEMON)
+ ebt_print_error2("--atomic is not supported in daemon mode");
+ if (OPT_COMMANDS)
+ ebt_print_error2("--atomic has to come before the command");
+ /* A possible memory leak here, but this is not
+ * executed in daemon mode */
+ replace->filename = (char *)malloc(strlen(optarg) + 1);
+ strcpy(replace->filename, optarg);
+ break;
+ case 1 :
+ if (!strcmp(optarg, "!"))
+ ebt_check_inverse2(optarg);
+ else
+ ebt_print_error2("Bad argument : '%s'", optarg);
+ /* ebt_check_inverse() did optind++ */
+ optind--;
+ continue;
+ default:
+ /* Is it a target option? */
+ t = (struct ebt_u_target *)new_entry->t;
+ if ((t->parse(c - t->option_offset, argv, argc, new_entry, &t->flags, &t->t))) {
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ goto check_extension;
+ }
+
+ /* Is it a match_option? */
+ for (m = ebt_matches; m; m = m->next)
+ if (m->parse(c - m->option_offset, argv, argc, new_entry, &m->flags, &m->m))
+ break;
+
+ if (m != NULL) {
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ if (m->used == 0) {
+ ebt_add_match(new_entry, m);
+ m->used = 1;
+ }
+ goto check_extension;
+ }
+
+ /* Is it a watcher option? */
+ for (w = ebt_watchers; w; w = w->next)
+ if (w->parse(c - w->option_offset, argv, argc, new_entry, &w->flags, &w->w))
+ break;
+
+ if (w == NULL && c == '?')
+ ebt_print_error2("Unknown argument: '%s'", argv[optind - 1], (char)optopt, (char)c);
+ else if (w == NULL) {
+ if (!strcmp(t->name, "standard"))
+ ebt_print_error2("Unknown argument: don't forget the -t option");
+ else
+ ebt_print_error2("Target-specific option does not correspond with specified target");
+ }
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ if (w->used == 0) {
+ ebt_add_watcher(new_entry, w);
+ w->used = 1;
+ }
+check_extension:
+ if (replace->command != 'A' && replace->command != 'I' &&
+ replace->command != 'D' && replace->command != 'C')
+ ebt_print_error2("Extensions only for -A, -I, -D and -C");
+ }
+ ebt_invert = 0;
+ }
+
+ /* Just in case we didn't catch an error */
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+
+ if (!(table = ebt_find_table(replace->name)))
+ ebt_print_error2("Bad table name");
+
+ if (replace->command == 'h' && !(replace->flags & OPT_ZERO)) {
+ print_help();
+ if (exec_style == EXEC_STYLE_PRG)
+ exit(0);
+ }
+
+ /* Do the final checks */
+ if (replace->command == 'A' || replace->command == 'I' ||
+ replace->command == 'D' || replace->command == 'C') {
+ /* This will put the hook_mask right for the chains */
+ ebt_check_for_loops(replace);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ entries = ebt_to_chain(replace);
+ m_l = new_entry->m_list;
+ w_l = new_entry->w_list;
+ t = (struct ebt_u_target *)new_entry->t;
+ while (m_l) {
+ m = (struct ebt_u_match *)(m_l->m);
+ m->final_check(new_entry, m->m, replace->name,
+ entries->hook_mask, 0);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ m_l = m_l->next;
+ }
+ while (w_l) {
+ w = (struct ebt_u_watcher *)(w_l->w);
+ w->final_check(new_entry, w->w, replace->name,
+ entries->hook_mask, 0);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ w_l = w_l->next;
+ }
+ t->final_check(new_entry, t->t, replace->name,
+ entries->hook_mask, 0);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ }
+ /* So, the extensions can work with the host endian.
+ * The kernel does not have to do this of course */
+ new_entry->ethproto = htons(new_entry->ethproto);
+
+ if (replace->command == 'P') {
+ if (replace->selected_chain < NF_BR_NUMHOOKS && policy == EBT_RETURN)
+ ebt_print_error2("Policy RETURN only allowed for user defined chains");
+ ebt_change_policy(replace, policy);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ } else if (replace->command == 'L') {
+ list_rules();
+ if (!(replace->flags & OPT_ZERO) && exec_style == EXEC_STYLE_PRG)
+ exit(0);
+ }
+ if (replace->flags & OPT_ZERO) {
+ replace->selected_chain = zerochain;
+ ebt_zero_counters(replace);
+ } else if (replace->command == 'F') {
+ ebt_flush_chains(replace);
+ } else if (replace->command == 'A' || replace->command == 'I') {
+ ebt_add_rule(replace, new_entry, rule_nr);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ /* Makes undoing the add easier (jumps to delete_the_rule) */
+ if (rule_nr <= 0)
+ rule_nr--;
+ rule_nr_end = rule_nr;
+
+ /* a jump to a udc requires checking for loops */
+ if (!strcmp(new_entry->t->u.name, EBT_STANDARD_TARGET) &&
+ ((struct ebt_standard_target *)(new_entry->t))->verdict >= 0) {
+ /* FIXME: this can be done faster */
+ ebt_check_for_loops(replace);
+ if (ebt_errormsg[0] != '\0')
+ goto delete_the_rule;
+ }
+
+ /* Do the final_check(), for all entries.
+ * This is needed when adding a rule that has a chain target */
+ i = -1;
+ while (++i != replace->num_chains) {
+ struct ebt_u_entry *e;
+
+ entries = replace->chains[i];
+ if (!entries) {
+ if (i < NF_BR_NUMHOOKS)
+ continue;
+ else
+ ebt_print_bug("whoops\n");
+ }
+ e = entries->entries->next;
+ while (e != entries->entries) {
+ /* Userspace extensions use host endian */
+ e->ethproto = ntohs(e->ethproto);
+ ebt_do_final_checks(replace, e, entries);
+ if (ebt_errormsg[0] != '\0')
+ goto delete_the_rule;
+ e->ethproto = htons(e->ethproto);
+ e = e->next;
+ }
+ }
+ /* Don't reuse the added rule */
+ new_entry = NULL;
+ } else if (replace->command == 'D') {
+delete_the_rule:
+ ebt_delete_rule(replace, new_entry, rule_nr, rule_nr_end);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ } else if (replace->command == 'C') {
+ ebt_change_counters(replace, new_entry, rule_nr, rule_nr_end, &(new_entry->cnt_surplus), chcounter);
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ }
+ /* Commands -N, -E, -X, --atomic-commit, --atomic-commit, --atomic-save,
+ * --init-table fall through */
+
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ if (table->check)
+ table->check(replace);
+
+ if (exec_style == EXEC_STYLE_PRG) {/* Implies ebt_errormsg[0] == '\0' */
+ ebt_deliver_table(replace);
+
+ if (replace->nentries)
+ ebt_deliver_counters(replace);
+ }
+ return 0;
+}
diff --git a/tools/remus/imqebt/extensions/Makefile b/tools/remus/imqebt/extensions/Makefile
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/extensions/Makefile
@@ -0,0 +1,29 @@
+#! /usr/bin/make
+
+EXT_FUNC+=standard imq
+EXT_TABLES+=filter
+EXT_OBJS+=$(foreach T,$(EXT_FUNC), extensions/ebt_$(T).o)
+EXT_OBJS+=$(foreach T,$(EXT_TABLES), extensions/ebtable_$(T).o)
+EXT_LIBS+=$(foreach T,$(EXT_FUNC), extensions/libebt_$(T).so)
+EXT_LIBS+=$(foreach T,$(EXT_TABLES), extensions/libebtable_$(T).so)
+EXT_LIBSI+=$(foreach T,$(EXT_FUNC), -lebt_$(T))
+EXT_LIBSI+=$(foreach T,$(EXT_TABLES), -lebtable_$(T))
+
+extensions/ebt_%.so: extensions/ebt_%.o
+ $(CC) -shared -o $@ -lc $< -nostartfiles
+
+extensions/libebt_%.so: extensions/ebt_%.so
+ mv $< $@
+
+extensions/ebtable_%.so: extensions/ebtable_%.o
+ $(CC) -shared -o $@ -lc $< -nostartfiles
+
+extensions/libebtable_%.so: extensions/ebtable_%.so
+ mv $< $@
+
+extensions/ebt_%.o: extensions/ebt_%.c include/ebtables_u.h
+ $(CC) $(CFLAGS) $(CFLAGS_SH_LIB) $(PROGSPECS) -c -o $@ $< -I$(KERNEL_INCLUDES)
+
+extensions/ebtable_%.o: extensions/ebtable_%.c
+ $(CC) $(CFLAGS) $(CFLAGS_SH_LIB) $(PROGSPECS) -c -o $@ $< -I$(KERNEL_INCLUDES)
+
diff --git a/tools/remus/imqebt/extensions/ebt_imq.c b/tools/remus/imqebt/extensions/ebt_imq.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/extensions/ebt_imq.c
@@ -0,0 +1,84 @@
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include "../include/ebtables_u.h"
+#include <linux/netfilter_bridge/ebt_imq.h>
+
+#define IMQ_TODEV '1'
+
+static struct option opts[] =
+{
+ { "todev" , required_argument, 0, IMQ_TODEV },
+ { 0 }
+};
+
+static void help(void)
+{
+ printf(
+ "IMQ options:\n"
+ " --todev <N> enqueue to imq<N>, defaults to 0\n");
+}
+
+static void init(struct ebt_entry_target *target)
+{
+ struct ebt_imq_info *imqinfo = (struct ebt_imq_info *)target->data;
+
+ imqinfo->todev = 0;
+}
+
+static int parse(int c, char **argv, int argc, const struct ebt_u_entry *entry,
+ unsigned int *flags, struct ebt_entry_target **target)
+{
+ struct ebt_imq_info *imqinfo = (struct ebt_imq_info *)(*target)->data;
+
+ switch(c) {
+ case IMQ_TODEV:
+ imqinfo->todev = atoi(optarg);
+ }
+
+ return 1;
+}
+
+static void final_check(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target, const char *name,
+ unsigned int hookmask, unsigned int time)
+{
+}
+
+static void print(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target)
+{
+ struct ebt_imq_info *imqinfo = (struct ebt_imq_info *)target->data;
+
+ printf("--todev %d", imqinfo->todev);
+}
+
+static int compare(const struct ebt_entry_target *t1,
+ const struct ebt_entry_target *t2)
+{
+ struct ebt_imq_info *imqinfo1 = (struct ebt_imq_info *)t1->data;
+ struct ebt_imq_info *imqinfo2 = (struct ebt_imq_info *)t2->data;
+
+ if (imqinfo1->todev != imqinfo2->todev)
+ return 0;
+
+ return 1;
+}
+
+static struct ebt_u_target imq_target =
+{
+ .name = "imq",
+ .size = sizeof(struct ebt_imq_info),
+ .help = help,
+ .init = init,
+ .parse = parse,
+ .final_check = final_check,
+ .print = print,
+ .compare = compare,
+ .extra_ops = opts,
+};
+
+void _init(void)
+{
+ ebt_register_target(&imq_target);
+}
diff --git a/tools/remus/imqebt/extensions/ebt_standard.c b/tools/remus/imqebt/extensions/ebt_standard.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/extensions/ebt_standard.c
@@ -0,0 +1,90 @@
+/* ebt_standard
+ *
+ * Authors:
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * April, 2002
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include "../include/ebtables_u.h"
+
+static struct option opts[] =
+{
+ {0}
+};
+
+static void print_help(void)
+{
+ printf("Standard targets: DROP, ACCEPT, RETURN or CONTINUE;\n"
+ "The target can also be a user defined chain.\n");
+}
+
+static void init(struct ebt_entry_target *t)
+{
+ ((struct ebt_standard_target *)t)->verdict = EBT_CONTINUE;
+}
+
+static int parse(int c, char **argv, int argc, const struct ebt_u_entry *entry,
+ unsigned int *flags, struct ebt_entry_target **target)
+{
+ return 0;
+}
+
+static void final_check(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target, const char *name,
+ unsigned int hookmask, unsigned int time)
+{
+}
+
+static void print(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target)
+{
+ int verdict = ((struct ebt_standard_target *)target)->verdict;
+
+ if (verdict >= 0) {
+ struct ebt_u_entries *entries;
+
+ entries = entry->replace->chains[verdict + NF_BR_NUMHOOKS];
+ printf("%s", entries->name);
+ return;
+ }
+ if (verdict == EBT_CONTINUE)
+ printf("CONTINUE ");
+ else if (verdict == EBT_ACCEPT)
+ printf("ACCEPT ");
+ else if (verdict == EBT_DROP)
+ printf("DROP ");
+ else if (verdict == EBT_RETURN)
+ printf("RETURN ");
+ else
+ ebt_print_bug("Bad standard target");
+}
+
+static int compare(const struct ebt_entry_target *t1,
+ const struct ebt_entry_target *t2)
+{
+ return ((struct ebt_standard_target *)t1)->verdict ==
+ ((struct ebt_standard_target *)t2)->verdict;
+}
+
+static struct ebt_u_target standard =
+{
+ .name = "standard",
+ .size = sizeof(struct ebt_standard_target) -
+ sizeof(struct ebt_entry_target),
+ .help = print_help,
+ .init = init,
+ .parse = parse,
+ .final_check = final_check,
+ .print = print,
+ .compare = compare,
+ .extra_ops = opts,
+};
+
+void _init(void)
+{
+ ebt_register_target(&standard);
+}
diff --git a/tools/remus/imqebt/extensions/ebtable_filter.c b/tools/remus/imqebt/extensions/ebtable_filter.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/extensions/ebtable_filter.c
@@ -0,0 +1,35 @@
+/* ebtable_filter
+ *
+ * Authors:
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * April, 2002
+ */
+
+#include <stdio.h>
+#include "../include/ebtables_u.h"
+
+#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
+ (1 << NF_BR_LOCAL_OUT))
+
+static void print_help(const char **hn)
+{
+ int i;
+
+ printf("Supported chains for the filter table:\n");
+ for (i = 0; i < NF_BR_NUMHOOKS; i++)
+ if (FILTER_VALID_HOOKS & (1 << i))
+ printf("%s ", hn[i]);
+ printf("\n");
+}
+
+static struct ebt_u_table table =
+{
+ .name = "filter",
+ .help = print_help,
+};
+
+void _init(void)
+{
+ ebt_register_table(&table);
+}
diff --git a/tools/remus/imqebt/getethertype.c b/tools/remus/imqebt/getethertype.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/getethertype.c
@@ -0,0 +1,162 @@
+/*
+* getethertype.c
+*
+* This file was part of the NYS Library.
+*
+** The NYS Library is free software; you can redistribute it and/or
+** modify it under the terms of the GNU Library General Public License as
+** published by the Free Software Foundation; either version 2 of the
+** License, or (at your option) any later version.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/********************************************************************
+* Description: Ethertype name service switch and the ethertypes
+* database access functions
+* Author: Nick Fedchik <fnm@ukrsat.com>
+* Checker: Bart De Schuymer <bdschuym@pandora.be>
+* Origin: uClibc-0.9.16/libc/inet/getproto.c
+* Created at: Mon Nov 11 12:20:11 EET 2002
+********************************************************************/
+
+
+#include <ctype.h>
+#include <features.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netinet/ether.h>
+#include <net/ethernet.h>
+
+#include "ethernetdb.h"
+
+#define MAXALIASES 35
+
+static FILE *etherf = NULL;
+static char line[BUFSIZ + 1];
+static struct ethertypeent et_ent;
+static char *ethertype_aliases[MAXALIASES];
+static int ethertype_stayopen;
+
+void setethertypeent(int f)
+{
+ if (etherf == NULL)
+ etherf = fopen(_PATH_ETHERTYPES, "r");
+ else
+ rewind(etherf);
+ ethertype_stayopen |= f;
+}
+
+void endethertypeent(void)
+{
+ if (etherf) {
+ fclose(etherf);
+ etherf = NULL;
+ }
+ ethertype_stayopen = 0;
+}
+
+struct ethertypeent *getethertypeent(void)
+{
+ char *e;
+ char *endptr;
+ register char *cp, **q;
+
+ if (etherf == NULL
+ && (etherf = fopen(_PATH_ETHERTYPES, "r")) == NULL) {
+ return (NULL);
+ }
+
+again:
+ if ((e = fgets(line, BUFSIZ, etherf)) == NULL) {
+ return (NULL);
+ }
+ if (*e == '#')
+ goto again;
+ cp = strpbrk(e, "#\n");
+ if (cp == NULL)
+ goto again;
+ *cp = '\0';
+ et_ent.e_name = e;
+ cp = strpbrk(e, " \t");
+ if (cp == NULL)
+ goto again;
+ *cp++ = '\0';
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ e = strpbrk(cp, " \t");
+ if (e != NULL)
+ *e++ = '\0';
+// Check point
+ et_ent.e_ethertype = strtol(cp, &endptr, 16);
+ if (*endptr != '\0'
+ || (et_ent.e_ethertype < ETH_ZLEN
+ || et_ent.e_ethertype > 0xFFFF))
+ goto again; // Skip invalid etherproto type entry
+ q = et_ent.e_aliases = ethertype_aliases;
+ if (e != NULL) {
+ cp = e;
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < ðertype_aliases[MAXALIASES - 1])
+ *q++ = cp;
+ cp = strpbrk(cp, " \t");
+ if (cp != NULL)
+ *cp++ = '\0';
+ }
+ }
+ *q = NULL;
+ return (&et_ent);
+}
+
+
+struct ethertypeent *getethertypebyname(const char *name)
+{
+ register struct ethertypeent *e;
+ register char **cp;
+
+ setethertypeent(ethertype_stayopen);
+ while ((e = getethertypeent()) != NULL) {
+ if (strcasecmp(e->e_name, name) == 0)
+ break;
+ for (cp = e->e_aliases; *cp != 0; cp++)
+ if (strcasecmp(*cp, name) == 0)
+ goto found;
+ }
+found:
+ if (!ethertype_stayopen)
+ endethertypeent();
+ return (e);
+}
+
+struct ethertypeent *getethertypebynumber(int type)
+{
+ register struct ethertypeent *e;
+
+ setethertypeent(ethertype_stayopen);
+ while ((e = getethertypeent()) != NULL)
+ if (e->e_ethertype == type)
+ break;
+ if (!ethertype_stayopen)
+ endethertypeent();
+ return (e);
+}
diff --git a/tools/remus/imqebt/include/ebtables_u.h b/tools/remus/imqebt/include/ebtables_u.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/ebtables_u.h
@@ -0,0 +1,379 @@
+/*
+ * $Id: ebtables.c,v 1.03 2002/01/19
+ *
+ * Copyright (C) 2001-2002 Bart De Schuymer
+ *
+ * This code is stongly inspired on the iptables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EBTABLES_U_H
+#define EBTABLES_U_H
+#include <netinet/in.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter/x_tables.h>
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+
+#define EXEC_STYLE_PRG 0
+#define EXEC_STYLE_DAEMON 1
+
+#ifndef EBT_MIN_ALIGN
+#define EBT_MIN_ALIGN (__alignof__(struct _xt_align))
+#endif
+#define EBT_ALIGN(s) (((s) + (EBT_MIN_ALIGN-1)) & ~(EBT_MIN_ALIGN-1))
+#define ERRORMSG_MAXLEN 128
+
+struct ebt_u_entries
+{
+ int policy;
+ unsigned int nentries;
+ /* counter offset for this chain */
+ unsigned int counter_offset;
+ /* used for udc */
+ unsigned int hook_mask;
+ char *kernel_start;
+ char name[EBT_CHAIN_MAXNAMELEN];
+ struct ebt_u_entry *entries;
+};
+
+struct ebt_cntchanges
+{
+ unsigned short type;
+ unsigned short change; /* determines incremental/decremental/change */
+ struct ebt_cntchanges *prev;
+ struct ebt_cntchanges *next;
+};
+
+#define EBT_ORI_MAX_CHAINS 10
+struct ebt_u_replace
+{
+ char name[EBT_TABLE_MAXNAMELEN];
+ unsigned int valid_hooks;
+ /* nr of rules in the table */
+ unsigned int nentries;
+ unsigned int num_chains;
+ unsigned int max_chains;
+ struct ebt_u_entries **chains;
+ /* nr of counters userspace expects back */
+ unsigned int num_counters;
+ /* where the kernel will put the old counters */
+ struct ebt_counter *counters;
+ /*
+ * can be used e.g. to know if a standard option
+ * has been specified twice
+ */
+ unsigned int flags;
+ /* we stick the specified command (e.g. -A) in here */
+ char command;
+ /*
+ * here we stick the chain to do our thing on (can be -1 if unspecified)
+ */
+ int selected_chain;
+ /* used for the atomic option */
+ char *filename;
+ /* tells what happened to the old rules (counter changes) */
+ struct ebt_cntchanges *cc;
+};
+
+struct ebt_u_table
+{
+ char name[EBT_TABLE_MAXNAMELEN];
+ void (*check)(struct ebt_u_replace *repl);
+ void (*help)(const char **);
+ struct ebt_u_table *next;
+};
+
+struct ebt_u_match_list
+{
+ struct ebt_u_match_list *next;
+ struct ebt_entry_match *m;
+};
+
+struct ebt_u_watcher_list
+{
+ struct ebt_u_watcher_list *next;
+ struct ebt_entry_watcher *w;
+};
+
+struct ebt_u_entry
+{
+ unsigned int bitmask;
+ unsigned int invflags;
+ uint16_t ethproto;
+ char in[IFNAMSIZ];
+ char logical_in[IFNAMSIZ];
+ char out[IFNAMSIZ];
+ char logical_out[IFNAMSIZ];
+ unsigned char sourcemac[ETH_ALEN];
+ unsigned char sourcemsk[ETH_ALEN];
+ unsigned char destmac[ETH_ALEN];
+ unsigned char destmsk[ETH_ALEN];
+ struct ebt_u_match_list *m_list;
+ struct ebt_u_watcher_list *w_list;
+ struct ebt_entry_target *t;
+ struct ebt_u_entry *prev;
+ struct ebt_u_entry *next;
+ struct ebt_counter cnt;
+ struct ebt_counter cnt_surplus; /* for increasing/decreasing a counter and for option 'C' */
+ struct ebt_cntchanges *cc;
+ /* the standard target needs this to know the name of a udc when
+ * printing out rules. */
+ struct ebt_u_replace *replace;
+};
+
+struct ebt_u_match
+{
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ /* size of the real match data */
+ unsigned int size;
+ void (*help)(void);
+ void (*init)(struct ebt_entry_match *m);
+ int (*parse)(int c, char **argv, int argc,
+ const struct ebt_u_entry *entry, unsigned int *flags,
+ struct ebt_entry_match **match);
+ void (*final_check)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_match *match,
+ const char *name, unsigned int hookmask, unsigned int time);
+ void (*print)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_match *match);
+ int (*compare)(const struct ebt_entry_match *m1,
+ const struct ebt_entry_match *m2);
+ const struct option *extra_ops;
+ /*
+ * can be used e.g. to check for multiple occurance of the same option
+ */
+ unsigned int flags;
+ unsigned int option_offset;
+ struct ebt_entry_match *m;
+ /*
+ * if used == 1 we no longer have to add it to
+ * the match chain of the new entry
+ * be sure to put it back on 0 when finished
+ */
+ unsigned int used;
+ struct ebt_u_match *next;
+};
+
+struct ebt_u_watcher
+{
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ unsigned int size;
+ void (*help)(void);
+ void (*init)(struct ebt_entry_watcher *w);
+ int (*parse)(int c, char **argv, int argc,
+ const struct ebt_u_entry *entry, unsigned int *flags,
+ struct ebt_entry_watcher **watcher);
+ void (*final_check)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_watcher *watch, const char *name,
+ unsigned int hookmask, unsigned int time);
+ void (*print)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_watcher *watcher);
+ int (*compare)(const struct ebt_entry_watcher *w1,
+ const struct ebt_entry_watcher *w2);
+ const struct option *extra_ops;
+ unsigned int flags;
+ unsigned int option_offset;
+ struct ebt_entry_watcher *w;
+ unsigned int used;
+ struct ebt_u_watcher *next;
+};
+
+struct ebt_u_target
+{
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ unsigned int size;
+ void (*help)(void);
+ void (*init)(struct ebt_entry_target *t);
+ int (*parse)(int c, char **argv, int argc,
+ const struct ebt_u_entry *entry, unsigned int *flags,
+ struct ebt_entry_target **target);
+ void (*final_check)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target, const char *name,
+ unsigned int hookmask, unsigned int time);
+ void (*print)(const struct ebt_u_entry *entry,
+ const struct ebt_entry_target *target);
+ int (*compare)(const struct ebt_entry_target *t1,
+ const struct ebt_entry_target *t2);
+ const struct option *extra_ops;
+ unsigned int option_offset;
+ unsigned int flags;
+ struct ebt_entry_target *t;
+ unsigned int used;
+ struct ebt_u_target *next;
+};
+
+/* libebtc.c */
+
+extern struct ebt_u_table *ebt_tables;
+extern struct ebt_u_match *ebt_matches;
+extern struct ebt_u_watcher *ebt_watchers;
+extern struct ebt_u_target *ebt_targets;
+
+void ebt_register_table(struct ebt_u_table *);
+void ebt_register_match(struct ebt_u_match *);
+void ebt_register_watcher(struct ebt_u_watcher *);
+void ebt_register_target(struct ebt_u_target *t);
+int ebt_get_kernel_table(struct ebt_u_replace *replace, int init);
+struct ebt_u_target *ebt_find_target(const char *name);
+struct ebt_u_match *ebt_find_match(const char *name);
+struct ebt_u_watcher *ebt_find_watcher(const char *name);
+struct ebt_u_table *ebt_find_table(const char *name);
+int ebtables_insmod(const char *modname);
+void ebt_list_extensions(void);
+void ebt_initialize_entry(struct ebt_u_entry *e);
+void ebt_cleanup_replace(struct ebt_u_replace *replace);
+void ebt_reinit_extensions(void);
+void ebt_double_chains(struct ebt_u_replace *replace);
+void ebt_free_u_entry(struct ebt_u_entry *e);
+struct ebt_u_entries *ebt_name_to_chain(const struct ebt_u_replace *replace,
+ const char* arg);
+struct ebt_u_entries *ebt_name_to_chain(const struct ebt_u_replace *replace,
+ const char* arg);
+int ebt_get_chainnr(const struct ebt_u_replace *replace, const char* arg);
+/**/
+void ebt_change_policy(struct ebt_u_replace *replace, int policy);
+void ebt_flush_chains(struct ebt_u_replace *replace);
+int ebt_check_rule_exists(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry);
+void ebt_add_rule(struct ebt_u_replace *replace, struct ebt_u_entry *new_entry,
+ int rule_nr);
+void ebt_delete_rule(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry, int begin, int end);
+void ebt_zero_counters(struct ebt_u_replace *replace);
+void ebt_change_counters(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry, int begin, int end,
+ struct ebt_counter *cnt, int mask);
+void ebt_new_chain(struct ebt_u_replace *replace, const char *name, int policy);
+void ebt_delete_chain(struct ebt_u_replace *replace);
+void ebt_rename_chain(struct ebt_u_replace *replace, const char *name);
+/**/
+void ebt_do_final_checks(struct ebt_u_replace *replace, struct ebt_u_entry *e,
+ struct ebt_u_entries *entries);
+int ebt_check_for_references(struct ebt_u_replace *replace, int print_err);
+int ebt_check_for_references2(struct ebt_u_replace *replace, int chain_nr,
+ int print_err);
+void ebt_check_for_loops(struct ebt_u_replace *replace);
+void ebt_add_match(struct ebt_u_entry *new_entry, struct ebt_u_match *m);
+void ebt_add_watcher(struct ebt_u_entry *new_entry, struct ebt_u_watcher *w);
+void ebt_iterate_matches(void (*f)(struct ebt_u_match *));
+void ebt_iterate_watchers(void (*f)(struct ebt_u_watcher *));
+void ebt_iterate_targets(void (*f)(struct ebt_u_target *));
+void __ebt_print_bug(char *file, int line, char *format, ...);
+void __ebt_print_error(char *format, ...);
+
+/* communication.c */
+
+int ebt_get_table(struct ebt_u_replace *repl, int init);
+void ebt_deliver_counters(struct ebt_u_replace *repl);
+void ebt_deliver_table(struct ebt_u_replace *repl);
+
+/* useful_functions.c */
+
+extern int ebt_invert;
+void ebt_check_option(unsigned int *flags, unsigned int mask);
+#define ebt_check_inverse(arg) _ebt_check_inverse(arg, argc, argv)
+int _ebt_check_inverse(const char option[], int argc, char **argv);
+void ebt_print_mac(const unsigned char *mac);
+void ebt_print_mac_and_mask(const unsigned char *mac, const unsigned char *mask);
+int ebt_get_mac_and_mask(const char *from, unsigned char *to, unsigned char *mask);
+void ebt_parse_ip_address(char *address, uint32_t *addr, uint32_t *msk);
+char *ebt_mask_to_dotted(uint32_t mask);
+void ebt_parse_ip6_address(char *address, struct in6_addr *addr,
+ struct in6_addr *msk);
+char *ebt_ip6_to_numeric(const struct in6_addr *addrp);
+
+
+int do_command(int argc, char *argv[], int exec_style,
+ struct ebt_u_replace *replace_);
+
+struct ethertypeent *parseethertypebynumber(int type);
+
+#define ebt_to_chain(repl) \
+({struct ebt_u_entries *_ch = NULL; \
+if (repl->selected_chain != -1) \
+ _ch = repl->chains[repl->selected_chain]; \
+_ch;})
+#define ebt_print_bug(format, args...) \
+ __ebt_print_bug(__FILE__, __LINE__, format, ##args)
+#define ebt_print_error(format,args...) __ebt_print_error(format, ##args);
+#define ebt_print_error2(format, args...) do {__ebt_print_error(format, ##args); \
+ return -1;} while (0)
+#define ebt_check_option2(flags,mask) \
+({ebt_check_option(flags,mask); \
+ if (ebt_errormsg[0] != '\0') \
+ return -1;})
+#define ebt_check_inverse2(option) \
+({int __ret = ebt_check_inverse(option); \
+if (ebt_errormsg[0] != '\0') \
+ return -1; \
+if (!optarg) { \
+ __ebt_print_error("Option without (mandatory) argument"); \
+ return -1; \
+} \
+__ret;})
+#define ebt_print_memory() do {printf("Ebtables: " __FILE__ \
+ " %s %d :Out of memory.\n", __FUNCTION__, __LINE__); exit(-1);} while (0)
+
+/* used for keeping the rule counters right during rule adds or deletes */
+#define CNT_NORM 0
+#define CNT_DEL 1
+#define CNT_ADD 2
+#define CNT_CHANGE 3
+
+extern const char *ebt_hooknames[NF_BR_NUMHOOKS];
+extern const char *ebt_standard_targets[NUM_STANDARD_TARGETS];
+extern char ebt_errormsg[ERRORMSG_MAXLEN];
+extern char *ebt_modprobe;
+extern int ebt_silent;
+extern int ebt_printstyle_mac;
+
+/*
+ * Transforms a target string into the right integer,
+ * returns 0 on success.
+ */
+#define FILL_TARGET(_str, _pos) ({ \
+ int _i, _ret = 0; \
+ for (_i = 0; _i < NUM_STANDARD_TARGETS; _i++) \
+ if (!strcmp(_str, ebt_standard_targets[_i])) {\
+ _pos = -_i - 1; \
+ break; \
+ } \
+ if (_i == NUM_STANDARD_TARGETS) \
+ _ret = 1; \
+ _ret; \
+})
+
+/* Transforms the target value to an index into standard_targets[] */
+#define TARGET_INDEX(_value) (-_value - 1)
+/* Returns a target string corresponding to the value */
+#define TARGET_NAME(_value) (ebt_standard_targets[TARGET_INDEX(_value)])
+/* True if the hook mask denotes that the rule is in a base chain */
+#define BASE_CHAIN (hookmask & (1 << NF_BR_NUMHOOKS))
+/* Clear the bit in the hook_mask that tells if the rule is on a base chain */
+#define CLEAR_BASE_CHAIN_BIT (hookmask &= ~(1 << NF_BR_NUMHOOKS))
+#define PRINT_VERSION printf(PROGNAME" v"PROGVERSION" ("PROGDATE")\n")
+#ifndef PROC_SYS_MODPROBE
+#define PROC_SYS_MODPROBE "/proc/sys/kernel/modprobe"
+#endif
+#define ATOMIC_ENV_VARIABLE "EBTABLES_ATOMIC_FILE"
+#endif /* EBTABLES_U_H */
diff --git a/tools/remus/imqebt/include/ethernetdb.h b/tools/remus/imqebt/include/ethernetdb.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/ethernetdb.h
@@ -0,0 +1,58 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* All data returned by the network data base library are supplied in
+ host order and returned in network order (suitable for use in
+ system calls). */
+
+#ifndef _ETHERNETDB_H
+#define _ETHERNETDB_H 1
+
+#include <features.h>
+#include <netinet/in.h>
+#include <stdint.h>
+
+/* Absolute file name for network data base files. */
+#ifndef _PATH_ETHERTYPES
+#define _PATH_ETHERTYPES "/etc/ethertypes"
+#endif /* _PATH_ETHERTYPES */
+
+struct ethertypeent {
+ char *e_name; /* Official ethernet type name. */
+ char **e_aliases; /* Alias list. */
+ int e_ethertype; /* Ethernet type number. */
+};
+
+/* Open ethertype data base files and mark them as staying open even
+ after a later search if STAY_OPEN is non-zero. */
+extern void setethertypeent(int __stay_open) __THROW;
+
+/* Close ethertype data base files and clear `stay open' flag. */
+extern void endethertypeent(void) __THROW;
+
+/* Get next entry from ethertype data base file. Open data base if
+ necessary. */
+extern struct ethertypeent *getethertypeent(void) __THROW;
+
+/* Return entry from ethertype data base for network with NAME. */
+extern struct ethertypeent *getethertypebyname(__const char *__name)
+ __THROW;
+
+/* Return entry from ethertype data base which number is PROTO. */
+extern struct ethertypeent *getethertypebynumber(int __ethertype) __THROW;
+
+
+#endif /* ethernetdb.h */
diff --git a/tools/remus/imqebt/include/linux/if_ether.h b/tools/remus/imqebt/include/linux/if_ether.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/linux/if_ether.h
@@ -0,0 +1,146 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the Ethernet IEEE 802.3 interface.
+ *
+ * Version: @(#)if_ether.h 1.0.1a 02/08/94
+ *
+ * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald Becker, <becker@super.org>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
+ * Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_IF_ETHER_H
+#define _LINUX_IF_ETHER_H
+
+#include <linux/types.h>
+
+/*
+ * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
+ * and FCS/CRC (frame check sequence).
+ */
+
+#define ETH_ALEN 6 /* Octets in one ethernet addr */
+#define ETH_HLEN 14 /* Total octets in header. */
+#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
+#define ETH_DATA_LEN 1500 /* Max. octets in payload */
+#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */
+#define ETH_FCS_LEN 4 /* Octets in the FCS */
+
+/*
+ * These are the defined Ethernet Protocol ID's.
+ */
+
+#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
+#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
+#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+#define ETH_P_X25 0x0805 /* CCITT X.25 */
+#define ETH_P_ARP 0x0806 /* Address Resolution packet */
+#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */
+#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */
+#define ETH_P_DEC 0x6000 /* DEC Assigned proto */
+#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */
+#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */
+#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */
+#define ETH_P_LAT 0x6004 /* DEC LAT */
+#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
+#define ETH_P_CUST 0x6006 /* DEC Customer use */
+#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
+#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */
+#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
+#define ETH_P_ATALK 0x809B /* Appletalk DDP */
+#define ETH_P_AARP 0x80F3 /* Appletalk AARP */
+#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
+#define ETH_P_IPX 0x8137 /* IPX over DIX */
+#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
+#define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */
+#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */
+#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol
+ * defined in draft-wilson-wrec-wccp-v2-00.txt */
+#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */
+#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */
+#define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */
+#define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */
+#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */
+#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport
+ * over Ethernet
+ */
+#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
+#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */
+#define ETH_P_TIPC 0x88CA /* TIPC */
+#define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */
+#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+
+/*
+ * Non DIX types. Won't clash for 1500 types.
+ */
+
+#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */
+#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */
+#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */
+#define ETH_P_802_2 0x0004 /* 802.2 frames */
+#define ETH_P_SNAP 0x0005 /* Internal only */
+#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */
+#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/
+#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */
+#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */
+#define ETH_P_CAN 0x000C /* Controller Area Network */
+#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/
+#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */
+#define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */
+#define ETH_P_CONTROL 0x0016 /* Card specific control frames */
+#define ETH_P_IRDA 0x0017 /* Linux-IrDA */
+#define ETH_P_ECONET 0x0018 /* Acorn Econet */
+#define ETH_P_HDLC 0x0019 /* HDLC frames */
+#define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */
+#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */
+#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */
+#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */
+
+/*
+ * This is an Ethernet frame header.
+ */
+
+struct ethhdr {
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ __be16 h_proto; /* packet type ID field */
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
+{
+ return (struct ethhdr *)skb_mac_header(skb);
+}
+
+int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+
+#ifdef CONFIG_SYSCTL
+extern struct ctl_table ether_table[];
+#endif
+
+extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
+
+/*
+ * Display a 6 byte device address (MAC) in a readable format.
+ */
+extern char *print_mac(char *buf, const unsigned char *addr);
+#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_BUF_SIZE 18
+#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE] __maybe_unused
+
+#endif
+
+#endif /* _LINUX_IF_ETHER_H */
diff --git a/tools/remus/imqebt/include/linux/netfilter_bridge.h b/tools/remus/imqebt/include/linux/netfilter_bridge.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/linux/netfilter_bridge.h
@@ -0,0 +1,91 @@
+#ifndef __LINUX_BRIDGE_NETFILTER_H
+#define __LINUX_BRIDGE_NETFILTER_H
+
+/* bridge-specific defines for netfilter.
+ */
+
+/* Bridge Hooks */
+/* After promisc drops, checksum checks. */
+#define NF_BR_PRE_ROUTING 0
+/* If the packet is destined for this box. */
+#define NF_BR_LOCAL_IN 1
+/* If the packet is destined for another interface. */
+#define NF_BR_FORWARD 2
+/* Packets coming from a local process. */
+#define NF_BR_LOCAL_OUT 3
+/* Packets about to hit the wire. */
+#define NF_BR_POST_ROUTING 4
+/* Not really a hook, but used for the ebtables broute table */
+#define NF_BR_BROUTING 5
+#define NF_BR_NUMHOOKS 6
+
+#ifdef __KERNEL__
+#include <linux/netfilter.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+
+enum nf_br_hook_priorities {
+ NF_BR_PRI_FIRST = INT_MIN,
+ NF_BR_PRI_NAT_DST_BRIDGED = -300,
+ NF_BR_PRI_FILTER_BRIDGED = -200,
+ NF_BR_PRI_BRNF = 0,
+ NF_BR_PRI_NAT_DST_OTHER = 100,
+ NF_BR_PRI_FILTER_OTHER = 200,
+ NF_BR_PRI_NAT_SRC = 300,
+ NF_BR_PRI_LAST = INT_MAX,
+};
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+
+#define BRNF_PKT_TYPE 0x01
+#define BRNF_BRIDGED_DNAT 0x02
+#define BRNF_DONT_TAKE_PARENT 0x04
+#define BRNF_BRIDGED 0x08
+#define BRNF_NF_BRIDGE_PREROUTING 0x10
+
+
+/* Only used in br_forward.c */
+extern int nf_bridge_copy_header(struct sk_buff *skb);
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+ return nf_bridge_copy_header(skb);
+ return 0;
+}
+
+static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __cpu_to_be16(ETH_P_8021Q):
+ return VLAN_HLEN;
+ case __cpu_to_be16(ETH_P_PPP_SES):
+ return PPPOE_SES_HLEN;
+ default:
+ return 0;
+ }
+}
+
+/* This is called by the IP fragmenting code and it ensures there is
+ * enough room for the encapsulating header (if there is one). */
+static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
+{
+ if (skb->nf_bridge)
+ return nf_bridge_encap_header_len(skb);
+ return 0;
+}
+
+struct bridge_skb_cb {
+ union {
+ __be32 ipv4;
+ } daddr;
+};
+
+#else
+#define nf_bridge_maybe_copy_header(skb) (0)
+#define nf_bridge_pad(skb) (0)
+#endif /* CONFIG_BRIDGE_NETFILTER */
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/tools/remus/imqebt/include/linux/netfilter_bridge/ebt_imq.h b/tools/remus/imqebt/include/linux/netfilter_bridge/ebt_imq.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/linux/netfilter_bridge/ebt_imq.h
@@ -0,0 +1,8 @@
+#ifndef __LINUX_BRIDGE_EBT_IMQ_H
+#define __LINUX_BRIDGE_EBT_IMQ_H
+
+struct ebt_imq_info
+{
+ unsigned int todev;
+};
+#endif
diff --git a/tools/remus/imqebt/include/linux/netfilter_bridge/ebtables.h b/tools/remus/imqebt/include/linux/netfilter_bridge/ebtables.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/linux/netfilter_bridge/ebtables.h
@@ -0,0 +1,276 @@
+/*
+ * ebtables
+ *
+ * Authors:
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * ebtables.c,v 2.0, April, 2002
+ *
+ * This code is stongly inspired on the iptables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ */
+
+/* Local copy of the kernel file, needed for Sparc64 support */
+#ifndef __LINUX_BRIDGE_EFF_H
+#define __LINUX_BRIDGE_EFF_H
+#include <linux/if.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/if_ether.h>
+
+#define EBT_TABLE_MAXNAMELEN 32
+#define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN
+#define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN
+
+/* verdicts >0 are "branches" */
+#define EBT_ACCEPT -1
+#define EBT_DROP -2
+#define EBT_CONTINUE -3
+#define EBT_RETURN -4
+#define NUM_STANDARD_TARGETS 4
+/* ebtables target modules store the verdict inside an int. We can
+ * reclaim a part of this int for backwards compatible extensions.
+ * The 4 lsb are more than enough to store the verdict. */
+#define EBT_VERDICT_BITS 0x0000000F
+
+struct ebt_counter
+{
+ uint64_t pcnt;
+ uint64_t bcnt;
+};
+
+struct ebt_replace
+{
+ char name[EBT_TABLE_MAXNAMELEN];
+ unsigned int valid_hooks;
+ /* nr of rules in the table */
+ unsigned int nentries;
+ /* total size of the entries */
+ unsigned int entries_size;
+ /* start of the chains */
+#ifdef KERNEL_64_USERSPACE_32
+ uint64_t hook_entry[NF_BR_NUMHOOKS];
+#else
+ struct ebt_entries *hook_entry[NF_BR_NUMHOOKS];
+#endif
+ /* nr of counters userspace expects back */
+ unsigned int num_counters;
+ /* where the kernel will put the old counters */
+#ifdef KERNEL_64_USERSPACE_32
+ uint64_t counters;
+ uint64_t entries;
+#else
+ struct ebt_counter *counters;
+ char *entries;
+#endif
+};
+
+struct ebt_entries {
+ /* this field is always set to zero
+ * See EBT_ENTRY_OR_ENTRIES.
+ * Must be same size as ebt_entry.bitmask */
+ unsigned int distinguisher;
+ /* the chain name */
+ char name[EBT_CHAIN_MAXNAMELEN];
+ /* counter offset for this chain */
+ unsigned int counter_offset;
+ /* one standard (accept, drop, return) per hook */
+ int policy;
+ /* nr. of entries */
+ unsigned int nentries;
+ /* entry list */
+ char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
+};
+
+/* used for the bitmask of struct ebt_entry */
+
+/* This is a hack to make a difference between an ebt_entry struct and an
+ * ebt_entries struct when traversing the entries from start to end.
+ * Using this simplifies the code alot, while still being able to use
+ * ebt_entries.
+ * Contrary, iptables doesn't use something like ebt_entries and therefore uses
+ * different techniques for naming the policy and such. So, iptables doesn't
+ * need a hack like this.
+ */
+#define EBT_ENTRY_OR_ENTRIES 0x01
+/* these are the normal masks */
+#define EBT_NOPROTO 0x02
+#define EBT_802_3 0x04
+#define EBT_SOURCEMAC 0x08
+#define EBT_DESTMAC 0x10
+#define EBT_F_MASK (EBT_NOPROTO | EBT_802_3 | EBT_SOURCEMAC | EBT_DESTMAC \
+ | EBT_ENTRY_OR_ENTRIES)
+
+#define EBT_IPROTO 0x01
+#define EBT_IIN 0x02
+#define EBT_IOUT 0x04
+#define EBT_ISOURCE 0x8
+#define EBT_IDEST 0x10
+#define EBT_ILOGICALIN 0x20
+#define EBT_ILOGICALOUT 0x40
+#define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \
+ | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST)
+
+struct ebt_entry_match
+{
+ union {
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ struct ebt_match *match;
+ } u;
+ /* size of data */
+ unsigned int match_size;
+#ifdef KERNEL_64_USERSPACE_32
+ unsigned int pad;
+#endif
+ unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
+};
+
+struct ebt_entry_watcher
+{
+ union {
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ struct ebt_watcher *watcher;
+ } u;
+ /* size of data */
+ unsigned int watcher_size;
+#ifdef KERNEL_64_USERSPACE_32
+ unsigned int pad;
+#endif
+ unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
+};
+
+struct ebt_entry_target
+{
+ union {
+ char name[EBT_FUNCTION_MAXNAMELEN];
+ struct ebt_target *target;
+ } u;
+ /* size of data */
+ unsigned int target_size;
+#ifdef KERNEL_64_USERSPACE_32
+ unsigned int pad;
+#endif
+ unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
+};
+
+#define EBT_STANDARD_TARGET "standard"
+struct ebt_standard_target
+{
+ struct ebt_entry_target target;
+ int verdict;
+#ifdef KERNEL_64_USERSPACE_32
+ unsigned int pad;
+#endif
+};
+
+/* one entry */
+struct ebt_entry {
+ /* this needs to be the first field */
+ unsigned int bitmask;
+ unsigned int invflags;
+ uint16_t ethproto;
+ /* the physical in-dev */
+ char in[IFNAMSIZ];
+ /* the logical in-dev */
+ char logical_in[IFNAMSIZ];
+ /* the physical out-dev */
+ char out[IFNAMSIZ];
+ /* the logical out-dev */
+ char logical_out[IFNAMSIZ];
+ unsigned char sourcemac[ETH_ALEN];
+ unsigned char sourcemsk[ETH_ALEN];
+ unsigned char destmac[ETH_ALEN];
+ unsigned char destmsk[ETH_ALEN];
+ /* sizeof ebt_entry + matches */
+ unsigned int watchers_offset;
+ /* sizeof ebt_entry + matches + watchers */
+ unsigned int target_offset;
+ /* sizeof ebt_entry + matches + watchers + target */
+ unsigned int next_offset;
+ unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
+};
+
+/* {g,s}etsockopt numbers */
+#define EBT_BASE_CTL 128
+
+#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
+#define EBT_SO_SET_COUNTERS (EBT_SO_SET_ENTRIES+1)
+#define EBT_SO_SET_MAX (EBT_SO_SET_COUNTERS+1)
+
+#define EBT_SO_GET_INFO (EBT_BASE_CTL)
+#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO+1)
+#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES+1)
+#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO+1)
+#define EBT_SO_GET_MAX (EBT_SO_GET_INIT_ENTRIES+1)
+
+/* blatently stolen from ip_tables.h
+ * fn returns 0 to continue iteration */
+#define EBT_MATCH_ITERATE(e, fn, args...) \
+({ \
+ unsigned int __i; \
+ int __ret = 0; \
+ struct ebt_entry_match *__match; \
+ \
+ for (__i = sizeof(struct ebt_entry); \
+ __i < (e)->watchers_offset; \
+ __i += __match->match_size + \
+ sizeof(struct ebt_entry_match)) { \
+ __match = (void *)(e) + __i; \
+ \
+ __ret = fn(__match , ## args); \
+ if (__ret != 0) \
+ break; \
+ } \
+ if (__ret == 0) { \
+ if (__i != (e)->watchers_offset) \
+ __ret = -EINVAL; \
+ } \
+ __ret; \
+})
+
+#define EBT_WATCHER_ITERATE(e, fn, args...) \
+({ \
+ unsigned int __i; \
+ int __ret = 0; \
+ struct ebt_entry_watcher *__watcher; \
+ \
+ for (__i = e->watchers_offset; \
+ __i < (e)->target_offset; \
+ __i += __watcher->watcher_size + \
+ sizeof(struct ebt_entry_watcher)) { \
+ __watcher = (void *)(e) + __i; \
+ \
+ __ret = fn(__watcher , ## args); \
+ if (__ret != 0) \
+ break; \
+ } \
+ if (__ret == 0) { \
+ if (__i != (e)->target_offset) \
+ __ret = -EINVAL; \
+ } \
+ __ret; \
+})
+
+#define EBT_ENTRY_ITERATE(entries, size, fn, args...) \
+({ \
+ unsigned int __i; \
+ int __ret = 0; \
+ struct ebt_entry *__entry; \
+ \
+ for (__i = 0; __i < (size);) { \
+ __entry = (void *)(entries) + __i; \
+ __ret = fn(__entry , ## args); \
+ if (__ret != 0) \
+ break; \
+ if (__entry->bitmask != 0) \
+ __i += __entry->next_offset; \
+ else \
+ __i += sizeof(struct ebt_entries); \
+ } \
+ if (__ret == 0) { \
+ if (__i != (size)) \
+ __ret = -EINVAL; \
+ } \
+ __ret; \
+})
+
+#endif
diff --git a/tools/remus/imqebt/include/linux/types.h b/tools/remus/imqebt/include/linux/types.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/include/linux/types.h
@@ -0,0 +1,209 @@
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#include <asm/types.h>
+
+#ifndef __ASSEMBLY__
+#ifdef __KERNEL__
+
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+#endif
+
+#include <linux/posix_types.h>
+
+#ifdef __KERNEL__
+
+typedef __u32 __kernel_dev_t;
+
+typedef __kernel_fd_set fd_set;
+typedef __kernel_dev_t dev_t;
+typedef __kernel_ino_t ino_t;
+typedef __kernel_mode_t mode_t;
+typedef __kernel_nlink_t nlink_t;
+typedef __kernel_off_t off_t;
+typedef __kernel_pid_t pid_t;
+typedef __kernel_daddr_t daddr_t;
+typedef __kernel_key_t key_t;
+typedef __kernel_suseconds_t suseconds_t;
+typedef __kernel_timer_t timer_t;
+typedef __kernel_clockid_t clockid_t;
+typedef __kernel_mqd_t mqd_t;
+
+typedef _Bool bool;
+
+typedef __kernel_uid32_t uid_t;
+typedef __kernel_gid32_t gid_t;
+typedef __kernel_uid16_t uid16_t;
+typedef __kernel_gid16_t gid16_t;
+
+typedef unsigned long uintptr_t;
+
+#ifdef CONFIG_UID16
+/* This is defined by include/asm-{arch}/posix_types.h */
+typedef __kernel_old_uid_t old_uid_t;
+typedef __kernel_old_gid_t old_gid_t;
+#endif /* CONFIG_UID16 */
+
+#if defined(__GNUC__)
+typedef __kernel_loff_t loff_t;
+#endif
+
+/*
+ * The following typedefs are also protected by individual ifdefs for
+ * historical reasons:
+ */
+#ifndef _SIZE_T
+#define _SIZE_T
+typedef __kernel_size_t size_t;
+#endif
+
+#ifndef _SSIZE_T
+#define _SSIZE_T
+typedef __kernel_ssize_t ssize_t;
+#endif
+
+#ifndef _PTRDIFF_T
+#define _PTRDIFF_T
+typedef __kernel_ptrdiff_t ptrdiff_t;
+#endif
+
+#ifndef _TIME_T
+#define _TIME_T
+typedef __kernel_time_t time_t;
+#endif
+
+#ifndef _CLOCK_T
+#define _CLOCK_T
+typedef __kernel_clock_t clock_t;
+#endif
+
+#ifndef _CADDR_T
+#define _CADDR_T
+typedef __kernel_caddr_t caddr_t;
+#endif
+
+/* bsd */
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+
+/* sysv */
+typedef unsigned char unchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef __u8 u_int8_t;
+typedef __s8 int8_t;
+typedef __u16 u_int16_t;
+typedef __s16 int16_t;
+typedef __u32 u_int32_t;
+typedef __s32 int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef __u8 uint8_t;
+typedef __u16 uint16_t;
+typedef __u32 uint32_t;
+
+#if defined(__GNUC__)
+typedef __u64 uint64_t;
+typedef __u64 u_int64_t;
+typedef __s64 int64_t;
+#endif
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBD
+typedef u64 sector_t;
+typedef u64 blkcnt_t;
+#else
+typedef unsigned long sector_t;
+typedef unsigned long blkcnt_t;
+#endif
+
+/*
+ * The type of an index into the pagecache. Use a #define so asm/types.h
+ * can override it.
+ */
+#ifndef pgoff_t
+#define pgoff_t unsigned long
+#endif
+
+#endif /* __KERNEL__ */
+
+/*
+ * Below are truly Linux-specific types that should never collide with
+ * any application/library that wants linux/types.h.
+ */
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#ifdef __CHECK_ENDIAN__
+#define __bitwise __bitwise__
+#else
+#define __bitwise
+#endif
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#ifdef __KERNEL__
+typedef unsigned __bitwise__ gfp_t;
+typedef unsigned __bitwise__ fmode_t;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
+typedef phys_addr_t resource_size_t;
+
+typedef struct {
+ volatile int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+ volatile long counter;
+} atomic64_t;
+#endif
+
+struct ustat {
+ __kernel_daddr_t f_tfree;
+ __kernel_ino_t f_tinode;
+ char f_fname[6];
+ char f_fpack[6];
+};
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
diff --git a/tools/remus/imqebt/libebtc.c b/tools/remus/imqebt/libebtc.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/libebtc.c
@@ -0,0 +1,1280 @@
+/*
+ * libebtc.c, January 2004
+ *
+ * Contains the functions with which to make a table in userspace.
+ *
+ * Author: Bart De Schuymer
+ *
+ * This code is stongly inspired on the iptables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include "include/ebtables_u.h"
+#include "include/ethernetdb.h"
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+static void decrease_chain_jumps(struct ebt_u_replace *replace);
+static int iterate_entries(struct ebt_u_replace *replace, int type);
+
+/* The standard names */
+const char *ebt_hooknames[NF_BR_NUMHOOKS] =
+{
+ [NF_BR_PRE_ROUTING]"PREROUTING",
+ [NF_BR_LOCAL_IN]"INPUT",
+ [NF_BR_FORWARD]"FORWARD",
+ [NF_BR_LOCAL_OUT]"OUTPUT",
+ [NF_BR_POST_ROUTING]"POSTROUTING",
+ [NF_BR_BROUTING]"BROUTING"
+};
+
+/* The four target names */
+const char* ebt_standard_targets[NUM_STANDARD_TARGETS] =
+{
+ "ACCEPT",
+ "DROP",
+ "CONTINUE",
+ "RETURN",
+};
+
+/* The lists of supported tables, matches, watchers and targets */
+struct ebt_u_table *ebt_tables;
+struct ebt_u_match *ebt_matches;
+struct ebt_u_watcher *ebt_watchers;
+struct ebt_u_target *ebt_targets;
+
+/* Find the right structure belonging to a name */
+struct ebt_u_target *ebt_find_target(const char *name)
+{
+ struct ebt_u_target *t = ebt_targets;
+
+ while (t && strcmp(t->name, name))
+ t = t->next;
+ return t;
+}
+
+struct ebt_u_match *ebt_find_match(const char *name)
+{
+ struct ebt_u_match *m = ebt_matches;
+
+ while (m && strcmp(m->name, name))
+ m = m->next;
+ return m;
+}
+
+struct ebt_u_watcher *ebt_find_watcher(const char *name)
+{
+ struct ebt_u_watcher *w = ebt_watchers;
+
+ while (w && strcmp(w->name, name))
+ w = w->next;
+ return w;
+}
+
+struct ebt_u_table *ebt_find_table(const char *name)
+{
+ struct ebt_u_table *t = ebt_tables;
+
+ while (t && strcmp(t->name, name))
+ t = t->next;
+ return t;
+}
+
+/* Prints all registered extensions */
+void ebt_list_extensions()
+{
+ struct ebt_u_table *tbl = ebt_tables;
+ struct ebt_u_target *t = ebt_targets;
+ struct ebt_u_match *m = ebt_matches;
+ struct ebt_u_watcher *w = ebt_watchers;
+
+ PRINT_VERSION;
+ printf("Loaded userspace extensions:\n\nLoaded tables:\n");
+ while (tbl) {
+ printf("%s\n", tbl->name);
+ tbl = tbl->next;
+ }
+ printf("\nLoaded targets:\n");
+ while (t) {
+ printf("%s\n", t->name);
+ t = t->next;
+ }
+ printf("\nLoaded matches:\n");
+ while (m) {
+ printf("%s\n", m->name);
+ m = m->next;
+ }
+ printf("\nLoaded watchers:\n");
+ while (w) {
+ printf("%s\n", w->name);
+ w = w->next;
+ }
+}
+
+/* Get the table from the kernel or from a binary file
+ * init: 1 = ask the kernel for the initial contents of a table, i.e. the
+ * way it looks when the table is insmod'ed
+ * 0 = get the current data in the table */
+int ebt_get_kernel_table(struct ebt_u_replace *replace, int init)
+{
+ if (!ebt_find_table(replace->name)) {
+ ebt_print_error("Bad table name '%s'", replace->name);
+ return -1;
+ }
+ /* Get the kernel's information */
+ if (ebt_get_table(replace, init)) {
+ if (ebt_errormsg[0] != '\0')
+ return -1;
+ ebtables_insmod("ebtables");
+ if (ebt_get_table(replace, init)) {
+ ebt_print_error("The kernel doesn't support the ebtables '%s' table", replace->name);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/* Put sane values into a new entry */
+void ebt_initialize_entry(struct ebt_u_entry *e)
+{
+ e->bitmask = EBT_NOPROTO;
+ e->invflags = 0;
+ e->ethproto = 0;
+ strcpy(e->in, "");
+ strcpy(e->out, "");
+ strcpy(e->logical_in, "");
+ strcpy(e->logical_out, "");
+ e->m_list = NULL;
+ e->w_list = NULL;
+ e->t = (struct ebt_entry_target *)ebt_find_target(EBT_STANDARD_TARGET);
+ ebt_find_target(EBT_STANDARD_TARGET)->used = 1;
+ e->cnt.pcnt = e->cnt.bcnt = e->cnt_surplus.pcnt = e->cnt_surplus.bcnt = 0;
+
+ if (!e->t)
+ ebt_print_bug("Couldn't load standard target");
+ ((struct ebt_standard_target *)((struct ebt_u_target *)e->t)->t)->verdict = EBT_CONTINUE;
+}
+
+/* Free up the memory of the table held in userspace, *replace can be reused */
+void ebt_cleanup_replace(struct ebt_u_replace *replace)
+{
+ int i;
+ struct ebt_u_entries *entries;
+ struct ebt_cntchanges *cc1, *cc2;
+ struct ebt_u_entry *u_e1, *u_e2;
+
+ replace->name[0] = '\0';
+ replace->valid_hooks = 0;
+ replace->nentries = 0;
+ replace->num_counters = 0;
+ replace->flags = 0;
+ replace->command = 0;
+ replace->selected_chain = -1;
+ free(replace->filename);
+ replace->filename = NULL;
+ free(replace->counters);
+ replace->counters = NULL;
+
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ u_e1 = entries->entries->next;
+ while (u_e1 != entries->entries) {
+ ebt_free_u_entry(u_e1);
+ u_e2 = u_e1->next;
+ free(u_e1);
+ u_e1 = u_e2;
+ }
+ free(entries->entries);
+ free(entries);
+ replace->chains[i] = NULL;
+ }
+ cc1 = replace->cc->next;
+ while (cc1 != replace->cc) {
+ cc2 = cc1->next;
+ free(cc1);
+ cc1 = cc2;
+ }
+ replace->cc->next = replace->cc->prev = replace->cc;
+}
+
+/* Should be called, e.g., between 2 rule adds */
+void ebt_reinit_extensions()
+{
+ struct ebt_u_match *m;
+ struct ebt_u_watcher *w;
+ struct ebt_u_target *t;
+ int size;
+
+ /* The init functions should determine by themselves whether they are
+ * called for the first time or not (when necessary). */
+ for (m = ebt_matches; m; m = m->next) {
+ if (m->used) {
+ size = EBT_ALIGN(m->size) + sizeof(struct ebt_entry_match);
+ m->m = (struct ebt_entry_match *)malloc(size);
+ if (!m->m)
+ ebt_print_memory();
+ strcpy(m->m->u.name, m->name);
+ m->m->match_size = EBT_ALIGN(m->size);
+ m->used = 0;
+ }
+ m->flags = 0; /* An error can occur before used is set, while flags is changed. */
+ m->init(m->m);
+ }
+ for (w = ebt_watchers; w; w = w->next) {
+ if (w->used) {
+ size = EBT_ALIGN(w->size) + sizeof(struct ebt_entry_watcher);
+ w->w = (struct ebt_entry_watcher *)malloc(size);
+ if (!w->w)
+ ebt_print_memory();
+ strcpy(w->w->u.name, w->name);
+ w->w->watcher_size = EBT_ALIGN(w->size);
+ w->used = 0;
+ }
+ w->flags = 0;
+ w->init(w->w);
+ }
+ for (t = ebt_targets; t; t = t->next) {
+ if (t->used) {
+ size = EBT_ALIGN(t->size) + sizeof(struct ebt_entry_target);
+ t->t = (struct ebt_entry_target *)malloc(size);
+ if (!t->t)
+ ebt_print_memory();
+ strcpy(t->t->u.name, t->name);
+ t->t->target_size = EBT_ALIGN(t->size);
+ t->used = 0;
+ }
+ t->flags = 0;
+ t->init(t->t);
+ }
+}
+
+/* This doesn't free e, because the calling function might need e->next */
+void ebt_free_u_entry(struct ebt_u_entry *e)
+{
+ struct ebt_u_match_list *m_l, *m_l2;
+ struct ebt_u_watcher_list *w_l, *w_l2;
+
+ m_l = e->m_list;
+ while (m_l) {
+ m_l2 = m_l->next;
+ free(m_l->m);
+ free(m_l);
+ m_l = m_l2;
+ }
+ w_l = e->w_list;
+ while (w_l) {
+ w_l2 = w_l->next;
+ free(w_l->w);
+ free(w_l);
+ w_l = w_l2;
+ }
+ free(e->t);
+}
+
+static char *get_modprobe(void)
+{
+ int procfile;
+ char *ret;
+
+ procfile = open(PROC_SYS_MODPROBE, O_RDONLY);
+ if (procfile < 0)
+ return NULL;
+
+ ret = malloc(1024);
+ if (ret) {
+ if (read(procfile, ret, 1024) == -1)
+ goto fail;
+ /* The kernel adds a '\n' */
+ ret[1023] = '\n';
+ *strchr(ret, '\n') = '\0';
+ close(procfile);
+ return ret;
+ }
+ fail:
+ free(ret);
+ close(procfile);
+ return NULL;
+}
+
+char *ebt_modprobe;
+/* Try to load the kernel module, analogous to ip_tables.c */
+int ebtables_insmod(const char *modname)
+{
+ char *buf = NULL;
+ char *argv[3];
+
+ /* If they don't explicitly set it, read out of /proc */
+ if (!ebt_modprobe) {
+ buf = get_modprobe();
+ if (!buf)
+ return -1;
+ ebt_modprobe = buf; /* Keep the value for possible later use */
+ }
+
+ switch (fork()) {
+ case 0:
+ argv[0] = (char *)ebt_modprobe;
+ argv[1] = (char *)modname;
+ argv[2] = NULL;
+ execv(argv[0], argv);
+
+ /* Not usually reached */
+ exit(0);
+ case -1:
+ return -1;
+
+ default: /* Parent */
+ wait(NULL);
+ }
+
+ return 0;
+}
+
+/* Parse the chain name and return a pointer to the chain base.
+ * Returns NULL on failure. */
+struct ebt_u_entries *ebt_name_to_chain(const struct ebt_u_replace *replace, const char* arg)
+{
+ int i;
+ struct ebt_u_entries *chain;
+
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(chain = replace->chains[i]))
+ continue;
+ if (!strcmp(arg, chain->name))
+ return chain;
+ }
+ return NULL;
+}
+
+/* Parse the chain name and return the corresponding chain nr
+ * returns -1 on failure */
+int ebt_get_chainnr(const struct ebt_u_replace *replace, const char* arg)
+{
+ int i;
+
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!replace->chains[i])
+ continue;
+ if (!strcmp(arg, replace->chains[i]->name))
+ return i;
+ }
+ return -1;
+}
+
+ /*
+************
+************
+**COMMANDS**
+************
+************
+ */
+
+/* Change the policy of selected_chain.
+ * Handing a bad policy to this function is a bug. */
+void ebt_change_policy(struct ebt_u_replace *replace, int policy)
+{
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ if (policy < -NUM_STANDARD_TARGETS || policy == EBT_CONTINUE)
+ ebt_print_bug("Wrong policy: %d", policy);
+ entries->policy = policy;
+}
+
+void ebt_delete_cc(struct ebt_cntchanges *cc)
+{
+ if (cc->type == CNT_ADD) {
+ cc->prev->next = cc->next;
+ cc->next->prev = cc->prev;
+ free(cc);
+ }
+ cc->type = CNT_DEL;
+}
+
+void ebt_empty_chain(struct ebt_u_entries *entries)
+{
+ struct ebt_u_entry *u_e = entries->entries->next, *tmp;
+ while (u_e != entries->entries) {
+ ebt_delete_cc(u_e->cc);
+ ebt_free_u_entry(u_e);
+ tmp = u_e->next;
+ free(u_e);
+ u_e = tmp;
+ }
+ entries->entries->next = entries->entries->prev = entries->entries;
+ entries->nentries = 0;
+}
+
+/* Flush one chain or the complete table
+ * If selected_chain == -1 then flush the complete table */
+void ebt_flush_chains(struct ebt_u_replace *replace)
+{
+ int i, numdel;
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ /* Flush whole table */
+ if (!entries) {
+ if (replace->nentries == 0)
+ return;
+ replace->nentries = 0;
+
+ /* Free everything and zero (n)entries */
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ entries->counter_offset = 0;
+ ebt_empty_chain(entries);
+ }
+ return;
+ }
+
+ if (entries->nentries == 0)
+ return;
+ replace->nentries -= entries->nentries;
+ numdel = entries->nentries;
+
+ /* Update counter_offset */
+ for (i = replace->selected_chain+1; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ entries->counter_offset -= numdel;
+ }
+
+ entries = ebt_to_chain(replace);
+ ebt_empty_chain(entries);
+}
+
+#define OPT_COUNT 0x1000 /* This value is also defined in ebtables.c */
+/* Returns the rule number on success (starting from 0), -1 on failure
+ *
+ * This function expects the ebt_{match,watcher,target} members of new_entry
+ * to contain pointers to ebt_u_{match,watcher,target} */
+int ebt_check_rule_exists(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry)
+{
+ struct ebt_u_entry *u_e;
+ struct ebt_u_match_list *m_l, *m_l2;
+ struct ebt_u_match *m;
+ struct ebt_u_watcher_list *w_l, *w_l2;
+ struct ebt_u_watcher *w;
+ struct ebt_u_target *t = (struct ebt_u_target *)new_entry->t;
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+ int i, j, k;
+
+ u_e = entries->entries->next;
+ /* Check for an existing rule (if there are duplicate rules,
+ * take the first occurance) */
+ for (i = 0; i < entries->nentries; i++, u_e = u_e->next) {
+ if (u_e->ethproto != new_entry->ethproto)
+ continue;
+ if (strcmp(u_e->in, new_entry->in))
+ continue;
+ if (strcmp(u_e->out, new_entry->out))
+ continue;
+ if (strcmp(u_e->logical_in, new_entry->logical_in))
+ continue;
+ if (strcmp(u_e->logical_out, new_entry->logical_out))
+ continue;
+ if (new_entry->bitmask & EBT_SOURCEMAC &&
+ memcmp(u_e->sourcemac, new_entry->sourcemac, ETH_ALEN))
+ continue;
+ if (new_entry->bitmask & EBT_DESTMAC &&
+ memcmp(u_e->destmac, new_entry->destmac, ETH_ALEN))
+ continue;
+ if (new_entry->bitmask != u_e->bitmask ||
+ new_entry->invflags != u_e->invflags)
+ continue;
+ if (replace->flags & OPT_COUNT && (new_entry->cnt.pcnt !=
+ u_e->cnt.pcnt || new_entry->cnt.bcnt != u_e->cnt.bcnt))
+ continue;
+ /* Compare all matches */
+ m_l = new_entry->m_list;
+ j = 0;
+ while (m_l) {
+ m = (struct ebt_u_match *)(m_l->m);
+ m_l2 = u_e->m_list;
+ while (m_l2 && strcmp(m_l2->m->u.name, m->m->u.name))
+ m_l2 = m_l2->next;
+ if (!m_l2 || !m->compare(m->m, m_l2->m))
+ goto letscontinue;
+ j++;
+ m_l = m_l->next;
+ }
+ /* Now be sure they have the same nr of matches */
+ k = 0;
+ m_l = u_e->m_list;
+ while (m_l) {
+ k++;
+ m_l = m_l->next;
+ }
+ if (j != k)
+ continue;
+
+ /* Compare all watchers */
+ w_l = new_entry->w_list;
+ j = 0;
+ while (w_l) {
+ w = (struct ebt_u_watcher *)(w_l->w);
+ w_l2 = u_e->w_list;
+ while (w_l2 && strcmp(w_l2->w->u.name, w->w->u.name))
+ w_l2 = w_l2->next;
+ if (!w_l2 || !w->compare(w->w, w_l2->w))
+ goto letscontinue;
+ j++;
+ w_l = w_l->next;
+ }
+ k = 0;
+ w_l = u_e->w_list;
+ while (w_l) {
+ k++;
+ w_l = w_l->next;
+ }
+ if (j != k)
+ continue;
+ if (strcmp(t->t->u.name, u_e->t->u.name))
+ continue;
+ if (!t->compare(t->t, u_e->t))
+ continue;
+ return i;
+letscontinue:;
+ }
+ return -1;
+}
+
+/* Add a rule, rule_nr is the rule to update
+ * rule_nr specifies where the rule should be inserted
+ * rule_nr > 0 : insert the rule right before the rule_nr'th rule
+ * (the first rule is rule 1)
+ * rule_nr < 0 : insert the rule right before the (n+rule_nr+1)'th rule,
+ * where n denotes the number of rules in the chain
+ * rule_nr == 0: add a new rule at the end of the chain
+ *
+ * This function expects the ebt_{match,watcher,target} members of new_entry
+ * to contain pointers to ebt_u_{match,watcher,target} and updates these
+ * pointers so that they point to ebt_{match,watcher,target}, before adding
+ * the rule to the chain. Don't free() the ebt_{match,watcher,target} and
+ * don't reuse the new_entry after a successful call to ebt_add_rule() */
+void ebt_add_rule(struct ebt_u_replace *replace, struct ebt_u_entry *new_entry, int rule_nr)
+{
+ int i;
+ struct ebt_u_entry *u_e;
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+ struct ebt_cntchanges *cc, *new_cc;
+
+ if (rule_nr <= 0)
+ rule_nr += entries->nentries;
+ else
+ rule_nr--;
+ if (rule_nr > entries->nentries || rule_nr < 0) {
+ ebt_print_error("The specified rule number is incorrect");
+ return;
+ }
+ /* Go to the right position in the chain */
+ if (rule_nr == entries->nentries)
+ u_e = entries->entries;
+ else {
+ u_e = entries->entries->next;
+ for (i = 0; i < rule_nr; i++)
+ u_e = u_e->next;
+ }
+ /* We're adding one rule */
+ replace->nentries++;
+ entries->nentries++;
+ /* Insert the rule */
+ new_entry->next = u_e;
+ new_entry->prev = u_e->prev;
+ u_e->prev->next = new_entry;
+ u_e->prev = new_entry;
+ new_cc = (struct ebt_cntchanges *)malloc(sizeof(struct ebt_cntchanges));
+ if (!new_cc)
+ ebt_print_memory();
+ new_cc->type = CNT_ADD;
+ new_cc->change = 0;
+ if (new_entry->next == entries->entries) {
+ for (i = replace->selected_chain+1; i < replace->num_chains; i++)
+ if (!replace->chains[i] || replace->chains[i]->nentries == 0)
+ continue;
+ else
+ break;
+ if (i == replace->num_chains)
+ cc = replace->cc;
+ else
+ cc = replace->chains[i]->entries->next->cc;
+ } else
+ cc = new_entry->next->cc;
+ new_cc->next = cc;
+ new_cc->prev = cc->prev;
+ cc->prev->next = new_cc;
+ cc->prev = new_cc;
+ new_entry->cc = new_cc;
+
+ /* Put the ebt_{match, watcher, target} pointers in place */
+ m_l = new_entry->m_list;
+ while (m_l) {
+ m_l->m = ((struct ebt_u_match *)m_l->m)->m;
+ m_l = m_l->next;
+ }
+ w_l = new_entry->w_list;
+ while (w_l) {
+ w_l->w = ((struct ebt_u_watcher *)w_l->w)->w;
+ w_l = w_l->next;
+ }
+ new_entry->t = ((struct ebt_u_target *)new_entry->t)->t;
+ /* Update the counter_offset of chains behind this one */
+ for (i = replace->selected_chain+1; i < replace->num_chains; i++) {
+ entries = replace->chains[i];
+ if (!(entries = replace->chains[i]))
+ continue;
+ entries->counter_offset++;
+ }
+}
+
+/* If *begin==*end==0 then find the rule corresponding to new_entry,
+ * else make the rule numbers positive (starting from 0) and check
+ * for bad rule numbers. */
+static int check_and_change_rule_number(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry, int *begin, int *end)
+{
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ if (*begin < 0)
+ *begin += entries->nentries + 1;
+ if (*end < 0)
+ *end += entries->nentries + 1;
+
+ if (*begin < 0 || *begin > *end || *end > entries->nentries) {
+ ebt_print_error("Sorry, wrong rule numbers");
+ return -1;
+ }
+
+ if ((*begin * *end == 0) && (*begin + *end != 0))
+ ebt_print_bug("begin and end should be either both zero, "
+ "either both non-zero");
+ if (*begin != 0) {
+ (*begin)--;
+ (*end)--;
+ } else {
+ *begin = ebt_check_rule_exists(replace, new_entry);
+ *end = *begin;
+ if (*begin == -1) {
+ ebt_print_error("Sorry, rule does not exist");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/* Delete a rule or rules
+ * begin == end == 0: delete the rule corresponding to new_entry
+ *
+ * The first rule has rule nr 1, the last rule has rule nr -1, etc.
+ * This function expects the ebt_{match,watcher,target} members of new_entry
+ * to contain pointers to ebt_u_{match,watcher,target}. */
+void ebt_delete_rule(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry, int begin, int end)
+{
+ int i, nr_deletes;
+ struct ebt_u_entry *u_e, *u_e2, *u_e3;
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ if (check_and_change_rule_number(replace, new_entry, &begin, &end))
+ return;
+ /* We're deleting rules */
+ nr_deletes = end - begin + 1;
+ replace->nentries -= nr_deletes;
+ entries->nentries -= nr_deletes;
+ /* Go to the right position in the chain */
+ u_e = entries->entries->next;
+ for (i = 0; i < begin; i++)
+ u_e = u_e->next;
+ u_e3 = u_e->prev;
+ /* Remove the rules */
+ for (i = 0; i < nr_deletes; i++) {
+ u_e2 = u_e;
+ ebt_delete_cc(u_e2->cc);
+ u_e = u_e->next;
+ /* Free everything */
+ ebt_free_u_entry(u_e2);
+ free(u_e2);
+ }
+ u_e3->next = u_e;
+ u_e->prev = u_e3;
+ /* Update the counter_offset of chains behind this one */
+ for (i = replace->selected_chain+1; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ entries->counter_offset -= nr_deletes;
+ }
+}
+
+/* Change the counters of a rule or rules
+ * begin == end == 0: change counters of the rule corresponding to new_entry
+ *
+ * The first rule has rule nr 1, the last rule has rule nr -1, etc.
+ * This function expects the ebt_{match,watcher,target} members of new_entry
+ * to contain pointers to ebt_u_{match,watcher,target}.
+ * The mask denotes the following:
+ * pcnt: mask % 3 = 0 : change; = 1: increment; = 2: decrement
+ * bcnt: mask / 3 = 0 : change; = 1: increment = 2: increment
+ * In daemon mode, mask==0 must hold */
+void ebt_change_counters(struct ebt_u_replace *replace,
+ struct ebt_u_entry *new_entry, int begin, int end,
+ struct ebt_counter *cnt, int mask)
+{
+ int i;
+ struct ebt_u_entry *u_e;
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ if (check_and_change_rule_number(replace, new_entry, &begin, &end))
+ return;
+ u_e = entries->entries->next;
+ for (i = 0; i < begin; i++)
+ u_e = u_e->next;
+ for (i = end-begin+1; i > 0; i--) {
+ if (mask % 3 == 0) {
+ u_e->cnt.pcnt = (*cnt).pcnt;
+ u_e->cnt_surplus.pcnt = 0;
+ } else {
+#ifdef EBT_DEBUG
+ if (u_e->cc->type != CNT_NORM)
+ ebt_print_bug("cc->type != CNT_NORM");
+#endif
+ u_e->cnt_surplus.pcnt = (*cnt).pcnt;
+ }
+
+ if (mask / 3 == 0) {
+ u_e->cnt.bcnt = (*cnt).bcnt;
+ u_e->cnt_surplus.bcnt = 0;
+ } else {
+#ifdef EBT_DEBUG
+ if (u_e->cc->type != CNT_NORM)
+ ebt_print_bug("cc->type != CNT_NORM");
+#endif
+ u_e->cnt_surplus.bcnt = (*cnt).bcnt;
+ }
+ if (u_e->cc->type != CNT_ADD)
+ u_e->cc->type = CNT_CHANGE;
+ u_e->cc->change = mask;
+ u_e = u_e->next;
+ }
+}
+
+/* If selected_chain == -1 then zero all counters,
+ * otherwise, zero the counters of selected_chain */
+void ebt_zero_counters(struct ebt_u_replace *replace)
+{
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+ struct ebt_u_entry *next;
+ int i;
+
+ if (!entries) {
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ next = entries->entries->next;
+ while (next != entries->entries) {
+ if (next->cc->type == CNT_NORM)
+ next->cc->type = CNT_CHANGE;
+ next->cnt.bcnt = next->cnt.pcnt = 0;
+ next->cc->change = 0;
+ next = next->next;
+ }
+ }
+ } else {
+ if (entries->nentries == 0)
+ return;
+
+ next = entries->entries->next;
+ while (next != entries->entries) {
+ if (next->cc->type == CNT_NORM)
+ next->cc->type = CNT_CHANGE;
+ next->cnt.bcnt = next->cnt.pcnt = 0;
+ next = next->next;
+ }
+ }
+}
+
+/* Add a new chain and specify its policy */
+void ebt_new_chain(struct ebt_u_replace *replace, const char *name, int policy)
+{
+ struct ebt_u_entries *new;
+
+ if (replace->num_chains == replace->max_chains)
+ ebt_double_chains(replace);
+ new = (struct ebt_u_entries *)malloc(sizeof(struct ebt_u_entries));
+ if (!new)
+ ebt_print_memory();
+ replace->chains[replace->num_chains++] = new;
+ new->nentries = 0;
+ new->policy = policy;
+ new->counter_offset = replace->nentries;
+ new->hook_mask = 0;
+ strcpy(new->name, name);
+ new->entries = (struct ebt_u_entry *)malloc(sizeof(struct ebt_u_entry));
+ if (!new->entries)
+ ebt_print_memory();
+ new->entries->next = new->entries->prev = new->entries;
+ new->kernel_start = NULL;
+}
+
+/* returns -1 if the chain is referenced, 0 on success */
+static int ebt_delete_a_chain(struct ebt_u_replace *replace, int chain, int print_err)
+{
+ int tmp = replace->selected_chain;
+ /* If the chain is referenced, don't delete it,
+ * also decrement jumps to a chain behind the
+ * one we're deleting */
+ replace->selected_chain = chain;
+ if (ebt_check_for_references(replace, print_err))
+ return -1;
+ decrease_chain_jumps(replace);
+ ebt_flush_chains(replace);
+ replace->selected_chain = tmp;
+ free(replace->chains[chain]->entries);
+ free(replace->chains[chain]);
+ memmove(replace->chains+chain, replace->chains+chain+1, (replace->num_chains-chain-1)*sizeof(void *));
+ replace->num_chains--;
+ return 0;
+}
+
+/* Selected_chain == -1: delete all non-referenced udc
+ * selected_chain < NF_BR_NUMHOOKS is illegal */
+void ebt_delete_chain(struct ebt_u_replace *replace)
+{
+ if (replace->selected_chain != -1 && replace->selected_chain < NF_BR_NUMHOOKS)
+ ebt_print_bug("You can't remove a standard chain");
+ if (replace->selected_chain == -1) {
+ int i = NF_BR_NUMHOOKS;
+
+ while (i < replace->num_chains)
+ if (ebt_delete_a_chain(replace, i, 0))
+ i++;
+ } else
+ ebt_delete_a_chain(replace, replace->selected_chain, 1);
+}
+
+/* Rename an existing chain. */
+void ebt_rename_chain(struct ebt_u_replace *replace, const char *name)
+{
+ struct ebt_u_entries *entries = ebt_to_chain(replace);
+
+ if (!entries)
+ ebt_print_bug("ebt_rename_chain: entries == NULL");
+ strcpy(entries->name, name);
+}
+
+
+ /*
+*************************
+*************************
+**SPECIALIZED*FUNCTIONS**
+*************************
+*************************
+ */
+
+
+void ebt_double_chains(struct ebt_u_replace *replace)
+{
+ struct ebt_u_entries **new;
+
+ replace->max_chains *= 2;
+ new = (struct ebt_u_entries **)malloc(replace->max_chains*sizeof(void *));
+ if (!new)
+ ebt_print_memory();
+ memcpy(new, replace->chains, replace->max_chains/2*sizeof(void *));
+ free(replace->chains);
+ replace->chains = new;
+}
+
+/* Executes the final_check() function for all extensions used by the rule
+ * ebt_check_for_loops should have been executed earlier, to make sure the
+ * hook_mask is correct. The time argument to final_check() is set to 1,
+ * meaning it's the second time the final_check() function is executed. */
+void ebt_do_final_checks(struct ebt_u_replace *replace, struct ebt_u_entry *e,
+ struct ebt_u_entries *entries)
+{
+ struct ebt_u_match_list *m_l;
+ struct ebt_u_watcher_list *w_l;
+ struct ebt_u_target *t;
+ struct ebt_u_match *m;
+ struct ebt_u_watcher *w;
+
+ m_l = e->m_list;
+ w_l = e->w_list;
+ while (m_l) {
+ m = ebt_find_match(m_l->m->u.name);
+ m->final_check(e, m_l->m, replace->name,
+ entries->hook_mask, 1);
+ if (ebt_errormsg[0] != '\0')
+ return;
+ m_l = m_l->next;
+ }
+ while (w_l) {
+ w = ebt_find_watcher(w_l->w->u.name);
+ w->final_check(e, w_l->w, replace->name,
+ entries->hook_mask, 1);
+ if (ebt_errormsg[0] != '\0')
+ return;
+ w_l = w_l->next;
+ }
+ t = ebt_find_target(e->t->u.name);
+ t->final_check(e, e->t, replace->name,
+ entries->hook_mask, 1);
+}
+
+/* Returns 1 (if it returns) when the chain is referenced, 0 when it isn't.
+ * print_err: 0 (resp. 1) = don't (resp. do) print error when referenced */
+int ebt_check_for_references(struct ebt_u_replace *replace, int print_err)
+{
+ if (print_err)
+ return iterate_entries(replace, 1);
+ else
+ return iterate_entries(replace, 2);
+}
+
+/* chain_nr: nr of the udc (>= NF_BR_NUMHOOKS)
+ * Returns 1 (if it returns) when the chain is referenced, 0 when it isn't.
+ * print_err: 0 (resp. 1) = don't (resp. do) print error when referenced */
+int ebt_check_for_references2(struct ebt_u_replace *replace, int chain_nr,
+ int print_err)
+{
+ int tmp = replace->selected_chain, ret;
+
+ replace->selected_chain = chain_nr;
+ if (print_err)
+ ret = iterate_entries(replace, 1);
+ else
+ ret = iterate_entries(replace, 2);
+ replace->selected_chain = tmp;
+ return ret;
+}
+
+struct ebt_u_stack
+{
+ int chain_nr;
+ int n;
+ struct ebt_u_entry *e;
+ struct ebt_u_entries *entries;
+};
+
+/* Checks for loops
+ * As a by-product, the hook_mask member of each chain is filled in
+ * correctly. The check functions of the extensions need this hook_mask
+ * to know from which standard chains they can be called. */
+void ebt_check_for_loops(struct ebt_u_replace *replace)
+{
+ int chain_nr , i, j , k, sp = 0, verdict;
+ struct ebt_u_entries *entries, *entries2;
+ struct ebt_u_stack *stack = NULL;
+ struct ebt_u_entry *e;
+
+ /* Initialize hook_mask to 0 */
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ if (i < NF_BR_NUMHOOKS)
+ /* (1 << NF_BR_NUMHOOKS) implies it's a standard chain
+ * (usefull in the final_check() funtions) */
+ entries->hook_mask = (1 << i) | (1 << NF_BR_NUMHOOKS);
+ else
+ entries->hook_mask = 0;
+ }
+ if (replace->num_chains == NF_BR_NUMHOOKS)
+ return;
+ stack = (struct ebt_u_stack *)malloc((replace->num_chains - NF_BR_NUMHOOKS) * sizeof(struct ebt_u_stack));
+ if (!stack)
+ ebt_print_memory();
+
+ /* Check for loops, starting from every base chain */
+ for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ chain_nr = i;
+
+ e = entries->entries->next;
+ for (j = 0; j < entries->nentries; j++) {
+ if (strcmp(e->t->u.name, EBT_STANDARD_TARGET))
+ goto letscontinue;
+ verdict = ((struct ebt_standard_target *)(e->t))->verdict;
+ if (verdict < 0)
+ goto letscontinue;
+ /* Now see if we've been here before */
+ for (k = 0; k < sp; k++)
+ if (stack[k].chain_nr == verdict + NF_BR_NUMHOOKS) {
+ ebt_print_error("Loop from chain '%s' to chain '%s'",
+ replace->chains[chain_nr]->name,
+ replace->chains[stack[k].chain_nr]->name);
+ goto free_stack;
+ }
+ entries2 = replace->chains[verdict + NF_BR_NUMHOOKS];
+ /* check if we've dealt with this chain already */
+ if (entries2->hook_mask & (1<<i))
+ goto letscontinue;
+ entries2->hook_mask |= entries->hook_mask;
+ /* Jump to the chain, make sure we know how to get back */
+ stack[sp].chain_nr = chain_nr;
+ stack[sp].n = j;
+ stack[sp].entries = entries;
+ stack[sp].e = e;
+ sp++;
+ j = -1;
+ e = entries2->entries->next;
+ chain_nr = verdict + NF_BR_NUMHOOKS;
+ entries = entries2;
+ continue;
+letscontinue:
+ e = e->next;
+ }
+ /* We are at the end of a standard chain */
+ if (sp == 0)
+ continue;
+ /* Go back to the chain one level higher */
+ sp--;
+ j = stack[sp].n;
+ chain_nr = stack[sp].chain_nr;
+ e = stack[sp].e;
+ entries = stack[sp].entries;
+ goto letscontinue;
+ }
+free_stack:
+ free(stack);
+ return;
+}
+
+/* The user will use the match, so put it in new_entry. The ebt_u_match
+ * pointer is put in the ebt_entry_match pointer. ebt_add_rule will
+ * fill in the final value for new->m. Unless the rule is added to a chain,
+ * the pointer will keep pointing to the ebt_u_match (until the new_entry
+ * is freed). I know, I should use a union for these 2 pointer types... */
+void ebt_add_match(struct ebt_u_entry *new_entry, struct ebt_u_match *m)
+{
+ struct ebt_u_match_list **m_list, *new;
+
+ for (m_list = &new_entry->m_list; *m_list; m_list = &(*m_list)->next);
+ new = (struct ebt_u_match_list *)
+ malloc(sizeof(struct ebt_u_match_list));
+ if (!new)
+ ebt_print_memory();
+ *m_list = new;
+ new->next = NULL;
+ new->m = (struct ebt_entry_match *)m;
+}
+
+void ebt_add_watcher(struct ebt_u_entry *new_entry, struct ebt_u_watcher *w)
+{
+ struct ebt_u_watcher_list **w_list;
+ struct ebt_u_watcher_list *new;
+
+ for (w_list = &new_entry->w_list; *w_list; w_list = &(*w_list)->next);
+ new = (struct ebt_u_watcher_list *)
+ malloc(sizeof(struct ebt_u_watcher_list));
+ if (!new)
+ ebt_print_memory();
+ *w_list = new;
+ new->next = NULL;
+ new->w = (struct ebt_entry_watcher *)w;
+}
+
+
+ /*
+*******************
+*******************
+**OTHER*FUNCTIONS**
+*******************
+*******************
+ */
+
+
+/* type = 0 => update chain jumps
+ * type = 1 => check for reference, print error when referenced
+ * type = 2 => check for reference, don't print error when referenced
+ *
+ * Returns 1 when type == 1 and the chain is referenced
+ * returns 0 otherwise */
+static int iterate_entries(struct ebt_u_replace *replace, int type)
+{
+ int i, j, chain_nr = replace->selected_chain - NF_BR_NUMHOOKS;
+ struct ebt_u_entries *entries;
+ struct ebt_u_entry *e;
+
+ if (chain_nr < 0)
+ ebt_print_bug("iterate_entries: udc = %d < 0", chain_nr);
+ for (i = 0; i < replace->num_chains; i++) {
+ if (!(entries = replace->chains[i]))
+ continue;
+ e = entries->entries->next;
+ for (j = 0; j < entries->nentries; j++) {
+ int chain_jmp;
+
+ if (strcmp(e->t->u.name, EBT_STANDARD_TARGET)) {
+ e = e->next;
+ continue;
+ }
+ chain_jmp = ((struct ebt_standard_target *)e->t)->
+ verdict;
+ switch (type) {
+ case 1:
+ case 2:
+ if (chain_jmp == chain_nr) {
+ if (type == 2)
+ return 1;
+ ebt_print_error("Can't delete the chain '%s', it's referenced in chain '%s', rule %d",
+ replace->chains[chain_nr + NF_BR_NUMHOOKS]->name, entries->name, j);
+ return 1;
+ }
+ break;
+ case 0:
+ /* Adjust the chain jumps when necessary */
+ if (chain_jmp > chain_nr)
+ ((struct ebt_standard_target *)e->t)->verdict--;
+ break;
+ } /* End switch */
+ e = e->next;
+ }
+ }
+ return 0;
+}
+
+static void decrease_chain_jumps(struct ebt_u_replace *replace)
+{
+ iterate_entries(replace, 0);
+}
+
+/* Used in initialization code of modules */
+void ebt_register_match(struct ebt_u_match *m)
+{
+ int size = EBT_ALIGN(m->size) + sizeof(struct ebt_entry_match);
+ struct ebt_u_match **i;
+
+ m->m = (struct ebt_entry_match *)malloc(size);
+ if (!m->m)
+ ebt_print_memory();
+ strcpy(m->m->u.name, m->name);
+ m->m->match_size = EBT_ALIGN(m->size);
+ m->init(m->m);
+
+ for (i = &ebt_matches; *i; i = &((*i)->next));
+ m->next = NULL;
+ *i = m;
+}
+
+void ebt_register_watcher(struct ebt_u_watcher *w)
+{
+ int size = EBT_ALIGN(w->size) + sizeof(struct ebt_entry_watcher);
+ struct ebt_u_watcher **i;
+
+ w->w = (struct ebt_entry_watcher *)malloc(size);
+ if (!w->w)
+ ebt_print_memory();
+ strcpy(w->w->u.name, w->name);
+ w->w->watcher_size = EBT_ALIGN(w->size);
+ w->init(w->w);
+
+ for (i = &ebt_watchers; *i; i = &((*i)->next));
+ w->next = NULL;
+ *i = w;
+}
+
+void ebt_register_target(struct ebt_u_target *t)
+{
+ int size = EBT_ALIGN(t->size) + sizeof(struct ebt_entry_target);
+ struct ebt_u_target **i;
+
+ t->t = (struct ebt_entry_target *)malloc(size);
+ if (!t->t)
+ ebt_print_memory();
+ strcpy(t->t->u.name, t->name);
+ t->t->target_size = EBT_ALIGN(t->size);
+ t->init(t->t);
+
+ for (i = &ebt_targets; *i; i = &((*i)->next));
+ t->next = NULL;
+ *i = t;
+}
+
+void ebt_register_table(struct ebt_u_table *t)
+{
+ t->next = ebt_tables;
+ ebt_tables = t;
+}
+
+void ebt_iterate_matches(void (*f)(struct ebt_u_match *))
+{
+ struct ebt_u_match *i;
+
+ for (i = ebt_matches; i; i = i->next)
+ f(i);
+}
+
+void ebt_iterate_watchers(void (*f)(struct ebt_u_watcher *))
+{
+ struct ebt_u_watcher *i;
+
+ for (i = ebt_watchers; i; i = i->next)
+ f(i);
+}
+
+void ebt_iterate_targets(void (*f)(struct ebt_u_target *))
+{
+ struct ebt_u_target *i;
+
+ for (i = ebt_targets; i; i = i->next)
+ f(i);
+}
+
+/* Don't use this function, use ebt_print_bug() */
+void __ebt_print_bug(char *file, int line, char *format, ...)
+{
+ va_list l;
+
+ va_start(l, format);
+ fprintf(stderr, PROGNAME" v"PROGVERSION":%s:%d:--BUG--: \n", file, line);
+ vfprintf(stderr, format, l);
+ fprintf(stderr, "\n");
+ va_end(l);
+ exit (-1);
+}
+
+/* The error messages are put in here when ebt_silent == 1
+ * ebt_errormsg[0] == '\0' implies there was no error */
+char ebt_errormsg[ERRORMSG_MAXLEN];
+/* When error messages should not be printed on the screen, after which
+ * the program exit()s, set ebt_silent to 1. */
+int ebt_silent;
+/* Don't use this function, use ebt_print_error() */
+void __ebt_print_error(char *format, ...)
+{
+ va_list l;
+
+ va_start(l, format);
+ if (ebt_silent && ebt_errormsg[0] == '\0') {
+ vsnprintf(ebt_errormsg, ERRORMSG_MAXLEN, format, l);
+ va_end(l);
+ } else {
+ vfprintf(stderr, format, l);
+ fprintf(stderr, ".\n");
+ va_end(l);
+ exit (-1);
+ }
+}
diff --git a/tools/remus/imqebt/useful_functions.c b/tools/remus/imqebt/useful_functions.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/imqebt/useful_functions.c
@@ -0,0 +1,413 @@
+/*
+ * useful_functions.c, January 2004
+ *
+ * Random collection of functions that can be used by extensions.
+ *
+ * Author: Bart De Schuymer
+ *
+ * This code is stongly inspired on the iptables code which is
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include "include/ebtables_u.h"
+#include "include/ethernetdb.h"
+#include <stdio.h>
+#include <netinet/ether.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+const unsigned char mac_type_unicast[ETH_ALEN] = {0,0,0,0,0,0};
+const unsigned char msk_type_unicast[ETH_ALEN] = {1,0,0,0,0,0};
+const unsigned char mac_type_multicast[ETH_ALEN] = {1,0,0,0,0,0};
+const unsigned char msk_type_multicast[ETH_ALEN] = {1,0,0,0,0,0};
+const unsigned char mac_type_broadcast[ETH_ALEN] = {255,255,255,255,255,255};
+const unsigned char msk_type_broadcast[ETH_ALEN] = {255,255,255,255,255,255};
+const unsigned char mac_type_bridge_group[ETH_ALEN] = {0x01,0x80,0xc2,0,0,0};
+const unsigned char msk_type_bridge_group[ETH_ALEN] = {255,255,255,255,255,255};
+
+/* 0: default, print only 2 digits if necessary
+ * 2: always print 2 digits, a printed mac address
+ * then always has the same length */
+int ebt_printstyle_mac;
+
+void ebt_print_mac(const unsigned char *mac)
+{
+ if (ebt_printstyle_mac == 2) {
+ int j;
+ for (j = 0; j < ETH_ALEN; j++)
+ printf("%02x%s", mac[j],
+ (j==ETH_ALEN-1) ? "" : ":");
+ } else
+ printf("%s", ether_ntoa((struct ether_addr *) mac));
+}
+
+void ebt_print_mac_and_mask(const unsigned char *mac, const unsigned char *mask)
+{
+ char hlpmsk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+ if (!memcmp(mac, mac_type_unicast, 6) &&
+ !memcmp(mask, msk_type_unicast, 6))
+ printf("Unicast");
+ else if (!memcmp(mac, mac_type_multicast, 6) &&
+ !memcmp(mask, msk_type_multicast, 6))
+ printf("Multicast");
+ else if (!memcmp(mac, mac_type_broadcast, 6) &&
+ !memcmp(mask, msk_type_broadcast, 6))
+ printf("Broadcast");
+ else if (!memcmp(mac, mac_type_bridge_group, 6) &&
+ !memcmp(mask, msk_type_bridge_group, 6))
+ printf("BGA");
+ else {
+ ebt_print_mac(mac);
+ if (memcmp(mask, hlpmsk, 6)) {
+ printf("/");
+ ebt_print_mac(mask);
+ }
+ }
+}
+
+/* Checks the type for validity and calls getethertypebynumber(). */
+struct ethertypeent *parseethertypebynumber(int type)
+{
+ if (type < 1536)
+ ebt_print_error("Ethernet protocols have values >= 0x0600");
+ if (type > 0xffff)
+ ebt_print_error("Ethernet protocols have values <= 0xffff");
+ return getethertypebynumber(type);
+}
+
+/* Put the mac address into 6 (ETH_ALEN) bytes returns 0 on success. */
+int ebt_get_mac_and_mask(const char *from, unsigned char *to,
+ unsigned char *mask)
+{
+ char *p;
+ int i;
+ struct ether_addr *addr;
+
+ if (strcasecmp(from, "Unicast") == 0) {
+ memcpy(to, mac_type_unicast, ETH_ALEN);
+ memcpy(mask, msk_type_unicast, ETH_ALEN);
+ return 0;
+ }
+ if (strcasecmp(from, "Multicast") == 0) {
+ memcpy(to, mac_type_multicast, ETH_ALEN);
+ memcpy(mask, msk_type_multicast, ETH_ALEN);
+ return 0;
+ }
+ if (strcasecmp(from, "Broadcast") == 0) {
+ memcpy(to, mac_type_broadcast, ETH_ALEN);
+ memcpy(mask, msk_type_broadcast, ETH_ALEN);
+ return 0;
+ }
+ if (strcasecmp(from, "BGA") == 0) {
+ memcpy(to, mac_type_bridge_group, ETH_ALEN);
+ memcpy(mask, msk_type_bridge_group, ETH_ALEN);
+ return 0;
+ }
+ if ( (p = strrchr(from, '/')) != NULL) {
+ *p = '\0';
+ if (!(addr = ether_aton(p + 1)))
+ return -1;
+ memcpy(mask, addr, ETH_ALEN);
+ } else
+ memset(mask, 0xff, ETH_ALEN);
+ if (!(addr = ether_aton(from)))
+ return -1;
+ memcpy(to, addr, ETH_ALEN);
+ for (i = 0; i < ETH_ALEN; i++)
+ to[i] &= mask[i];
+ return 0;
+}
+
+/* 0: default
+ * 1: the inverse '!' of the option has already been specified */
+int ebt_invert = 0;
+
+/*
+ * Check if the inverse of the option is specified. This is used
+ * in the parse functions of the extensions and ebtables.c
+ */
+int _ebt_check_inverse(const char option[], int argc, char **argv)
+{
+ if (!option)
+ return ebt_invert;
+ if (strcmp(option, "!") == 0) {
+ if (ebt_invert == 1)
+ ebt_print_error("Double use of '!' not allowed");
+ if (optind >= argc)
+ optarg = NULL;
+ else
+ optarg = argv[optind];
+ optind++;
+ ebt_invert = 1;
+ return 1;
+ }
+ return ebt_invert;
+}
+
+/* Make sure the same option wasn't specified twice. This is used
+ * in the parse functions of the extensions and ebtables.c */
+void ebt_check_option(unsigned int *flags, unsigned int mask)
+{
+ if (*flags & mask)
+ ebt_print_error("Multiple use of same option not allowed");
+ *flags |= mask;
+}
+
+/* Put the ip string into 4 bytes. */
+static int undot_ip(char *ip, unsigned char *ip2)
+{
+ char *p, *q, *end;
+ long int onebyte;
+ int i;
+ char buf[20];
+
+ strncpy(buf, ip, sizeof(buf) - 1);
+
+ p = buf;
+ for (i = 0; i < 3; i++) {
+ if ((q = strchr(p, '.')) == NULL)
+ return -1;
+ *q = '\0';
+ onebyte = strtol(p, &end, 10);
+ if (*end != '\0' || onebyte > 255 || onebyte < 0)
+ return -1;
+ ip2[i] = (unsigned char)onebyte;
+ p = q + 1;
+ }
+
+ onebyte = strtol(p, &end, 10);
+ if (*end != '\0' || onebyte > 255 || onebyte < 0)
+ return -1;
+ ip2[3] = (unsigned char)onebyte;
+
+ return 0;
+}
+
+/* Put the mask into 4 bytes. */
+static int ip_mask(char *mask, unsigned char *mask2)
+{
+ char *end;
+ long int bits;
+ uint32_t mask22;
+
+ if (undot_ip(mask, mask2)) {
+ /* not the /a.b.c.e format, maybe the /x format */
+ bits = strtol(mask, &end, 10);
+ if (*end != '\0' || bits > 32 || bits < 0)
+ return -1;
+ if (bits != 0) {
+ mask22 = htonl(0xFFFFFFFF << (32 - bits));
+ memcpy(mask2, &mask22, 4);
+ } else {
+ mask22 = 0xFFFFFFFF;
+ memcpy(mask2, &mask22, 4);
+ }
+ }
+ return 0;
+}
+
+/* Set the ip mask and ip address. Callers should check ebt_errormsg[0].
+ * The string pointed to by address can be altered. */
+void ebt_parse_ip_address(char *address, uint32_t *addr, uint32_t *msk)
+{
+ char *p;
+
+ /* first the mask */
+ if ((p = strrchr(address, '/')) != NULL) {
+ *p = '\0';
+ if (ip_mask(p + 1, (unsigned char *)msk)) {
+ ebt_print_error("Problem with the IP mask '%s'", p + 1);
+ return;
+ }
+ } else
+ *msk = 0xFFFFFFFF;
+
+ if (undot_ip(address, (unsigned char *)addr)) {
+ ebt_print_error("Problem with the IP address '%s'", address);
+ return;
+ }
+ *addr = *addr & *msk;
+}
+
+
+/* Transform the ip mask into a string ready for output. */
+char *ebt_mask_to_dotted(uint32_t mask)
+{
+ int i;
+ static char buf[20];
+ uint32_t maskaddr, bits;
+
+ maskaddr = ntohl(mask);
+
+ /* don't print /32 */
+ if (mask == 0xFFFFFFFFL) {
+ *buf = '\0';
+ return buf;
+ }
+
+ i = 32;
+ bits = 0xFFFFFFFEL; /* Case 0xFFFFFFFF has just been dealt with */
+ while (--i >= 0 && maskaddr != bits)
+ bits <<= 1;
+
+ if (i > 0)
+ sprintf(buf, "/%d", i);
+ else if (!i)
+ *buf = '\0';
+ else
+ /* Mask was not a decent combination of 1's and 0's */
+ sprintf(buf, "/%d.%d.%d.%d", ((unsigned char *)&mask)[0],
+ ((unsigned char *)&mask)[1], ((unsigned char *)&mask)[2],
+ ((unsigned char *)&mask)[3]);
+
+ return buf;
+}
+
+/* Most of the following code is derived from iptables */
+static void
+in6addrcpy(struct in6_addr *dst, struct in6_addr *src)
+{
+ memcpy(dst, src, sizeof(struct in6_addr));
+}
+
+int string_to_number_ll(const char *s, unsigned long long min,
+ unsigned long long max, unsigned long long *ret)
+{
+ unsigned long long number;
+ char *end;
+
+ /* Handle hex, octal, etc. */
+ errno = 0;
+ number = strtoull(s, &end, 0);
+ if (*end == '\0' && end != s) {
+ /* we parsed a number, let's see if we want this */
+ if (errno != ERANGE && min <= number && (!max || number <= max)) {
+ *ret = number;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int string_to_number_l(const char *s, unsigned long min, unsigned long max,
+ unsigned long *ret)
+{
+ int result;
+ unsigned long long number;
+
+ result = string_to_number_ll(s, min, max, &number);
+ *ret = (unsigned long)number;
+
+ return result;
+}
+
+int string_to_number(const char *s, unsigned int min, unsigned int max,
+ unsigned int *ret)
+{
+ int result;
+ unsigned long number;
+
+ result = string_to_number_l(s, min, max, &number);
+ *ret = (unsigned int)number;
+
+ return result;
+}
+
+static struct in6_addr *numeric_to_addr(const char *num)
+{
+ static struct in6_addr ap;
+ int err;
+
+ if ((err=inet_pton(AF_INET6, num, &ap)) == 1)
+ return ≈
+ return (struct in6_addr *)NULL;
+}
+
+static struct in6_addr *parse_ip6_mask(char *mask)
+{
+ static struct in6_addr maskaddr;
+ struct in6_addr *addrp;
+ unsigned int bits;
+
+ if (mask == NULL) {
+ /* no mask at all defaults to 128 bits */
+ memset(&maskaddr, 0xff, sizeof maskaddr);
+ return &maskaddr;
+ }
+ if ((addrp = numeric_to_addr(mask)) != NULL)
+ return addrp;
+ if (string_to_number(mask, 0, 128, &bits) == -1)
+ ebt_print_error("Invalid IPv6 Mask '%s' specified", mask);
+ if (bits != 0) {
+ char *p = (char *)&maskaddr;
+ memset(p, 0xff, bits / 8);
+ memset(p + (bits / 8) + 1, 0, (128 - bits) / 8);
+ p[bits / 8] = 0xff << (8 - (bits & 7));
+ return &maskaddr;
+ }
+
+ memset(&maskaddr, 0, sizeof maskaddr);
+ return &maskaddr;
+}
+
+/* Set the ipv6 mask and address. Callers should check ebt_errormsg[0].
+ * The string pointed to by address can be altered. */
+void ebt_parse_ip6_address(char *address, struct in6_addr *addr,
+ struct in6_addr *msk)
+{
+ struct in6_addr *tmp_addr;
+ char buf[256];
+ char *p;
+ int i;
+ int err;
+
+ strncpy(buf, address, sizeof(buf) - 1);
+ /* first the mask */
+ buf[sizeof(buf) - 1] = '\0';
+ if ((p = strrchr(buf, '/')) != NULL) {
+ *p = '\0';
+ tmp_addr = parse_ip6_mask(p + 1);
+ } else
+ tmp_addr = parse_ip6_mask(NULL);
+ in6addrcpy(msk, tmp_addr);
+
+ /* if a null mask is given, the name is ignored, like in "any/0" */
+ if (!memcmp(msk, &in6addr_any, sizeof(in6addr_any)))
+ strcpy(buf, "::");
+
+ if ((err=inet_pton(AF_INET6, buf, addr)) < 1) {
+ ebt_print_error("Invalid IPv6 Address '%s' specified", buf);
+ return;
+ }
+
+ for (i = 0; i < 4; i++)
+ addr->s6_addr32[i] &= msk->s6_addr32[i];
+}
+
+/* Transform the ip6 addr into a string ready for output. */
+char *ebt_ip6_to_numeric(const struct in6_addr *addrp)
+{
+ /* 0000:0000:0000:0000:0000:000.000.000.000
+ * 0000:0000:0000:0000:0000:0000:0000:0000 */
+ static char buf[50+1];
+ return (char *)inet_ntop(AF_INET6, addrp, buf, sizeof(buf));
+}
diff --git a/tools/remus/kmod/Kbuild b/tools/remus/kmod/Kbuild
new file mode 100644
--- /dev/null
+++ b/tools/remus/kmod/Kbuild
@@ -0,0 +1,1 @@
+obj-m := sch_queue.o ebt_imq.o
diff --git a/tools/remus/kmod/Makefile b/tools/remus/kmod/Makefile
new file mode 100644
--- /dev/null
+++ b/tools/remus/kmod/Makefile
@@ -0,0 +1,24 @@
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+# Should make makefiles export linux build directory!
+# This is a fragile hack to tide us over
+ifeq ($(KERNELS),linux-2.6-xen)
+LINUX_VER=2.6.18-xen
+endif
+ifeq ($(KERNELS),linux-2.6-xen0)
+LINUX_VER=2.6.18-xen0
+endif
+
+KERNELDIR ?= $(XEN_ROOT)/build-linux-$(LINUX_VER)_$(XEN_TARGET_ARCH)
+
+.PHONY: all
+all:
+ if test -d $(KERNELDIR); then $(MAKE) -C $(KERNELDIR) SUBDIRS=`pwd` modules; fi
+
+.PHONY: install
+install:
+ if test -d $(KERNELDIR); then $(MAKE) -C $(KERNELDIR) SUBDIRS=`pwd` INSTALL_MOD_PATH=$(DESTDIR) modules_install; fi
+
+clean::
+ -rm -rf *.o *.ko *.mod.c *.mod.o Module.symvers .*.cmd .tmp_versions
diff --git a/tools/remus/kmod/ebt_imq.c b/tools/remus/kmod/ebt_imq.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/kmod/ebt_imq.c
@@ -0,0 +1,45 @@
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netdevice.h>
+#include "ebt_imq.h"
+
+static int ebt_target_imq(struct sk_buff **pskb, unsigned int hooknr,
+ const struct net_device *in, const struct net_device *out,
+ const void *data, unsigned int datalen)
+{
+ struct ebt_imq_info *info = (struct ebt_imq_info *) data;
+
+ (*pskb)->imq_flags = info->todev | IMQ_F_ENQUEUE;
+
+ return EBT_CONTINUE;
+}
+
+static int ebt_target_imq_check(const char *tablename, unsigned int hookmask,
+ const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+ return 0;
+}
+
+static struct ebt_target imq_target =
+{
+ .name = "imq",
+ .target = ebt_target_imq,
+ .check = ebt_target_imq_check,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ebt_register_target(&imq_target);
+}
+
+static void __exit fini(void)
+{
+ ebt_unregister_target(&imq_target);
+}
+
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/tools/remus/kmod/ebt_imq.h b/tools/remus/kmod/ebt_imq.h
new file mode 100644
--- /dev/null
+++ b/tools/remus/kmod/ebt_imq.h
@@ -0,0 +1,10 @@
+#ifndef __LINUX_BRIDGE_EBT_IMQ_H
+#define __LINUX_BRIDGE_EBT_IMQ_H
+
+#define IMQ_F_ENQUEUE 0x80
+
+struct ebt_imq_info
+{
+ unsigned int todev;
+};
+#endif
diff --git a/tools/remus/kmod/sch_queue.c b/tools/remus/kmod/sch_queue.c
new file mode 100644
--- /dev/null
+++ b/tools/remus/kmod/sch_queue.c
@@ -0,0 +1,208 @@
+/*
+ * sch_queue.c Queue traffic until an explicit release command
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * The operation of the buffer is as follows:
+ * When a checkpoint begins, a barrier is inserted into the
+ * network queue by a netlink request (it operates by storing
+ * a pointer to the next packet which arrives and blocking dequeue
+ * when that packet is at the head of the queue).
+ * When a checkpoint completes (the backup acknowledges receipt),
+ * currently-queued packets are released.
+ * So it supports two operations, barrier and release.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+
+/* xenbus directory */
+#define FIFO_BUF (10*1024*1024)
+
+#define TCQ_CHECKPOINT 0
+#define TCQ_DEQUEUE 1
+
+struct queue_sched_data {
+ /* this packet is the first packet which should not be delivered.
+ * If it is NULL, queue_enqueue will set it to the next packet it sees. */
+ struct sk_buff *stop;
+};
+
+struct tc_queue_qopt {
+ /* 0: reset stop packet pointer
+ * 1: dequeue to stop pointer */
+ int action;
+};
+
+/* borrowed from drivers/xen/netback/loopback.c */
+static int is_foreign(unsigned long pfn)
+{
+ /* NB. Play it safe for auto-translation mode. */
+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+ struct page *page;
+ unsigned long pfn;
+ int i, off;
+ char *vaddr;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+ if (!is_foreign(pfn))
+ continue;
+ /*
+ printk("foreign ref found\n");
+ */
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!page))
+ return 0;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ off = skb_shinfo(skb)->frags[i].page_offset;
+ memcpy(page_address(page) + off, vaddr + off,
+ skb_shinfo(skb)->frags[i].size);
+ kunmap_skb_frag(vaddr);
+
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page = page;
+ }
+
+ return 1;
+}
+
+static int queue_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct queue_sched_data *q = qdisc_priv(sch);
+
+ if (likely(sch->qstats.backlog + skb->len <= FIFO_BUF))
+ {
+ if (!q->stop)
+ q->stop = skb;
+
+ if (!skb_remove_foreign_references(skb)) {
+ printk("error removing foreign ref\n");
+ return qdisc_reshape_fail(skb, sch);
+ }
+
+ return qdisc_enqueue_tail(skb, sch);
+ }
+ printk("queue reported full: %d,%d\n", sch->qstats.backlog, skb->len);
+
+ return qdisc_reshape_fail(skb, sch);
+}
+
+/* dequeue doesn't actually dequeue until the release command is
+ * received. */
+static inline struct sk_buff *queue_dequeue(struct Qdisc* sch)
+{
+ struct queue_sched_data *q = qdisc_priv(sch);
+ struct sk_buff* peek;
+ /*
+ struct timeval tv;
+
+ if (!q->stop) {
+ do_gettimeofday(&tv);
+ printk("packet dequeued at %lu.%06lu\n", tv.tv_sec, tv.tv_usec);
+ }
+ */
+
+ if (sch->flags & TCQ_F_THROTTLED)
+ return NULL;
+
+ peek = (struct sk_buff *)((sch->q).next);
+
+ /* this pointer comparison may be shady */
+ if (peek == q->stop) {
+ /*
+ do_gettimeofday(&tv);
+ printk("stop packet at %lu.%06lu\n", tv.tv_sec, tv.tv_usec);
+ */
+
+ /* this is the tail of the last round. Release it and block the queue */
+ sch->flags |= TCQ_F_THROTTLED;
+ return NULL;
+ }
+
+ return qdisc_dequeue_head(sch);
+}
+
+static int queue_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ sch->flags |= TCQ_F_THROTTLED;
+
+ return 0;
+}
+
+/* receives two messages:
+ * 0: checkpoint queue (set stop to next packet)
+ * 1: dequeue until stop */
+static int queue_change(struct Qdisc* sch, struct rtattr* opt)
+{
+ struct queue_sched_data *q = qdisc_priv(sch);
+ struct tc_queue_qopt* msg;
+ /*
+ struct timeval tv;
+ */
+
+ if (!opt || RTA_PAYLOAD(opt) < sizeof(*msg))
+ return -EINVAL;
+
+ msg = RTA_DATA(opt);
+
+ if (msg->action == TCQ_CHECKPOINT) {
+ /* reset stop */
+ q->stop = NULL;
+ } else if (msg->action == TCQ_DEQUEUE) {
+ /* dequeue */
+ sch->flags &= ~TCQ_F_THROTTLED;
+ netif_schedule(sch->dev);
+ /*
+ do_gettimeofday(&tv);
+ printk("queue release at %lu.%06lu (%d bytes)\n", tv.tv_sec, tv.tv_usec,
+ sch->qstats.backlog);
+ */
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct Qdisc_ops queue_qdisc_ops = {
+ .id = "queue",
+ .priv_size = sizeof(struct queue_sched_data),
+ .enqueue = queue_enqueue,
+ .dequeue = queue_dequeue,
+ .init = queue_init,
+ .change = queue_change,
+ .owner = THIS_MODULE,
+};
+
+static int __init queue_module_init(void)
+{
+ printk("loading queue\n");
+ return register_qdisc(&queue_qdisc_ops);
+}
+
+static void __exit queue_module_exit(void)
+{
+ printk("queue unloaded\n");
+ unregister_qdisc(&queue_qdisc_ops);
+}
+module_init(queue_module_init)
+module_exit(queue_module_exit)
+MODULE_LICENSE("GPL");
^ permalink raw reply [flat|nested] 16+ messages in thread