xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Zheng Li <dev@zheng.li>
To: xen-devel@lists.xenproject.org
Cc: Dave Scott <Dave.Scott@citrix.com>, Joe Jin <joe.jin@oracle.com>,
	"Luis R. Rodriguez" <mcgrof@suse.com>,
	Luonengjun <luonengjun@huawei.com>, Zheng Li <dev@zheng.li>,
	Fanhenglong <fanhenglong@huawei.com>,
	"Liuqiming (John)" <john.liuqiming@huawei.com>,
	Ian Jackson <Ian.Jackson@citrix.com>
Subject: [PATCH 1/8] oxenstored: add a poll-based select mechanism
Date: Mon, 15 Sep 2014 23:39:13 +0100	[thread overview]
Message-ID: <1410820760-7994-2-git-send-email-dev@zheng.li> (raw)
In-Reply-To: <1410820760-7994-1-git-send-email-dev@zheng.li>

Currently, oxenstored uses Unix.select underneath, so it doesn't work properly
if given a FD number >= 1024. This is a scalability bottleneck for hosts
running large number of VMs.

To remove this limitation, we implemented a poll-based mechanism but with the
same type signature as the Unix.select currently in use. So these two functions
can be interchangeable at any stage.

Signed-off-by: Zheng Li <dev@zheng.li>
---
 tools/ocaml/xenstored/Makefile       |  9 +++--
 tools/ocaml/xenstored/select.ml      | 49 ++++++++++++++++++++++++++
 tools/ocaml/xenstored/select.mli     | 20 +++++++++++
 tools/ocaml/xenstored/select_stubs.c | 68 ++++++++++++++++++++++++++++++++++++
 tools/ocaml/xenstored/xenstored.ml   |  2 +-
 5 files changed, 144 insertions(+), 4 deletions(-)
 create mode 100644 tools/ocaml/xenstored/select.ml
 create mode 100644 tools/ocaml/xenstored/select.mli
 create mode 100644 tools/ocaml/xenstored/select_stubs.c

diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
index 068e04a..47d5303 100644
--- a/tools/ocaml/xenstored/Makefile
+++ b/tools/ocaml/xenstored/Makefile
@@ -15,10 +15,12 @@ OCAMLINCLUDE += \
 	-I $(OCAML_TOPLEVEL)/libs/xc \
 	-I $(OCAML_TOPLEVEL)/libs/eventchn
 
-LIBS = syslog.cma syslog.cmxa
+LIBS = syslog.cma syslog.cmxa select.cma select.cmxa
 syslog_OBJS = syslog
 syslog_C_OBJS = syslog_stubs
-OCAML_LIBRARY = syslog
+select_OBJS = select
+select_C_OBJS = select_stubs
+OCAML_LIBRARY = syslog select
 
 LIBS += systemd.cma systemd.cmxa
 systemd_OBJS = systemd
@@ -46,12 +48,13 @@ OBJS = define \
 	process \
 	xenstored
 
-INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi
+INTF = symbol.cmi trie.cmi syslog.cmi systemd.cmi select.cmi
 
 XENSTOREDLIBS = \
 	unix.cmxa \
 	-ccopt -L -ccopt . syslog.cmxa \
 	-ccopt -L -ccopt . systemd.cmxa \
+	-ccopt -L -ccopt . select.cmxa \
 	-ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/mmap $(OCAML_TOPLEVEL)/libs/mmap/xenmmap.cmxa \
 	-ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/eventchn $(OCAML_TOPLEVEL)/libs/eventchn/xeneventchn.cmxa \
 	-ccopt -L -ccopt $(OCAML_TOPLEVEL)/libs/xc $(OCAML_TOPLEVEL)/libs/xc/xenctrl.cmxa \
diff --git a/tools/ocaml/xenstored/select.ml b/tools/ocaml/xenstored/select.ml
new file mode 100644
index 0000000..2c18c70
--- /dev/null
+++ b/tools/ocaml/xenstored/select.ml
@@ -0,0 +1,49 @@
+(*
+ * Copyright (C) 2014 Zheng Li <dev@zheng.li>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *)
+
+
+type event = {
+	mutable read: bool;
+	mutable write: bool;
+	mutable except: bool;
+}
+
+external select_on_poll: (Unix.file_descr * event) array -> int -> int = "stub_select_on_poll"
+
+let init_event () = {read = false; write = false; except = false}
+
+let select in_fds out_fds exc_fds timeout =
+	let h = Hashtbl.create 57 in
+	let add_event event_set fd =
+		let e =
+			try Hashtbl.find h fd
+			with Not_found ->
+				let e = init_event () in
+				Hashtbl.add h fd e; e in
+		event_set e in
+	List.iter (add_event (fun x -> x.read <- true)) in_fds;
+	List.iter (add_event (fun x -> x.write <- true)) out_fds;
+	List.iter (add_event (fun x -> x.except <- true)) exc_fds;
+	let a = Array.make (Hashtbl.length h) (Unix.stdin, init_event ()) in
+	let i = ref (-1) in
+	Hashtbl.iter (fun fd event -> incr i; Array.set a !i (fd, event)) h;
+	let n = select_on_poll a (int_of_float (timeout *. 1000.)) in
+	let r = [], [], [] in
+	if n = 0 then r else
+		Array.fold_right
+			(fun (fd, event) (r, w, x) ->
+			 (if event.read then fd :: r else r),
+			 (if event.write then fd :: w else w),
+			 (if event.except then fd :: x else x))
+			a r
diff --git a/tools/ocaml/xenstored/select.mli b/tools/ocaml/xenstored/select.mli
new file mode 100644
index 0000000..1253d4e
--- /dev/null
+++ b/tools/ocaml/xenstored/select.mli
@@ -0,0 +1,20 @@
+(*
+ * Copyright (C) 2014 Zheng Li <dev@zheng.li>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *)
+
+
+(** Same interface and semantics as [Unix.select] but with an extra alternative
+    implementation based on poll. *)
+val select:
+	Unix.file_descr list -> Unix.file_descr list -> Unix.file_descr list -> float
+	-> Unix.file_descr list * Unix.file_descr list * Unix.file_descr list
diff --git a/tools/ocaml/xenstored/select_stubs.c b/tools/ocaml/xenstored/select_stubs.c
new file mode 100644
index 0000000..a50f417
--- /dev/null
+++ b/tools/ocaml/xenstored/select_stubs.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Zheng Li <dev@zheng.li>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include <poll.h>
+#include <errno.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <caml/mlvalues.h>
+#include <caml/memory.h>
+#include <caml/fail.h>
+#include <caml/alloc.h>
+#include <caml/signals.h>
+#include <caml/unixsupport.h>
+
+CAMLprim value stub_select_on_poll(value fd_events, value timeo) {
+
+	CAMLparam2(fd_events, timeo);
+	CAMLlocal1(events);
+	int i, rc, c_len = Wosize_val(fd_events), c_timeo = Int_val(timeo);
+	struct pollfd c_fds[c_len];	
+
+
+	for (i = 0; i < c_len; i++) {
+
+		events = Field(Field(fd_events, i), 1);
+
+		c_fds[i].fd = Int_val(Field(Field(fd_events, i), 0));
+		c_fds[i].events = c_fds[i].revents = 0;
+		c_fds[i].events |= Bool_val(Field(events, 0)) ? POLLIN : 0;
+		c_fds[i].events |= Bool_val(Field(events, 1)) ? POLLOUT: 0;
+		c_fds[i].events |= Bool_val(Field(events, 2)) ? POLLPRI: 0;
+
+	};
+
+	caml_enter_blocking_section();
+	rc = poll(c_fds, c_len, c_timeo);
+	caml_leave_blocking_section();
+
+	if (rc < 0) uerror("select", Nothing);
+
+	if (rc > 0) {
+
+		for (i = 0; i < c_len; i++) {
+
+			events = Field(Field(fd_events, i), 1);
+
+			if (c_fds[i].revents & POLLNVAL) unix_error(EBADF, "select", Nothing);
+			Field(events, 0) = Val_bool(c_fds[i].events | POLLIN  && c_fds[i].revents & (POLLIN |POLLHUP|POLLERR));
+			Field(events, 1) = Val_bool(c_fds[i].events | POLLOUT && c_fds[i].revents & (POLLOUT|POLLHUP|POLLERR));
+			Field(events, 2) = Val_bool(c_fds[i].revents & POLLPRI);
+			
+		}
+
+	}
+
+	CAMLreturn(Val_int(rc));
+}
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index 1c02f2f..bfa488f 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -368,7 +368,7 @@ let _ =
 		let timeout = if List.length mw > 0 then 0. else -1. in
 		let rset, wset, _ =
 		try
-			Unix.select (spec_fds @ inset) outset [] timeout
+			Select.select (spec_fds @ inset) outset [] timeout
 		with Unix.Unix_error(Unix.EINTR, _, _) ->
 			[], [], [] in
 		let sfds, cfds =
-- 
2.1.0

  reply	other threads:[~2014-09-15 22:41 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-15 22:39 Some oxenstored improvements Zheng Li
2014-09-15 22:39 ` Zheng Li [this message]
2014-09-16  9:01   ` [PATCH 1/8] oxenstored: add a poll-based select mechanism David Scott
2014-09-16 13:00     ` Zheng Li
2014-09-15 22:39 ` [PATCH 2/8] oxenstored: add facilities to raise the max open fds uplimit Zheng Li
2014-09-16  9:05   ` Dave Scott
2014-09-16  9:38   ` David Vrabel
2014-09-16 13:05     ` Zheng Li
2014-09-15 22:39 ` [PATCH 3/8] oxenstored: add a --use-select command line flag Zheng Li
2014-09-15 22:39 ` [PATCH 4/8] oxenstored: catch the error when a connection is already deleted Zheng Li
2014-09-16  9:08   ` Dave Scott
2014-09-15 22:39 ` [PATCH 5/8] oxenstored: use hash table to store socket connections Zheng Li
2014-09-15 22:39 ` [PATCH 6/8] oxenstored: enable domain connection indexing based on eventchn port Zheng Li
2014-09-16  9:16   ` Dave Scott
2014-09-16 13:13     ` Zheng Li
2014-09-15 22:39 ` [PATCH 7/8] oxenstored: only process domain connections that notify us by events Zheng Li
2014-09-16  9:19   ` Dave Scott
2014-09-16 13:34     ` Zheng Li
2014-09-15 22:39 ` [PATCH 8/8] oxenstored: fine tunning the recognition of domain connections with queued input/output Zheng Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1410820760-7994-2-git-send-email-dev@zheng.li \
    --to=dev@zheng.li \
    --cc=Dave.Scott@citrix.com \
    --cc=Ian.Jackson@citrix.com \
    --cc=fanhenglong@huawei.com \
    --cc=joe.jin@oracle.com \
    --cc=john.liuqiming@huawei.com \
    --cc=luonengjun@huawei.com \
    --cc=mcgrof@suse.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).