* [v2 PATCH 0/3] Improve performance when reading stdin
@ 2024-05-19 5:22 Herbert Xu
2024-05-19 5:22 ` [v2 PATCH 1/3] input: Move newline loop into preadbuffer Herbert Xu
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Herbert Xu @ 2024-05-19 5:22 UTC (permalink / raw)
To: DASH Mailing List
v2 rebases on the new multibyte patch-set.
The performance when reading stdin has regressed after we started
reading one byte at a time. This patch series uses lseek(2) and
tee(2) to recover performance where possible.
Herbert Xu (3):
input: Move newline loop into preadbuffer
input: Use lseek on stdin when possible
input: Use tee(2) for stdin pipe
configure.ac | 2 +-
src/eval.c | 3 +
src/init.h | 1 +
src/input.c | 208 +++++++++++++++++++++++++++++++++++++++-----------
src/input.h | 4 +
src/jobs.c | 3 +
src/mkinit.c | 6 ++
src/options.c | 2 +-
src/redir.c | 26 ++++---
src/system.h | 7 ++
src/trap.c | 1 +
11 files changed, 205 insertions(+), 58 deletions(-)
--
2.39.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [v2 PATCH 1/3] input: Move newline loop into preadbuffer
2024-05-19 5:22 [v2 PATCH 0/3] Improve performance when reading stdin Herbert Xu
@ 2024-05-19 5:22 ` Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 2/3] input: Use lseek on stdin when possible Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 3/3] input: Use tee(2) for stdin pipe Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-05-19 5:22 UTC (permalink / raw)
To: DASH Mailing List
As it stands preadfd tries to fetch a whole line when we're reading
one byte at a time. However, this is wrong because how many bytes
we're trying to read has nothing to do with whether we get a whole
line or not.
Move the loop into preadbuffer alongside the other history support
code.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/input.c | 47 +++++++++++++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 16 deletions(-)
diff --git a/src/input.c b/src/input.c
index 6779069..193235d 100644
--- a/src/input.c
+++ b/src/input.c
@@ -192,16 +192,27 @@ preadfd(void)
{
char *buf = parsefile->buf;
int unget;
+ int pnr;
int nr;
+ nr = input_get_lleft(parsefile);
+
unget = parsefile->nextc - buf;
if (unget > PUNGETC_MAX)
unget = PUNGETC_MAX;
- memmove(buf, parsefile->nextc - unget, unget);
- parsefile->nextc = buf += unget;
+ memmove(buf, parsefile->nextc - unget, unget + nr);
+ buf += unget;
+ parsefile->nextc = buf;
+ buf += nr;
+ nr = BUFSIZ - nr;
+ if (!IS_DEFINED_SMALL && !nr)
+ return nr;
+
+ pnr = nr;
retry:
+ nr = pnr;
#ifndef SMALL
if (parsefile->fd == 0 && el) {
static const char *rl_cp;
@@ -216,9 +227,8 @@ retry:
if (rl_cp == NULL)
nr = 0;
else {
- nr = el_len;
- if (nr > BUFSIZ)
- nr = BUFSIZ;
+ if (nr > el_len)
+ nr = el_len;
memcpy(buf, rl_cp, nr);
if (nr != el_len) {
el_len -= nr;
@@ -230,10 +240,8 @@ retry:
} else
#endif
if (parsefile->fd)
- nr = read(parsefile->fd, buf, BUFSIZ);
+ nr = read(parsefile->fd, buf, nr);
else {
- unsigned len = BUFSIZ;
-
nr = 0;
do {
@@ -255,7 +263,7 @@ retry:
}
nr++;
- } while (!IS_DEFINED_SMALL && *buf++ != '\n' && --len);
+ } while (0);
}
if (nr < 0) {
@@ -290,19 +298,26 @@ static int preadbuffer(void)
return PEOF;
flushall();
+ q = parsefile->nextc;
+ something = !first;
+
more = input_get_lleft(parsefile);
if (more <= 0) {
+ int nr;
+
again:
- if ((more = preadfd()) <= 0) {
+ nr = q - parsefile->nextc;
+ more = preadfd();
+ q = parsefile->nextc + nr;
+ if (more <= 0) {
input_set_lleft(parsefile, parsefile->nleft = 0);
+ if (!IS_DEFINED_SMALL && nr > 0)
+ goto save;
return PEOF;
}
}
- q = parsefile->nextc;
-
/* delete nul characters */
- something = !first;
for (;;) {
int c;
@@ -321,7 +336,6 @@ again:
switch (c) {
case '\n':
- parsefile->nleft = q - parsefile->nextc - 1;
goto done;
default:
@@ -335,8 +349,7 @@ again:
check:
if (more <= 0) {
- parsefile->nleft = q - parsefile->nextc - 1;
- if (parsefile->nleft < 0)
+ if (!IS_DEFINED_SMALL)
goto again;
break;
}
@@ -344,6 +357,8 @@ check:
done:
input_set_lleft(parsefile, more);
+save:
+ parsefile->nleft = q - parsefile->nextc - 1;
if (!IS_DEFINED_SMALL)
savec = *q;
*q = '\0';
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [v2 PATCH 2/3] input: Use lseek on stdin when possible
2024-05-19 5:22 [v2 PATCH 0/3] Improve performance when reading stdin Herbert Xu
2024-05-19 5:22 ` [v2 PATCH 1/3] input: Move newline loop into preadbuffer Herbert Xu
@ 2024-05-19 5:23 ` Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 3/3] input: Use tee(2) for stdin pipe Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-05-19 5:23 UTC (permalink / raw)
To: DASH Mailing List
For files that can be sought, use lseek instead of reading one
byte at a time.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/eval.c | 3 ++
src/init.h | 1 +
src/input.c | 108 +++++++++++++++++++++++++++++++++++---------------
src/input.h | 4 ++
src/jobs.c | 3 ++
src/mkinit.c | 6 +++
src/options.c | 2 +-
src/redir.c | 26 +++++++-----
src/trap.c | 1 +
9 files changed, 111 insertions(+), 43 deletions(-)
diff --git a/src/eval.c b/src/eval.c
index 32f1e64..c785edb 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -896,6 +896,8 @@ bail:
goto bail;
default:
+ flush_input();
+
/* Fork off a child process if necessary. */
if (!(flags & EV_EXIT) || have_traps()) {
INTOFF;
@@ -1131,6 +1133,7 @@ execcmd(int argc, char **argv)
iflag = 0; /* exit on error */
mflag = 0;
optschanged();
+ flush_input();
shellexec(argv + 1, pathval(), 0);
}
return 0;
diff --git a/src/init.h b/src/init.h
index 4f98b5d..e117895 100644
--- a/src/init.h
+++ b/src/init.h
@@ -39,4 +39,5 @@ union node;
void init(void);
void exitreset(void);
void forkreset(union node *);
+void postexitreset(void);
void reset(void);
diff --git a/src/input.c b/src/input.c
index 193235d..b84ecec 100644
--- a/src/input.c
+++ b/src/input.c
@@ -32,11 +32,13 @@
* SUCH DAMAGE.
*/
-#include <stdio.h> /* defines BUFSIZ */
#include <fcntl.h>
-#include <unistd.h>
+#include <stdbool.h>
+#include <stdio.h> /* defines BUFSIZ */
#include <stdlib.h>
#include <string.h>
+#include <termios.h>
+#include <unistd.h>
/*
* This file implements the input routines used by the parser.
@@ -59,12 +61,21 @@
#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1)
+struct stdin_state {
+ tcflag_t canon;
+ off_t seekable;
+ struct termios tios;
+};
MKINIT struct parsefile basepf; /* top level input file */
MKINIT char basebuf[IBUFSIZ]; /* buffer for top level input file */
MKINIT struct parsefile *toppf = &basepf;
+MKINIT struct stdin_state stdin_state;
struct parsefile *parsefile = &basepf; /* current input file */
int whichprompt; /* 1 == PS1, 2 == PS2 */
+int stdin_istty;
+
+MKINIT void input_init(void);
STATIC void pushfile(void);
static void popstring(void);
@@ -74,6 +85,7 @@ static int preadbuffer(void);
#ifdef mkinit
INCLUDE <stdio.h>
+INCLUDE <termios.h>
INCLUDE <unistd.h>
INCLUDE "input.h"
INCLUDE "error.h"
@@ -82,6 +94,8 @@ INCLUDE "syntax.h"
INIT {
basepf.nextc = basepf.buf = basebuf;
basepf.linno = 1;
+
+ input_init();
}
RESET {
@@ -104,8 +118,32 @@ FORKRESET {
parsefile->fd = 0;
}
}
+
+POSTEXITRESET {
+ flush_input();
+}
#endif
+void input_init(void)
+{
+ struct stdin_state *st = &stdin_state;
+ int istty;
+
+ istty = tcgetattr(0, &st->tios) + 1;
+ st->seekable = istty ? 0 : lseek(0, 0, SEEK_CUR) + 1;
+ st->canon = istty ? st->tios.c_lflag & ICANON : 0;
+ stdin_istty = istty;
+}
+
+static bool stdin_bufferable(void)
+{
+ struct stdin_state *st = &stdin_state;
+
+ if (stdin_istty < 0)
+ input_init();
+
+ return st->canon || st->seekable;
+}
static void freestrings(struct strpush *sp)
{
@@ -191,6 +229,7 @@ static int
preadfd(void)
{
char *buf = parsefile->buf;
+ int fd = parsefile->fd;
int unget;
int pnr;
int nr;
@@ -214,7 +253,7 @@ preadfd(void)
retry:
nr = pnr;
#ifndef SMALL
- if (parsefile->fd == 0 && el) {
+ if (fd == 0 && el) {
static const char *rl_cp;
static int el_len;
@@ -237,38 +276,23 @@ retry:
rl_cp = 0;
}
- } else
-#endif
- if (parsefile->fd)
- nr = read(parsefile->fd, buf, nr);
- else {
- nr = 0;
-
- do {
- int err;
-
- err = read(0, buf, 1);
- if (err <= 0) {
- if (nr)
- break;
-
- nr = err;
- if (errno != EWOULDBLOCK)
- break;
- if (stdin_clear_nonblock() < 0)
- break;
-
- out2str("sh: turning off NDELAY mode\n");
- goto retry;
- }
-
- nr++;
- } while (0);
+ return nr;
}
+#endif
+
+ if (!fd && !stdin_bufferable())
+ nr = 1;
+
+ nr = read(fd, buf, nr);
if (nr < 0) {
if (errno == EINTR && !(basepf.prev && pending_sig))
goto retry;
+ if (fd == 0 && errno == EWOULDBLOCK &&
+ stdin_clear_nonblock() >= 0) {
+ out2str("sh: turning off NDELAY mode\n");
+ goto retry;
+ }
}
return nr;
}
@@ -302,6 +326,8 @@ static int preadbuffer(void)
something = !first;
more = input_get_lleft(parsefile);
+
+ INTOFF;
if (more <= 0) {
int nr;
@@ -313,6 +339,7 @@ again:
input_set_lleft(parsefile, parsefile->nleft = 0);
if (!IS_DEFINED_SMALL && nr > 0)
goto save;
+ INTON;
return PEOF;
}
}
@@ -365,11 +392,10 @@ save:
if (parsefile->fd == 0 && hist && something) {
HistEvent he;
- INTOFF;
history(hist, &he, first ? H_ENTER : H_APPEND,
parsefile->nextc);
- INTON;
}
+ INTON;
if (vflag) {
out2str(parsefile->nextc);
@@ -590,3 +616,21 @@ popallfiles(void)
{
unwindfiles(toppf);
}
+
+void __attribute__((noinline)) flush_input(void)
+{
+ int left = basepf.nleft + input_get_lleft(&basepf);
+
+ if (stdin_state.seekable && left) {
+ INTOFF;
+ lseek(0, -left, SEEK_CUR);
+ input_set_lleft(&basepf, basepf.nleft = 0);
+ INTON;
+ }
+}
+
+void reset_input(void)
+{
+ flush_input();
+ stdin_istty = -1;
+}
diff --git a/src/input.h b/src/input.h
index c59d784..af1c1be 100644
--- a/src/input.h
+++ b/src/input.h
@@ -35,6 +35,7 @@
*/
#include <limits.h>
+#include <stdbool.h>
#ifdef SMALL
#define IS_DEFINED_SMALL 1
@@ -94,6 +95,7 @@ struct parsefile {
};
extern struct parsefile *parsefile;
+extern int stdin_istty;
/*
* The input line number. Input.c just defines this variable, and saves
@@ -113,6 +115,8 @@ void pushstdin(void);
void popfile(void);
void unwindfiles(struct parsefile *);
void popallfiles(void);
+void flush_input(void);
+void reset_input(void);
static inline int input_get_lleft(struct parsefile *pf)
{
diff --git a/src/jobs.c b/src/jobs.c
index 4cf6b8c..5765e6d 100644
--- a/src/jobs.c
+++ b/src/jobs.c
@@ -968,6 +968,9 @@ forkshell(struct job *jp, union node *n, int mode)
int pid;
TRACE(("forkshell(%%%d, %p, %d) called\n", jobno(jp), n, mode));
+
+ flush_input();
+
pid = fork();
if (pid == 0)
forkchild(jp, n, mode);
diff --git a/src/mkinit.c b/src/mkinit.c
index 870b64d..2514ebf 100644
--- a/src/mkinit.c
+++ b/src/mkinit.c
@@ -119,6 +119,11 @@ char forkreset[] = "\
* This routine is called when we enter a subshell.\n\
*/\n";
+char postexitreset[] = "\
+/*\n\
+ * This routine is called in exitshell.\n\
+ */\n";
+
char reset[] = "\
/*\n\
* This routine is called when an error or an interrupt occurs in an\n\
@@ -130,6 +135,7 @@ struct event event[] = {
{"INIT", "init", init},
{"EXITRESET", "exitreset", exitreset},
{"FORKRESET", "forkreset", forkreset, "union node *n"},
+ {"POSTEXITRESET", "postexitreset", postexitreset},
{"RESET", "reset", reset},
{NULL, NULL}
};
diff --git a/src/options.c b/src/options.c
index f157321..4d0a53a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -142,7 +142,7 @@ procargs(int argc, char **argv)
sh_error("-c requires an argument");
sflag = 1;
}
- if (iflag == 2 && sflag == 1 && isatty(0) && isatty(1))
+ if (iflag == 2 && sflag == 1 && stdin_istty && isatty(1))
iflag = 1;
if (mflag == 2)
mflag = iflag;
diff --git a/src/redir.c b/src/redir.c
index c57d745..8d1c8f6 100644
--- a/src/redir.c
+++ b/src/redir.c
@@ -46,16 +46,17 @@
* Code for dealing with input/output redirection.
*/
-#include "main.h"
-#include "shell.h"
-#include "nodes.h"
-#include "jobs.h"
-#include "options.h"
-#include "expand.h"
-#include "redir.h"
-#include "output.h"
-#include "memalloc.h"
#include "error.h"
+#include "expand.h"
+#include "input.h"
+#include "jobs.h"
+#include "main.h"
+#include "memalloc.h"
+#include "nodes.h"
+#include "options.h"
+#include "output.h"
+#include "redir.h"
+#include "shell.h"
#include "system.h"
#include "trap.h"
@@ -142,6 +143,8 @@ redirect(union node *redir, int flags)
continue;
fd = n->nfile.fd;
+ if (fd == 0)
+ reset_input();
if (sv) {
int closed;
@@ -415,8 +418,11 @@ popredir(int drop)
close(i);
break;
default:
- if (!drop)
+ if (!drop) {
+ if (i == 0)
+ reset_input();
dup2(rp->renamed[i], i);
+ }
close(rp->renamed[i]);
break;
}
diff --git a/src/trap.c b/src/trap.c
index f871656..2351b61 100644
--- a/src/trap.c
+++ b/src/trap.c
@@ -426,6 +426,7 @@ exitshell(void)
}
out:
exitreset();
+ postexitreset();
/*
* Disable job control so that whoever had the foreground before we
* started can get it back.
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [v2 PATCH 3/3] input: Use tee(2) for stdin pipe
2024-05-19 5:22 [v2 PATCH 0/3] Improve performance when reading stdin Herbert Xu
2024-05-19 5:22 ` [v2 PATCH 1/3] input: Move newline loop into preadbuffer Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 2/3] input: Use lseek on stdin when possible Herbert Xu
@ 2024-05-19 5:23 ` Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-05-19 5:23 UTC (permalink / raw)
To: DASH Mailing List
use tee(2) to peek at pipes in order to avoid reading one byte at
a time.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
configure.ac | 2 +-
src/input.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++------
src/system.h | 7 ++++++
3 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/configure.ac b/configure.ac
index cb55c3f..50effc0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -89,7 +89,7 @@ dnl Checks for library functions.
AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \
memfd_create mempcpy \
sigsetmask stpcpy strchrnul strsignal strtod strtoimax \
- strtoumax sysconf)
+ strtoumax sysconf tee)
dnl Check whether it's worth working around FreeBSD PR kern/125009.
dnl The traditional behavior of access/faccessat is crazy, but
diff --git a/src/input.c b/src/input.c
index b84ecec..8f8c173 100644
--- a/src/input.c
+++ b/src/input.c
@@ -57,14 +57,18 @@
#include "redir.h"
#include "shell.h"
#include "syntax.h"
+#include "system.h"
#include "trap.h"
#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1)
+MKINIT
struct stdin_state {
tcflag_t canon;
off_t seekable;
struct termios tios;
+ int pip[2];
+ int pending;
};
MKINIT struct parsefile basepf; /* top level input file */
@@ -85,6 +89,7 @@ static int preadbuffer(void);
#ifdef mkinit
INCLUDE <stdio.h>
+INCLUDE <string.h>
INCLUDE <termios.h>
INCLUDE <unistd.h>
INCLUDE "input.h"
@@ -117,6 +122,11 @@ FORKRESET {
close(parsefile->fd);
parsefile->fd = 0;
}
+ if (stdin_state.pip[0]) {
+ close(stdin_state.pip[0]);
+ close(stdin_state.pip[1]);
+ memset(stdin_state.pip, 0, sizeof(stdin_state.pip));
+ }
}
POSTEXITRESET {
@@ -145,6 +155,43 @@ static bool stdin_bufferable(void)
return st->canon || st->seekable;
}
+static void flush_tee(void *buf, int nr, int pending)
+{
+ while (pending > 0) {
+ int err;
+
+ err = read(0, buf, nr > pending ? pending : nr);
+ if (err > 0)
+ pending -= err;
+ }
+}
+
+static int stdin_tee(void *buf, int nr)
+{
+ int err;
+
+ if (stdin_istty)
+ return 0;
+
+ if (!stdin_state.pip[0]) {
+ err = pipe(stdin_state.pip);
+ if (err < 0)
+ return err;
+ if (stdin_state.pip[0] < 10)
+ stdin_state.pip[0] = savefd(stdin_state.pip[0],
+ stdin_state.pip[0]);
+ if (stdin_state.pip[1] < 10)
+ stdin_state.pip[1] = savefd(stdin_state.pip[1],
+ stdin_state.pip[1]);
+ }
+
+ flush_tee(buf, nr, stdin_state.pending);
+
+ err = tee(0, stdin_state.pip[1], nr, 0);
+ stdin_state.pending = err;
+ return err;
+}
+
static void freestrings(struct strpush *sp)
{
INTOFF;
@@ -280,10 +327,17 @@ retry:
}
#endif
- if (!fd && !stdin_bufferable())
- nr = 1;
+ if (!fd && !stdin_bufferable()) {
+ nr = stdin_tee(buf, nr);
+ fd = stdin_state.pip[0];
+ if (nr <= 0) {
+ fd = 0;
+ nr = 1;
+ }
+ }
- nr = read(fd, buf, nr);
+ if (nr >= 0)
+ nr = read(fd, buf, nr);
if (nr < 0) {
if (errno == EINTR && !(basepf.prev && pending_sig))
@@ -621,12 +675,15 @@ void __attribute__((noinline)) flush_input(void)
{
int left = basepf.nleft + input_get_lleft(&basepf);
- if (stdin_state.seekable && left) {
- INTOFF;
+ INTOFF;
+ if (stdin_state.seekable && left)
lseek(0, -left, SEEK_CUR);
- input_set_lleft(&basepf, basepf.nleft = 0);
- INTON;
+ else if (stdin_state.pending > left) {
+ flush_tee(basebuf, BUFSIZ, stdin_state.pending - left);
+ stdin_state.pending = 0;
}
+ input_set_lleft(&basepf, basepf.nleft = 0);
+ INTON;
}
void reset_input(void)
diff --git a/src/system.h b/src/system.h
index 6b31d52..e7f968b 100644
--- a/src/system.h
+++ b/src/system.h
@@ -118,6 +118,13 @@ long sysconf(int) __attribute__((__noreturn__));
int isblank(int c);
#endif
+#ifndef HAVE_TEE
+static inline ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags)
+{
+ return -1;
+}
+#endif
+
#ifndef HAVE_FNMATCH
static inline int fnmatch(const char *pattern, const char *string, int flags)
{
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-05-19 5:23 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-19 5:22 [v2 PATCH 0/3] Improve performance when reading stdin Herbert Xu
2024-05-19 5:22 ` [v2 PATCH 1/3] input: Move newline loop into preadbuffer Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 2/3] input: Use lseek on stdin when possible Herbert Xu
2024-05-19 5:23 ` [v2 PATCH 3/3] input: Use tee(2) for stdin pipe Herbert Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox