Netdev List
 help / color / mirror / Atom feed
* [PATCH V9 03/13] posix clocks: introduce a syscall for clock tuning.
From: Richard Cochran @ 2011-01-13 11:32 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
	John Stultz, Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti,
	Thomas Gleixner
In-Reply-To: <cover.1294917347.git.richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>

A new syscall is introduced that allows tuning of a POSIX clock. The
new call, clock_adjtime, takes two parameters, the clock ID and a
pointer to a struct timex. Any ADJTIMEX(2) operation may be requested
via this system call, but various POSIX clocks may or may not support
tuning.

Signed-off-by: Richard Cochran <richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>
---
 drivers/char/mmtimer.c       |    1 +
 include/linux/posix-timers.h |    4 +
 include/linux/syscalls.h     |    2 +
 kernel/compat.c              |  136 +++++++++++++++++++++++++++---------------
 kernel/posix-cpu-timers.c    |    6 ++
 kernel/posix-timers.c        |   35 +++++++++++
 6 files changed, 136 insertions(+), 48 deletions(-)

diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d7562..98f2488 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -767,6 +767,7 @@ static struct k_clock sgi_clock = {
 	.res = 0,
 	.clock_set = sgi_clock_set,
 	.clock_get = sgi_clock_get,
+	.clock_adj = do_posix_clock_noadjtime,
 	.timer_create = sgi_timer_create,
 	.nsleep = do_posix_clock_nonanosleep,
 	.timer_set = sgi_timer_set,
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844..b05d9b8 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/timex.h>
 
 union cpu_time_count {
 	cputime_t cpu;
@@ -71,6 +72,7 @@ struct k_clock {
 	int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
 	int (*clock_set) (const clockid_t which_clock, struct timespec * tp);
 	int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
+	int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
 	int (*timer_create) (struct k_itimer *timer);
 	int (*nsleep) (const clockid_t which_clock, int flags,
 		       struct timespec *, struct timespec __user *);
@@ -90,6 +92,7 @@ void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock);
 int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
 			       struct timespec __user *);
 int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
+int do_posix_clock_noadjtime(const clockid_t, struct timex *tx);
 
 /* function to call to trigger timer event */
 int posix_timer_event(struct k_itimer *timr, int si_private);
@@ -97,6 +100,7 @@ int posix_timer_event(struct k_itimer *timr, int si_private);
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
 int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
 int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
+int posix_cpu_clock_adj(const clockid_t which_clock, struct timex *tx);
 int posix_cpu_timer_create(struct k_itimer *timer);
 int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		     struct timespec *rqtp, struct timespec __user *rmtp);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 18cd068..bfacab9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -311,6 +311,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
 				const struct timespec __user *tp);
 asmlinkage long sys_clock_gettime(clockid_t which_clock,
 				struct timespec __user *tp);
+asmlinkage long sys_clock_adjtime(clockid_t which_clock,
+				struct timex __user *tx);
 asmlinkage long sys_clock_getres(clockid_t which_clock,
 				struct timespec __user *tp);
 asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0..38b1d2c 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
 		put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
 }
 
+static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
+{
+	memset(txc, 0, sizeof(struct timex));
+
+	if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
+			__get_user(txc->modes, &utp->modes) ||
+			__get_user(txc->offset, &utp->offset) ||
+			__get_user(txc->freq, &utp->freq) ||
+			__get_user(txc->maxerror, &utp->maxerror) ||
+			__get_user(txc->esterror, &utp->esterror) ||
+			__get_user(txc->status, &utp->status) ||
+			__get_user(txc->constant, &utp->constant) ||
+			__get_user(txc->precision, &utp->precision) ||
+			__get_user(txc->tolerance, &utp->tolerance) ||
+			__get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+			__get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+			__get_user(txc->tick, &utp->tick) ||
+			__get_user(txc->ppsfreq, &utp->ppsfreq) ||
+			__get_user(txc->jitter, &utp->jitter) ||
+			__get_user(txc->shift, &utp->shift) ||
+			__get_user(txc->stabil, &utp->stabil) ||
+			__get_user(txc->jitcnt, &utp->jitcnt) ||
+			__get_user(txc->calcnt, &utp->calcnt) ||
+			__get_user(txc->errcnt, &utp->errcnt) ||
+			__get_user(txc->stbcnt, &utp->stbcnt))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
+{
+	if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
+			__put_user(txc->modes, &utp->modes) ||
+			__put_user(txc->offset, &utp->offset) ||
+			__put_user(txc->freq, &utp->freq) ||
+			__put_user(txc->maxerror, &utp->maxerror) ||
+			__put_user(txc->esterror, &utp->esterror) ||
+			__put_user(txc->status, &utp->status) ||
+			__put_user(txc->constant, &utp->constant) ||
+			__put_user(txc->precision, &utp->precision) ||
+			__put_user(txc->tolerance, &utp->tolerance) ||
+			__put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+			__put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+			__put_user(txc->tick, &utp->tick) ||
+			__put_user(txc->ppsfreq, &utp->ppsfreq) ||
+			__put_user(txc->jitter, &utp->jitter) ||
+			__put_user(txc->shift, &utp->shift) ||
+			__put_user(txc->stabil, &utp->stabil) ||
+			__put_user(txc->jitcnt, &utp->jitcnt) ||
+			__put_user(txc->calcnt, &utp->calcnt) ||
+			__put_user(txc->errcnt, &utp->errcnt) ||
+			__put_user(txc->stbcnt, &utp->stbcnt) ||
+			__put_user(txc->tai, &utp->tai))
+		return -EFAULT;
+	return 0;
+}
+
 asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
 		struct timezone __user *tz)
 {
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
 	return err;
 }
 
+long compat_sys_clock_adjtime(clockid_t which_clock,
+		struct compat_timex __user *utp)
+{
+	struct timex txc;
+	mm_segment_t oldfs;
+	int err, ret;
+
+	err = compat_get_timex(&txc, utp);
+	if (err)
+		return err;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
+	set_fs(oldfs);
+
+	err = compat_put_timex(utp, &txc);
+	if (err)
+		return err;
+
+	return ret;
+}
+
 long compat_sys_clock_getres(clockid_t which_clock,
 		struct compat_timespec __user *tp)
 {
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
 {
 	struct timex txc;
-	int ret;
-
-	memset(&txc, 0, sizeof(struct timex));
+	int err, ret;
 
-	if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
-			__get_user(txc.modes, &utp->modes) ||
-			__get_user(txc.offset, &utp->offset) ||
-			__get_user(txc.freq, &utp->freq) ||
-			__get_user(txc.maxerror, &utp->maxerror) ||
-			__get_user(txc.esterror, &utp->esterror) ||
-			__get_user(txc.status, &utp->status) ||
-			__get_user(txc.constant, &utp->constant) ||
-			__get_user(txc.precision, &utp->precision) ||
-			__get_user(txc.tolerance, &utp->tolerance) ||
-			__get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
-			__get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
-			__get_user(txc.tick, &utp->tick) ||
-			__get_user(txc.ppsfreq, &utp->ppsfreq) ||
-			__get_user(txc.jitter, &utp->jitter) ||
-			__get_user(txc.shift, &utp->shift) ||
-			__get_user(txc.stabil, &utp->stabil) ||
-			__get_user(txc.jitcnt, &utp->jitcnt) ||
-			__get_user(txc.calcnt, &utp->calcnt) ||
-			__get_user(txc.errcnt, &utp->errcnt) ||
-			__get_user(txc.stbcnt, &utp->stbcnt))
-		return -EFAULT;
+	err = compat_get_timex(&txc, utp);
+	if (err)
+		return err;
 
 	ret = do_adjtimex(&txc);
 
-	if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
-			__put_user(txc.modes, &utp->modes) ||
-			__put_user(txc.offset, &utp->offset) ||
-			__put_user(txc.freq, &utp->freq) ||
-			__put_user(txc.maxerror, &utp->maxerror) ||
-			__put_user(txc.esterror, &utp->esterror) ||
-			__put_user(txc.status, &utp->status) ||
-			__put_user(txc.constant, &utp->constant) ||
-			__put_user(txc.precision, &utp->precision) ||
-			__put_user(txc.tolerance, &utp->tolerance) ||
-			__put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
-			__put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
-			__put_user(txc.tick, &utp->tick) ||
-			__put_user(txc.ppsfreq, &utp->ppsfreq) ||
-			__put_user(txc.jitter, &utp->jitter) ||
-			__put_user(txc.shift, &utp->shift) ||
-			__put_user(txc.stabil, &utp->stabil) ||
-			__put_user(txc.jitcnt, &utp->jitcnt) ||
-			__put_user(txc.calcnt, &utp->calcnt) ||
-			__put_user(txc.errcnt, &utp->errcnt) ||
-			__put_user(txc.stbcnt, &utp->stbcnt) ||
-			__put_user(txc.tai, &utp->tai))
-		ret = -EFAULT;
+	err = compat_put_timex(utp, &txc);
+	if (err)
+		return err;
 
 	return ret;
 }
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb717..0206116 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -207,6 +207,10 @@ int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 	return error;
 }
 
+int posix_cpu_clock_adj(const clockid_t which_clock, struct timex *tx)
+{
+	return -EOPNOTSUPP;
+}
 
 /*
  * Sample a per-thread clock for the given task.
@@ -1610,6 +1614,7 @@ static __init int init_posix_cpu_timers(void)
 		.clock_getres = process_cpu_clock_getres,
 		.clock_get = process_cpu_clock_get,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 		.timer_create = process_cpu_timer_create,
 		.nsleep = process_cpu_nsleep,
 		.nsleep_restart = process_cpu_nsleep_restart,
@@ -1618,6 +1623,7 @@ static __init int init_posix_cpu_timers(void)
 		.clock_getres = thread_cpu_clock_getres,
 		.clock_get = thread_cpu_clock_get,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 		.timer_create = thread_cpu_timer_create,
 		.nsleep = thread_cpu_nsleep,
 		.nsleep_restart = thread_cpu_nsleep_restart,
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb..91f9b4b 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -203,6 +203,11 @@ static int common_timer_create(struct k_itimer *new_timer)
 	return 0;
 }
 
+static inline int common_clock_adj(const clockid_t which_clock, struct timex *t)
+{
+	return do_adjtimex(t);
+}
+
 static int no_timer_create(struct k_itimer *new_timer)
 {
 	return -EOPNOTSUPP;
@@ -279,11 +284,13 @@ static __init int init_posix_timers(void)
 		.clock_getres = hrtimer_get_res,
 		.clock_get = posix_ktime_get_ts,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 	};
 	struct k_clock clock_monotonic_raw = {
 		.clock_getres = hrtimer_get_res,
 		.clock_get = posix_get_monotonic_raw,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 		.timer_create = no_timer_create,
 		.nsleep = no_nsleep,
 	};
@@ -291,6 +298,7 @@ static __init int init_posix_timers(void)
 		.clock_getres = posix_get_coarse_res,
 		.clock_get = posix_get_realtime_coarse,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 		.timer_create = no_timer_create,
 		.nsleep = no_nsleep,
 	};
@@ -298,6 +306,7 @@ static __init int init_posix_timers(void)
 		.clock_getres = posix_get_coarse_res,
 		.clock_get = posix_get_monotonic_coarse,
 		.clock_set = do_posix_clock_nosettime,
+		.clock_adj = do_posix_clock_noadjtime,
 		.timer_create = no_timer_create,
 		.nsleep = no_nsleep,
 	};
@@ -934,6 +943,12 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
 
+int do_posix_clock_noadjtime(const clockid_t which_clock, struct timex *t)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL_GPL(do_posix_clock_noadjtime);
+
 int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
 			       struct timespec *t, struct timespec __user *r)
 {
@@ -975,6 +990,26 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 
 }
 
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+		struct timex __user *, utx)
+{
+	struct timex ktx;
+	int err;
+
+	if (copy_from_user(&ktx, utx, sizeof(ktx)))
+		return -EFAULT;
+
+	if (invalid_clockid(which_clock))
+		return -EINVAL;
+
+	err = CLOCK_DISPATCH(which_clock, clock_adj, (which_clock, &ktx));
+
+	if (copy_to_user(utx, &ktx, sizeof(ktx)))
+		return -EFAULT;
+
+	return err;
+}
+
 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 		struct timespec __user *, tp)
 {
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH V9 02/13] ntp: add ADJ_SETOFFSET mode bit
From: Richard Cochran @ 2011-01-13 11:32 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
	John Stultz, Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti,
	Thomas Gleixner
In-Reply-To: <cover.1294917347.git.richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>

This patch adds a new mode bit into the timex structure. When set, the bit
instructs the kernel to add the given time value to the current time.

Signed-off-by: Richard Cochran <richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>
---
 include/linux/timex.h |    3 ++-
 kernel/time/ntp.c     |   11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 32d852f..800a2c8 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
 	long tolerance;		/* clock frequency tolerance (ppm)
 				 * (read only)
 				 */
-	struct timeval time;	/* (read only) */
+	struct timeval time;	/* (read only, except for ADJ_SETOFFSET) */
 	long tick;		/* (modified) usecs between clock ticks */
 
 	long ppsfreq;           /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
 #define ADJ_STATUS		0x0010	/* clock status */
 #define ADJ_TIMECONST		0x0020	/* pll time constant */
 #define ADJ_TAI			0x0080	/* set TAI offset */
+#define ADJ_SETOFFSET		0x0100  /* add 'time' to current time */
 #define ADJ_MICRO		0x1000	/* select microsecond resolution */
 #define ADJ_NANO		0x2000	/* select nanosecond resolution */
 #define ADJ_TICK		0x4000	/* tick value */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index d232189..0c87858 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -482,6 +482,17 @@ int do_adjtimex(struct timex *txc)
 			hrtimer_cancel(&leap_timer);
 	}
 
+	if (txc->modes & ADJ_SETOFFSET) {
+		struct timespec delta;
+		if ((unsigned long)txc->time.tv_usec >= NSEC_PER_SEC)
+			return -EINVAL;
+		delta.tv_sec  = txc->time.tv_sec;
+		delta.tv_nsec = txc->time.tv_usec;
+		if (!(txc->modes & ADJ_NANO))
+			delta.tv_nsec *= 1000;
+		timekeeping_inject_offset(&delta);
+	}
+
 	getnstimeofday(&ts);
 
 	write_seqlock_irq(&xtime_lock);
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH V9 01/13] time: Introduce timekeeping_inject_offset
From: Richard Cochran @ 2011-01-13 11:31 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
	John Stultz, Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti,
	Thomas Gleixner
In-Reply-To: <cover.1294917347.git.richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>

From: John Stultz <john.stultz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

This adds a kernel-internal timekeeping interface to add or subtract
a fixed amount from CLOCK_REALTIME. This makes it so kernel users or
interfaces trying to do so do not have to read the time, then add an
offset and then call settimeofday(), which adds some extra error in
comparision to just simply adding the offset in the kernel timekeeping
core.

CC: Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>
Signed-off-by: John Stultz <john.stultz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Signed-off-by: Richard Cochran <richard.cochran-3mrvs1K0uXizZXS1Dc/lvw@public.gmane.org>
---
 include/linux/time.h      |    1 +
 kernel/time/timekeeping.c |   36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/include/linux/time.h b/include/linux/time.h
index 9f15ac7..b402134 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -166,6 +166,7 @@ extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern void update_wall_time(void);
 extern void timekeeping_leap_insert(int leapsecond);
+extern int timekeeping_inject_offset(struct timespec *ts);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5bb86da..bc10622 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -344,6 +344,42 @@ int do_settimeofday(struct timespec *tv)
 
 EXPORT_SYMBOL(do_settimeofday);
 
+
+/**
+ * timekeeping_inject_offset - Adds or subtracts from the current time.
+ * @tv:		pointer to the timespec variable containing the offset
+ *
+ * Adds or subtracts an offset value from the current time.
+ */
+int timekeeping_inject_offset(struct timespec *ts)
+{
+	unsigned long flags;
+
+	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+
+	timekeeping_forward_now();
+
+	xtime = timespec_add(xtime, *ts);
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+
+	timekeeper.ntp_error = 0;
+	ntp_clear();
+
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	/* signal hrtimers about time change */
+	clock_was_set();
+
+	return 0;
+}
+EXPORT_SYMBOL(timekeeping_inject_offset);
+
 /**
  * change_clocksource - Swaps clocksources if a new one is available
  *
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH 00/22] ipvs namespaces v3.3
From: Patrick McHardy @ 2011-01-13 11:31 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Simon Horman, netfilter-devel, lvs-devel, netdev,
	Julian Anastasov, Hans Schillstrom
In-Reply-To: <4D2EDDD0.8060903@netfilter.org>

On 13.01.2011 12:11, Pablo Neira Ayuso wrote:
> On 13/01/11 02:52, Simon Horman wrote:
>> In order to create this series I merged net-next-2.6 into nf-next-2.6.
>> The result is at
>> git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6 ipvs-netns3.3
>>
>> However, I guess that you have already done your own merge and simply
>> pulling the branch above will create a bit of a mess. Please let me know
>> if you have a tree/branch that I should use as a base for a pull request.
> 
> I have pulled it, everything was fine. Thanks Simon!
> 

Thanks Pablo. I'm back up to speed, if you want, I can pull your tree
into mine.

^ permalink raw reply

* [PATCH V9 00/13] ptp: IEEE 1588 hardware clock support
From: Richard Cochran @ 2011-01-13 11:31 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
	John Stultz, Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti,
	Thomas Gleixner

* Introduction

  The aim of this patch set is to add support for PTP Hardware Clocks
  (PHCs) into the Linux kernel.

  The first nine patches provide infrastructure supporting dynamic
  POSIX clock devices. This new code will be useful for other kinds of
  new clocks, not just PHCs.

  The last four patches implement the PHC code.

  Support for obtaining timestamps from a PHC already exists via the
  SO_TIMESTAMPING socket option, integrated in kernel version 2.6.30.
  This patch set completes the picture by allow user space programs to
  adjust the PHC and to control its ancillary features.

* Patch ChangeLog

** v9
*** dynamic posix clocks
    - changed the kernel interface to be less idiosyncratic
*** ptp hardware clocks
    - changed the kernel interface to be less idiosyncratic
*** posix clock/ntp syscalls
    - completely removed the CLOCK_DISPATCH macro
    - pick a bit for ADJ_SETOFFSET that is unused by BSD
*** ixp driver
    - resolved todo regarding npe to channel mapping
    - removed weird CamelCase from register definitions
    - removed wrapper functions for reading/writing registers

* Previous Discussions

  This patch set previously appeared on the netdev list. Since V5 of
  the character device patch set, the discussion has moved to the
  lkml.

  - IEEE 1588 hardware clock support [V5]
    http://lkml.org/lkml/2010/8/16/90

  - POSIX clock tuning syscall with static clock ids
    http://lkml.org/lkml/2010/8/23/49

  - POSIX clock tuning syscall with dynamic clock ids
    http://lkml.org/lkml/2010/9/3/119

  - IEEE 1588 hardware clock support [V6]
    http://lkml.org/lkml/2010/9/23/310

  - Dynamic clock devices [RFC]
    http://lkml.org/lkml/2010/11/4/290

  - IEEE 1588 hardware clock support [V7]
    http://lkml.org/lkml/2010/12/16/195

  - IEEE 1588 hardware clock support [V8]
    http://lkml.org/lkml/2010/12/31/128

Thanks for your comments.

Enjoy,
Richard


John Stultz (1):
  time: Introduce timekeeping_inject_offset

Richard Cochran (12):
  ntp: add ADJ_SETOFFSET mode bit
  posix clocks: introduce a syscall for clock tuning.
  posix_clocks: add clock_adjtime for arm
  posix_clocks: add clock_adjtime for blackfin
  posix_clocks: add clock_adjtime for powerpc
  posix_clocks: add clock_adjtime for x86
  posix clocks: cleanup the CLOCK_DISPTACH macro
  posix clocks: introduce dynamic clocks
  ptp: Added a brand new class driver for ptp clocks.
  ptp: Added a clock that uses the eTSEC found on the MPC85xx.
  ptp: Added a clock driver for the IXP46x.
  ptp: Added a clock driver for the National Semiconductor PHYTER.

 Documentation/ABI/testing/sysfs-ptp             |   97 +++
 Documentation/powerpc/dts-bindings/fsl/tsec.txt |   57 ++
 Documentation/ptp/ptp.txt                       |   93 +++
 Documentation/ptp/testptp.c                     |  352 +++++++++
 Documentation/ptp/testptp.mk                    |   33 +
 arch/arm/include/asm/unistd.h                   |    1 +
 arch/arm/kernel/calls.S                         |    1 +
 arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h   |   78 ++
 arch/blackfin/include/asm/unistd.h              |    3 +-
 arch/blackfin/mach-common/entry.S               |    1 +
 arch/powerpc/boot/dts/mpc8313erdb.dts           |   14 +
 arch/powerpc/boot/dts/mpc8572ds.dts             |   14 +
 arch/powerpc/boot/dts/p2020ds.dts               |   14 +
 arch/powerpc/boot/dts/p2020rdb.dts              |   14 +
 arch/powerpc/include/asm/systbl.h               |    1 +
 arch/powerpc/include/asm/unistd.h               |    3 +-
 arch/x86/ia32/ia32entry.S                       |    1 +
 arch/x86/include/asm/unistd_32.h                |    3 +-
 arch/x86/include/asm/unistd_64.h                |    2 +
 arch/x86/kernel/syscall_table_32.S              |    1 +
 drivers/Kconfig                                 |    2 +
 drivers/Makefile                                |    1 +
 drivers/char/mmtimer.c                          |    1 +
 drivers/net/Makefile                            |    1 +
 drivers/net/arm/ixp4xx_eth.c                    |  192 +++++-
 drivers/net/gianfar_ptp.c                       |  448 +++++++++++
 drivers/net/gianfar_ptp_reg.h                   |  113 +++
 drivers/net/phy/Kconfig                         |   29 +
 drivers/net/phy/Makefile                        |    1 +
 drivers/net/phy/dp83640.c                       |  896 +++++++++++++++++++++++
 drivers/net/phy/dp83640_reg.h                   |  261 +++++++
 drivers/ptp/Kconfig                             |   53 ++
 drivers/ptp/Makefile                            |    7 +
 drivers/ptp/ptp_chardev.c                       |  144 ++++
 drivers/ptp/ptp_clock.c                         |  319 ++++++++
 drivers/ptp/ptp_ixp46x.c                        |  332 +++++++++
 drivers/ptp/ptp_private.h                       |   85 +++
 drivers/ptp/ptp_sysfs.c                         |  226 ++++++
 include/linux/Kbuild                            |    1 +
 include/linux/posix-clock.h                     |  150 ++++
 include/linux/posix-timers.h                    |   25 +-
 include/linux/ptp_clock.h                       |   79 ++
 include/linux/ptp_clock_kernel.h                |  141 ++++
 include/linux/syscalls.h                        |    2 +
 include/linux/time.h                            |    3 +
 include/linux/timex.h                           |    3 +-
 kernel/compat.c                                 |  136 +++--
 kernel/posix-cpu-timers.c                       |    8 +-
 kernel/posix-timers.c                           |  257 +++++---
 kernel/time/Makefile                            |    3 +-
 kernel/time/ntp.c                               |   11 +
 kernel/time/posix-clock-syscalls.h              |   39 +
 kernel/time/posix-clock.c                       |  432 +++++++++++
 kernel/time/timekeeping.c                       |   36 +
 54 files changed, 5068 insertions(+), 152 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-ptp
 create mode 100644 Documentation/ptp/ptp.txt
 create mode 100644 Documentation/ptp/testptp.c
 create mode 100644 Documentation/ptp/testptp.mk
 create mode 100644 arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h
 create mode 100644 drivers/net/gianfar_ptp.c
 create mode 100644 drivers/net/gianfar_ptp_reg.h
 create mode 100644 drivers/net/phy/dp83640.c
 create mode 100644 drivers/net/phy/dp83640_reg.h
 create mode 100644 drivers/ptp/Kconfig
 create mode 100644 drivers/ptp/Makefile
 create mode 100644 drivers/ptp/ptp_chardev.c
 create mode 100644 drivers/ptp/ptp_clock.c
 create mode 100644 drivers/ptp/ptp_ixp46x.c
 create mode 100644 drivers/ptp/ptp_private.h
 create mode 100644 drivers/ptp/ptp_sysfs.c
 create mode 100644 include/linux/posix-clock.h
 create mode 100644 include/linux/ptp_clock.h
 create mode 100644 include/linux/ptp_clock_kernel.h
 create mode 100644 kernel/time/posix-clock-syscalls.h
 create mode 100644 kernel/time/posix-clock.c

^ permalink raw reply

* Re: [PATCH] netfilter: ipt_CLUSTERIP: dont flood with "no conntrack!"
From: Pablo Neira Ayuso @ 2011-01-13 11:29 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Eric Dumazet, Netfilter Development Mailinglist, netdev
In-Reply-To: <4D2EE1F8.2040206@trash.net>

On 13/01/11 12:28, Patrick McHardy wrote:
> On 13.01.2011 12:23, Pablo Neira Ayuso wrote:
>> Hi Eric,
>>
>> On 13/01/11 12:13, Eric Dumazet wrote:
>>> ipt_CLUSTERIP users might hit this annoying printk, if they forgot an
>>> "iptables -I INPUT -m state --state INVALID -j DROP" before CLUSTERIP
>>> rule. We could use net_ratelimit() here, or not log the message at all.
>>> I chose to log it once per config.
>>
>> I think that this should be converted to pr_debug() instead, there's
>> also another reference to "unknown protocol" that should be converted as
>> well.
> 
> I think the FIXME could also be removed, we *do* drop invalid
> packets in CLUSTERIP.

Hey! You're back! :-)

^ permalink raw reply

* Re: [PATCH] netfilter: ipt_CLUSTERIP: dont flood with "no conntrack!"
From: Patrick McHardy @ 2011-01-13 11:28 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Eric Dumazet, Netfilter Development Mailinglist, netdev
In-Reply-To: <4D2EE09A.1010409@netfilter.org>

On 13.01.2011 12:23, Pablo Neira Ayuso wrote:
> Hi Eric,
> 
> On 13/01/11 12:13, Eric Dumazet wrote:
>> ipt_CLUSTERIP users might hit this annoying printk, if they forgot an
>> "iptables -I INPUT -m state --state INVALID -j DROP" before CLUSTERIP
>> rule. We could use net_ratelimit() here, or not log the message at all.
>> I chose to log it once per config.
> 
> I think that this should be converted to pr_debug() instead, there's
> also another reference to "unknown protocol" that should be converted as
> well.

I think the FIXME could also be removed, we *do* drop invalid
packets in CLUSTERIP.

^ permalink raw reply

* Re: [PATCH V8 08/13] posix clocks: cleanup the CLOCK_DISPTACH macro
From: Thomas Gleixner @ 2011-01-13 11:25 UTC (permalink / raw)
  To: Richard Cochran
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Alan Cox, Arnd Bergmann, Christoph Lameter, David Miller,
	John Stultz, Krzysztof Halasa, Peter Zijlstra, Rodolfo Giometti
In-Reply-To: <20110113043037.GA17726-7KxsofuKt4IfAd9E5cN8NEzG7cXyKsk/@public.gmane.org>

On Thu, 13 Jan 2011, Richard Cochran wrote:
> On Tue, Jan 11, 2011 at 01:57:23PM +0100, Thomas Gleixner wrote:
> > 
> > static clockid_t clock_get_array_id(const clockid_t id)
> > {
> > 	if (id >= 0)
> > 	       return id < MAX_CLOCKS ? id : POSIX_INV_CLOCK_ID;
> > 
> >       	if (clock_is_posix_cpu(id))
> > 		return POSIX_CPU_CLOCK_ID;
> > 
> > 	return POSIX_INV_CLOCK_ID;
> > }
> > 
> > static inline int dispatch_clock_getres(const clockid_t id, struct timespec *ts)
> > {
> > 	struct k_clock *clk = &posix_clocks[clock_get_array_id(id)];
> > 
> > 	return clk->clock_getres ? clk->clock_getres(id, ts) : -EINVAL;
> > }
> 
> I would like to take this idea one step further, like so:
> 
> static struct k_clock *clockid_to_kclock(const clockid_t id)
> {
> 	if (id >= 0)
> 		return id < MAX_CLOCKS ?
> 			&posix_clocks[id] : &posix_clocks[POSIX_INV_CLOCK_ID];
> 	...
> }
> 
> SYSCALL( ... , const clockid_t id, struct timespec *ts)
> {
> 	struct k_clock *clk = clockid_to_kclock(id);
> 
> 	return clk->clock_getres ? clk->clock_getres(id, ts) : -EINVAL;
> }
> 
> What do you think?

Yeah, that's even better!

^ permalink raw reply

* Re: [PATCH] netfilter: ipt_CLUSTERIP: dont flood with "no conntrack!"
From: Pablo Neira Ayuso @ 2011-01-13 11:23 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netfilter Development Mailinglist, netdev, Patrick McHardy
In-Reply-To: <1294917210.3570.48.camel@edumazet-laptop>

Hi Eric,

On 13/01/11 12:13, Eric Dumazet wrote:
> ipt_CLUSTERIP users might hit this annoying printk, if they forgot an
> "iptables -I INPUT -m state --state INVALID -j DROP" before CLUSTERIP
> rule. We could use net_ratelimit() here, or not log the message at all.
> I chose to log it once per config.

I think that this should be converted to pr_debug() instead, there's
also another reference to "unknown protocol" that should be converted as
well.

^ permalink raw reply

* [PATCH] netfilter: ipt_CLUSTERIP: dont flood with "no conntrack!"
From: Eric Dumazet @ 2011-01-13 11:13 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Netfilter Development Mailinglist, netdev, Patrick McHardy
In-Reply-To: <4D2E1A74.5080102@netfilter.org>

ipt_CLUSTERIP users might hit this annoying printk, if they forgot an
"iptables -I INPUT -m state --state INVALID -j DROP" before CLUSTERIP
rule. We could use net_ratelimit() here, or not log the message at all.
I chose to log it once per config.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |   13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a48..bac8739 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -47,6 +47,7 @@ struct clusterip_config {
 	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
 	struct net_device *dev;			/* device */
 	u_int16_t num_total_nodes;		/* total number of nodes */
+	bool warned_no_conntrack;
 	unsigned long local_nodes;		/* node number array */
 
 #ifdef CONFIG_PROC_FS
@@ -301,10 +302,14 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL) {
-		pr_info("no conntrack!\n");
-			/* FIXME: need to drop invalid ones, since replies
-			 * to outgoing connections of other nodes will be
-			 * marked as INVALID */
+		if (unlikely(!cipinfo->config->warned_no_conntrack)) {
+			cipinfo->config->warned_no_conntrack = true;
+			pr_info("no conntrack!\n");
+		}
+		/* FIXME: need to drop invalid ones, since replies
+		 * to outgoing connections of other nodes will be
+		 * marked as INVALID
+		 */
 		return NF_DROP;
 	}
 



^ permalink raw reply related

* Re: [PATCH 00/22] ipvs namespaces v3.3
From: Pablo Neira Ayuso @ 2011-01-13 11:11 UTC (permalink / raw)
  To: Simon Horman
  Cc: netfilter-devel, lvs-devel, netdev, Patrick McHardy,
	Julian Anastasov, Hans Schillstrom
In-Reply-To: <1294883588-5683-1-git-send-email-horms@verge.net.au>

On 13/01/11 02:52, Simon Horman wrote:
> Hi Pablo,
> 
> this changest includes the following changes since the v3.2 series
> which was most recently posted as "[GIT PULL nf-next-2.6] ipvs namespaces".
> 
> * Remove several hunks that only make whitespace changes

Thanks a lot for doing this.

> * Add Acked-by: Julian Anastasov <ja@ssi.bg>
>   (It was an omission from v3.2)
> * Fix merge conflicts
> 
> There are two changes that produce conflicts
> * In the current net-next-2.6 tree but absent from the current nf-next-2.6 tree
>   there is "workqueue: convert
>   cancel_rearming_delayed_work[queue]() users to cancel_delayed_work_sync()"
> * And in the current nf-next-2.6 tree  but absent from the current
>   net-next-2.6 tree there is "net: use the macros defined for the members
>   of flowi"

nf-*-2.6 are Patrick's trees. My trees are here:

http://1984.lsi.us.es/git/

> In order to create this series I merged net-next-2.6 into nf-next-2.6.
> The result is at
> git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6 ipvs-netns3.3
> 
> However, I guess that you have already done your own merge and simply
> pulling the branch above will create a bit of a mess. Please let me know
> if you have a tree/branch that I should use as a base for a pull request.

I have pulled it, everything was fine. Thanks Simon!

^ permalink raw reply

* Re: [PATCH] ipv4: devconf: start IPV4_DEVCONF_* from 0
From: Thomas Graf @ 2011-01-13 10:02 UTC (permalink / raw)
  To: Lucian Adrian Grijincu
  Cc: David Miller, netdev, kuznet, pekkas, jmorris, yoshfuji, kaber,
	opurdila, ddvlad
In-Reply-To: <AANLkTimLyNf-sgLtzf0i3k7om9MEQ2t8tSOwDcpS66dv@mail.gmail.com>

On Thu, Jan 13, 2011 at 09:50:14AM +0200, Lucian Adrian Grijincu wrote:
> Yes it works, but there does not seem to be a good reason why to
> complicate things like this (again the sentinel nature of zero is not
> used in any place here).

The reason I didn't change anything was the same as Dave's reply, I
thought it must have been done on purpose. It probably was but I can't
spot any reason now either.

Also, IPv6 is doing just fine with using '0' as its first devconf id.

I have no objects to changing this at all but we don't gain much either.

^ permalink raw reply

* RE: [E1000-devel] [e100] Page allocation failure warning(?) in 2.6.36.3
From: Chris Rankin @ 2011-01-13  9:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: JesseBrandeburg, David Miller, e1000-devel@lists.sourceforge.net,
	Tushar NDave, netdev@vger.kernel.org, Jeffrey TKirsher
In-Reply-To: <1294909556.3570.25.camel@edumazet-laptop>

-- On Thu, 13/1/11, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> If you care of performance, dont unload/reload your driver
> all the time, and dont use modules (this matter on old hardware because
> of TLB misses)

As long as I can route the full bandwidth of my ADSLv2 connection then it's fine.

Cheers,
Chris



      

^ permalink raw reply

* Re: what are txqueuelen and nic ring parameters exactly?
From: Jesper Dangaard Brouer @ 2011-01-13  9:21 UTC (permalink / raw)
  To: MK; +Cc: netdev
In-Reply-To: <AANLkTi=_hbmvgO9wrX6sg7Q=Wh+iVaqGdga08M-ypqae@mail.gmail.com>

On Mon, 10 Jan 2011, MK wrote:

> I often come across two variables that can be tuned for networking -
>
> 1) txqueuelen (via ifconfig )
> 2) NIC ring parameters for tx and rx (via ethtool)
>
> Can someone please tell me where these queues are exactly? Are both
> the same (seems not since their current values are different on my
> computer) . Is txqueuelen somehow part of the linux networking
> subsystem whereas the other is purely a h/w device construct?

You are spot on.

The txqueuelen is a Linux network stack thing, and is related to the 
traffic control subsystem, BUT only when using the default qdisc 
(pfifo_fast or mq).
If you add another qdisc, then its that specific qdiscs limits which 
counts, not the device txqueuelen.

When tuning these queue lengths, you probably decrease these queue sizes, 
NOT increase!

See the bufferbloat debate:
   http://netoptimizer.blogspot.com/2010/12/buffer-bloat-calculations.html
   http://netoptimizer.blogspot.com/2011/01/bufferbloat-wireless-is-worse-than.html
   http://gettys.wordpress.com/bufferbloat-faq/

Cheers,
   Jesper Brouer

--
-------------------------------------------------------------------
MSc. Master of Computer Science
Dept. of Computer Science, University of Copenhagen
Author of http://www.adsl-optimizer.dk
-------------------------------------------------------------------

^ permalink raw reply

* STMMAC driver: NFS Problem on 2.6.37
From: deepaksi @ 2011-01-13  9:09 UTC (permalink / raw)
  To: Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-nfs-u79uwXL29TY76Z2rM5mHXA,
	Armando VISCONTI, Shiraz HASHIM, Viresh KUMAR

Hi

I am facing a problem related to nfs boot, while using the stmmac driver
ported on 2.6.37 kernel. When we use a JFFS2 file system and mount the kernel,
the network driver works fine.

I have been following the mailing list and could find some issues with NFS 
on 2.6.37 but I am not too sure whether the kernel crash I am getting is 
related to that.

The driver worked fine on 2.6.32 kernel, but while booting the 2.6.37
kernel I get the following log messages:

stmmac: Rx Checksum Offload Engine supported
        TX Checksum insertion supported
IP-Config: Complete:
     device=eth0, addr=192.168.1.10, mask=255.255.255.0, gw=255.255.255.255,
     host=192.168.1.10, domain=, nis-domain=(none),
     bootserver=192.168.1.1, rootserver=192.168.1.1, rootpath=
VFS: Unable to mount root fs via NFS, trying floppy.
VFS: Cannot open root device "nfs" or unknown-block(2,0)
Please append a correct "root=" boot option; here are the available
partitions:
1f00              64 mtdblock0  (driver?)
1f01             256 mtdblock1  (driver?)
1f02            2816 mtdblock2  (driver?)
1f03            5056 mtdblock3  (driver?)
Kernel panic - not syncing: VFS: Unable to mount root fs on
unknown-block(2,0)
Backtrace:
[<c00370f0>] (dump_backtrace+0x0/0x110) from [<c0037234>]
(dump_stack+0x18/0x1c)
 r7:c7b5b000 r6:00000000 r5:c7b5b015 r4:c04296b8
[<c003721c>] (dump_stack+0x0/0x1c) from [<c004ebf8>] (panic+0x60/0x180)
[<c004eb98>] (panic+0x0/0x180) from [<c0009114>]
(mount_block_root+0x1d4/0x214)
 r3:00000000 r2:00000001 r1:c782bf50 r0:c0394851
[<c0008f40>] (mount_block_root+0x0/0x214) from [<c00091fc>]
(mount_root+0xa8/0xc8)
[<c0009154>] (mount_root+0x0/0xc8) from [<c0009388>]
(prepare_namespace+0x16c/0x1d0)
 r4:c04288c0
[<c000921c>] (prepare_namespace+0x0/0x1d0) from [<c0008904>]
(kernel_init+0x1cc/0x220)
 r5:c0402048 r4:c0428860
[<c0008738>] (kernel_init+0x0/0x220) from [<c00522a8>] (do_exit+0x0/0x5e0)
 r7:00000013 r6:c00522a8 r5:c0008738 r4:00000000
CPU0: stopping
Backtrace:
[<c00370f0>] (dump_backtrace+0x0/0x110) from [<c0037234>]
(dump_stack+0x18/0x1c)
 r7:c0405484 r6:00000406 r5:00000000 r4:00000000
[<c003721c>] (dump_stack+0x0/0x1c) from [<c002d334>] (do_IPI+0xb4/0x124)
[<c002d280>] (do_IPI+0x0/0x124) from [<c0032bb4>] (__irq_svc+0x34/0xc0)
Exception stack(0xc03f3f50 to 0xc03f3f98)
3f40:                                     c0402048 00000000 c03f3f98
00000000
3f60: c03f2000 c04288dc c0027290 c0405484 000258e8 411fc091 00000000
c03f3fa4
3f80: c03f3fa8 c03f3f98 c0034a24 c0034a28 60000013 ffffffff
 r5:fc800100 r4:ffffffff
[<c00349fc>] (default_idle+0x0/0x30) from [<c0034874>] (cpu_idle+0x80/0xc0)
[<c00347f4>] (cpu_idle+0x0/0xc0) from [<c030602c>] (rest_init+0x64/0x7c)
 r5:c04288dc r4:c04020b0
[<c0305fc8>] (rest_init+0x0/0x7c) from [<c0008bd4>]
(start_kernel+0x27c/0x2d8)
[<c0008958>] (start_kernel+0x0/0x2d8) from [<00008038>] (0x8038)
 r5:c0401fac r4:10c5387d

I have tried the same over latest source picked from linus tree,
4162cf64973df51fc885825bc9ca4d055891c49f
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6

We are using version 3 of the NFs protocol in kernel's NFS client.


Regards
Deepak
ST Microelectronics

.


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [E1000-devel] [e100] Page allocation failure warning(?) in 2.6.36.3
From: Eric Dumazet @ 2011-01-13  9:05 UTC (permalink / raw)
  To: Chris Rankin
  Cc: JesseBrandeburg, David Miller, e1000-devel@lists.sourceforge.net,
	Tushar NDave, netdev@vger.kernel.org, Jeffrey TKirsher
In-Reply-To: <895344.13845.qm@web121707.mail.ne1.yahoo.com>

Le jeudi 13 janvier 2011 à 01:00 -0800, Chris Rankin a écrit :
> --- On Thu, 13/1/11, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Problem is e100 allocates an order-6 page in DMA zone
> > (a 256 KB contigous area of ram)
> > 
> > This contigous area of ram is not available but just after
> > booting...
> 
> I suspected as much. Fortunately, this machine has no function apart from routing and can happily left untouched for extended periods of time.
> 
> > On such small router, I doubt you need more than 64 slots
> > in TX ring buffer.
> 
> But what would the effect of that change be to the interfaces' performance, please?

If you care of performance, dont unload/reload your driver all the time,
and dont use modules (this matter on old hardware because of TLB misses)

Anyway, the change ( 128 -> 64 ) is not needed, since the kernel message
is a warning only. The allocation is retried and apparently succeeds.

The __GFP_NOWARN should make the failed allocation not noticed at all.




^ permalink raw reply

* RE: [E1000-devel] [e100] Page allocation failure warning(?) in 2.6.36.3
From: Chris Rankin @ 2011-01-13  9:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: JesseBrandeburg, David Miller, e1000-devel@lists.sourceforge.net,
	Tushar NDave, netdev@vger.kernel.org, Jeffrey TKirsher
In-Reply-To: <1294894536.3335.510.camel@edumazet-laptop>

--- On Thu, 13/1/11, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Problem is e100 allocates an order-6 page in DMA zone
> (a 256 KB contigous area of ram)
> 
> This contigous area of ram is not available but just after
> booting...

I suspected as much. Fortunately, this machine has no function apart from routing and can happily left untouched for extended periods of time.

> On such small router, I doubt you need more than 64 slots
> in TX ring buffer.

But what would the effect of that change be to the interfaces' performance, please?

Cheers,
Chris


      

^ permalink raw reply

* [PATCH 10/10] GRETH: resolve SMP issues and other problems
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

Fixes the following:
1. POLL should not enable IRQ when work is not completed
2. No locking between TX descriptor cleaning and XMIT descriptor handling
3. No locking between RX POLL and XMIT modifying control register
4. Since TX cleaning (called from POLL) is running in parallel with XMIT
   unnecessary locking is needed.
5. IRQ handler looks at RX frame status solely, this is wrong when IRQ is
   temporarily disabled (in POLL), and when IRQ is shared.
6. IRQ handler clears IRQ status, which is unnecessary
7. TX queue was stopped in preventing cause when not MAX_SKB_FRAGS+1
   descriptors were available after a SKB been scheduled by XMIT. Instead
   the TX queue is stopped first when not enough descriptors are available
   upon entering XMIT.

It was hard to split up this patch in smaller pieces since all are tied
together somehow.

Note the RX flag used in the interrupt handler does not signal that
interrupt was asserted, but that a frame was received. Same goes for TX.
Also, IRQ is not asserted when the RX flag is set before enabling IRQ
enable until a new frame is received. So extra care must be taken to
avoid enabling IRQ and all descriptors are already used, hence dead lock
will upon us. See new POLL implementation that enableds IRQ then look at
the RX flag to determine if one or more IRQs may have been missed. TX/RX
flags are cleared before handling previously enabled descriptors, this
ensures that the RX/TX flags are valid when determining if IRQ should be
turned on again.

By moving TX cleaning from POLL to XMIT in the standard case, removes some
locking trouble. Enabling TX cleaning from poll only when not enough TX
descriptors are available is safe because the TX queue is at the same time
stopped, thus XMIT will not be called. The TX queue is woken up again when
enough descriptrs are available.

TX Frames are always enabled with IRQ, however the TX IRQ Enable flag will
not be enabled until XMIT must wait for free descriptors.

Locking RX and XMIT parts of the driver from each other is needed because
the RX/TX enable bits share the same register.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |  159 +++++++++++++++++++++++++++++---------------------
 1 files changed, 92 insertions(+), 67 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index b9623d2..954f65a 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -1,7 +1,7 @@
 /*
  * Aeroflex Gaisler GRETH 10/100/1G Ethernet MAC.
  *
- * 2005-2009 (c) Aeroflex Gaisler AB
+ * 2005-2010 (c) Aeroflex Gaisler AB
  *
  * This driver supports GRETH 10/100 and GRETH 10/100/1G Ethernet MACs
  * available in the GRLIB VHDL IP core library.
@@ -402,12 +402,20 @@ greth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct greth_private *greth = netdev_priv(dev);
 	struct greth_bd *bdp;
 	int err = NETDEV_TX_OK;
-	u32 status, dma_addr;
+	u32 status, dma_addr, ctrl;
+	unsigned long flags;
 
-	bdp = greth->tx_bd_base + greth->tx_next;
+	/* Clean TX Ring */
+	greth_clean_tx(greth->netdev);
 
 	if (unlikely(greth->tx_free <= 0)) {
+		spin_lock_irqsave(&greth->devlock, flags);/*save from poll/irq*/
+		ctrl = GRETH_REGLOAD(greth->regs->control);
+		/* Enable TX IRQ only if not already in poll() routine */
+		if (ctrl & GRETH_RXI)
+			GRETH_REGSAVE(greth->regs->control, ctrl | GRETH_TXI);
 		netif_stop_queue(dev);
+		spin_unlock_irqrestore(&greth->devlock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -420,13 +428,14 @@ greth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
+	bdp = greth->tx_bd_base + greth->tx_next;
 	dma_addr = greth_read_bd(&bdp->addr);
 
 	memcpy((unsigned char *) phys_to_virt(dma_addr), skb->data, skb->len);
 
 	dma_sync_single_for_device(greth->dev, dma_addr, skb->len, DMA_TO_DEVICE);
 
-	status = GRETH_BD_EN | (skb->len & GRETH_BD_LEN);
+	status = GRETH_BD_EN | GRETH_BD_IE | (skb->len & GRETH_BD_LEN);
 
 	/* Wrap around descriptor ring */
 	if (greth->tx_next == GRETH_TXBD_NUM_MASK) {
@@ -436,22 +445,11 @@ greth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	greth->tx_next = NEXT_TX(greth->tx_next);
 	greth->tx_free--;
 
-	/* No more descriptors */
-	if (unlikely(greth->tx_free == 0)) {
-
-		/* Free transmitted descriptors */
-		greth_clean_tx(dev);
-
-		/* If nothing was cleaned, stop queue & wait for irq */
-		if (unlikely(greth->tx_free == 0)) {
-			status |= GRETH_BD_IE;
-			netif_stop_queue(dev);
-		}
-	}
-
 	/* Write descriptor control word and enable transmission */
 	greth_write_bd(&bdp->stat, status);
+	spin_lock_irqsave(&greth->devlock, flags); /*save from poll/irq*/
 	greth_enable_tx(greth);
+	spin_unlock_irqrestore(&greth->devlock, flags);
 
 out:
 	dev_kfree_skb(skb);
@@ -464,13 +462,23 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct greth_private *greth = netdev_priv(dev);
 	struct greth_bd *bdp;
-	u32 status = 0, dma_addr;
+	u32 status = 0, dma_addr, ctrl;
 	int curr_tx, nr_frags, i, err = NETDEV_TX_OK;
+	unsigned long flags;
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
 
+	/* Clean TX Ring */
+	greth_clean_tx_gbit(dev);
+
 	if (greth->tx_free < nr_frags + 1) {
+		spin_lock_irqsave(&greth->devlock, flags);/*save from poll/irq*/
+		ctrl = GRETH_REGLOAD(greth->regs->control);
+		/* Enable TX IRQ only if not already in poll() routine */
+		if (ctrl & GRETH_RXI)
+			GRETH_REGSAVE(greth->regs->control, ctrl | GRETH_TXI);
 		netif_stop_queue(dev);
+		spin_unlock_irqrestore(&greth->devlock, flags);
 		err = NETDEV_TX_BUSY;
 		goto out;
 	}
@@ -523,14 +531,8 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 		/* More fragments left */
 		if (i < nr_frags - 1)
 			status |= GRETH_TXBD_MORE;
-
-		/* ... last fragment, check if out of descriptors  */
-		else if (greth->tx_free - nr_frags - 1 < (MAX_SKB_FRAGS + 1)) {
-
-			/* Enable interrupts and stop queue */
-			status |= GRETH_BD_IE;
-			netif_stop_queue(dev);
-		}
+		else
+			status |= GRETH_BD_IE; /* enable IRQ on last fragment */
 
 		greth_write_bd(&bdp->stat, status);
 
@@ -558,7 +560,9 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 
 	wmb();
 
+	spin_lock_irqsave(&greth->devlock, flags); /*save from poll/irq*/
 	greth_enable_tx(greth);
+	spin_unlock_irqrestore(&greth->devlock, flags);
 
 	return NETDEV_TX_OK;
 
@@ -580,12 +584,11 @@ out:
 	return err;
 }
 
-
 static irqreturn_t greth_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct greth_private *greth;
-	u32 status;
+	u32 status, ctrl;
 	irqreturn_t retval = IRQ_NONE;
 
 	greth = netdev_priv(dev);
@@ -595,14 +598,15 @@ static irqreturn_t greth_interrupt(int irq, void *dev_id)
 	/* Get the interrupt events that caused us to be here. */
 	status = GRETH_REGLOAD(greth->regs->status);
 
-	/* Handle rx and tx interrupts through poll */
-	if (status & (GRETH_INT_RE | GRETH_INT_RX |
-		      GRETH_INT_TE | GRETH_INT_TX)) {
+	/* Must see if interrupts are enabled also, INT_TX|INT_RX flags may be
+	 * set regardless of whether IRQ is enabled or not. Especially
+	 * important when shared IRQ.
+	 */
+	ctrl = GRETH_REGLOAD(greth->regs->control);
 
-		/* Clear interrupt status */
-		GRETH_REGSAVE(greth->regs->status,
-			      status & (GRETH_INT_RE | GRETH_INT_RX |
-					GRETH_INT_TE | GRETH_INT_TX));
+	/* Handle rx and tx interrupts through poll */
+	if (((status & (GRETH_INT_RE | GRETH_INT_RX)) && (ctrl & GRETH_RXI)) ||
+	    ((status & (GRETH_INT_TE | GRETH_INT_TX)) && (ctrl & GRETH_TXI))) {
 		retval = IRQ_HANDLED;
 
 		/* Disable interrupts and schedule poll() */
@@ -626,6 +630,8 @@ static void greth_clean_tx(struct net_device *dev)
 
 	while (1) {
 		bdp = greth->tx_bd_base + greth->tx_last;
+		GRETH_REGSAVE(greth->regs->status, GRETH_INT_TE | GRETH_INT_TX);
+		mb();
 		stat = greth_read_bd(&bdp->stat);
 
 		if (unlikely(stat & GRETH_BD_EN))
@@ -686,7 +692,10 @@ static void greth_clean_tx_gbit(struct net_device *dev)
 
 		/* We only clean fully completed SKBs */
 		bdp_last_frag = greth->tx_bd_base + SKIP_TX(greth->tx_last, nr_frags);
-		stat = bdp_last_frag->stat;
+
+		GRETH_REGSAVE(greth->regs->status, GRETH_INT_TE | GRETH_INT_TX);
+		mb();
+		stat = greth_read_bd(&bdp_last_frag->stat);
 
 		if (stat & GRETH_BD_EN)
 			break;
@@ -718,21 +727,9 @@ static void greth_clean_tx_gbit(struct net_device *dev)
 		greth->tx_free += nr_frags+1;
 		dev_kfree_skb(skb);
 	}
-	if (greth->tx_free > (MAX_SKB_FRAGS + 1)) {
-		netif_wake_queue(dev);
-	}
-}
 
-static int greth_pending_packets(struct greth_private *greth)
-{
-	struct greth_bd *bdp;
-	u32 status;
-	bdp = greth->rx_bd_base + greth->rx_cur;
-	status = greth_read_bd(&bdp->stat);
-	if (status & GRETH_BD_EN)
-		return 0;
-	else
-		return 1;
+	if (netif_queue_stopped(dev) && (greth->tx_free > (MAX_SKB_FRAGS+1)))
+		netif_wake_queue(dev);
 }
 
 static int greth_rx(struct net_device *dev, int limit)
@@ -743,20 +740,24 @@ static int greth_rx(struct net_device *dev, int limit)
 	int pkt_len;
 	int bad, count;
 	u32 status, dma_addr;
+	unsigned long flags;
 
 	greth = netdev_priv(dev);
 
 	for (count = 0; count < limit; ++count) {
 
 		bdp = greth->rx_bd_base + greth->rx_cur;
+		GRETH_REGSAVE(greth->regs->status, GRETH_INT_RE | GRETH_INT_RX);
+		mb();
 		status = greth_read_bd(&bdp->stat);
-		dma_addr = greth_read_bd(&bdp->addr);
-		bad = 0;
 
 		if (unlikely(status & GRETH_BD_EN)) {
 			break;
 		}
 
+		dma_addr = greth_read_bd(&bdp->addr);
+		bad = 0;
+
 		/* Check status for errors. */
 		if (unlikely(status & GRETH_RXBD_STATUS)) {
 			if (status & GRETH_RXBD_ERR_FT) {
@@ -818,7 +819,9 @@ static int greth_rx(struct net_device *dev, int limit)
 
 		dma_sync_single_for_device(greth->dev, dma_addr, MAX_FRAME_SIZE, DMA_FROM_DEVICE);
 
+		spin_lock_irqsave(&greth->devlock, flags); /* save from XMIT */
 		greth_enable_rx(greth);
+		spin_unlock_irqrestore(&greth->devlock, flags);
 
 		greth->rx_cur = NEXT_RX(greth->rx_cur);
 	}
@@ -852,6 +855,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 	int pkt_len;
 	int bad, count = 0;
 	u32 status, dma_addr;
+	unsigned long flags;
 
 	greth = netdev_priv(dev);
 
@@ -859,6 +863,8 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 
 		bdp = greth->rx_bd_base + greth->rx_cur;
 		skb = greth->rx_skbuff[greth->rx_cur];
+		GRETH_REGSAVE(greth->regs->status, GRETH_INT_RE | GRETH_INT_RX);
+		mb();
 		status = greth_read_bd(&bdp->stat);
 		bad = 0;
 
@@ -946,7 +952,9 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 
 		wmb();
 		greth_write_bd(&bdp->stat, status);
+		spin_lock_irqsave(&greth->devlock, flags);
 		greth_enable_rx(greth);
+		spin_unlock_irqrestore(&greth->devlock, flags);
 		greth->rx_cur = NEXT_RX(greth->rx_cur);
 	}
 
@@ -958,15 +966,18 @@ static int greth_poll(struct napi_struct *napi, int budget)
 {
 	struct greth_private *greth;
 	int work_done = 0;
+	unsigned long flags;
+	u32 mask, ctrl;
 	greth = container_of(napi, struct greth_private, napi);
 
-	if (greth->gbit_mac) {
-		greth_clean_tx_gbit(greth->netdev);
-	} else {
-		greth_clean_tx(greth->netdev);
+restart_txrx_poll:
+	if (netif_queue_stopped(greth->netdev)) {
+		if (greth->gbit_mac)
+			greth_clean_tx_gbit(greth->netdev);
+		else
+			greth_clean_tx(greth->netdev);
 	}
 
-restart_poll:
 	if (greth->gbit_mac) {
 		work_done += greth_rx_gbit(greth->netdev, budget - work_done);
 	} else {
@@ -975,15 +986,29 @@ restart_poll:
 
 	if (work_done < budget) {
 
-		napi_complete(napi);
+		spin_lock_irqsave(&greth->devlock, flags);
 
-		if (greth_pending_packets(greth)) {
-			napi_reschedule(napi);
-			goto restart_poll;
+		ctrl = GRETH_REGLOAD(greth->regs->control);
+		if (netif_queue_stopped(greth->netdev)) {
+			GRETH_REGSAVE(greth->regs->control,
+					ctrl | GRETH_TXI | GRETH_RXI);
+			mask = GRETH_INT_RX | GRETH_INT_RE |
+			       GRETH_INT_TX | GRETH_INT_TE;
+		} else {
+			GRETH_REGSAVE(greth->regs->control, ctrl | GRETH_RXI);
+			mask = GRETH_INT_RX | GRETH_INT_RE;
+		}
+
+		if (GRETH_REGLOAD(greth->regs->status) & mask) {
+			GRETH_REGSAVE(greth->regs->control, ctrl);
+			spin_unlock_irqrestore(&greth->devlock, flags);
+			goto restart_txrx_poll;
+		} else {
+			__napi_complete(napi);
+			spin_unlock_irqrestore(&greth->devlock, flags);
 		}
 	}
 
-	greth_enable_irqs(greth);
 	return work_done;
 }
 
@@ -1178,11 +1203,11 @@ static const struct ethtool_ops greth_ethtool_ops = {
 };
 
 static struct net_device_ops greth_netdev_ops = {
-	.ndo_open = greth_open,
-	.ndo_stop = greth_close,
-	.ndo_start_xmit = greth_start_xmit,
-	.ndo_set_mac_address = greth_set_mac_add,
-	.ndo_validate_addr 	= eth_validate_addr,
+	.ndo_open		= greth_open,
+	.ndo_stop		= greth_close,
+	.ndo_start_xmit		= greth_start_xmit,
+	.ndo_set_mac_address	= greth_set_mac_add,
+	.ndo_validate_addr	= eth_validate_addr,
 };
 
 static inline int wait_for_mdio(struct greth_private *greth)
-- 
1.5.4


^ permalink raw reply related

* [PATCH 03/10] GRETH: added no_gbit option
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

For debug only. The driver does not report that it is GBit capable, instead
it will report 10/100 mode to the generic PHY layer.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |   15 +++++++++++++--
 drivers/net/greth.h |    1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 1b10186..ef8da22 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -66,6 +66,10 @@ static int greth_edcl = 1;
 module_param(greth_edcl, int, 0);
 MODULE_PARM_DESC(greth_edcl, "GRETH EDCL usage indicator. Set to 1 if EDCL is used.");
 
+static int no_gbit = 0;
+module_param(no_gbit, int, S_IRUGO);
+MODULE_PARM_DESC(no_gbit, "GRETH reports only 10/100 support to PHY layer if set to 1. Only affects GRETH GBit MAC, default 0 (off).");
+
 static int greth_open(struct net_device *dev);
 static netdev_tx_t greth_start_xmit(struct sk_buff *skb,
 	   struct net_device *dev);
@@ -1284,7 +1288,7 @@ static int greth_mdio_probe(struct net_device *dev)
 	}
 
 	ret = phy_connect_direct(dev, phy, &greth_link_change,
-			0, greth->gbit_mac ?
+			0, greth->gbit_phy_support ?
 			PHY_INTERFACE_MODE_GMII :
 			PHY_INTERFACE_MODE_MII);
 	if (ret) {
@@ -1293,7 +1297,7 @@ static int greth_mdio_probe(struct net_device *dev)
 		return ret;
 	}
 
-	if (greth->gbit_mac)
+	if (greth->gbit_phy_support)
 		phy->supported &= PHY_GBIT_FEATURES;
 	else
 		phy->supported &= PHY_BASIC_FEATURES;
@@ -1447,6 +1451,13 @@ static int __devinit greth_of_probe(struct platform_device *ofdev, const struct
 	tmp = GRETH_REGLOAD(regs->control);
 	greth->gbit_mac = (tmp >> 27) & 1;
 
+	/* Let user skip GBit link mode by telling MDIO layer that MAC does
+	 * not support GBIT (for debug) */
+	if (greth->gbit_mac && !no_gbit)
+		greth->gbit_phy_support = 1;
+	else
+		greth->gbit_phy_support = 0;
+
 	/* Check for multicast capability */
 	greth->multicast = (tmp >> 25) & 1;
 
diff --git a/drivers/net/greth.h b/drivers/net/greth.h
index 03ad903..9414169 100644
--- a/drivers/net/greth.h
+++ b/drivers/net/greth.h
@@ -138,6 +138,7 @@ struct greth_private {
 	u8 gbit_mac;
 	u8 mdio_int_en;
 	u8 edcl;
+	u8 gbit_phy_support;
 };
 
 #endif
-- 
1.5.4


^ permalink raw reply related

* [PATCH 06/10] GRETH: GBit transmit descriptor handling optimization
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

It is safe to enable all fragments before enabling the first descriptor,
this way all descriptors don't have to be processed twice, added extra
memory barrier.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |   19 ++++++++++---------
 1 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 27578c9..72a4317 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -513,7 +513,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 		greth->tx_skbuff[curr_tx] = NULL;
 		bdp = greth->tx_bd_base + curr_tx;
 
-		status = GRETH_TXBD_CSALL;
+		status = GRETH_TXBD_CSALL | GRETH_BD_EN;
 		status |= frag->size & GRETH_BD_LEN;
 
 		/* Wrap around descriptor ring */
@@ -550,26 +550,27 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev)
 
 	wmb();
 
-	/* Enable the descriptors that we configured ...  */
-	for (i = 0; i < nr_frags + 1; i++) {
-		bdp = greth->tx_bd_base + greth->tx_next;
-		greth_write_bd(&bdp->stat, greth_read_bd(&bdp->stat) | GRETH_BD_EN);
-		greth->tx_next = NEXT_TX(greth->tx_next);
-		greth->tx_free--;
-	}
+	/* Enable the descriptor chain by enabling the first descriptor */
+	bdp = greth->tx_bd_base + greth->tx_next;
+	greth_write_bd(&bdp->stat, greth_read_bd(&bdp->stat) | GRETH_BD_EN);
+	greth->tx_next = curr_tx;
+	greth->tx_free -= nr_frags + 1;
+
+	wmb();
 
 	greth_enable_tx(greth);
 
 	return NETDEV_TX_OK;
 
 frag_map_error:
-	/* Unmap SKB mappings that succeeded */
+	/* Unmap SKB mappings that succeeded and disable descriptor */
 	for (i = 0; greth->tx_next + i != curr_tx; i++) {
 		bdp = greth->tx_bd_base + greth->tx_next + i;
 		dma_unmap_single(greth->dev,
 				 greth_read_bd(&bdp->addr),
 				 greth_read_bd(&bdp->stat) & GRETH_BD_LEN,
 				 DMA_TO_DEVICE);
+		greth_write_bd(&bdp->stat, 0);
 	}
 map_error:
 	if (net_ratelimit())
-- 
1.5.4


^ permalink raw reply related

* [PATCH 07/10] GRETH: fixed skb buffer memory leak on frame errors
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

A new SKB buffer should not be allocated when the old SKB is reused.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |   18 ++++++++++++++----
 1 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 72a4317..888dc65 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -880,10 +880,9 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 			}
 		}
 
-		/* Allocate new skb to replace current */
-		newskb = netdev_alloc_skb(dev, MAX_FRAME_SIZE + NET_IP_ALIGN);
-
-		if (!bad && newskb) {
+		/* Allocate new skb to replace current, not needed if the
+		 * current skb can be reused */
+		if (!bad && (newskb=netdev_alloc_skb(dev, MAX_FRAME_SIZE + NET_IP_ALIGN))) {
 			skb_reserve(newskb, NET_IP_ALIGN);
 
 			dma_addr = dma_map_single(greth->dev,
@@ -920,11 +919,22 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 				if (net_ratelimit())
 					dev_warn(greth->dev, "Could not create DMA mapping, dropping packet\n");
 				dev_kfree_skb(newskb);
+				/* reusing current skb, so it is a drop */
 				dev->stats.rx_dropped++;
 			}
+		} else if (bad) {
+			/* Bad Frame transfer, the skb is reused */
+			dev->stats.rx_dropped++;
 		} else {
+			/* Failed Allocating a new skb. This is rather stupid
+			 * but the current "filled" skb is reused, as if
+			 * transfer failure. One could argue that RX descriptor
+			 * table handling should be divided into cleaning and
+			 * filling as the TX part of the driver
+			 */
 			if (net_ratelimit())
 				dev_warn(greth->dev, "Could not allocate SKB, dropping packet\n");
+			/* reusing current skb, so it is a drop */
 			dev->stats.rx_dropped++;
 		}
 
-- 
1.5.4


^ permalink raw reply related

* [PATCH 09/10] GRETH: handle frame error interrupts
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

Frame error interrupts must also be handled since the RX flag only indicates
successful reception, it is unlikely but the old code may lead to dead lock
if 128 error frames are recieved in a row.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |    9 +++++----
 drivers/net/greth.h |    2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index fea1e20..b9623d2 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -596,12 +596,13 @@ static irqreturn_t greth_interrupt(int irq, void *dev_id)
 	status = GRETH_REGLOAD(greth->regs->status);
 
 	/* Handle rx and tx interrupts through poll */
-	if (status & (GRETH_INT_RX | GRETH_INT_TX)) {
+	if (status & (GRETH_INT_RE | GRETH_INT_RX |
+		      GRETH_INT_TE | GRETH_INT_TX)) {
 
 		/* Clear interrupt status */
-		GRETH_REGORIN(greth->regs->status,
-			      status & (GRETH_INT_RX | GRETH_INT_TX));
-
+		GRETH_REGSAVE(greth->regs->status,
+			      status & (GRETH_INT_RE | GRETH_INT_RX |
+					GRETH_INT_TE | GRETH_INT_TX));
 		retval = IRQ_HANDLED;
 
 		/* Disable interrupts and schedule poll() */
diff --git a/drivers/net/greth.h b/drivers/net/greth.h
index 9414169..f97f553 100644
--- a/drivers/net/greth.h
+++ b/drivers/net/greth.h
@@ -23,6 +23,7 @@
 #define GRETH_BD_LEN 0x7FF
 
 #define GRETH_TXEN 0x1
+#define GRETH_INT_TE 0x2
 #define GRETH_INT_TX 0x8
 #define GRETH_TXI 0x4
 #define GRETH_TXBD_STATUS 0x0001C000
@@ -35,6 +36,7 @@
 #define GRETH_TXBD_ERR_UE 0x4000
 #define GRETH_TXBD_ERR_AL 0x8000
 
+#define GRETH_INT_RE         0x1
 #define GRETH_INT_RX         0x4
 #define GRETH_RXEN           0x2
 #define GRETH_RXI            0x8
-- 
1.5.4


^ permalink raw reply related

* [PATCH 04/10] GRETH: added greth_compat_mode module parameter
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

The greth_compat_mode option can be used to set a GRETH GBit capable MAC
in operate as if the GRETH 10/100 device was found. The GRETH GBit supports
TCP/UDP checksum offloading, unaligned frame buffers, scatter gather etc.
Enabling this mode allows the developer to test the GRETH 10/100 device
without all features mentioned above on a GBit MAC capable of the above.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index ef8da22..775dc24 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -70,6 +70,12 @@ static int no_gbit = 0;
 module_param(no_gbit, int, S_IRUGO);
 MODULE_PARM_DESC(no_gbit, "GRETH reports only 10/100 support to PHY layer if set to 1. Only affects GRETH GBit MAC, default 0 (off).");
 
+/* Use this option to enable GRETH 10/100 code on GRETH_GBIT hardware
+ * (debug legacy code option) */
+static int compat_mode = 0;
+module_param(compat_mode, int, S_IRUGO);
+MODULE_PARM_DESC(compat_mode, "GRETH 10/100 legacy mode enable. Only affects GRETH GBit MAC, default 0 (off).");
+
 static int greth_open(struct net_device *dev);
 static netdev_tx_t greth_start_xmit(struct sk_buff *skb,
 	   struct net_device *dev);
@@ -1458,6 +1464,10 @@ static int __devinit greth_of_probe(struct platform_device *ofdev, const struct
 	else
 		greth->gbit_phy_support = 0;
 
+	/* Force GBit MAC in legacy 10/100 mode (no offloading etc.) */
+	if (compat_mode == 1)
+		greth->gbit_mac = 0;
+
 	/* Check for multicast capability */
 	greth->multicast = (tmp >> 25) & 1;
 
-- 
1.5.4


^ permalink raw reply related

* [PATCH 08/10] GRETH: avoid writing bad speed/duplex when setting transfer mode
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |   19 ++++++++-----------
 1 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 888dc65..fea1e20 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -1242,29 +1242,26 @@ static void greth_link_change(struct net_device *dev)
 	struct greth_private *greth = netdev_priv(dev);
 	struct phy_device *phydev = greth->phy;
 	unsigned long flags;
-
 	int status_change = 0;
+	u32 ctrl;
 
 	spin_lock_irqsave(&greth->devlock, flags);
 
 	if (phydev->link) {
 
 		if ((greth->speed != phydev->speed) || (greth->duplex != phydev->duplex)) {
-
-			GRETH_REGANDIN(greth->regs->control,
-				       ~(GRETH_CTRL_FD | GRETH_CTRL_SP | GRETH_CTRL_GB));
+			ctrl = GRETH_REGLOAD(greth->regs->control) &
+			       ~(GRETH_CTRL_FD | GRETH_CTRL_SP | GRETH_CTRL_GB);
 
 			if (phydev->duplex)
-				GRETH_REGORIN(greth->regs->control, GRETH_CTRL_FD);
-
-			if (phydev->speed == SPEED_100) {
-
-				GRETH_REGORIN(greth->regs->control, GRETH_CTRL_SP);
-			}
+				ctrl |= GRETH_CTRL_FD;
 
+			if (phydev->speed == SPEED_100)
+				ctrl |= GRETH_CTRL_SP;
 			else if (phydev->speed == SPEED_1000)
-				GRETH_REGORIN(greth->regs->control, GRETH_CTRL_GB);
+				ctrl |= GRETH_CTRL_GB;
 
+			GRETH_REGSAVE(greth->regs->control, ctrl);
 			greth->speed = phydev->speed;
 			greth->duplex = phydev->duplex;
 			status_change = 1;
-- 
1.5.4


^ permalink raw reply related

* [PATCH 05/10] GRETH: fix opening/closing
From: Daniel Hellstrom @ 2011-01-13  8:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, kristoffer
In-Reply-To: <1294907135-24884-1-git-send-email-daniel@gaisler.com>

When NAPI is disabled there is no point in having IRQs enabled, TX/RX
should be off before clearing the TX/RX descriptor rings.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
---
 drivers/net/greth.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index 775dc24..27578c9 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -366,6 +366,8 @@ static int greth_open(struct net_device *dev)
 		dev_dbg(&dev->dev, " starting queue\n");
 	netif_start_queue(dev);
 
+	GRETH_REGSAVE(greth->regs->status, 0xFF);
+
 	napi_enable(&greth->napi);
 
 	greth_enable_irqs(greth);
@@ -381,7 +383,9 @@ static int greth_close(struct net_device *dev)
 
 	napi_disable(&greth->napi);
 
+	greth_disable_irqs(greth);
 	greth_disable_tx(greth);
+	greth_disable_rx(greth);
 
 	netif_stop_queue(dev);
 
-- 
1.5.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox