* [RFC 1/2] serial/8250.c: Change to singly linked handler chain.
[not found] ` <20081114053042.11532.qmail@science.horizon.com>
@ 2008-11-14 21:17 ` George Spelvin
[not found] ` <20081114053314.12093.qmail@science.horizon.com>
1 sibling, 0 replies; 10+ messages in thread
From: George Spelvin @ 2008-11-14 21:17 UTC (permalink / raw)
To: linux-kernel; +Cc: linux
I posted this to linux-serial yesterday, but it has attracted no comment
yet, so I'm submitting it to a wider audience.
This is a prerequisite to the interesting patch 2/2. This boots and
runs, but only on a single-port interrupt, so that's not very good
testing. It's out here for style comments.
This just converts the list of struct uart_8250_port attached to a single
IRQ from doubly linked to singly. It should have no behavior differences.
---
drivers/serial/8250.c | 93 +++++++++++++++++++++++++++----------------------
1 files changed, 51 insertions(+), 42 deletions(-)
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 18caa95..7f66335 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -116,7 +116,7 @@ static unsigned int probe_rsa_count;
struct uart_8250_port {
struct uart_port port;
struct timer_list timer; /* "no irq" timer */
- struct list_head list; /* ports on this IRQ */
+ struct uart_8250_port *next; /* ports on this IRQ */
unsigned short capabilities; /* port capabilities */
unsigned short bugs; /* port bugs */
unsigned int tx_loadsz; /* transmit fifo load size */
@@ -146,7 +146,7 @@ struct uart_8250_port {
struct irq_info {
spinlock_t lock;
- struct list_head *head;
+ struct uart_8250_port *head;
};
static struct irq_info irq_lists[NR_IRQS];
@@ -1458,23 +1458,39 @@ serial8250_handle_port(struct uart_8250_port *up)
*
* This means we need to loop through all ports. checking that they
* don't have an interrupt pending.
+ *
+ * Fencepost consideration: serial8250_handle_port only polls each
+ * interrupt source once; it does not itself loop until the port
+ * has deasserted the interrupt line. Therefore, it is necessary to
+ * loop here until each port has had its IIR read in the idle state.
+ * "end" points to the beginning of the most recent run of idle ports.
+ * It is NULL if the current port is not idle. The loop ends when we
+ * are about to check end again.
*/
static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
{
struct irq_info *i = dev_id;
- struct list_head *l, *end = NULL;
+ struct uart_8250_port *up, *end = NULL;
int pass_counter = 0, handled = 0;
DEBUG_INTR("serial8250_interrupt(%d)...", irq);
spin_lock(&i->lock);
- l = i->head;
+ up = i->head;
do {
- struct uart_8250_port *up;
unsigned int iir;
- up = list_entry(l, struct uart_8250_port, list);
+ if (up == NULL) {
+ if (pass_counter++ > PASS_LIMIT) {
+ /* If we hit this, we're dead. */
+ printk(KERN_ERR "serial8250: too much work for "
+ "irq%d\n", irq);
+ break;
+ }
+ up = i->head;
+ continue;
+ }
iir = serial_in(up, UART_IIR);
if (!(iir & UART_IIR_NO_INT)) {
@@ -1486,9 +1505,10 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
} else if (up->port.iotype == UPIO_DWAPB &&
(iir & UART_IIR_BUSY) == UART_IIR_BUSY) {
/* The DesignWare APB UART has an Busy Detect (0x07)
- * interrupt meaning an LCR write attempt occured while the
- * UART was busy. The interrupt must be cleared by reading
- * the UART status register (USR) and the LCR re-written. */
+ * interrupt meaning an LCR write attempt occured
+ * while the UART was busy. The interrupt must
+ * be cleared by reading the UART status register
+ * (USR) and re-writing the LCR. */
unsigned int status;
status = *(volatile u32 *)up->port.private_data;
serial_out(up, UART_LCR, up->lcr);
@@ -1497,17 +1517,10 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
end = NULL;
} else if (end == NULL)
- end = l;
+ end = up;
- l = l->next;
-
- if (l == i->head && pass_counter++ > PASS_LIMIT) {
- /* If we hit this, we're dead. */
- printk(KERN_ERR "serial8250: too much work for "
- "irq%d\n", irq);
- break;
- }
- } while (l != end);
+ up = up->next;
+ } while (up != end);
spin_unlock(&i->lock);
@@ -1525,39 +1538,35 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
*/
static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
{
+ struct uart_8250_port *p, **pptr = &i->head;
+
spin_lock_irq(&i->lock);
- if (!list_empty(i->head)) {
- if (i->head == &up->list)
- i->head = i->head->next;
- list_del(&up->list);
- } else {
- BUG_ON(i->head != &up->list);
- i->head = NULL;
+ while ((p = *pptr) != up) {
+ BUG_ON(p == NULL);
+ pptr = &p->next;
}
+ *pptr = up->next;
+
spin_unlock_irq(&i->lock);
}
static int serial_link_irq_chain(struct uart_8250_port *up)
{
struct irq_info *i = irq_lists + up->port.irq;
- int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
+ int ret = 0;
+ struct uart_8250_port *head;
spin_lock_irq(&i->lock);
+ up->next = head = i->head;
+ i->head = up;
+ spin_unlock_irq(&i->lock);
- if (i->head) {
- list_add(&up->list, i->head);
- spin_unlock_irq(&i->lock);
-
- ret = 0;
- } else {
- INIT_LIST_HEAD(&up->list);
- i->head = &up->list;
- spin_unlock_irq(&i->lock);
-
+ if (!head) {
+ int irq_flag = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
ret = request_irq(up->port.irq, serial8250_interrupt,
- irq_flags, "serial", i);
+ irq_flag, "serial", i);
if (ret < 0)
serial_do_unlink(i, up);
}
@@ -1570,8 +1579,8 @@ static void serial_unlink_irq_chain(struct uart_8250_port *up)
struct irq_info *i = irq_lists + up->port.irq;
BUG_ON(i->head == NULL);
-
- if (list_empty(i->head))
+ serial_do_unlink(i, up);
+ if (i->head == NULL);
free_irq(up->port.irq, i);
serial_do_unlink(i, up);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
[not found] ` <20081114053314.12093.qmail@science.horizon.com>
@ 2008-11-14 21:34 ` George Spelvin
2008-11-14 21:47 ` Alan Cox
0 siblings, 1 reply; 10+ messages in thread
From: George Spelvin @ 2008-11-14 21:34 UTC (permalink / raw)
To: linux-kernel; +Cc: linux
I posted this to linux-serial yesterday, but it has attracted no comment
yet, so I'm submitting it to a wider audience. It boots and runs on
amd64 with a standard (non-shared) serial port, but I haven't subjected
it to detailed testing. Comments about the code style and general idea
are requested.
This is the interesting patch. It changes the uart_8250_port list to
a self-adjusting one. Busy ports move to the front, and idle ones
migrate to the end. This reduces overall polling effort when there is
a mix of busy and idle ports.
Any time you're dealing with an edge-sensitive interrupt, it is essential
that the interrupt is turned off before returning from the handler.
With a level-sensitive one, you'll just loop back into the handler,
but an edge-sensitive one will never respond again.
When there are multiple independent sources, that means that you have to
poll them all repeatedly until everyone reports idle. Since interrupts don't
go away by themselves, that means that there was an instant, just before
the final pass started, when there were no active interrupt sources.
Now on modern computers even PCI bus reads are painfully slow, and
super-I/O chips connected via the "LPC bus" low-pin count emulated ISA
bus (basically a 4-bit/33 MHz bus which acts like a 16-bit/8.33 MHz
ISA bus) are truly agonizing.
We can't get around the need to poll every port sharing the same IRQ line,
but if we can predict which ports will be busy and poll them first,
we can start the final verification pass as early as possible.
This code does that by using the previous poll cycles as a hint.
If a port is idle, it will migrate to the end of the list and
only have to be checked once.
Part 1 changed the list to singly-linked to make the list shuffling easier.
Comments? Please?
---
drivers/serial/8250.c | 49 ++++++++++++++++++++++++++++++++++---------------
1 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 7f66335..96e784f 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1469,18 +1469,27 @@ serial8250_handle_port(struct uart_8250_port *up)
* "end" points to the beginning of the most recent run of idle ports.
* It is NULL if the current port is not idle. The loop ends when we
* are about to check end again.
+ *
+ * Optimization: this will finish sooner if we can check all busy
+ * ports first, starting the final verify-idle pass as soon as possible.
+ * To achieve this, busy ports are moved to the front of the list.
+ * *tailp marks the boundary between the front and the back of the list;
+ * when a port is found to be busy, it is removed from between *upp
+ * and upp->next, and reinserted at *tailp.
*/
static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
{
struct irq_info *i = dev_id;
- struct uart_8250_port *up, *end = NULL;
+ struct uart_8250_port *up, **upp, *end = NULL, **tailp;
int pass_counter = 0, handled = 0;
DEBUG_INTR("serial8250_interrupt(%d)...", irq);
+ tailp = upp = &i->head;
+
spin_lock(&i->lock);
- up = i->head;
+ up = *upp;
do {
unsigned int iir;
@@ -1491,7 +1500,7 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
"irq%d\n", irq);
break;
}
- up = i->head;
+ tailp = upp = &i->head;
continue;
}
@@ -1499,11 +1508,8 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
if (!(iir & UART_IIR_NO_INT)) {
serial8250_handle_port(up);
- handled = 1;
-
- end = NULL;
- } else if (up->port.iotype == UPIO_DWAPB &&
- (iir & UART_IIR_BUSY) == UART_IIR_BUSY) {
+ } else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY &&
+ up->port.iotype == UPIO_DWAPB) {
/* The DesignWare APB UART has an Busy Detect (0x07)
* interrupt meaning an LCR write attempt occured
* while the UART was busy. The interrupt must
@@ -1513,14 +1519,27 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
status = *(volatile u32 *)up->port.private_data;
serial_out(up, UART_LCR, up->lcr);
- handled = 1;
-
- end = NULL;
- } else if (end == NULL)
- end = up;
+ } else {
+ /* Port does not have interrupt asserted. */
+ if (end == NULL)
+ end = up; /* Start of run of idle ports */
+ upp = &up->next; /* Do not move anything */
+ continue;
+ }
- up = up->next;
- } while (up != end);
+ /* up had activity */
+ handled = 1;
+ end = NULL;
+ /* Move to front of list; append after *tailp */
+ if (tailp != upp) {
+ *upp = up->next;
+ up->next = *tailp;
+ *tailp = up;
+ tailp = &up->next;
+ } else {
+ tailp = upp = &up->next;
+ }
+ } while ((up = *upp) != end);
spin_unlock(&i->lock);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-14 21:34 ` [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order George Spelvin
@ 2008-11-14 21:47 ` Alan Cox
2008-11-15 0:10 ` Theodore Tso
0 siblings, 1 reply; 10+ messages in thread
From: Alan Cox @ 2008-11-14 21:47 UTC (permalink / raw)
To: George Spelvin; +Cc: linux-kernel, linux
> This code does that by using the previous poll cycles as a hint.
> If a port is idle, it will migrate to the end of the list and
> only have to be checked once.
>
> Part 1 changed the list to singly-linked to make the list shuffling easier.
>
> Comments? Please?
Is it really worth the complexity
- PCI ports are shared IRQ always
- Legacy ports are almost never shared IRQ on the LPC bus (and are
increasingly going away)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-14 21:47 ` Alan Cox
@ 2008-11-15 0:10 ` Theodore Tso
2008-11-16 15:23 ` George Spelvin
0 siblings, 1 reply; 10+ messages in thread
From: Theodore Tso @ 2008-11-15 0:10 UTC (permalink / raw)
To: Alan Cox; +Cc: George Spelvin, linux-kernel
On Fri, Nov 14, 2008 at 09:47:11PM +0000, Alan Cox wrote:
> > This code does that by using the previous poll cycles as a hint.
> > If a port is idle, it will migrate to the end of the list and
> > only have to be checked once.
> >
> > Part 1 changed the list to singly-linked to make the list shuffling easier.
> >
> > Comments? Please?
>
> Is it really worth the complexity
>
> - PCI ports are shared IRQ always
> - Legacy ports are almost never shared IRQ on the LPC bus (and are
> increasingly going away)
It's worth the complexity only *if* you have enough ports shared on a
single IRQ and simultaneously such that there a risk that if you don't
poll them quickly enough, characters will actually get dropped from
the UART's FIFO. The question is whether that is likely to happen on
modern CPU's. I worred about such things when I tried to make 16
115kbps serial ports work at full-speed using relatively primitive
16550A UART's with 16 character FIFO's on a 40 MHz 386. But (a)
UART's generally have deeper FIFO's these days, and (b) CPU's have
gotten a wee bit faster since 1992.
So color me dubious that this is actually necessary....
- Ted
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-15 0:10 ` Theodore Tso
@ 2008-11-16 15:23 ` George Spelvin
2008-11-16 15:51 ` Alan Cox
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: George Spelvin @ 2008-11-16 15:23 UTC (permalink / raw)
To: tytso, alan; +Cc: linux, linux-kernel
Theodore Tso <tytso@mit.edu> wrote:
> On Fri, Nov 14, 2008 at 09:47:11PM +0000, Alan Cox wrote:
>>> This code does that by using the previous poll cycles as a hint.
>>> If a port is idle, it will migrate to the end of the list and
>>> only have to be checked once.
>>
>> Is it really worth the complexity?
>>
>> - PCI ports are shared IRQ always
>> - Legacy ports are almost never shared IRQ on the LPC bus (and are
>> increasingly going away)
>
> It's worth the complexity only *if* you have enough ports shared on a
> single IRQ and simultaneously such that there a risk that if you don't
> poll them quickly enough, characters will actually get dropped from
> the UART's FIFO. The question is whether that is likely to happen on
> modern CPUs. I worred about such things when I tried to make 16
> 115kbps serial ports work at full-speed using relatively primitive
> 16550A UARTs with 16 character FIFOs on a 40 MHz 386. But (a)
> UARTs generally have deeper FIFOs these days, and (b) CPUs have
> gotten a wee bit faster since 1992.
>
> So color me dubious that this is actually necessary....
First of all, thank you both for the feedback.
Re: shared IRQs and PCI bus...
In desktop hardware, yes. But PC-104 equipment (for the benefit of
confused bystandaers, that's classic ISA bus in a different form factor)
is still going strong; I just added an 8-port serial card to a little
embedded box.
But even PCI reads are slow, and the current code does not optimize the
case when interrupts are level-sensitive. (The optimization is trivial,
but detecting whether the interrupt is edge- or level-sensitive looks
messy.)
Regarding the purpose of the patch...
The goal is not fewer dropped characters (although there could be a small
benefit in that direction), and it doesn't improve worst-case timing;
the goal is to reduce the time spent in the interrupt handler _on average_
and thereby make more CPU available for other work.
The idea is that, if you have 4 ports sharing an interrupt, and the fourth
is the one that's busy, you'll check every port twice. If you could check
the busy port first, you'd only need to to do 5 checks.
The real over-optimization argument that I can think of is that emptying 8 bytes
out of a FIFO involves 16 register reads (LSR + receive data for each byte),
compared to which the time to poll a few IIRs is not too much. (Of course, then
there's low_latency mode.)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-16 15:23 ` George Spelvin
@ 2008-11-16 15:51 ` Alan Cox
2008-11-16 16:57 ` George Spelvin
2008-11-16 16:23 ` Theodore Tso
2008-11-16 16:49 ` Theodore Tso
2 siblings, 1 reply; 10+ messages in thread
From: Alan Cox @ 2008-11-16 15:51 UTC (permalink / raw)
To: George Spelvin; +Cc: tytso, linux, linux-kernel
> In desktop hardware, yes. But PC-104 equipment (for the benefit of
> confused bystandaers, that's classic ISA bus in a different form factor)
> is still going strong; I just added an 8-port serial card to a little
Oh dear. I thought the compact PCI people had exterminated that dinosaur.
> But even PCI reads are slow, and the current code does not optimize the
> case when interrupts are level-sensitive. (The optimization is trivial,
> but detecting whether the interrupt is edge- or level-sensitive looks
> messy.)
I think you would need to remember that when the port was configured
initially, but yes that would be a much more useful change for most users.
> The real over-optimization argument that I can think of is that emptying 8 bytes
> out of a FIFO involves 16 register reads (LSR + receive data for each byte),
> compared to which the time to poll a few IIRs is not too much. (Of course, then
> there's low_latency mode.)
So perhaps the right way to approach this is to use two IRQ handlers, one
for level triggered interrupts which doesn't keep polling, and one for
edge triggered which expects the single device case but can re-order your
device scans as you propose.
That would make PCI handling faster, keep normal ISA handling the same
(and with no extra complexity for a list of length one device) and speed
up the case you care about ?
Does that seem practical ?
Alan
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-16 15:23 ` George Spelvin
2008-11-16 15:51 ` Alan Cox
@ 2008-11-16 16:23 ` Theodore Tso
2008-11-16 18:02 ` George Spelvin
2008-11-16 16:49 ` Theodore Tso
2 siblings, 1 reply; 10+ messages in thread
From: Theodore Tso @ 2008-11-16 16:23 UTC (permalink / raw)
To: George Spelvin; +Cc: alan, linux-kernel
On Sun, Nov 16, 2008 at 10:23:52AM -0500, George Spelvin wrote:
> Re: shared IRQs and PCI bus...
> In desktop hardware, yes. But PC-104 equipment (for the benefit of
> confused bystandaers, that's classic ISA bus in a different form factor)
> is still going strong; I just added an 8-port serial card to a little
> embedded box.
>
> But even PCI reads are slow, and the current code does not optimize the
> case when interrupts are level-sensitive. (The optimization is trivial,
> but detecting whether the interrupt is edge- or level-sensitive looks
> messy.)
> The idea is that, if you have 4 ports sharing an interrupt, and the fourth
> is the one that's busy, you'll check every port twice. If you could check
> the busy port first, you'd only need to to do 5 checks.
Um, but the ISA bus was edge sensitive. So presumably PC-104 would be
as well. So you *have* to scan all of the ports twice, because you
want to keep the race window as narrow as possible. If you don't
service incoming characters for a previously idle port (because of the
thought that you only have to check currently busy ports as an
optimization), you'll never get an interrupt on that IRQ again, and
you'll have to wait for the serial timeout to save you (and in the
meantime, you'll likely be losing characters as the FIFO's overflow).
- Ted
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-16 15:23 ` George Spelvin
2008-11-16 15:51 ` Alan Cox
2008-11-16 16:23 ` Theodore Tso
@ 2008-11-16 16:49 ` Theodore Tso
2 siblings, 0 replies; 10+ messages in thread
From: Theodore Tso @ 2008-11-16 16:49 UTC (permalink / raw)
To: George Spelvin; +Cc: alan, linux-kernel
On Sun, Nov 16, 2008 at 10:23:52AM -0500, George Spelvin wrote:
> The goal is not fewer dropped characters (although there could be a small
> benefit in that direction), and it doesn't improve worst-case timing;
> the goal is to reduce the time spent in the interrupt handler _on average_
> and thereby make more CPU available for other work.
Have you actually measured how much CPU is currently being burned by
the interrupt handler? And does it actually make a difference with
your optimization? I did a lot of measurements of this back in the
day of the 40 MHz 386 and 16 serial ports running at 115kbps. CPU's
have gotten *so* much faster, and as you have pointed the out, the PCI
bus accesses are also faster (and on the ISA bus, given edge-triggered
interrupts, you have to scan all of the ports any way) --- so it's not
obvious to me that it's actually worth it.
There were programs to measure CPU overhead; they normally worked by
doing a certain amount of work (i.e., seeing how many interations of
some mathematical calculation) in a given amount of clock time both
with and without the serial ports being busy. It might be worthwhile
to see whether for your workload how measurrable the CPU reduction
really is, given modern hardware. I'm not convinced given the number
of Moore's law doublings since 1992, that it's really going to be
worth it for a rational number of serial ports being serviced by a
modern Linux machine.
- Ted
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-16 15:51 ` Alan Cox
@ 2008-11-16 16:57 ` George Spelvin
0 siblings, 0 replies; 10+ messages in thread
From: George Spelvin @ 2008-11-16 16:57 UTC (permalink / raw)
To: alan; +Cc: tytso, linux, linux-kernel
Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> In desktop hardware, yes. But PC-104 equipment (for the benefit of
>> confused bystandaers, that's classic ISA bus in a different form factor)
>> is still going strong; I just added an 8-port serial card to a little
>
> Oh dear. I thought the compact PCI people had exterminated that dinosaur.
Oh, I agree that it's dying, but it'll take longer to die that the third
act of Tristan und Isolde. (Or the even more gruesome tale of Erasmus
of Formiae.)
This is PC hardware; the long tail is very long indeed. Didn't you just
read that Microsoft stopped selling Windows 3.1 licenses just this month?
And the people upset because they're still using it in production?
http://news.bbc.co.uk/1/hi/technology/7707016.stm
>> But even PCI reads are slow, and the current code does not optimize the
>> case when interrupts are level-sensitive. (The optimization is trivial,
>> but detecting whether the interrupt is edge- or level-sensitive looks
>> messy.)
>
> I think you would need to remember that when the port was configured
> initially, but yes that would be a much more useful change for most users.
>
>> The real over-optimization argument that I can think of is that emptying 8 bytes
>> out of a FIFO involves 16 register reads (LSR + receive data for each byte),
>> compared to which the time to poll a few IIRs is not too much. (Of course, then
>> there's low_latency mode.)
>
> So perhaps the right way to approach this is to use two IRQ handlers, one
> for level triggered interrupts which doesn't keep polling, and one for
> edge triggered which expects the single device case but can re-order your
> device scans as you propose.
That's possible, but is it really worth two separate code paths?
The entire idea is based on the fact that even uncached memory accesses
are faster than I/O reads, so a few more conditions in the polling loop
should not be a problem. The rearrangement is already conditional (on
whether it would just be an effective no-op), and there is already a
test for reaching the end of the ports list.
> That would make PCI handling faster, keep normal ISA handling the same
> (and with no extra complexity for a list of length one device) and speed
> up the case you care about ?
>
> Does that seem practical ?
Yes it does, but I could use some help figuring out how to distinguish
different interrupt types at run-time. Is there a way that doesn't involve
arch-specific code?
Thank you!
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order.
2008-11-16 16:23 ` Theodore Tso
@ 2008-11-16 18:02 ` George Spelvin
0 siblings, 0 replies; 10+ messages in thread
From: George Spelvin @ 2008-11-16 18:02 UTC (permalink / raw)
To: tytso, linux; +Cc: linux-kernel, alan
Theodore Tso <tytso@mit.edu> wrote:
>> The idea is that, if you have 4 ports sharing an interrupt, and the fourth
>> is the one that's busy, you'll check every port twice. If you could check
>> the busy port first, you'd only need to to do 5 checks.
>
> Um, but the ISA bus was edge sensitive. So presumably PC-104 would be
> as well. So you *have* to scan all of the ports twice, because you
> want to keep the race window as narrow as possible. If you don't
> service incoming characters for a previously idle port (because of the
> thought that you only have to check currently busy ports as an
> optimization), you'll never get an interrupt on that IRQ again, and
> you'll have to wait for the serial timeout to save you (and in the
> meantime, you'll likely be losing characters as the FIFO's overflow).
Er, no, you seem to be missing something. You do NOT have to scan all
the ports twice. You have to scan until you observe all ports idle.
So the minimum is to scan each busy port twice and each idle port once.
The only problem is that you have to observe all ports idle *at the same
time*, meaning that the "all ports idle" part of the scan starts after the
last busy port is found.
Doing this actually completely closes the race. If you poll, in
succession, ports 1 though N, and observe them all not requesting
an interrupt, then you know there was an interval, between when you
serviced the last busy port and polled port 1, during which the IRQ line
was not asserted.
If this is confusing, I can expand the relevant comments further.
On Sun, Nov 16, 2008 at 10:23:52AM -0500, George Spelvin wrote:
>> The goal is not fewer dropped characters (although there could be a small
>> benefit in that direction), and it doesn't improve worst-case timing;
>> the goal is to reduce the time spent in the interrupt handler _on average_
>> and thereby make more CPU available for other work.
> Have you actually measured how much CPU is currently being burned by
> the interrupt handler? And does it actually make a difference with
> your optimization?
No, I confess that I haven't. I was playing with PPS timing code and
noticed the polling loop. After studying it, it dawned on me that it
could be optimized further.
> I did a lot of measurements of this back in the day of the 40 MHz 386 and
> 16 serial ports running at 115kbps. CPU's have gotten *so* much faster,
> and as you have pointed the out, the PCI bus accesses are also faster
> (and on the ISA bus, given edge-triggered interrupts, you have to scan
> all of the ports any way) --- so it's not obvious to me that it's actually
> worth it.
> There were programs to measure CPU overhead; they normally worked by
> doing a certain amount of work (i.e., seeing how many interations of
> some mathematical calculation) in a given amount of clock time both
> with and without the serial ports being busy. It might be worthwhile
> to see whether for your workload how measurrable the CPU reduction
> really is, given modern hardware. I'm not convinced given the number
> of Moore's law doublings since 1992, that it's really going to be
> worth it for a rational number of serial ports being serviced by a
> modern Linux machine.
I'll try to get some measurements, but the idea is that Moore's law has
dome wonders for CPU performance but has NOT sped up the PCI bus since
1992, so the cost in CPU cycles has actually gone up. Even the ISA to
PCI bus transition wasn't that big an improvement, especially for
single-byte reads. (The 16550D data sheet claims a minimum read cycle
time of 280 ns, maybe 3 cycles at 120 ns = 360 ns in practice, while a
PCI bus access to an OC16PCI954 is 5 PCI bus cycles, 150 ns)
Thus, it's worth more CPU effort to save I/O bus cycles.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2008-11-16 18:03 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20081113150308.3590.qmail@science.horizon.com>
[not found] ` <20081114053042.11532.qmail@science.horizon.com>
2008-11-14 21:17 ` [RFC 1/2] serial/8250.c: Change to singly linked handler chain George Spelvin
[not found] ` <20081114053314.12093.qmail@science.horizon.com>
2008-11-14 21:34 ` [RFC 2/2] serial/8250.c: Use self-adjusting list for port poll order George Spelvin
2008-11-14 21:47 ` Alan Cox
2008-11-15 0:10 ` Theodore Tso
2008-11-16 15:23 ` George Spelvin
2008-11-16 15:51 ` Alan Cox
2008-11-16 16:57 ` George Spelvin
2008-11-16 16:23 ` Theodore Tso
2008-11-16 18:02 ` George Spelvin
2008-11-16 16:49 ` Theodore Tso
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox