All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
@ 2005-09-12 12:18 Karsten Wiese
  2005-09-12 12:35 ` Takashi Iwai
  0 siblings, 1 reply; 14+ messages in thread
From: Karsten Wiese @ 2005-09-12 12:18 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: alsa-devel

[-- Attachment #1: Type: text/plain, Size: 64 bytes --]

Hi Takashi

Please apply, if you agree.

   Thanks,
   Karsten


[-- Attachment #2: via82xx_fast_pointer.patch --]
[-- Type: text/x-diff, Size: 4835 bytes --]

From: Karsten Wiese <annabellesgarden@yahoo.de>


Reduce interrupt latency in sound/pci/via82xx.c


The change only affects the via823x kind of chips.
Here the  via8233_pcm_pointer_hw() function
(named snd_via8233_pcm_pointer() before)
needs to loop until a non zero position is red from the chip.

Measurements have shown that more than 200 loops are typically needed on
an Athlon64. 
As io-reads cost many cycles, those loops sum up huge.
via8233_pcm_pointer_hw() runs either in interrupt or with interrupts
disabled. So it introduces significant interrupt latency.

The patch introduces a calculated position value hwptr_done,
that is updated by the interrupt routine when a period is completed.
This works much faster at the cost of less precise positions.
Position precision is period_size, which is ok for nearly all applications.

The actual method chosen depends on the period_size used and its
relation to the module parameter hw_position[]:
	if (period_size >= hw_position[device_index]))
		Use chip based precise position calculation
	else
		Use fast coarse position calculation


Signed-off-by: Karsten Wiese <annabellesgarden@yahoo.de>


--- alsa/alsa-kernel/pci/via82xx.c	9 Sep 2005 13:21:46 -0000	1.167
+++ alsa/alsa-kernel/pci/via82xx.c	12 Sep 2005 11:30:05 -0000
@@ -83,6 +83,7 @@
 static int ac97_clock[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 48000};
 static char *ac97_quirk[SNDRV_CARDS];
 static int dxs_support[SNDRV_CARDS];
+static int hw_position[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 8192};
 
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for VIA 82xx bridge.");
@@ -102,6 +103,8 @@
 MODULE_PARM_DESC(ac97_quirk, "AC'97 workaround for strange hardware.");
 module_param_array(dxs_support, int, NULL, 0444);
 MODULE_PARM_DESC(dxs_support, "Support for DXS channels (0 = auto, 1 = enable, 2 = disable, 3 = 48k only, 4 = no VRA, 5 = enable any sample rate)");
+module_param_array(hw_position, int, NULL, 0444);
+MODULE_PARM_DESC(hw_position, "If actual frames per period is bigger or equal, position is red from hardware. Else position resolution is period size.");
 
 
 /* revision numbers for via686 */
@@ -328,6 +331,7 @@
 	unsigned int fragsize;
 	unsigned int bufsize;
 	unsigned int bufsize2;
+	int hwptr_done;		/* processed frame position in the buffer */
 };
 
 
@@ -596,6 +600,7 @@
 	outb(0x00, VIADEV_REG(viadev, OFFSET_TYPE)); /* for via686 */
 	// outl(0, VIADEV_REG(viadev, OFFSET_CURR_PTR));
 	viadev->lastpos = 0;
+	viadev->hwptr_done = 0;
 }
 
 
@@ -621,13 +626,26 @@
 	spin_lock(&chip->reg_lock);
 	for (i = 0; i < chip->num_devs; i++) {
 		viadev_t *viadev = &chip->devs[i];
+		snd_pcm_substream_t *substream;
 		unsigned char c_status = inb(VIADEV_REG(viadev, OFFSET_STATUS));
 		c_status &= (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG|VIA_REG_STAT_STOPPED);
 		if (! c_status)
 			continue;
-		if (viadev->substream && viadev->running) {
+		substream = viadev->substream;
+		if (substream && viadev->running) {
 			spin_unlock(&chip->reg_lock);
-			snd_pcm_period_elapsed(viadev->substream);
+
+			/*
+			 * Update period bound position.
+			 * Its used by via8233 when hw_pointer[dev] > period_size.
+			 * To check for this condition would take as much cycles as to just do it.
+			 */
+			if (c_status & VIA_REG_STAT_EOL)
+				viadev->hwptr_done = 0;
+			else
+				viadev->hwptr_done += substream->runtime->period_size;
+
+			snd_pcm_period_elapsed(substream);
 			spin_lock(&chip->reg_lock);
 		}
 		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
@@ -764,12 +782,12 @@
 }
 
 /*
- * get the current pointer on via823x
+ * get the current pointer on via823x from chip
+ * exact, but slow due to excessive looping 
  */
-static snd_pcm_uframes_t snd_via8233_pcm_pointer(snd_pcm_substream_t *substream)
+static snd_pcm_uframes_t via8233_pcm_pointer_hw(snd_pcm_substream_t *substream, viadev_t *viadev)
 {
 	via82xx_t *chip = snd_pcm_substream_chip(substream);
-	viadev_t *viadev = (viadev_t *)substream->runtime->private_data;
 	unsigned int idx, count, res;
 	int timeout = 5000;
 	
@@ -800,6 +818,26 @@
 	return bytes_to_frames(substream->runtime, res);
 }
 
+/*
+ * get the current pointer on via82xx from elapsed periods.
+ * resolution limited to period_size. fast. 
+ */
+static snd_pcm_uframes_t via82xx_pcm_pointer(snd_pcm_substream_t *substream, viadev_t *viadev)
+{
+	return viadev->hwptr_done;
+}
+
+/*
+ * get the current pointer on via823x
+ */
+static snd_pcm_uframes_t snd_via8233_pcm_pointer(snd_pcm_substream_t *substream)
+{
+	viadev_t *viadev = (viadev_t *)substream->runtime->private_data;
+	if (likely(hw_position[substream->pcm->device] > substream->runtime->period_size))
+			return via82xx_pcm_pointer(substream, viadev);
+		else
+			return via8233_pcm_pointer_hw(substream, viadev);
+}
 
 /*
  * hw_params callback:

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 12:18 [PATCH] Reduce interrupt latency in sound/pci/via82xx.c Karsten Wiese
@ 2005-09-12 12:35 ` Takashi Iwai
  2005-09-12 13:09   ` Karsten Wiese
  0 siblings, 1 reply; 14+ messages in thread
From: Takashi Iwai @ 2005-09-12 12:35 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: alsa-devel

At Mon, 12 Sep 2005 14:18:04 +0200,
Karsten Wiese wrote:
> 
> From: Karsten Wiese <annabellesgarden@yahoo.de>
> 
> 
> Reduce interrupt latency in sound/pci/via82xx.c
> 
> 
> The change only affects the via823x kind of chips.
> Here the  via8233_pcm_pointer_hw() function
> (named snd_via8233_pcm_pointer() before)
> needs to loop until a non zero position is red from the chip.
> 
> Measurements have shown that more than 200 loops are typically needed on
> an Athlon64. 
> As io-reads cost many cycles, those loops sum up huge.
> via8233_pcm_pointer_hw() runs either in interrupt or with interrupts
> disabled. So it introduces significant interrupt latency.
> 
> The patch introduces a calculated position value hwptr_done,
> that is updated by the interrupt routine when a period is completed.
> This works much faster at the cost of less precise positions.
> Position precision is period_size, which is ok for nearly all applications.
> 
> The actual method chosen depends on the period_size used and its
> relation to the module parameter hw_position[]:
> 	if (period_size >= hw_position[device_index]))
> 		Use chip based precise position calculation
> 	else
> 		Use fast coarse position calculation
> 
> 
> Signed-off-by: Karsten Wiese <annabellesgarden@yahoo.de>

I don't like that the behavior depends on the period size.
If it's bad to loop too much, we should fix this behavior.

The question is how to handle the value 0 of CUR_COUNT register.
Usually this occurs in a boundary, i.e. very likely only in
snd_pcm_period_elapsed().

So, for example, add a flag indicating that the position callback is
being called in the irq handler.  And the position callback assumes
the exact next period if CUR_COUNT is 0 and this flag is set.  If the
flag isn't set, the driver loops probing to get an accurate position.


Takashi


-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 12:35 ` Takashi Iwai
@ 2005-09-12 13:09   ` Karsten Wiese
  2005-09-12 13:09     ` Takashi Iwai
  0 siblings, 1 reply; 14+ messages in thread
From: Karsten Wiese @ 2005-09-12 13:09 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: alsa-devel

Am Montag, 12. September 2005 14:35 schrieb Takashi Iwai:
> 
> I don't like that the behavior depends on the period size.
> If it's bad to loop too much, we should fix this behavior.
> 
> The question is how to handle the value 0 of CUR_COUNT register.
> Usually this occurs in a boundary, i.e. very likely only in
> snd_pcm_period_elapsed().
> 
> So, for example, add a flag indicating that the position callback is
> being called in the irq handler.  And the position callback assumes
> the exact next period if CUR_COUNT is 0 and this flag is set.  If the
> flag isn't set, the driver loops probing to get an accurate position.

This doesn't help in the worst case:
An application requests position when CUR_COUNT is already 0 immediately
before the interrupt callback runs. Thus it disables interrupt
(or blocks the callback by spinlock) until it reads != 0 resulting in
the same interrupt latency.

How about a bool module_parameter to choose the pointer calculation
method then?
Do you know any application benefitting from the sub period position
precision?
If not so, we could even use #ifdef to choose the method at compile
time and default to the new fast method.
Thus we'd still have the know how in the code and default to optimal
interrupt performance.

   Karsten  

	

	
		
___________________________________________________________ 
Gesendet von Yahoo! Mail - Jetzt mit 1GB Speicher kostenlos - Hier anmelden: http://mail.yahoo.de



-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 13:09   ` Karsten Wiese
@ 2005-09-12 13:09     ` Takashi Iwai
  2005-09-12 14:31       ` Karsten Wiese
  0 siblings, 1 reply; 14+ messages in thread
From: Takashi Iwai @ 2005-09-12 13:09 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: alsa-devel

At Mon, 12 Sep 2005 15:09:27 +0200,
Karsten Wiese wrote:
> 
> Am Montag, 12. September 2005 14:35 schrieb Takashi Iwai:
> > 
> > I don't like that the behavior depends on the period size.
> > If it's bad to loop too much, we should fix this behavior.
> > 
> > The question is how to handle the value 0 of CUR_COUNT register.
> > Usually this occurs in a boundary, i.e. very likely only in
> > snd_pcm_period_elapsed().
> > 
> > So, for example, add a flag indicating that the position callback is
> > being called in the irq handler.  And the position callback assumes
> > the exact next period if CUR_COUNT is 0 and this flag is set.  If the
> > flag isn't set, the driver loops probing to get an accurate position.
> 
> This doesn't help in the worst case:
> An application requests position when CUR_COUNT is already 0 immediately
> before the interrupt callback runs. Thus it disables interrupt
> (or blocks the callback by spinlock) until it reads != 0 resulting in
> the same interrupt latency.

Hmm, then let's fix the handling of 0 read in all cases.
We can simply assume the next period in this case as an exception.

> How about a bool module_parameter to choose the pointer calculation
> method then?
> Do you know any application benefitting from the sub period position
> precision?

dmix/dsnoop/dshare.


Takashi


-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 13:09     ` Takashi Iwai
@ 2005-09-12 14:31       ` Karsten Wiese
  2005-09-12 15:07         ` Takashi Iwai
  2005-09-12 22:00         ` James Courtier-Dutton
  0 siblings, 2 replies; 14+ messages in thread
From: Karsten Wiese @ 2005-09-12 14:31 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: alsa-devel

Am Montag, 12. September 2005 15:09 schrieb Takashi Iwai:
> Hmm, then let's fix the handling of 0 read in all cases.
> We can simply assume the next period in this case as an exception.

No. I've looked at the change sequence of CUR_COUNT through an iopl(3)ed
app running as root polling CUR_COUNT.
The problem case looks like this:
step    value
======  ==========
0	0x01000020	// page index 1, 32 bytes left to transfer
1	0x01000000	// page index 1 complete
2	0x02000000	// switched to page index 2, looks like complete
			// _but hasn't yet started!_
3	0x02001000	// page index 2, 4096 bytes left to transfer


Step 1 values could be red up to ~270 times in a row.
Step 2 values occurred maximum 2 times in a row so far.

If above Step 2 repetition count would reflect real worst case,
we could handle (CUR_COUNT & 0xFFFFFF)==0 like this:

	read CUR_COUNT 3 times in a row.
	index page of lowest (CUR_COUNT >> 24) has completed

Problem is that 3 times in a row might not be enough for all machines.
So we'd assume 6 times to be safe and that would give significant
latency again, though still better than what we have now.

How about:
	- when in interrupt, read CUR_COUNT 1 time. 
	  if (CUR_COUNT & 0xFFFFFF)==0 use fast position with period
	  resolution
	- when not in interrupt get position from chip by reading
	  CUR_COUNT maximum 6 times
	- provide default off module_parameter "fast_position"
          to always only use fast position

   Karsten

	

	
		
___________________________________________________________ 
Gesendet von Yahoo! Mail - Jetzt mit 1GB Speicher kostenlos - Hier anmelden: http://mail.yahoo.de



-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 14:31       ` Karsten Wiese
@ 2005-09-12 15:07         ` Takashi Iwai
  2005-09-12 22:00         ` James Courtier-Dutton
  1 sibling, 0 replies; 14+ messages in thread
From: Takashi Iwai @ 2005-09-12 15:07 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: alsa-devel

At Mon, 12 Sep 2005 16:31:31 +0200,
Karsten Wiese wrote:
> 
> Am Montag, 12. September 2005 15:09 schrieb Takashi Iwai:
> > Hmm, then let's fix the handling of 0 read in all cases.
> > We can simply assume the next period in this case as an exception.
> 
> No. I've looked at the change sequence of CUR_COUNT through an iopl(3)ed
> app running as root polling CUR_COUNT.
> The problem case looks like this:
> step    value
> ======  ==========
> 0	0x01000020	// page index 1, 32 bytes left to transfer
> 1	0x01000000	// page index 1 complete
> 2	0x02000000	// switched to page index 2, looks like complete
> 			// _but hasn't yet started!_
> 3	0x02001000	// page index 2, 4096 bytes left to transfer
> 
> 
> Step 1 values could be red up to ~270 times in a row.
> Step 2 values occurred maximum 2 times in a row so far.

Well, we can check the validity of the assumed position by comparing
with the last position.  An untested patch is below.

> If above Step 2 repetition count would reflect real worst case,
> we could handle (CUR_COUNT & 0xFFFFFF)==0 like this:
> 
> 	read CUR_COUNT 3 times in a row.
> 	index page of lowest (CUR_COUNT >> 24) has completed
> 
> Problem is that 3 times in a row might not be enough for all machines.
> So we'd assume 6 times to be safe and that would give significant
> latency again, though still better than what we have now.
> 
> How about:
> 	- when in interrupt, read CUR_COUNT 1 time. 
> 	  if (CUR_COUNT & 0xFFFFFF)==0 use fast position with period
> 	  resolution
> 	- when not in interrupt get position from chip by reading
> 	  CUR_COUNT maximum 6 times

This sounds not bad.

> 	- provide default off module_parameter "fast_position"
>           to always only use fast position

... if the solution witout loop doesn't work.


Takashi


Index: alsa-kernel/pci/via82xx.c
===================================================================
RCS file: /home/iwai/cvs/alsa/alsa-kernel/pci/via82xx.c,v
retrieving revision 1.178
diff -u -r1.178 via82xx.c
--- alsa-kernel/pci/via82xx.c	12 Sep 2005 10:27:50 -0000	1.178
+++ alsa-kernel/pci/via82xx.c	12 Sep 2005 15:03:34 -0000
@@ -325,6 +325,7 @@
 	struct snd_via_sg_table *idx_table;
 	/* for recovery from the unexpected pointer */
 	unsigned int lastpos;
+	unsigned int hwpos;
 	unsigned int fragsize;
 	unsigned int bufsize;
 	unsigned int bufsize2;
@@ -596,6 +597,7 @@
 	outb(0x00, VIADEV_REG(viadev, OFFSET_TYPE)); /* for via686 */
 	// outl(0, VIADEV_REG(viadev, OFFSET_CURR_PTR));
 	viadev->lastpos = 0;
+	viadev->hwpos = 0;
 }
 
 
@@ -626,6 +628,8 @@
 		if (! c_status)
 			continue;
 		if (viadev->substream && viadev->running) {
+			viadev->hwpos += viadev->fragsize;
+			viadev->hwpos %= viadev->bufsize;
 			spin_unlock(&chip->reg_lock);
 			snd_pcm_period_elapsed(viadev->substream);
 			spin_lock(&chip->reg_lock);
@@ -771,29 +775,34 @@
 	via82xx_t *chip = snd_pcm_substream_chip(substream);
 	viadev_t *viadev = (viadev_t *)substream->runtime->private_data;
 	unsigned int idx, count, res;
-	int timeout = 5000;
 	
 	snd_assert(viadev->tbl_entries, return 0);
 	if (!(inb(VIADEV_REG(viadev, OFFSET_STATUS)) & VIA_REG_STAT_ACTIVE))
 		return 0;
 	spin_lock(&chip->reg_lock);
-	do {
-		count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
-		/* some mobos read 0 count */
-		if ((count & 0xffffff) || ! viadev->running)
-			break;
-	} while (--timeout);
-	if (! timeout)
-		snd_printd(KERN_ERR "zero position is read\n");
-	idx = count >> 24;
-	if (idx >= viadev->tbl_entries) {
+	count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
+	/* some mobos read 0 count
+	 *
+	 * This happens when the register is being updated.  We assume the
+	 * new period here
+	 */
+	if (! (count & 0xffffff)) {
+		res = viadev->hwpos;
+		if (check_invalid_pos(viadev, res))
+			res = viadev->lastpos;
+		else
+			viadev->lastpos = res;
+	} else {
+		idx = count >> 24;
+		if (idx >= viadev->tbl_entries) {
 #ifdef POINTER_DEBUG
-		printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
+			printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
 #endif
-		res = viadev->lastpos;
-	} else {
-		count &= 0xffffff;
-		res = calc_linear_pos(viadev, idx, count);
+			res = viadev->lastpos;
+		} else {
+			count &= 0xffffff;
+			res = calc_linear_pos(viadev, idx, count);
+		}
 	}
 	spin_unlock(&chip->reg_lock);
 


-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 14:31       ` Karsten Wiese
  2005-09-12 15:07         ` Takashi Iwai
@ 2005-09-12 22:00         ` James Courtier-Dutton
  2005-09-13 10:42           ` Takashi Iwai
  2005-09-13 11:18           ` Karsten Wiese
  1 sibling, 2 replies; 14+ messages in thread
From: James Courtier-Dutton @ 2005-09-12 22:00 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: Takashi Iwai, alsa-devel

Karsten Wiese wrote:
> Am Montag, 12. September 2005 15:09 schrieb Takashi Iwai:
> 
>>Hmm, then let's fix the handling of 0 read in all cases.
>>We can simply assume the next period in this case as an exception.
> 
> 
> No. I've looked at the change sequence of CUR_COUNT through an iopl(3)ed
> app running as root polling CUR_COUNT.
> The problem case looks like this:
> step    value
> ======  ==========
> 0	0x01000020	// page index 1, 32 bytes left to transfer
> 1	0x01000000	// page index 1 complete
> 2	0x02000000	// switched to page index 2, looks like complete
> 			// _but hasn't yet started!_
> 3	0x02001000	// page index 2, 4096 bytes left to transfer
> 
> 
> Step 1 values could be red up to ~270 times in a row.
> Step 2 values occurred maximum 2 times in a row so far.
> 

That looks truely horrible.
So upper byte in counting up, and lower 3 bytes are counting down!!!

Can you please give me some indication of how long the register stays at 
each value, and at what point the interrupt happens.

Are you also saying that the count can proceed like this:
0x01000000
0x02000000
0x02001000
0x02000fff
...
0x02000001
0x02000000
0x03000000

So, here we actually have 0x0200000 twice in the same sequence. That 
sort of situation will never work well and there is really not a lot 
that can be done except to say that whenever ((value & 0xffffff) == 0) 
we don't have a clue where we are in the buffer!

You say the Step 1 values could be red up to ~270 times in a row but 
Step 2 values occured max 2 times in a row. Can we assume therefore that 
the counter is getting stuck st Step 1 for some reason?

James


-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 22:00         ` James Courtier-Dutton
@ 2005-09-13 10:42           ` Takashi Iwai
  2005-09-13 11:18           ` Karsten Wiese
  1 sibling, 0 replies; 14+ messages in thread
From: Takashi Iwai @ 2005-09-13 10:42 UTC (permalink / raw)
  To: James Courtier-Dutton; +Cc: Karsten Wiese, alsa-devel

At Mon, 12 Sep 2005 23:00:16 +0100,
James Courtier-Dutton wrote:
> 
> Karsten Wiese wrote:
> > Am Montag, 12. September 2005 15:09 schrieb Takashi Iwai:
> > 
> >>Hmm, then let's fix the handling of 0 read in all cases.
> >>We can simply assume the next period in this case as an exception.
> > 
> > 
> > No. I've looked at the change sequence of CUR_COUNT through an iopl(3)ed
> > app running as root polling CUR_COUNT.
> > The problem case looks like this:
> > step    value
> > ======  ==========
> > 0	0x01000020	// page index 1, 32 bytes left to transfer
> > 1	0x01000000	// page index 1 complete
> > 2	0x02000000	// switched to page index 2, looks like complete
> > 			// _but hasn't yet started!_
> > 3	0x02001000	// page index 2, 4096 bytes left to transfer
> > 
> > 
> > Step 1 values could be red up to ~270 times in a row.
> > Step 2 values occurred maximum 2 times in a row so far.
> > 
> 
> That looks truely horrible.
> So upper byte in counting up, and lower 3 bytes are counting down!!!

It's the correct behavior.

The lower 3 bytes are the _residual_ counter, and the upper byte is
the index of the SG buffer descriptor.


Takashi


-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-12 22:00         ` James Courtier-Dutton
  2005-09-13 10:42           ` Takashi Iwai
@ 2005-09-13 11:18           ` Karsten Wiese
  2005-09-26 18:52             ` Karsten Wiese
  1 sibling, 1 reply; 14+ messages in thread
From: Karsten Wiese @ 2005-09-13 11:18 UTC (permalink / raw)
  To: James Courtier-Dutton; +Cc: Takashi Iwai, alsa-devel

Am Dienstag, 13. September 2005 00:00 schrieb James Courtier-Dutton:
> Karsten Wiese wrote:
> > Am Montag, 12. September 2005 15:09 schrieb Takashi Iwai:
> > 
> >>Hmm, then let's fix the handling of 0 read in all cases.
> >>We can simply assume the next period in this case as an exception.
> > 
> > 
> > No. I've looked at the change sequence of CUR_COUNT through an iopl(3)ed
> > app running as root polling CUR_COUNT.
> > The problem case looks like this:
> > step    value
> > ======  ==========
> > 0	0x01000020	// page index 1, 32 bytes left to transfer
> > 1	0x01000000	// page index 1 complete
> > 2	0x02000000	// switched to page index 2, looks like complete
> > 			// _but hasn't yet started!_
> > 3	0x02001000	// page index 2, 4096 bytes left to transfer
> > 
> > 
> > Step 1 values could be red up to ~270 times in a row.
> > Step 2 values occurred maximum 2 times in a row so far.
> > 
> 
> That looks truely horrible.
> So upper byte in counting up, and lower 3 bytes are counting down!!!

bits 31-24 hold s(catter)g(ather)d(ma) page index,
bits 23-0  show the count of bytes still to transfer.

> 
> Can you please give me some indication of how long the register stays at 
> each value, and at what point the interrupt happens.

I think the interrupt line is activated, where the first Step 1 value
is red. Haven't checked the exact times in useconds.

> 
> Are you also saying that the count can proceed like this:
> 0x01000000
> 0x02000000
> 0x02001000
> 0x02000fff
> ...
> 0x02000001
> 0x02000000
> 0x03000000

yes
> 
> So, here we actually have 0x0200000 twice in the same sequence. That 
> sort of situation will never work well and there is really not a lot 
> that can be done except to say that whenever ((value & 0xffffff) == 0) 
> we don't have a clue where we are in the buffer!
> 
> You say the Step 1 values could be red up to ~270 times in a row but 
> Step 2 values occured max 2 times in a row. Can we assume therefore that 
> the counter is getting stuck st Step 1 for some reason?

Yes, it waits shortly when it has finished a period.

So if above assumption about when the interrupt triggers is correct,
another strategy to solve the ((value & 0xffffff) == 0) situation,
when not in interrupt would look like:
	int c_status = inb(VIADEV_REG(viadev, OFFSET_STATUS))
	if (c_status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG))
		we have finished the period
	else
		we are still at the start of the period,
		interrupt routine has just run,
		use last interrupt position

Will take some time before I try that. Urgent stuff piling up here...

   Karsten

	

	
		
___________________________________________________________ 
Gesendet von Yahoo! Mail - Jetzt mit 1GB Speicher kostenlos - Hier anmelden: http://mail.yahoo.de



-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-13 11:18           ` Karsten Wiese
@ 2005-09-26 18:52             ` Karsten Wiese
  2005-09-29 10:44               ` Takashi Iwai
  0 siblings, 1 reply; 14+ messages in thread
From: Karsten Wiese @ 2005-09-26 18:52 UTC (permalink / raw)
  To: James Courtier-Dutton, Takashi Iwai; +Cc: alsa-devel

[-- Attachment #1: Type: text/plain, Size: 511 bytes --]

Am Dienstag, 13. September 2005 13:18 schrieb Karsten Wiese:
> another strategy to solve the ((value & 0xffffff) == 0) situation,
> when not in interrupt would look like:
> 	int c_status = inb(VIADEV_REG(viadev, OFFSET_STATUS))
> 	if (c_status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG))
> 		we have finished the period
> 	else
> 		we are still at the start of the period,
> 		interrupt routine has just run,
> 		use last interrupt position

Attached patch implements above idea and works fine here.

      Karsten

[-- Attachment #2: patch-snd-via82xx-noloop --]
[-- Type: message/rfc822, Size: 8352 bytes --]

From: Karsten Wiese <annabellesgarden@yahoo.de>


Reduce interrupt latency in sound/pci/via82xx.c


The change only affects the via823x kind of chips.
Here the  via8233_pcm_pointer_hw() function
(named snd_via8233_pcm_pointer() before)
needed to loop until a non zero position is red from the chip.

Measurements have shown that more than 200 loops are typically needed on
an Athlon64. 
As io-reads cost many cycles, those loops sum up huge.
via8233_pcm_pointer_hw() runs either in interrupt or with interrupts
disabled. So it introduces significant interrupt latency.

The patch introduces a calculated position value hwptr_done,
that is updated by the interrupt routine when a period is completed.
It is only used, if the 823x chip returns a zero position, which can't
be interpreted reliably.

Further optimisation is applied on the 8233 chip's interrupt routine:
Only the SGD_SHADOW is read, as it contains all infos needed.
We ommit ~5 more register reads that way.


Signed-off-by: Karsten Wiese <annabellesgarden@yahoo.de>






--- linux-2.6.13/sound/pci/via82xx.c	2005-08-29 01:41:01.000000000 +0200
+++ linux-2.6.13-RT-kw/sound/pci/via82xx.c	2005-09-26 20:30:38.000000000 +0200
@@ -41,6 +41,9 @@
  *	  device for applications.
  *	- clean up the code, separate low-level initialization
  *	  routines for each chipset.
+ *
+ * Sep. 26, 2005	Karsten Wiese <annabellesgarden@yahoo.de>
+ *	- Optimize position calculation for the 823x chips. 
  */
 
 #include <sound/driver.h>
@@ -336,6 +339,8 @@
 	unsigned int fragsize;
 	unsigned int bufsize;
 	unsigned int bufsize2;
+	int hwptr_done;		/* processed frame position in the buffer */
+	int in_interrupt;
 };
 
 
@@ -401,8 +406,8 @@
 };
 
 static struct pci_device_id snd_via82xx_ids[] = {
-	{ 0x1106, 0x3058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
-	{ 0x1106, 0x3059, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
+	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
+	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8233_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
 	{ 0, }
 };
 
@@ -556,7 +561,7 @@
 {
 	via82xx_t *chip = ac97->private_data;
 	unsigned int xval;
-	
+
 	xval = !ac97->num ? VIA_REG_AC97_CODEC_ID_PRIMARY : VIA_REG_AC97_CODEC_ID_SECONDARY;
 	xval <<= VIA_REG_AC97_CODEC_ID_SHIFT;
 	xval |= reg << VIA_REG_AC97_CMD_SHIFT;
@@ -604,14 +609,15 @@
 	outb(0x00, VIADEV_REG(viadev, OFFSET_TYPE)); /* for via686 */
 	// outl(0, VIADEV_REG(viadev, OFFSET_CURR_PTR));
 	viadev->lastpos = 0;
+	viadev->hwptr_done = 0;
 }
 
 
 /*
  *  Interrupt handler
+ *  Used for 686 and 8233A
  */
-
-static irqreturn_t snd_via82xx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t snd_via686_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	via82xx_t *chip = dev_id;
 	unsigned int status;
@@ -630,13 +636,23 @@
 	for (i = 0; i < chip->num_devs; i++) {
 		viadev_t *viadev = &chip->devs[i];
 		unsigned char c_status = inb(VIADEV_REG(viadev, OFFSET_STATUS));
-		c_status &= (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG|VIA_REG_STAT_STOPPED);
-		if (! c_status)
+		if (! (c_status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG|VIA_REG_STAT_STOPPED)))
 			continue;
 		if (viadev->substream && viadev->running) {
+			/*
+			 * Update hwptr_done based on 'period elapsed'
+			 * interrupts. We'll use it, when the chip returns 0 
+			 * for OFFSET_CURR_COUNT.
+			 */
+			if (c_status & VIA_REG_STAT_EOL)
+				viadev->hwptr_done = 0;
+			else
+				viadev->hwptr_done += viadev->fragsize;
+			viadev->in_interrupt = c_status;
 			spin_unlock(&chip->reg_lock);
 			snd_pcm_period_elapsed(viadev->substream);
 			spin_lock(&chip->reg_lock);
+			viadev->in_interrupt = 0;
 		}
 		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
 	}
@@ -645,6 +661,61 @@
 }
 
 /*
+ *  Interrupt handler
+ */
+static irqreturn_t snd_via8233_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	via82xx_t *chip = dev_id;
+	unsigned int status;
+	unsigned int i;
+	unsigned int intr_mask;
+	irqreturn_t irqreturn = IRQ_NONE;
+
+
+	/* check status for each stream */
+	intr_mask = chip->intr_mask;
+	spin_lock(&chip->reg_lock);
+	status = inl(VIAREG(chip, SGD_SHADOW));
+	for (i = 0; i < chip->num_devs;
+	     i++, intr_mask >>= 4, status >>= 4) {
+		viadev_t *viadev = &chip->devs[i];
+		snd_pcm_substream_t *substream;
+		unsigned char c_status;
+		while (!(intr_mask & 3)) {
+			intr_mask >>= 4;
+			status >>= 4;
+		}
+		c_status = status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG);
+		if (!c_status)
+			continue;
+		c_status |= ((status & 8) << 4);		// ACTIVE Flag
+		substream = viadev->substream;
+		if (substream && viadev->running) {
+			/*
+			 * Update hwptr_done based on 'period elapsed'
+			 * interrupts. We'll use it, when the chip returns 0 
+			 * for OFFSET_CURR_COUNT.
+			 */
+			if (c_status & VIA_REG_STAT_EOL)
+				viadev->hwptr_done = 0;
+			else
+				viadev->hwptr_done += viadev->fragsize;
+			viadev->in_interrupt = c_status;
+			spin_unlock(&chip->reg_lock);
+
+			snd_pcm_period_elapsed(substream);
+
+			spin_lock(&chip->reg_lock);
+			viadev->in_interrupt = 0;
+		}
+		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
+		irqreturn = IRQ_HANDLED;
+	}
+	spin_unlock(&chip->reg_lock);
+	return irqreturn;
+}
+
+/*
  *  PCM callbacks
  */
 
@@ -705,6 +776,8 @@
 	size = viadev->idx_table[idx].size;
 	base = viadev->idx_table[idx].offset;
 	res = base + size - count;
+	if (res >= viadev->bufsize)
+		res -= viadev->bufsize;
 
 	/* check the validity of the calculated position */
 	if (size < count) {
@@ -734,9 +807,6 @@
 			}
 		}
 	}
-	viadev->lastpos = res; /* remember the last position */
-	if (res >= viadev->bufsize)
-		res -= viadev->bufsize;
 	return res;
 }
 
@@ -764,6 +834,7 @@
 	else /* CURR_PTR holds the address + 8 */
 		idx = ((ptr - (unsigned int)viadev->table.addr) / 8 - 1) % viadev->tbl_entries;
 	res = calc_linear_pos(viadev, idx, count);
+	viadev->lastpos = res; /* remember the last position */
 	spin_unlock(&chip->reg_lock);
 
 	return bytes_to_frames(substream->runtime, res);
@@ -777,36 +848,49 @@
 	via82xx_t *chip = snd_pcm_substream_chip(substream);
 	viadev_t *viadev = (viadev_t *)substream->runtime->private_data;
 	unsigned int idx, count, res;
-	int timeout = 5000;
+	int status;
 	
 	snd_assert(viadev->tbl_entries, return 0);
-	if (!(inb(VIADEV_REG(viadev, OFFSET_STATUS)) & VIA_REG_STAT_ACTIVE))
-		return 0;
+
 	spin_lock(&chip->reg_lock);
-	do {
-		count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
-		/* some mobos read 0 count */
-		if ((count & 0xffffff) || ! viadev->running)
-			break;
-	} while (--timeout);
-	if (! timeout)
-		snd_printd(KERN_ERR "zero position is read\n");
-	idx = count >> 24;
-	if (idx >= viadev->tbl_entries) {
+	count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
+	status = viadev->in_interrupt;
+	if (!status)
+		status = inb(VIADEV_REG(viadev, OFFSET_STATUS));
+
+	if (!(status & VIA_REG_STAT_ACTIVE)) {
+		res = 0;
+		goto unlock;
+	}
+	if (count & 0xffffff) {
+		idx = count >> 24;
+		if (idx >= viadev->tbl_entries) {
 #ifdef POINTER_DEBUG
-		printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
+			printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
 #endif
-		res = viadev->lastpos;
+			res = viadev->lastpos;
+		} else {
+			count &= 0xffffff;
+			res = calc_linear_pos(viadev, idx, count);
+		}
 	} else {
-		count &= 0xffffff;
-		res = calc_linear_pos(viadev, idx, count);
-	}
+		res = viadev->hwptr_done;
+		if (!viadev->in_interrupt) {
+			if (status & VIA_REG_STAT_EOL) {
+				res = 0;
+			} else
+				if (status & VIA_REG_STAT_FLAG) {
+					res += viadev->fragsize;
+				}
+		}
+	}			    
+unlock:
+	viadev->lastpos = res;
 	spin_unlock(&chip->reg_lock);
 
 	return bytes_to_frames(substream->runtime, res);
 }
 
-
 /*
  * hw_params callback:
  * allocate the buffer and build up the buffer description table
@@ -2089,7 +2173,10 @@
 		return err;
 	}
 	chip->port = pci_resource_start(pci, 0);
-	if (request_irq(pci->irq, snd_via82xx_interrupt, SA_INTERRUPT|SA_SHIRQ,
+	if (request_irq(pci->irq,
+			chip_type == TYPE_VIA8233 ?
+			snd_via8233_interrupt :	snd_via686_interrupt,
+			SA_INTERRUPT|SA_SHIRQ,
 			card->driver, (void *)chip)) {
 		snd_printk("unable to grab IRQ %d\n", pci->irq);
 		snd_via82xx_free(chip);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-26 18:52             ` Karsten Wiese
@ 2005-09-29 10:44               ` Takashi Iwai
  2005-09-29 13:56                 ` Takashi Iwai
  2005-10-18 12:42                 ` Karsten Wiese
  0 siblings, 2 replies; 14+ messages in thread
From: Takashi Iwai @ 2005-09-29 10:44 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: James Courtier-Dutton, alsa-devel

At some time ago,
Karsten Wiese wrote:
> 
> 
> @@ -401,8 +406,8 @@
>  };
>  
>  static struct pci_device_id snd_via82xx_ids[] = {
> -	{ 0x1106, 0x3058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
> -	{ 0x1106, 0x3059, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
> +	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
> +	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8233_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
>  	{ 0, }
>  };

I like to keep the numbers here to check the support PCI IDs easier
from the driver code.  I don't think changing this to PCI_* would
improve the readability.
The only drawback to use the numbers is possible typos, though.

>  /*
> + *  Interrupt handler
> + */
> +static irqreturn_t snd_via8233_interrupt(int irq, void *dev_id, struct pt_regs *regs)
> +{
> +	via82xx_t *chip = dev_id;
> +	unsigned int status;
> +	unsigned int i;
> +	unsigned int intr_mask;
> +	irqreturn_t irqreturn = IRQ_NONE;
> +
> +
> +	/* check status for each stream */
> +	intr_mask = chip->intr_mask;
> +	spin_lock(&chip->reg_lock);
> +	status = inl(VIAREG(chip, SGD_SHADOW));
> +	for (i = 0; i < chip->num_devs;
> +	     i++, intr_mask >>= 4, status >>= 4) {
> +		viadev_t *viadev = &chip->devs[i];
> +		snd_pcm_substream_t *substream;
> +		unsigned char c_status;
> +		while (!(intr_mask & 3)) {
> +			intr_mask >>= 4;
> +			status >>= 4;
> +		}

It'd be better to have a new field in viadev struct to indicate the
shift bits rather than this loop.  The loop looks ad-hoc.

Also, could you use tabs instead of spaces, as mentioned in
CodingStyle?

Otherwise, it looks fine to me.

Thanks!


Takashi


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-29 10:44               ` Takashi Iwai
@ 2005-09-29 13:56                 ` Takashi Iwai
  2005-10-18 12:42                 ` Karsten Wiese
  1 sibling, 0 replies; 14+ messages in thread
From: Takashi Iwai @ 2005-09-29 13:56 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: James Courtier-Dutton, alsa-devel

At Thu, 29 Sep 2005 12:44:59 +0200,
I wrote:
> 
> Otherwise, it looks fine to me.

Oh, also it'd be helpful to write irq handler like below:

irqreturn_t foo()
{
	int handled = 0;
	...
	if (something)
		handled = 1;
	...
	return IRQ_RETVAL(handled);
}

This makes possible to compile even on 2.2/2.4 kernels without
patches.


thanks,

Takashi


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-09-29 10:44               ` Takashi Iwai
  2005-09-29 13:56                 ` Takashi Iwai
@ 2005-10-18 12:42                 ` Karsten Wiese
  2005-10-18 12:54                   ` Takashi Iwai
  1 sibling, 1 reply; 14+ messages in thread
From: Karsten Wiese @ 2005-10-18 12:42 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: James Courtier-Dutton, alsa-devel

[-- Attachment #1: Type: text/plain, Size: 1759 bytes --]

Am Donnerstag, 29. September 2005 12:44 schrieb Takashi Iwai:
> >  
> >  static struct pci_device_id snd_via82xx_ids[] = {
> > -	{ 0x1106, 0x3058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
> > -	{ 0x1106, 0x3059, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
> > +	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
> > +	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8233_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
> >  	{ 0, }
> >  };
> 
> I like to keep the numbers here to check the support PCI IDs easier
> from the driver code.  I don't think changing this to PCI_* would
> improve the readability.
> The only drawback to use the numbers is possible typos, though.

hmm, symbolic names make it easier to grep/cscope the tree
for occurrences of the IDs.
So in this patch the structures are initialized symbolically
plus the hex-IDs are there as comment.

> > +	/* check status for each stream */
> > +	intr_mask = chip->intr_mask;
> > +	spin_lock(&chip->reg_lock);
> > +	status = inl(VIAREG(chip, SGD_SHADOW));
> > +	for (i = 0; i < chip->num_devs;
> > +	     i++, intr_mask >>= 4, status >>= 4) {
> > +		viadev_t *viadev = &chip->devs[i];
> > +		snd_pcm_substream_t *substream;
> > +		unsigned char c_status;
> > +		while (!(intr_mask & 3)) {
> > +			intr_mask >>= 4;
> > +			status >>= 4;
> > +		}
> 
> It'd be better to have a new field in viadev struct to indicate the
> shift bits rather than this loop.  The loop looks ad-hoc.

Done.
> 
> Also, could you use tabs instead of spaces, as mentioned in
> CodingStyle?

KMail fooled me once again. Better now?

Interrupt return macro for 2.4 compatibility is also there.

      Karsten

[-- Attachment #2: via82xx_fast_pointer.4.patch --]
[-- Type: text/x-diff, Size: 11683 bytes --]

From: Karsten Wiese <annabellesgarden@yahoo.de>


Reduce interrupt latency in sound/pci/via82xx.c


The change only affects the via823x kind of chips.
Here the  via8233_pcm_pointer_hw() function
(named snd_via8233_pcm_pointer() before)
needed to loop until a non zero position is red from the chip.

Measurements have shown that more than 200 loops are typically needed on
an Athlon64. 
As io-reads cost many cycles, those loops sum up huge.
via8233_pcm_pointer_hw() runs either in interrupt or with interrupts
disabled. So it introduces significant interrupt latency.

The patch introduces a calculated position value hwptr_done,
that is updated by the interrupt routine when a period is completed.
It is only used, if the 823x chip returns a zero position, which can't
be interpreted reliably.

Further optimisation is applied on the 8233 chip's interrupt routine:
Only the SGD_SHADOW is read, as it contains all infos needed.
We ommit ~5 more register reads that way.


Signed-off-by: Karsten Wiese <annabellesgarden@yahoo.de>






--- linux-2.6.14-rc4/sound/pci/via82xx.c	2005-10-18 14:20:24.000000000 +0200
+++ linux-2.6.14-rc4-rt7-kw/sound/pci/via82xx.c	2005-10-18 14:20:24.000000000 +0200
@@ -41,6 +41,9 @@
  *	  device for applications.
  *	- clean up the code, separate low-level initialization
  *	  routines for each chipset.
+ *
+ * Sep. 26, 2005	Karsten Wiese <annabellesgarden@yahoo.de>
+ *	- Optimize position calculation for the 823x chips. 
  */
 
 #include <sound/driver.h>
@@ -130,6 +133,7 @@
 /* common offsets */
 #define VIA_REG_OFFSET_STATUS		0x00	/* byte - channel status */
 #define   VIA_REG_STAT_ACTIVE		0x80	/* RO */
+#define   VIA8233_SHADOW_STAT_ACTIVE	0x08	/* RO */
 #define   VIA_REG_STAT_PAUSED		0x40	/* RO */
 #define   VIA_REG_STAT_TRIGGER_QUEUED	0x08	/* RO */
 #define   VIA_REG_STAT_STOPPED		0x04	/* RWC */
@@ -328,6 +332,9 @@
 	unsigned int fragsize;
 	unsigned int bufsize;
 	unsigned int bufsize2;
+	int hwptr_done;		/* processed frame position in the buffer */
+	int in_interrupt;
+	int shadow_shift;
 };
 
 
@@ -393,8 +400,10 @@
 };
 
 static struct pci_device_id snd_via82xx_ids[] = {
-	{ 0x1106, 0x3058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
-	{ 0x1106, 0x3059, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
+	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA686, },	/* 686A */
+	/*	0x1106			0x3058										*/
+	{ PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8233_5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, TYPE_CARD_VIA8233, },	/* VT8233 */
+	/*	0x1106			0x3059										*/
 	{ 0, }
 };
 
@@ -548,7 +557,7 @@
 {
 	via82xx_t *chip = ac97->private_data;
 	unsigned int xval;
-	
+
 	xval = !ac97->num ? VIA_REG_AC97_CODEC_ID_PRIMARY : VIA_REG_AC97_CODEC_ID_SECONDARY;
 	xval <<= VIA_REG_AC97_CODEC_ID_SHIFT;
 	xval |= reg << VIA_REG_AC97_CMD_SHIFT;
@@ -596,14 +605,15 @@
 	outb(0x00, VIADEV_REG(viadev, OFFSET_TYPE)); /* for via686 */
 	// outl(0, VIADEV_REG(viadev, OFFSET_CURR_PTR));
 	viadev->lastpos = 0;
+	viadev->hwptr_done = 0;
 }
 
 
 /*
  *  Interrupt handler
+ *  Used for 686 and 8233A
  */
-
-static irqreturn_t snd_via82xx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t snd_via686_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	via82xx_t *chip = dev_id;
 	unsigned int status;
@@ -622,18 +632,81 @@
 	for (i = 0; i < chip->num_devs; i++) {
 		viadev_t *viadev = &chip->devs[i];
 		unsigned char c_status = inb(VIADEV_REG(viadev, OFFSET_STATUS));
-		c_status &= (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG|VIA_REG_STAT_STOPPED);
-		if (! c_status)
+		if (! (c_status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG|VIA_REG_STAT_STOPPED)))
 			continue;
 		if (viadev->substream && viadev->running) {
+			/*
+			 * Update hwptr_done based on 'period elapsed'
+			 * interrupts. We'll use it, when the chip returns 0 
+			 * for OFFSET_CURR_COUNT.
+			 */
+			if (c_status & VIA_REG_STAT_EOL)
+				viadev->hwptr_done = 0;
+			else
+				viadev->hwptr_done += viadev->fragsize;
+			viadev->in_interrupt = c_status;
 			spin_unlock(&chip->reg_lock);
 			snd_pcm_period_elapsed(viadev->substream);
 			spin_lock(&chip->reg_lock);
+			viadev->in_interrupt = 0;
 		}
 		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
 	}
 	spin_unlock(&chip->reg_lock);
-	return IRQ_HANDLED;
+	return IRQ_RETVAL(IRQ_HANDLED);
+}
+
+/*
+ *  Interrupt handler
+ */
+static irqreturn_t snd_via8233_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	via82xx_t *chip = dev_id;
+	unsigned int status;
+	unsigned int i;
+	irqreturn_t irqreturn = IRQ_NONE;
+
+
+	/* check status for each stream */
+	spin_lock(&chip->reg_lock);
+	status = inl(VIAREG(chip, SGD_SHADOW));
+
+	for (i = 0; i < chip->num_devs; i++) {
+		viadev_t *viadev = &chip->devs[i];
+		snd_pcm_substream_t *substream;
+		unsigned char c_status, shadow_status;
+
+		shadow_status = (status >> viadev->shadow_shift) & (VIA8233_SHADOW_STAT_ACTIVE|VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG);
+		c_status = shadow_status & (VIA_REG_STAT_EOL|VIA_REG_STAT_FLAG);
+		if (!c_status)
+			continue;
+
+		substream = viadev->substream;
+		if (substream && viadev->running) {
+			/*
+			 * Update hwptr_done based on 'period elapsed'
+			 * interrupts. We'll use it, when the chip returns 0 
+			 * for OFFSET_CURR_COUNT.
+			 */
+			if (c_status & VIA_REG_STAT_EOL)
+				viadev->hwptr_done = 0;
+			else
+				viadev->hwptr_done += viadev->fragsize;
+			viadev->in_interrupt = c_status;
+			if (shadow_status & VIA8233_SHADOW_STAT_ACTIVE)
+				viadev->in_interrupt |= VIA_REG_STAT_ACTIVE;
+			spin_unlock(&chip->reg_lock);
+
+			snd_pcm_period_elapsed(substream);
+
+			spin_lock(&chip->reg_lock);
+			viadev->in_interrupt = 0;
+		}
+		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
+		irqreturn = IRQ_HANDLED;
+	}
+	spin_unlock(&chip->reg_lock);
+	return IRQ_RETVAL(irqreturn);
 }
 
 /*
@@ -699,6 +772,8 @@
 	size = viadev->idx_table[idx].size;
 	base = viadev->idx_table[idx].offset;
 	res = base + size - count;
+	if (res >= viadev->bufsize)
+		res -= viadev->bufsize;
 
 	/* check the validity of the calculated position */
 	if (size < count) {
@@ -728,9 +803,6 @@
 			}
 		}
 	}
-	viadev->lastpos = res; /* remember the last position */
-	if (res >= viadev->bufsize)
-		res -= viadev->bufsize;
 	return res;
 }
 
@@ -758,6 +830,7 @@
 	else /* CURR_PTR holds the address + 8 */
 		idx = ((ptr - (unsigned int)viadev->table.addr) / 8 - 1) % viadev->tbl_entries;
 	res = calc_linear_pos(viadev, idx, count);
+	viadev->lastpos = res; /* remember the last position */
 	spin_unlock(&chip->reg_lock);
 
 	return bytes_to_frames(substream->runtime, res);
@@ -771,30 +844,44 @@
 	via82xx_t *chip = snd_pcm_substream_chip(substream);
 	viadev_t *viadev = (viadev_t *)substream->runtime->private_data;
 	unsigned int idx, count, res;
-	int timeout = 5000;
+	int status;
 	
 	snd_assert(viadev->tbl_entries, return 0);
-	if (!(inb(VIADEV_REG(viadev, OFFSET_STATUS)) & VIA_REG_STAT_ACTIVE))
-		return 0;
+
 	spin_lock(&chip->reg_lock);
-	do {
-		count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
-		/* some mobos read 0 count */
-		if ((count & 0xffffff) || ! viadev->running)
-			break;
-	} while (--timeout);
-	if (! timeout)
-		snd_printd(KERN_ERR "zero position is read\n");
-	idx = count >> 24;
-	if (idx >= viadev->tbl_entries) {
+	count = inl(VIADEV_REG(viadev, OFFSET_CURR_COUNT));
+	status = viadev->in_interrupt;
+	if (!status)
+		status = inb(VIADEV_REG(viadev, OFFSET_STATUS));
+
+	if (!(status & VIA_REG_STAT_ACTIVE)) {
+		res = 0;
+		goto unlock;
+	}
+	if (count & 0xffffff) {
+		idx = count >> 24;
+		if (idx >= viadev->tbl_entries) {
 #ifdef POINTER_DEBUG
-		printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
+			printk("fail: invalid idx = %i/%i\n", idx, viadev->tbl_entries);
 #endif
-		res = viadev->lastpos;
+			res = viadev->lastpos;
+		} else {
+			count &= 0xffffff;
+			res = calc_linear_pos(viadev, idx, count);
+		}
 	} else {
-		count &= 0xffffff;
-		res = calc_linear_pos(viadev, idx, count);
-	}
+		res = viadev->hwptr_done;
+		if (!viadev->in_interrupt) {
+			if (status & VIA_REG_STAT_EOL) {
+				res = 0;
+			} else
+				if (status & VIA_REG_STAT_FLAG) {
+					res += viadev->fragsize;
+				}
+		}
+	}			    
+unlock:
+	viadev->lastpos = res;
 	spin_unlock(&chip->reg_lock);
 
 	return bytes_to_frames(substream->runtime, res);
@@ -1239,9 +1326,10 @@
 };
 
 
-static void init_viadev(via82xx_t *chip, int idx, unsigned int reg_offset, int direction)
+static void init_viadev(via82xx_t *chip, int idx, unsigned int reg_offset, int shadow_pos, int direction)
 {
 	chip->devs[idx].reg_offset = reg_offset;
+	chip->devs[idx].shadow_shift = shadow_pos * 4;
 	chip->devs[idx].direction = direction;
 	chip->devs[idx].port = chip->port + reg_offset;
 }
@@ -1271,9 +1359,9 @@
 	chip->pcms[0] = pcm;
 	/* set up playbacks */
 	for (i = 0; i < 4; i++)
-		init_viadev(chip, i, 0x10 * i, 0);
+		init_viadev(chip, i, 0x10 * i, i, 0);
 	/* capture */
-	init_viadev(chip, chip->capture_devno, VIA_REG_CAPTURE_8233_STATUS, 1);
+	init_viadev(chip, chip->capture_devno, VIA_REG_CAPTURE_8233_STATUS, 6, 1);
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 							 snd_dma_pci_data(chip->pci), 64*1024, 128*1024)) < 0)
@@ -1289,9 +1377,9 @@
 	strcpy(pcm->name, chip->card->shortname);
 	chip->pcms[1] = pcm;
 	/* set up playback */
-	init_viadev(chip, chip->multi_devno, VIA_REG_MULTPLAY_STATUS, 0);
+	init_viadev(chip, chip->multi_devno, VIA_REG_MULTPLAY_STATUS, 4, 0);
 	/* set up capture */
-	init_viadev(chip, chip->capture_devno + 1, VIA_REG_CAPTURE_8233_STATUS + 0x10, 1);
+	init_viadev(chip, chip->capture_devno + 1, VIA_REG_CAPTURE_8233_STATUS + 0x10, 7, 1);
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 						         snd_dma_pci_data(chip->pci), 64*1024, 128*1024)) < 0)
@@ -1324,9 +1412,9 @@
 	strcpy(pcm->name, chip->card->shortname);
 	chip->pcms[0] = pcm;
 	/* set up playback */
-	init_viadev(chip, chip->multi_devno, VIA_REG_MULTPLAY_STATUS, 0);
+	init_viadev(chip, chip->multi_devno, VIA_REG_MULTPLAY_STATUS, 4, 0);
 	/* capture */
-	init_viadev(chip, chip->capture_devno, VIA_REG_CAPTURE_8233_STATUS, 1);
+	init_viadev(chip, chip->capture_devno, VIA_REG_CAPTURE_8233_STATUS, 6, 1);
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 							 snd_dma_pci_data(chip->pci), 64*1024, 128*1024)) < 0)
@@ -1345,7 +1433,7 @@
 	strcpy(pcm->name, chip->card->shortname);
 	chip->pcms[1] = pcm;
 	/* set up playback */
-	init_viadev(chip, chip->playback_devno, 0x30, 0);
+	init_viadev(chip, chip->playback_devno, 0x30, 3, 0);
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 							 snd_dma_pci_data(chip->pci), 64*1024, 128*1024)) < 0)
@@ -1375,8 +1463,8 @@
 	pcm->private_data = chip;
 	strcpy(pcm->name, chip->card->shortname);
 	chip->pcms[0] = pcm;
-	init_viadev(chip, 0, VIA_REG_PLAYBACK_STATUS, 0);
-	init_viadev(chip, 1, VIA_REG_CAPTURE_STATUS, 1);
+	init_viadev(chip, 0, VIA_REG_PLAYBACK_STATUS, 0, 0);
+	init_viadev(chip, 1, VIA_REG_CAPTURE_STATUS, 0, 1);
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 							 snd_dma_pci_data(chip->pci), 64*1024, 128*1024)) < 0)
@@ -2084,7 +2172,10 @@
 		return err;
 	}
 	chip->port = pci_resource_start(pci, 0);
-	if (request_irq(pci->irq, snd_via82xx_interrupt, SA_INTERRUPT|SA_SHIRQ,
+	if (request_irq(pci->irq,
+			chip_type == TYPE_VIA8233 ?
+			snd_via8233_interrupt :	snd_via686_interrupt,
+			SA_INTERRUPT|SA_SHIRQ,
 			card->driver, (void *)chip)) {
 		snd_printk("unable to grab IRQ %d\n", pci->irq);
 		snd_via82xx_free(chip);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Reduce interrupt latency in sound/pci/via82xx.c
  2005-10-18 12:42                 ` Karsten Wiese
@ 2005-10-18 12:54                   ` Takashi Iwai
  0 siblings, 0 replies; 14+ messages in thread
From: Takashi Iwai @ 2005-10-18 12:54 UTC (permalink / raw)
  To: Karsten Wiese; +Cc: James Courtier-Dutton, alsa-devel

At Tue, 18 Oct 2005 14:42:10 +0200,
Karsten Wiese wrote:
> 
> > 
> > Also, could you use tabs instead of spaces, as mentioned in
> > CodingStyle?
> 
> KMail fooled me once again. Better now?

Yep.

> Interrupt return macro for 2.4 compatibility is also there.

Well....

> -static irqreturn_t snd_via82xx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
> +static irqreturn_t snd_via686_interrupt(int irq, void *dev_id, struct pt_regs *regs)
>  {
(snip)
> -	return IRQ_HANDLED;
> +	return IRQ_RETVAL(IRQ_HANDLED);

This change is useless.

> +}
> +
> +/*
> + *  Interrupt handler
> + */
> +static irqreturn_t snd_via8233_interrupt(int irq, void *dev_id, struct pt_regs *regs)
> +{
> +	via82xx_t *chip = dev_id;
> +	unsigned int status;
> +	unsigned int i;
> +	irqreturn_t irqreturn = IRQ_NONE;

It should be just an int:

	int irqreturn = 0;

(snip)
> +		outb(c_status, VIADEV_REG(viadev, OFFSET_STATUS)); /* ack */
> +		irqreturn = IRQ_HANDLED;
And
		irqreturn = 1;


Others look fine.
I'll commit your patch with these fixes.


Thanks!

Takashi


-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2005-10-18 12:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-12 12:18 [PATCH] Reduce interrupt latency in sound/pci/via82xx.c Karsten Wiese
2005-09-12 12:35 ` Takashi Iwai
2005-09-12 13:09   ` Karsten Wiese
2005-09-12 13:09     ` Takashi Iwai
2005-09-12 14:31       ` Karsten Wiese
2005-09-12 15:07         ` Takashi Iwai
2005-09-12 22:00         ` James Courtier-Dutton
2005-09-13 10:42           ` Takashi Iwai
2005-09-13 11:18           ` Karsten Wiese
2005-09-26 18:52             ` Karsten Wiese
2005-09-29 10:44               ` Takashi Iwai
2005-09-29 13:56                 ` Takashi Iwai
2005-10-18 12:42                 ` Karsten Wiese
2005-10-18 12:54                   ` Takashi Iwai

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.