linux-bluetooth.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Siarhei Siamashka <siarhei.siamashka@gmail.com>
To: linux-bluetooth@vger.kernel.org
Cc: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Subject: [PATCH 4/5] sbc: faster 'sbc_calculate_bits' function
Date: Fri,  2 Jul 2010 15:25:41 +0300	[thread overview]
Message-ID: <1278073542-14859-5-git-send-email-siarhei.siamashka@gmail.com> (raw)
In-Reply-To: <1278073542-14859-1-git-send-email-siarhei.siamashka@gmail.com>

From: Siarhei Siamashka <siarhei.siamashka@nokia.com>

By using SBC_ALWAYS_INLINE trick, the implementation of 'sbc_calculate_bits'
function is split into two branches, each having 'subband' variable value
known at compile time. It helps the compiler to generate more optimal code
by saving at least one extra register, and also provides more obvious
opportunities for loops unrolling.

Benchmarked on ARM Cortex-A8:

== Before: ==

$ time ./sbcenc -b53 -s8 -j test.au > /dev/null

real    0m3.989s
user    0m3.602s
sys     0m0.391s

samples  %        image name               symbol name
26057    32.6128  sbcenc                   sbc_pack_frame
20003    25.0357  sbcenc                   sbc_analyze_4b_8s_neon
14220    17.7977  sbcenc                   sbc_calculate_bits
8498     10.6361  no-vmlinux               /no-vmlinux
5300      6.6335  sbcenc                   sbc_calc_scalefactors_j_neon
3235      4.0489  sbcenc                   sbc_enc_process_input_8s_be_neon
2172      2.7185  sbcenc                   sbc_encode

== After: ==

$ time ./sbcenc -b53 -s8 -j test.au > /dev/null

real    0m3.652s
user    0m3.195s
sys     0m0.445s

samples  %        image name               symbol name
26207    36.0095  sbcenc                   sbc_pack_frame
19820    27.2335  sbcenc                   sbc_analyze_4b_8s_neon
8629     11.8566  no-vmlinux               /no-vmlinux
6988      9.6018  sbcenc                   sbc_calculate_bits
5094      6.9994  sbcenc                   sbc_calc_scalefactors_j_neon
3351      4.6044  sbcenc                   sbc_enc_process_input_8s_be_neon
2182      2.9982  sbcenc                   sbc_encode
---
 sbc/sbc.c |   43 ++++++++++++++++++++++++++++---------------
 1 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/sbc/sbc.c b/sbc/sbc.c
index 1921585..a6391ae 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -160,7 +160,8 @@ static uint8_t sbc_crc8(const uint8_t *data, size_t len)
  * Takes a pointer to the frame in question, a pointer to the bits array and
  * the sampling frequency (as 2 bit integer)
  */
-static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal(
+		const struct sbc_frame *frame, int (*bits)[8], int subbands)
 {
 	uint8_t sf = frame->frequency;
 
@@ -171,17 +172,17 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 		for (ch = 0; ch < frame->channels; ch++) {
 			max_bitneed = 0;
 			if (frame->allocation == SNR) {
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					bitneed[ch][sb] = frame->scale_factor[ch][sb];
 					if (bitneed[ch][sb] > max_bitneed)
 						max_bitneed = bitneed[ch][sb];
 				}
 			} else {
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					if (frame->scale_factor[ch][sb] == 0)
 						bitneed[ch][sb] = -5;
 					else {
-						if (frame->subbands == 4)
+						if (subbands == 4)
 							loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
 						else
 							loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@@ -202,7 +203,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 				bitslice--;
 				bitcount += slicecount;
 				slicecount = 0;
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
 						slicecount++;
 					else if (bitneed[ch][sb] == bitslice + 1)
@@ -215,7 +216,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 				bitslice--;
 			}
 
-			for (sb = 0; sb < frame->subbands; sb++) {
+			for (sb = 0; sb < subbands; sb++) {
 				if (bitneed[ch][sb] < bitslice + 2)
 					bits[ch][sb] = 0;
 				else {
@@ -225,7 +226,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 				}
 			}
 
-			for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
+			for (sb = 0; bitcount < frame->bitpool &&
+							sb < subbands; sb++) {
 				if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
 					bits[ch][sb]++;
 					bitcount++;
@@ -235,7 +237,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 				}
 			}
 
-			for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
+			for (sb = 0; bitcount < frame->bitpool &&
+							sb < subbands; sb++) {
 				if (bits[ch][sb] < 16) {
 					bits[ch][sb]++;
 					bitcount++;
@@ -251,7 +254,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 		max_bitneed = 0;
 		if (frame->allocation == SNR) {
 			for (ch = 0; ch < 2; ch++) {
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					bitneed[ch][sb] = frame->scale_factor[ch][sb];
 					if (bitneed[ch][sb] > max_bitneed)
 						max_bitneed = bitneed[ch][sb];
@@ -259,11 +262,11 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 			}
 		} else {
 			for (ch = 0; ch < 2; ch++) {
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					if (frame->scale_factor[ch][sb] == 0)
 						bitneed[ch][sb] = -5;
 					else {
-						if (frame->subbands == 4)
+						if (subbands == 4)
 							loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
 						else
 							loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@@ -286,7 +289,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 			bitcount += slicecount;
 			slicecount = 0;
 			for (ch = 0; ch < 2; ch++) {
-				for (sb = 0; sb < frame->subbands; sb++) {
+				for (sb = 0; sb < subbands; sb++) {
 					if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
 						slicecount++;
 					else if (bitneed[ch][sb] == bitslice + 1)
@@ -301,7 +304,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 		}
 
 		for (ch = 0; ch < 2; ch++) {
-			for (sb = 0; sb < frame->subbands; sb++) {
+			for (sb = 0; sb < subbands; sb++) {
 				if (bitneed[ch][sb] < bitslice + 2) {
 					bits[ch][sb] = 0;
 				} else {
@@ -325,7 +328,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 			if (ch == 1) {
 				ch = 0;
 				sb++;
-				if (sb >= frame->subbands) break;
+				if (sb >= subbands)
+					break;
 			} else
 				ch = 1;
 		}
@@ -340,7 +344,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 			if (ch == 1) {
 				ch = 0;
 				sb++;
-				if (sb >= frame->subbands) break;
+				if (sb >= subbands)
+					break;
 			} else
 				ch = 1;
 		}
@@ -349,6 +354,14 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
 
 }
 
+static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+	if (frame->subbands == 4)
+		sbc_calculate_bits_internal(frame, bits, 4);
+	else
+		sbc_calculate_bits_internal(frame, bits, 8);
+}
+
 /*
  * Unpacks a SBC frame at the beginning of the stream in data,
  * which has at most len bytes into frame.
-- 
1.6.4.4


  parent reply	other threads:[~2010-07-02 12:25 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-02 12:25 [PATCH 0/5] SBC encoder optimizations for ARM processors Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 1/5] sbc: ARM NEON optimized joint stereo processing in SBC encoder Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 2/5] sbc: ARM NEON optimizations for input permutation " Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 3/5] sbc: slightly faster 'sbc_calc_scalefactors_neon' Siarhei Siamashka
2010-07-02 12:25 ` Siarhei Siamashka [this message]
2010-07-02 12:25 ` [PATCH 5/5] sbc: ARMv6 optimized version of analysis filter for SBC encoder Siarhei Siamashka
2010-07-02 19:04 ` [PATCH 0/5] SBC encoder optimizations for ARM processors Johan Hedberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1278073542-14859-5-git-send-email-siarhei.siamashka@gmail.com \
    --to=siarhei.siamashka@gmail.com \
    --cc=linux-bluetooth@vger.kernel.org \
    --cc=siarhei.siamashka@nokia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).