From: Siarhei Siamashka <siarhei.siamashka@gmail.com>
To: linux-bluetooth@vger.kernel.org
Cc: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Subject: [PATCH 4/5] sbc: faster 'sbc_calculate_bits' function
Date: Fri, 2 Jul 2010 15:25:41 +0300 [thread overview]
Message-ID: <1278073542-14859-5-git-send-email-siarhei.siamashka@gmail.com> (raw)
In-Reply-To: <1278073542-14859-1-git-send-email-siarhei.siamashka@gmail.com>
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
By using SBC_ALWAYS_INLINE trick, the implementation of 'sbc_calculate_bits'
function is split into two branches, each having 'subband' variable value
known at compile time. It helps the compiler to generate more optimal code
by saving at least one extra register, and also provides more obvious
opportunities for loops unrolling.
Benchmarked on ARM Cortex-A8:
== Before: ==
$ time ./sbcenc -b53 -s8 -j test.au > /dev/null
real 0m3.989s
user 0m3.602s
sys 0m0.391s
samples % image name symbol name
26057 32.6128 sbcenc sbc_pack_frame
20003 25.0357 sbcenc sbc_analyze_4b_8s_neon
14220 17.7977 sbcenc sbc_calculate_bits
8498 10.6361 no-vmlinux /no-vmlinux
5300 6.6335 sbcenc sbc_calc_scalefactors_j_neon
3235 4.0489 sbcenc sbc_enc_process_input_8s_be_neon
2172 2.7185 sbcenc sbc_encode
== After: ==
$ time ./sbcenc -b53 -s8 -j test.au > /dev/null
real 0m3.652s
user 0m3.195s
sys 0m0.445s
samples % image name symbol name
26207 36.0095 sbcenc sbc_pack_frame
19820 27.2335 sbcenc sbc_analyze_4b_8s_neon
8629 11.8566 no-vmlinux /no-vmlinux
6988 9.6018 sbcenc sbc_calculate_bits
5094 6.9994 sbcenc sbc_calc_scalefactors_j_neon
3351 4.6044 sbcenc sbc_enc_process_input_8s_be_neon
2182 2.9982 sbcenc sbc_encode
---
sbc/sbc.c | 43 ++++++++++++++++++++++++++++---------------
1 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/sbc/sbc.c b/sbc/sbc.c
index 1921585..a6391ae 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -160,7 +160,8 @@ static uint8_t sbc_crc8(const uint8_t *data, size_t len)
* Takes a pointer to the frame in question, a pointer to the bits array and
* the sampling frequency (as 2 bit integer)
*/
-static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal(
+ const struct sbc_frame *frame, int (*bits)[8], int subbands)
{
uint8_t sf = frame->frequency;
@@ -171,17 +172,17 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
for (ch = 0; ch < frame->channels; ch++) {
max_bitneed = 0;
if (frame->allocation == SNR) {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
bitneed[ch][sb] = frame->scale_factor[ch][sb];
if (bitneed[ch][sb] > max_bitneed)
max_bitneed = bitneed[ch][sb];
}
} else {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if (frame->scale_factor[ch][sb] == 0)
bitneed[ch][sb] = -5;
else {
- if (frame->subbands == 4)
+ if (subbands == 4)
loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
else
loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@@ -202,7 +203,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitslice--;
bitcount += slicecount;
slicecount = 0;
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
slicecount++;
else if (bitneed[ch][sb] == bitslice + 1)
@@ -215,7 +216,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitslice--;
}
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if (bitneed[ch][sb] < bitslice + 2)
bits[ch][sb] = 0;
else {
@@ -225,7 +226,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
}
- for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
+ for (sb = 0; bitcount < frame->bitpool &&
+ sb < subbands; sb++) {
if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
bits[ch][sb]++;
bitcount++;
@@ -235,7 +237,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
}
- for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
+ for (sb = 0; bitcount < frame->bitpool &&
+ sb < subbands; sb++) {
if (bits[ch][sb] < 16) {
bits[ch][sb]++;
bitcount++;
@@ -251,7 +254,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
max_bitneed = 0;
if (frame->allocation == SNR) {
for (ch = 0; ch < 2; ch++) {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
bitneed[ch][sb] = frame->scale_factor[ch][sb];
if (bitneed[ch][sb] > max_bitneed)
max_bitneed = bitneed[ch][sb];
@@ -259,11 +262,11 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
} else {
for (ch = 0; ch < 2; ch++) {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if (frame->scale_factor[ch][sb] == 0)
bitneed[ch][sb] = -5;
else {
- if (frame->subbands == 4)
+ if (subbands == 4)
loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
else
loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
@@ -286,7 +289,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
bitcount += slicecount;
slicecount = 0;
for (ch = 0; ch < 2; ch++) {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
slicecount++;
else if (bitneed[ch][sb] == bitslice + 1)
@@ -301,7 +304,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
for (ch = 0; ch < 2; ch++) {
- for (sb = 0; sb < frame->subbands; sb++) {
+ for (sb = 0; sb < subbands; sb++) {
if (bitneed[ch][sb] < bitslice + 2) {
bits[ch][sb] = 0;
} else {
@@ -325,7 +328,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
if (ch == 1) {
ch = 0;
sb++;
- if (sb >= frame->subbands) break;
+ if (sb >= subbands)
+ break;
} else
ch = 1;
}
@@ -340,7 +344,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
if (ch == 1) {
ch = 0;
sb++;
- if (sb >= frame->subbands) break;
+ if (sb >= subbands)
+ break;
} else
ch = 1;
}
@@ -349,6 +354,14 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
}
+static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+ if (frame->subbands == 4)
+ sbc_calculate_bits_internal(frame, bits, 4);
+ else
+ sbc_calculate_bits_internal(frame, bits, 8);
+}
+
/*
* Unpacks a SBC frame at the beginning of the stream in data,
* which has at most len bytes into frame.
--
1.6.4.4
next prev parent reply other threads:[~2010-07-02 12:25 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-02 12:25 [PATCH 0/5] SBC encoder optimizations for ARM processors Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 1/5] sbc: ARM NEON optimized joint stereo processing in SBC encoder Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 2/5] sbc: ARM NEON optimizations for input permutation " Siarhei Siamashka
2010-07-02 12:25 ` [PATCH 3/5] sbc: slightly faster 'sbc_calc_scalefactors_neon' Siarhei Siamashka
2010-07-02 12:25 ` Siarhei Siamashka [this message]
2010-07-02 12:25 ` [PATCH 5/5] sbc: ARMv6 optimized version of analysis filter for SBC encoder Siarhei Siamashka
2010-07-02 19:04 ` [PATCH 0/5] SBC encoder optimizations for ARM processors Johan Hedberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1278073542-14859-5-git-send-email-siarhei.siamashka@gmail.com \
--to=siarhei.siamashka@gmail.com \
--cc=linux-bluetooth@vger.kernel.org \
--cc=siarhei.siamashka@nokia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).