[PATCH 3/3] lib: lzo: Improves decompression performance

From: zengzhaoxiu
Date: Mon Nov 09 2020 - 14:31:28 EST


From: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>

This patch does:
1. Cleanup code
2. Use the copy_from_back to copy the matched bytes from the back output buffer

I testd on 5.8.18-300.fc33.x86_64.
The performance of the lzo1x_decompress_safe function is improved by about 5%.
If no CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS feature, the performance is improved by 60%!

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>
---
lib/lzo/lzo1x_decompress_safe.c | 140 ++++++++++++++------------------
1 file changed, 59 insertions(+), 81 deletions(-)

diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 7892a40cf765..afef64cedc51 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#endif
#include <asm/unaligned.h>
+#include <asm/copy_from_back.h>
#include <linux/lzo.h>
#include "lzodefs.h"

@@ -43,7 +44,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
const unsigned char *ip;
size_t t, next;
size_t state = 0;
- const unsigned char *m_pos;
+ size_t dist;
const unsigned char * const ip_end = in + in_len;
unsigned char * const op_end = out + *out_len;

@@ -117,29 +118,31 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
}
state = 4;
continue;
- } else if (state != 4) {
- next = t & 3;
- m_pos = op - 1;
- m_pos -= t >> 2;
- m_pos -= *ip++ << 2;
- TEST_LB(m_pos);
+ }
+
+ next = t & 3;
+ dist = t >> 2;
+ dist += *ip++ << 2;
+ if (state != 4) {
+ dist += 1;
+ TEST_LB(op - dist);
NEED_OP(2);
- op[0] = m_pos[0];
- op[1] = m_pos[1];
op += 2;
- goto match_next;
} else {
- next = t & 3;
- m_pos = op - (1 + M2_MAX_OFFSET);
- m_pos -= t >> 2;
- m_pos -= *ip++ << 2;
- t = 3;
+ dist += (1 + M2_MAX_OFFSET);
+ TEST_LB(op - dist);
+ NEED_OP(3);
+ op += 3;
+ op[-3] = op[-3 - dist];
}
+ op[-2] = op[-2 - dist];
+ op[-1] = op[-1 - dist];
+ goto match_next;
} else if (t >= 64) {
next = t & 3;
- m_pos = op - 1;
- m_pos -= (t >> 2) & 7;
- m_pos -= *ip++ << 3;
+ dist = 1;
+ dist += (t >> 2) & 7;
+ dist += *ip++ << 3;
t = (t >> 5) - 1 + (3 - 1);
} else if (t >= 32) {
t = (t & 31) + (3 - 1);
@@ -159,14 +162,15 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
t += offset + 31 + *ip++;
NEED_IP(2);
}
- m_pos = op - 1;
+ dist = 1;
next = get_unaligned_le16(ip);
ip += 2;
- m_pos -= next >> 2;
+ dist += next >> 2;
next &= 3;
} else {
NEED_IP(2);
next = get_unaligned_le16(ip);
+
if (((next & 0xfffc) == 0xfffc) &&
((t & 0xf8) == 0x18) &&
likely(bitstream_version)) {
@@ -180,74 +184,48 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
next &= 3;
ip += 3;
goto match_next;
- } else {
- m_pos = op;
- m_pos -= (t & 8) << 11;
- t = (t & 7) + (3 - 1);
- if (unlikely(t == 2)) {
- size_t offset;
- const unsigned char *ip_last = ip;
+ }

- while (unlikely(*ip == 0)) {
- ip++;
- NEED_IP(1);
- }
- offset = ip - ip_last;
- if (unlikely(offset > MAX_255_COUNT))
- return LZO_E_ERROR;
+ dist = (t & 8) << 11;
+ t = (t & 7) + (3 - 1);
+ if (unlikely(t == 2)) {
+ size_t offset;
+ const unsigned char *ip_last = ip;

- offset = (offset << 8) - offset;
- t += offset + 7 + *ip++;
- NEED_IP(2);
- next = get_unaligned_le16(ip);
+ while (unlikely(*ip == 0)) {
+ ip++;
+ NEED_IP(1);
}
- ip += 2;
- m_pos -= next >> 2;
- next &= 3;
- if (m_pos == op)
- goto eof_found;
- m_pos -= 0x4000;
+ offset = ip - ip_last;
+ if (unlikely(offset > MAX_255_COUNT))
+ return LZO_E_ERROR;
+
+ offset = (offset << 8) - offset;
+ t += offset + 7 + *ip++;
+ NEED_IP(2);
+ next = get_unaligned_le16(ip);
}
+ ip += 2;
+ dist += next >> 2;
+ if (dist == 0)
+ goto eof_found;
+ dist += M3_MAX_OFFSET;
+ next &= 3;
}
- TEST_LB(m_pos);
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- if (op - m_pos >= 8) {
- unsigned char *oe = op + t;
- if (likely(HAVE_OP(t + 15))) {
- do {
- COPY8(op, m_pos);
- op += 8;
- m_pos += 8;
- COPY8(op, m_pos);
- op += 8;
- m_pos += 8;
- } while (op < oe);
- op = oe;
- if (HAVE_IP(6)) {
- state = next;
- COPY4(op, ip);
- op += next;
- ip += next;
- continue;
- }
- } else {
- NEED_OP(t);
- do {
- *op++ = *m_pos++;
- } while (op < oe);
+ TEST_LB(op - dist);
+ if (likely(HAVE_OP(t + FAST_COPY_SAFEGUARD_SIZE))) {
+ /* very common case */
+ op = copy_from_back_fast(op, dist, t);
+ if (HAVE_IP(6)) {
+ state = next;
+ COPY4(op, ip);
+ op += next;
+ ip += next;
+ continue;
}
- } else
-#endif
- {
- unsigned char *oe = op + t;
+ } else {
NEED_OP(t);
- op[0] = m_pos[0];
- op[1] = m_pos[1];
- op += 2;
- m_pos += 2;
- do {
- *op++ = *m_pos++;
- } while (op < oe);
+ op = copy_from_back(op, dist, t);
}
match_next:
state = next;
--
2.28.0