From e73d8c4d6e68f2ed2d1496c4debeb765f057424d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Dec 2013 17:40:22 +0000 Subject: [PATCH 1/3] word-at-a-time: provide generic big-endian zero_bytemask implementation Whilst architectures may be able to do better than this (which they can, by simply defining their own macro), this is a generic stab at a zero_bytemask implementation for the asm-generic, big-endian word-at-a-time implementation. On arm64, a clz instruction is used to implement the fls efficiently. Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds (cherry picked from commit 11ec50caedb56e3a87715edeff6a1852e6ae5416) Signed-off-by: Mark Brown --- include/asm-generic/word-at-a-time.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index 3f21f1b72e45..d3909effd725 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -49,4 +49,12 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct return (val + c->high_bits) & ~rhs; } +#ifndef zero_bytemask +#ifdef CONFIG_64BIT +#define zero_bytemask(mask) (~0ul << fls64(mask)) +#else +#define zero_bytemask(mask) (~0ul << fls(mask)) +#endif /* CONFIG_64BIT */ +#endif /* zero_bytemask */ + #endif /* _ASM_WORD_AT_A_TIME_H */ From 68813d8f7e4554f5e2a97e66c4cb7dcacfcc6ade Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 23 Apr 2014 17:52:52 +0100 Subject: [PATCH 2/3] word-at-a-time: avoid undefined behaviour in zero_bytemask macro The asm-generic, big-endian version of zero_bytemask creates a mask of bytes preceding the first zero-byte by left shifting ~0ul based on the position of the first zero byte. Unfortunately, if the first (top) byte is zero, the output of prep_zero_mask has only the top bit set, resulting in undefined C behaviour as we shift left by an amount equal to the width of the type. As it happens, GCC doesn't manage to spot this through the call to fls(), but the issue remains if architectures choose to implement their shift instructions differently. An example would be arch/arm/ (AArch32), where LSL Rd, Rn, #32 results in Rd == 0x0, whilst on arch/arm64 (AArch64) LSL Xd, Xn, #64 results in Xd == Xn. Rather than check explicitly for the problematic shift, this patch adds an extra shift by 1, replacing fls with __fls. Since zero_bytemask is never called with a zero argument (has_zero() is used to check the data first), we don't need to worry about calling __fls(0), which is undefined. Cc: Cc: Victor Kamensky Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds (cherry picked from commit ec6931b281797b69e6cf109f9cc94d5a2bf994e0) Signed-off-by: Mark Brown --- include/asm-generic/word-at-a-time.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index d3909effd725..d96deb443f18 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -50,11 +50,7 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct } #ifndef zero_bytemask -#ifdef CONFIG_64BIT -#define zero_bytemask(mask) (~0ul << fls64(mask)) -#else -#define zero_bytemask(mask) (~0ul << fls(mask)) -#endif /* CONFIG_64BIT */ -#endif /* zero_bytemask */ +#define zero_bytemask(mask) (~0ul << __fls(mask) << 1) +#endif #endif /* _ASM_WORD_AT_A_TIME_H */ From 8c8e31f98cae35666e0e619115ab1e1fc80b1835 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 30 Apr 2014 14:22:19 -0700 Subject: [PATCH 3/3] word-at-a-time: simplify big-endian zero_bytemask macro This is simpler and cleaner. Depending on architecture, a smart compiler may or may not generate the same code. Acked-by: Will Deacon Signed-off-by: Linus Torvalds (cherry picked from commit 789ce9dca8007ab5d7c72b9a174a29243817ac32) Signed-off-by: Mark Brown --- include/asm-generic/word-at-a-time.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index d96deb443f18..94f9ea8abcae 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -50,7 +50,7 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct } #ifndef zero_bytemask -#define zero_bytemask(mask) (~0ul << __fls(mask) << 1) +#define zero_bytemask(mask) (~1ul << __fls(mask)) #endif #endif /* _ASM_WORD_AT_A_TIME_H */