Re: [PATCH v3 1/4] find: Switch from inline to __always_inline

From: Nathan Chancellor
Date: Fri Aug 02 2024 - 19:07:24 EST


Hi Brian,

On Thu, Jul 18, 2024 at 05:50:37PM -0700, Brian Norris wrote:
> From: Yury Norov <yury.norov@xxxxxxxxx>
>
> 'inline' keyword is only a recommendation for compiler. If it decides to
> not inline find_bit nodemask functions, the whole small_const_nbits()
> machinery doesn't work.
>
> This is how a standard GCC 11.3.0 does for my x86_64 build now. This patch
> replaces 'inline' directive with unconditional '__always_inline' to make
> sure that there's always a chance for compile-time optimization. It doesn't
> change size of kernel image, according to bloat-o-meter.
>
> [[ Brian: split out from:
> Subject: [PATCH 1/3] bitmap: switch from inline to __always_inline
> https://lore.kernel.org/all/20221027043810.350460-2-yury.norov@xxxxxxxxx/
> But rewritten, as there were too many conflicts. ]]
>
> Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx>
> Co-developed-by: Brian Norris <briannorris@xxxxxxxxxxxx>
> Signed-off-by: Brian Norris <briannorris@xxxxxxxxxxxx>
> Reviewed-by: Kees Cook <kees@xxxxxxxxxx>

Sorry for taking some time to review this. Overall, this seems
reasonable, especially given the numbers that you provided in the third
patch. I would expect the compiler to be able to optimize better at some
callsites with this.

For the series:

Reviewed-by: Nathan Chancellor <nathan@xxxxxxxxxx>

> ---
>
> Changes in v3:
> - newly split out in v3
>
> include/linux/find.h | 50 ++++++++++++++++++++++----------------------
> 1 file changed, 25 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/find.h b/include/linux/find.h
> index 5dfca4225fef..68685714bc18 100644
> --- a/include/linux/find.h
> +++ b/include/linux/find.h
> @@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
> * Returns the bit number for the next set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
> unsigned long offset)
> {
> @@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
> * Returns the bit number for the next set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_and_bit(const unsigned long *addr1,
> const unsigned long *addr2, unsigned long size,
> unsigned long offset)
> @@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
> * Returns the bit number for the next set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_andnot_bit(const unsigned long *addr1,
> const unsigned long *addr2, unsigned long size,
> unsigned long offset)
> @@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
> * Returns the bit number for the next set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_or_bit(const unsigned long *addr1,
> const unsigned long *addr2, unsigned long size,
> unsigned long offset)
> @@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1,
> * Returns the bit number of the next zero bit
> * If no bits are zero, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
> unsigned long offset)
> {
> @@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
> * Returns the bit number of the first set bit.
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
> {
> if (small_const_nbits(size)) {
> @@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
> * Returns the bit number of the N'th set bit.
> * If no such, returns >= @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
> {
> if (n >= size)
> @@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign
> * Returns the bit number of the N'th set bit.
> * If no such, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
> unsigned long size, unsigned long n)
> {
> @@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *
> * Returns the bit number of the N'th set bit.
> * If no such, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
> unsigned long size, unsigned long n)
> {
> @@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
> * Returns the bit number for the next set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_first_and_bit(const unsigned long *addr1,
> const unsigned long *addr2,
> unsigned long size)
> @@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
> * Returns the bit number for the first set bit
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_first_and_and_bit(const unsigned long *addr1,
> const unsigned long *addr2,
> const unsigned long *addr3,
> @@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1,
> * Returns the bit number of the first cleared bit.
> * If no bits are zero, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
> {
> if (small_const_nbits(size)) {
> @@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
> *
> * Returns the bit number of the last set bit, or size.
> */
> -static inline
> +static __always_inline
> unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
> {
> if (small_const_nbits(size)) {
> @@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
> * Returns the bit number for the next set bit, or first set bit up to @offset
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
> const unsigned long *addr2,
> unsigned long size, unsigned long offset)
> @@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
> * Returns the bit number for the next set bit, or first set bit up to @offset
> * If no bits are set, returns @size.
> */
> -static inline
> +static __always_inline
> unsigned long find_next_bit_wrap(const unsigned long *addr,
> unsigned long size, unsigned long offset)
> {
> @@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr,
> * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
> * before using it alone.
> */
> -static inline
> +static __always_inline
> unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
> unsigned long start, unsigned long n)
> {
> @@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump,
>
> #if defined(__LITTLE_ENDIAN)
>
> -static inline unsigned long find_next_zero_bit_le(const void *addr,
> - unsigned long size, unsigned long offset)
> +static __always_inline
> +unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset)
> {
> return find_next_zero_bit(addr, size, offset);
> }
>
> -static inline unsigned long find_next_bit_le(const void *addr,
> - unsigned long size, unsigned long offset)
> +static __always_inline
> +unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset)
> {
> return find_next_bit(addr, size, offset);
> }
>
> -static inline unsigned long find_first_zero_bit_le(const void *addr,
> - unsigned long size)
> +static __always_inline
> +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
> {
> return find_first_zero_bit(addr, size);
> }
> @@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr,
> #elif defined(__BIG_ENDIAN)
>
> #ifndef find_next_zero_bit_le
> -static inline
> +static __always_inline
> unsigned long find_next_zero_bit_le(const void *addr, unsigned
> long size, unsigned long offset)
> {
> @@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
> #endif
>
> #ifndef find_first_zero_bit_le
> -static inline
> +static __always_inline
> unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
> {
> if (small_const_nbits(size)) {
> @@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
> #endif
>
> #ifndef find_next_bit_le
> -static inline
> +static __always_inline
> unsigned long find_next_bit_le(const void *addr, unsigned
> long size, unsigned long offset)
> {
> --
> 2.45.2.1089.g2a221341d9-goog
>