Re: [PATCH 1/5] mean and variance: Promote to lib/math

From: Darrick J. Wong
Date: Fri Feb 02 2024 - 02:30:50 EST


On Fri, Jan 26, 2024 at 05:06:51PM -0500, Kent Overstreet wrote:
> Small statistics library, for taking in a series of value and computing
> mean, weighted mean, standard deviation and weighted deviation.
>
> The main use case is for statistics on latency measurements.

Oh, heh, I didn't realize this was a patch and not a cover letter.

A few things I noticed while adding timestats to xfs -- pahole says this
about the weighted mean and variance object:

struct mean_and_variance_weighted {
/* typedef bool */ _Bool init; /* 0 1 */
/* typedef u8 -> __u8 */ unsigned char weight; /* 1 1 */

/* XXX 6 bytes hole, try to pack */

/* typedef s64 -> __s64 */ long long int mean; /* 8 8 */
/* typedef u64 -> __u64 */ long long unsigned int variance; /* 16 8 */

/* size: 24, cachelines: 1, members: 4 */
/* sum members: 18, holes: 1, sum holes: 6 */
/* last cacheline: 24 bytes */
};

AFAICT the init flag isn't used, and the u8 wastes 6 bytes of space.
Two of these get plugged into struct time_stats, which means we waste 12
bytes of space on a ~464 byte structure. Removing quantiles support
from that shrinks the struct down to 208 bytes.

Any chance we could pass in the weights as a function parameter so that
we could shrink this to 16 bytes? If we do that and rearrange
time_stats, the whole thing goes down to 192 bytes, which means I can
spray in twice as many timestats.

I also noticed that struct time_stat_buffer is:

struct time_stat_buffer {
unsigned int nr; /* 0 4 */

/* XXX 4 bytes hole, try to pack */

struct time_stat_buffer_entry {
/* typedef u64 -> __u64 */ long long unsigned int start; /* 8 8 */
/* typedef u64 -> __u64 */ long long unsigned int end; /* 16 8 */
} entries[32]; /* 8 512 */

/* size: 520, cachelines: 9, members: 2 */
/* sum members: 516, holes: 1, sum holes: 4 */
/* last cacheline: 8 bytes */
};

I wonder, if entries[] shrank to 31 entries, this would align to 512b;
would that make for more efficient allocations? I tried to follow
alloc_percpu_gfp and got caught in a twisty mess of macros.

--D

FWIW the full time_stats struct ended up like this after I turned off
quantiles and did all the lazy rearranging I could do without removing
init or weight:

struct time_stats {
/* typedef spinlock_t */ struct spinlock {
union {
struct raw_spinlock {
/* typedef arch_spinlock_t */ struct qspinlock {
union {
/* typedef atomic_t */ struct {
int counter; /* 0 4 */
} val; /* 0 4 */
struct {
/* typedef u8 -> __u8 */ unsigned char locked; /* 0 1 */
/* typedef u8 -> __u8 */ unsigned char pending; /* 1 1 */
}; /* 0 2 */
struct {
/* typedef u16 -> __u16 */ short unsigned int locked_pending; /* 0 2 */
/* typedef u16 -> __u16 */ short unsigned int tail; /* 2 2 */
}; /* 0 4 */
}; /* 0 4 */
} raw_lock; /* 0 4 */
unsigned int magic; /* 4 4 */
unsigned int owner_cpu; /* 8 4 */

/* XXX 4 bytes hole, try to pack */

void * owner; /* 16 8 */
}rlock; /* 0 24 */
}; /* 0 24 */
} lock; /* 0 24 */
/* typedef u64 -> __u64 */ long long unsigned int min_duration; /* 24 8 */
/* typedef u64 -> __u64 */ long long unsigned int max_duration; /* 32 8 */
/* typedef u64 -> __u64 */ long long unsigned int total_duration; /* 40 8 */
/* typedef u64 -> __u64 */ long long unsigned int max_freq; /* 48 8 */
/* typedef u64 -> __u64 */ long long unsigned int min_freq; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
/* typedef u64 -> __u64 */ long long unsigned int last_event; /* 64 8 */
/* typedef u64 -> __u64 */ long long unsigned int last_event_start; /* 72 8 */
struct mean_and_variance {
/* typedef s64 -> __s64 */ long long int n; /* 80 8 */
/* typedef s64 -> __s64 */ long long int sum; /* 88 8 */
/* typedef u128_u */ struct {
__int128 unsigned v; /* 96 16 */
} sum_squares __attribute__((__aligned__(16))) __attribute__((__aligned__(16))); /* 96 16 */
} __attribute__((__aligned__(16)))duration_stats __attribute__((__aligned__(16))); /* 80 32 */
struct mean_and_variance {
/* typedef s64 -> __s64 */ long long int n; /* 112 8 */
/* typedef s64 -> __s64 */ long long int sum; /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
/* typedef u128_u */ struct {
__int128 unsigned v; /* 128 16 */
} sum_squares __attribute__((__aligned__(16))) __attribute__((__aligned__(16))); /* 128 16 */
} __attribute__((__aligned__(16)))freq_stats __attribute__((__aligned__(16))); /* 112 32 */
struct mean_and_variance_weighted {
/* typedef bool */ _Bool init; /* 144 1 */
/* typedef u8 -> __u8 */ unsigned char weight; /* 145 1 */

/* XXX 6 bytes hole, try to pack */

/* typedef s64 -> __s64 */ long long int mean; /* 152 8 */
/* typedef u64 -> __u64 */ long long unsigned int variance; /* 160 8 */
}duration_stats_weighted; /* 144 24 */
struct mean_and_variance_weighted {
/* typedef bool */ _Bool init; /* 168 1 */
/* typedef u8 -> __u8 */ unsigned char weight; /* 169 1 */

/* XXX 6 bytes hole, try to pack */

/* typedef s64 -> __s64 */ long long int mean; /* 176 8 */
/* typedef u64 -> __u64 */ long long unsigned int variance; /* 184 8 */
}freq_stats_weighted; /* 168 24 */
/* --- cacheline 3 boundary (192 bytes) --- */
struct time_stat_buffer * buffer; /* 192 8 */

/* size: 208, cachelines: 4, members: 13 */
/* padding: 8 */
/* forced alignments: 2 */
/* last cacheline: 16 bytes */
} __attribute__((__aligned__(16)));


> Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> Cc: Daniel Hill <daniel@xxxxxxx>
> Cc: Darrick J. Wong <djwong@xxxxxxxxxx>
> ---
> MAINTAINERS | 9 +++++++++
> fs/bcachefs/Kconfig | 10 +---------
> fs/bcachefs/Makefile | 3 ---
> fs/bcachefs/util.c | 2 +-
> fs/bcachefs/util.h | 3 +--
> {fs/bcachefs => include/linux}/mean_and_variance.h | 0
> lib/Kconfig.debug | 9 +++++++++
> lib/math/Kconfig | 3 +++
> lib/math/Makefile | 2 ++
> {fs/bcachefs => lib/math}/mean_and_variance.c | 3 +--
> {fs/bcachefs => lib/math}/mean_and_variance_test.c | 3 +--
> 11 files changed, 28 insertions(+), 19 deletions(-)
> rename {fs/bcachefs => include/linux}/mean_and_variance.h (100%)
> rename {fs/bcachefs => lib/math}/mean_and_variance.c (99%)
> rename {fs/bcachefs => lib/math}/mean_and_variance_test.c (99%)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 8d1052fa6a69..de635cfd354d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13379,6 +13379,15 @@ S: Maintained
> F: drivers/net/mdio/mdio-regmap.c
> F: include/linux/mdio/mdio-regmap.h
>
> +MEAN AND VARIANCE LIBRARY
> +M: Daniel B. Hill <daniel@xxxxxxx>
> +M: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> +S: Maintained
> +T: git https://github.com/YellowOnion/linux/
> +F: include/linux/mean_and_variance.h
> +F: lib/math/mean_and_variance.c
> +F: lib/math/mean_and_variance_test.c
> +
> MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
> M: William Breathitt Gray <william.gray@xxxxxxxxxx>
> L: linux-iio@xxxxxxxxxxxxxxx
> diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
> index 5cdfef3b551a..72d1179262b3 100644
> --- a/fs/bcachefs/Kconfig
> +++ b/fs/bcachefs/Kconfig
> @@ -24,6 +24,7 @@ config BCACHEFS_FS
> select XXHASH
> select SRCU
> select SYMBOLIC_ERRNAME
> + select MEAN_AND_VARIANCE
> help
> The bcachefs filesystem - a modern, copy on write filesystem, with
> support for multiple devices, compression, checksumming, etc.
> @@ -86,12 +87,3 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN
> Instead of immediately sleeping when attempting to take a six lock that
> is held by another thread, spin for a short while, as long as the
> thread owning the lock is running.
> -
> -config MEAN_AND_VARIANCE_UNIT_TEST
> - tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
> - depends on KUNIT
> - depends on BCACHEFS_FS
> - default KUNIT_ALL_TESTS
> - help
> - This option enables the kunit tests for mean_and_variance module.
> - If unsure, say N.
> diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
> index 1a05cecda7cc..b11ba74b8ad4 100644
> --- a/fs/bcachefs/Makefile
> +++ b/fs/bcachefs/Makefile
> @@ -57,7 +57,6 @@ bcachefs-y := \
> keylist.o \
> logged_ops.o \
> lru.o \
> - mean_and_variance.o \
> migrate.o \
> move.o \
> movinggc.o \
> @@ -88,5 +87,3 @@ bcachefs-y := \
> util.o \
> varint.o \
> xattr.o
> -
> -obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o
> diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
> index 56b815fd9fc6..d7ea95abb9df 100644
> --- a/fs/bcachefs/util.c
> +++ b/fs/bcachefs/util.c
> @@ -22,9 +22,9 @@
> #include <linux/string.h>
> #include <linux/types.h>
> #include <linux/sched/clock.h>
> +#include <linux/mean_and_variance.h>
>
> #include "eytzinger.h"
> -#include "mean_and_variance.h"
> #include "util.h"
>
> static const char si_units[] = "?kMGTPEZY";
> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
> index b414736d59a5..0059481995ef 100644
> --- a/fs/bcachefs/util.h
> +++ b/fs/bcachefs/util.h
> @@ -17,8 +17,7 @@
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/workqueue.h>
> -
> -#include "mean_and_variance.h"
> +#include <linux/mean_and_variance.h>
>
> #include "darray.h"
>
> diff --git a/fs/bcachefs/mean_and_variance.h b/include/linux/mean_and_variance.h
> similarity index 100%
> rename from fs/bcachefs/mean_and_variance.h
> rename to include/linux/mean_and_variance.h
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 975a07f9f1cc..817ddfe132cd 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2191,6 +2191,15 @@ config CPUMASK_KUNIT_TEST
>
> If unsure, say N.
>
> +config MEAN_AND_VARIANCE_UNIT_TEST
> + tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
> + depends on KUNIT
> + select MEAN_AND_VARIANCE
> + default KUNIT_ALL_TESTS
> + help
> + This option enables the kunit tests for mean_and_variance module.
> + If unsure, say N.
> +
> config TEST_LIST_SORT
> tristate "Linked list sorting test" if !KUNIT_ALL_TESTS
> depends on KUNIT
> diff --git a/lib/math/Kconfig b/lib/math/Kconfig
> index 0634b428d0cb..7530ae9a3584 100644
> --- a/lib/math/Kconfig
> +++ b/lib/math/Kconfig
> @@ -15,3 +15,6 @@ config PRIME_NUMBERS
>
> config RATIONAL
> tristate
> +
> +config MEAN_AND_VARIANCE
> + tristate
> diff --git a/lib/math/Makefile b/lib/math/Makefile
> index 91fcdb0c9efe..8cdfa13a67ce 100644
> --- a/lib/math/Makefile
> +++ b/lib/math/Makefile
> @@ -4,6 +4,8 @@ obj-y += div64.o gcd.o lcm.o int_log.o int_pow.o int_sqrt.o reciprocal_div.o
> obj-$(CONFIG_CORDIC) += cordic.o
> obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
> obj-$(CONFIG_RATIONAL) += rational.o
> +obj-$(CONFIG_MEAN_AND_VARIANCE) += mean_and_variance.o
>
> obj-$(CONFIG_TEST_DIV64) += test_div64.o
> obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o
> +obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o
> diff --git a/fs/bcachefs/mean_and_variance.c b/lib/math/mean_and_variance.c
> similarity index 99%
> rename from fs/bcachefs/mean_and_variance.c
> rename to lib/math/mean_and_variance.c
> index bf0ef668fd38..ba90293204ba 100644
> --- a/fs/bcachefs/mean_and_variance.c
> +++ b/lib/math/mean_and_variance.c
> @@ -40,10 +40,9 @@
> #include <linux/limits.h>
> #include <linux/math.h>
> #include <linux/math64.h>
> +#include <linux/mean_and_variance.h>
> #include <linux/module.h>
>
> -#include "mean_and_variance.h"
> -
> u128_u u128_div(u128_u n, u64 d)
> {
> u128_u r;
> diff --git a/fs/bcachefs/mean_and_variance_test.c b/lib/math/mean_and_variance_test.c
> similarity index 99%
> rename from fs/bcachefs/mean_and_variance_test.c
> rename to lib/math/mean_and_variance_test.c
> index 019583c3ca0e..f45591a169d8 100644
> --- a/fs/bcachefs/mean_and_variance_test.c
> +++ b/lib/math/mean_and_variance_test.c
> @@ -1,7 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <kunit/test.h>
> -
> -#include "mean_and_variance.h"
> +#include <linux/mean_and_variance.h>
>
> #define MAX_SQR (SQRT_U64_MAX*SQRT_U64_MAX)
>
> --
> 2.43.0
>
>