^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Benchmark find_next_bit and related bit operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright 2020 Google LLC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "bench.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "../util/stat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/bitops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <subcmd/parse-options.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) static unsigned int outer_iterations = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) static unsigned int inner_iterations = 100000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) static const struct option options[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) "Number of outer iterations used"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) "Number of inner iterations used"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) OPT_END()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static const char *const bench_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) "perf bench mem find_bit <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static unsigned int accumulator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static unsigned int use_of_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static noinline void workload(int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) use_of_val += val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) accumulator++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static bool asm_test_bit(long nr, const unsigned long *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) bool oldbit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) asm volatile("bt %2,%1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) : "=@ccc" (oldbit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return oldbit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define asm_test_bit test_bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int do_for_each_set_bit(unsigned int num_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unsigned long *to_test = bitmap_alloc(num_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct timeval start, end, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) u64 runtime_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) struct stats fb_time_stats, tb_time_stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) double time_average, time_stddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) unsigned int bit, i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) unsigned int set_bits, skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) unsigned int old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) init_stats(&fb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) init_stats(&tb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) bitmap_zero(to_test, num_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) skip = num_bits / set_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) for (i = 0; i < num_bits; i += skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) set_bit(i, to_test);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) for (i = 0; i < outer_iterations; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) old = accumulator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) for (j = 0; j < inner_iterations; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) for_each_set_bit(bit, to_test, num_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) workload(bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) gettimeofday(&end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) assert(old + (inner_iterations * set_bits) == accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) timersub(&end, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) update_stats(&fb_time_stats, runtime_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) old = accumulator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) for (j = 0; j < inner_iterations; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) for (bit = 0; bit < num_bits; bit++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (asm_test_bit(bit, to_test))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) workload(bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) gettimeofday(&end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) assert(old + (inner_iterations * set_bits) == accumulator);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) timersub(&end, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) update_stats(&tb_time_stats, runtime_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) printf("%d operations %d bits set of %d bits\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) inner_iterations, set_bits, num_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) time_average = avg_stats(&fb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) time_stddev = stddev_stats(&fb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) time_average, time_stddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) time_average = avg_stats(&tb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) time_stddev = stddev_stats(&tb_time_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) time_average, time_stddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) printf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) bitmap_free(to_test);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) int bench_mem_find_bit(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int err = 0, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) argc = parse_options(argc, argv, options, bench_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (argc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) usage_with_options(bench_usage, options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) exit(EXIT_FAILURE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) for (i = 1; i <= 2048; i <<= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) do_for_each_set_bit(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }