^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * mem-memcpy.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Simple memcpy() and memset() benchmarks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "../perf-sys.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <subcmd/parse-options.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "../util/header.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "../util/cloexec.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "../util/string2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "bench.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "mem-memcpy-arch.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "mem-memset-arch.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <stdio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <sys/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/zalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define K 1024
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static const char *size_str = "1MB";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static const char *function_str = "all";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static int nr_loops = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static bool use_cycles;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) static int cycles_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static const struct option options[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) OPT_STRING('s', "size", &size_str, "1MB",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) "Specify the size of the memory buffers. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) "Available units: B, KB, MB, GB and TB (case insensitive)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) OPT_STRING('f', "function", &function_str, "all",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) OPT_INTEGER('l', "nr_loops", &nr_loops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) "Specify the number of loops to run. (default: 1)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) OPT_BOOLEAN('c', "cycles", &use_cycles,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) "Use a cycles event instead of gettimeofday() to measure performance"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) OPT_END()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) typedef void *(*memcpy_t)(void *, const void *, size_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) typedef void *(*memset_t)(void *, int, size_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct function {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) const char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) const char *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) memcpy_t memcpy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) memset_t memset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) } fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static struct perf_event_attr cycle_attr = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .type = PERF_TYPE_HARDWARE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .config = PERF_COUNT_HW_CPU_CYCLES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static int init_cycles(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (cycles_fd < 0 && errno == ENOSYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return cycles_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static u64 get_cycles(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) u64 clk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) ret = read(cycles_fd, &clk, sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) BUG_ON(ret != sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return clk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static double timeval2double(struct timeval *ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define print_bps(x) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (x < K) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) printf(" %14lf bytes/sec\n", x); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) else if (x < K * K) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) printf(" %14lfd KB/sec\n", x / K); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) else if (x < K * K * K) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) printf(" %14lf MB/sec\n", x / K / K); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) else \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) printf(" %14lf GB/sec\n", x / K / K / K); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct bench_mem_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) const struct function *functions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) const char *const *usage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) bool alloc_src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) const struct function *r = &info->functions[r_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) double result_bps = 0.0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) u64 result_cycles = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) void *src = NULL, *dst = zalloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) printf("# function '%s' (%s)\n", r->name, r->desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (dst == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) goto out_alloc_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (info->alloc_src) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) src = zalloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) if (src == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) goto out_alloc_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (bench_format == BENCH_FORMAT_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) printf("# Copying %s bytes ...\n\n", size_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (use_cycles) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) result_cycles = info->do_cycles(r, size, src, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) result_bps = info->do_gettimeofday(r, size, src, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) switch (bench_format) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) case BENCH_FORMAT_DEFAULT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if (use_cycles) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) print_bps(result_bps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) case BENCH_FORMAT_SIMPLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (use_cycles) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) printf("%lf\n", (double)result_cycles/size_total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) printf("%lf\n", result_bps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) BUG_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) free(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) free(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) out_alloc_failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) double size_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) argc = parse_options(argc, argv, options, info->usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (use_cycles) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) i = init_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (i < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) fprintf(stderr, "Failed to open cycles counter\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) size = (size_t)perf_atoll((char *)size_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) size_total = (double)size * nr_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if ((s64)size <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) fprintf(stderr, "Invalid size:%s\n", size_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (!strncmp(function_str, "all", 3)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) for (i = 0; info->functions[i].name; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) __bench_mem_function(info, i, size, size_total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) for (i = 0; info->functions[i].name; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (!strcmp(info->functions[i].name, function_str))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (!info->functions[i].name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (strcmp(function_str, "help") && strcmp(function_str, "h"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) printf("Unknown function: %s\n", function_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) printf("Available functions:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) for (i = 0; info->functions[i].name; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) printf("\t%s ... %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) info->functions[i].name, info->functions[i].desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) __bench_mem_function(info, i, size, size_total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) memset(src, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * We prefault the freshly allocated memory range here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * to not measure page fault overhead:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) fn(dst, src, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) u64 cycle_start = 0ULL, cycle_end = 0ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) memcpy_t fn = r->fn.memcpy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) memcpy_prefault(fn, size, src, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) cycle_start = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) for (i = 0; i < nr_loops; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) fn(dst, src, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) cycle_end = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return cycle_end - cycle_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) struct timeval tv_start, tv_end, tv_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) memcpy_t fn = r->fn.memcpy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) memcpy_prefault(fn, size, src, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) BUG_ON(gettimeofday(&tv_start, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) for (i = 0; i < nr_loops; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) fn(dst, src, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) BUG_ON(gettimeofday(&tv_end, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) timersub(&tv_end, &tv_start, &tv_diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) struct function memcpy_functions[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) { .name = "default",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) .desc = "Default memcpy() provided by glibc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) .fn.memcpy = memcpy },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) #ifdef HAVE_ARCH_X86_64_SUPPORT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) # include "mem-memcpy-x86-64-asm-def.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) # undef MEMCPY_FN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) { .name = NULL, }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) static const char * const bench_mem_memcpy_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) "perf bench mem memcpy <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) int bench_mem_memcpy(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) struct bench_mem_info info = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) .functions = memcpy_functions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) .do_cycles = do_memcpy_cycles,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) .do_gettimeofday = do_memcpy_gettimeofday,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) .usage = bench_mem_memcpy_usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) .alloc_src = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return bench_mem_common(argc, argv, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) u64 cycle_start = 0ULL, cycle_end = 0ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) memset_t fn = r->fn.memset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * We prefault the freshly allocated memory range here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * to not measure page fault overhead:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) fn(dst, -1, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) cycle_start = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) for (i = 0; i < nr_loops; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) fn(dst, i, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) cycle_end = get_cycles();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return cycle_end - cycle_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct timeval tv_start, tv_end, tv_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) memset_t fn = r->fn.memset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * We prefault the freshly allocated memory range here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * to not measure page fault overhead:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) fn(dst, -1, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) BUG_ON(gettimeofday(&tv_start, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) for (i = 0; i < nr_loops; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) fn(dst, i, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) BUG_ON(gettimeofday(&tv_end, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) timersub(&tv_end, &tv_start, &tv_diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static const char * const bench_mem_memset_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) "perf bench mem memset <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) static const struct function memset_functions[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) { .name = "default",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) .desc = "Default memset() provided by glibc",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) .fn.memset = memset },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #ifdef HAVE_ARCH_X86_64_SUPPORT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) # include "mem-memset-x86-64-asm-def.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) # undef MEMSET_FN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) { .name = NULL, }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) int bench_mem_memset(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct bench_mem_info info = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) .functions = memset_functions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) .do_cycles = do_memset_cycles,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) .do_gettimeofday = do_memset_gettimeofday,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) .usage = bench_mem_memset_usage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) return bench_mem_common(argc, argv, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }