^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * membarrier system call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Bitmask made from a "or" of all commands within enum membarrier_cmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * except MEMBARRIER_CMD_QUERY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #ifdef CONFIG_RSEQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define MEMBARRIER_CMD_BITMASK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static void ipi_mb(void *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) smp_mb(); /* IPIs should be serializing but paranoid. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) static void ipi_sync_core(void *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * The smp_mb() in membarrier after all the IPIs is supposed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * ensure that memory on remote CPUs that occur before the IPI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * become visible to membarrier()'s caller -- see scenario B in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * the big comment at the top of this file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * A sync_core() would provide this guarantee, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * sync_core_before_usermode() might end up being deferred until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * after membarrier()'s smp_mb().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) smp_mb(); /* IPIs should be serializing but paranoid. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) sync_core_before_usermode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static void ipi_rseq(void *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Ensure that all stores done by the calling thread are visible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * to the current task before the current task resumes. We could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * probably optimize this away on most architectures, but by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * time we've already sent an IPI, the cost of the extra smp_mb()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * is negligible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) rseq_preempt(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static void ipi_sync_rq_state(void *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct mm_struct *mm = (struct mm_struct *) info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) if (current->mm != mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) this_cpu_write(runqueues.membarrier_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) atomic_read(&mm->membarrier_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * Issue a memory barrier after setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * guarantee that no memory access following registration is reordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * before registration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) void membarrier_exec_mmap(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * Issue a memory barrier before clearing membarrier_state to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * guarantee that no memory access prior to exec is reordered after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * clearing this state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) atomic_set(&mm->membarrier_state, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * Keep the runqueue membarrier_state in sync with this mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * membarrier_state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) this_cpu_write(runqueues.membarrier_state, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static int membarrier_global_expedited(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) cpumask_var_t tmpmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (num_online_cpus() == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * Matches memory barriers around rq->curr modification in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) smp_mb(); /* system call entry is not a mb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) cpus_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * Skipping the current CPU is OK even through we can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * migrated at any point. The current CPU, at the point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * where we read raw_smp_processor_id(), is ensured to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * be in program order with respect to the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * thread. Therefore, we can skip this CPU from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (cpu == raw_smp_processor_id())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) MEMBARRIER_STATE_GLOBAL_EXPEDITED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * Skip the CPU if it runs a kernel thread. The scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * leaves the prior task mm in place as an optimization when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * scheduling a kthread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) p = rcu_dereference(cpu_rq(cpu)->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (p->flags & PF_KTHREAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) __cpumask_set_cpu(cpu, tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) free_cpumask_var(tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) cpus_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Memory barrier on the caller thread _after_ we finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * waiting for the last IPI. Matches memory barriers around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * rq->curr modification in scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) smp_mb(); /* exit from system call is not a mb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) static int membarrier_private_expedited(int flags, int cpu_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) cpumask_var_t tmpmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) smp_call_func_t ipi_func = ipi_mb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (!(atomic_read(&mm->membarrier_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) ipi_func = ipi_sync_core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) } else if (flags == MEMBARRIER_FLAG_RSEQ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if (!IS_ENABLED(CONFIG_RSEQ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (!(atomic_read(&mm->membarrier_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) ipi_func = ipi_rseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) WARN_ON_ONCE(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) if (!(atomic_read(&mm->membarrier_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (flags != MEMBARRIER_FLAG_SYNC_CORE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * Matches memory barriers around rq->curr modification in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) smp_mb(); /* system call entry is not a mb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) cpus_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (cpu_id >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) p = rcu_dereference(cpu_rq(cpu_id)->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (!p || p->mm != mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) p = rcu_dereference(cpu_rq(cpu)->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (p && p->mm == mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) __cpumask_set_cpu(cpu, tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (cpu_id >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * smp_call_function_single() will call ipi_func() if cpu_id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * is the calling CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) smp_call_function_single(cpu_id, ipi_func, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * For regular membarrier, we can save a few cycles by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * skipping the current cpu -- we're about to do smp_mb()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * below, and if we migrate to a different cpu, this cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * and the new cpu will execute a full barrier in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * For SYNC_CORE, we do need a barrier on the current cpu --
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * otherwise, if we are migrated and replaced by a different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * task in the same mm just before, during, or after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * membarrier, we will end up with some thread in the mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * running without a core sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * For RSEQ, don't rseq_preempt() the caller. User code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * is not supposed to issue syscalls at all from inside an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * rseq critical section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) smp_call_function_many(tmpmask, ipi_func, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) if (cpu_id < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) free_cpumask_var(tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) cpus_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * Memory barrier on the caller thread _after_ we finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * waiting for the last IPI. Matches memory barriers around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * rq->curr modification in scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) smp_mb(); /* exit from system call is not a mb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) static int sync_runqueues_membarrier_state(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) int membarrier_state = atomic_read(&mm->membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) cpumask_var_t tmpmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) this_cpu_write(runqueues.membarrier_state, membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * For single mm user, we can simply issue a memory barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * mm and in the current runqueue to guarantee that no memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * access following registration is reordered before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * registration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * For mm with multiple users, we need to ensure all future
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * scheduler executions will observe @mm's new membarrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * For each cpu runqueue, if the task's mm match @mm, ensure that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * @mm's membarrier state set bits are also set in the runqueue's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * membarrier state. This ensures that a runqueue scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * between threads which are users of @mm has its membarrier state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) cpus_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) p = rcu_dereference(rq->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) if (p && p->mm == mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) __cpumask_set_cpu(cpu, tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) free_cpumask_var(tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) cpus_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) static int membarrier_register_global_expedited(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct task_struct *p = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct mm_struct *mm = p->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (atomic_read(&mm->membarrier_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) ret = sync_runqueues_membarrier_state(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) &mm->membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) static int membarrier_register_private_expedited(int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) struct task_struct *p = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) struct mm_struct *mm = p->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) ready_state =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) } else if (flags == MEMBARRIER_FLAG_RSEQ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (!IS_ENABLED(CONFIG_RSEQ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) ready_state =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) WARN_ON_ONCE(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * We need to consider threads belonging to different thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * groups, which use the same mm. (CLONE_VM but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * CLONE_THREAD).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (flags & MEMBARRIER_FLAG_SYNC_CORE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) if (flags & MEMBARRIER_FLAG_RSEQ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) atomic_or(set_state, &mm->membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) ret = sync_runqueues_membarrier_state(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) atomic_or(ready_state, &mm->membarrier_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * sys_membarrier - issue memory barriers on a set of threads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * @cmd: Takes command values defined in enum membarrier_cmd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * @flags: Currently needs to be 0 for all commands other than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * contains the CPU on which to interrupt (= restart)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * the RSEQ critical section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * RSEQ CS should be interrupted (@cmd must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * If this system call is not implemented, -ENOSYS is returned. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * command specified does not exist, not available on the running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * kernel, or if the command argument is invalid, this system call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * returns -EINVAL. For a given command, with flags argument set to 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * always return the same value until reboot. In addition, it can return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * -ENOMEM if there is not enough memory available to perform the system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * All memory accesses performed in program order from each targeted thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * is guaranteed to be ordered with respect to sys_membarrier(). If we use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * the semantic "barrier()" to represent a compiler barrier forcing memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * accesses to be performed in program order across the barrier, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * smp_mb() to represent explicit memory barriers forcing full memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * ordering across the barrier, we have the following ordering table for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * each pair of barrier(), sys_membarrier() and smp_mb():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * The pair ordering is detailed as (O: ordered, X: not ordered):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * barrier() smp_mb() sys_membarrier()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * barrier() X X O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * smp_mb() X O O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * sys_membarrier() O O O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) if (unlikely(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) cpu_id = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) case MEMBARRIER_CMD_QUERY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) int cmd_mask = MEMBARRIER_CMD_BITMASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) if (tick_nohz_full_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return cmd_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) case MEMBARRIER_CMD_GLOBAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (tick_nohz_full_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (num_online_cpus() > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) return membarrier_global_expedited();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) return membarrier_register_global_expedited();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return membarrier_private_expedited(0, cpu_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) return membarrier_register_private_expedited(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) }