^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * kernel/sched/cpupri.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * CPU priority management
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (C) 2007-2008 Novell
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Author: Gregory Haskins <ghaskins@novell.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * This code tracks the priority of each CPU so that global migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * decisions are easy to calculate. Each CPU can be in a state as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * (INVALID), IDLE, NORMAL, RT1, ... RT99
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * going from the lowest priority to the highest. CPUs in the INVALID state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * are not eligible for routing. The system maintains this state with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * a 2 dimensional bitmap (the first for priority class, the second for CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * in that class). Therefore a typical application without affinity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * searches). For tasks with affinity restrictions, the algorithm has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * worst case complexity of O(min(102, nr_domcpus)), though the scenario that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * yields the worst case search is fairly contrived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /* Convert between a 140 based task->prio, and our 102 based cpupri */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) static int convert_prio(int prio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) int cpupri;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) if (prio == CPUPRI_INVALID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) cpupri = CPUPRI_INVALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) else if (prio == MAX_PRIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) cpupri = CPUPRI_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) else if (prio >= MAX_RT_PRIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) cpupri = CPUPRI_NORMAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) cpupri = MAX_RT_PRIO - prio + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return cpupri;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * drop_nopreempt_cpus - remove likely nonpreemptible cpus from the mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * @lowest_mask: mask with selected CPUs (non-NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) drop_nopreempt_cpus(struct cpumask *lowest_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) unsigned int cpu = cpumask_first(lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) while (cpu < nr_cpu_ids) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* unlocked access */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (task_may_not_preempt(task, cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) cpumask_clear_cpu(cpu, lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) cpu = cpumask_next(cpu, lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct cpumask *lowest_mask, int idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) bool drop_nopreempts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) int skip = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (!atomic_read(&(vec)->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) skip = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * When looking at the vector, we need to read the counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * do a memory barrier, then read the mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * Note: This is still all racey, but we can deal with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Ideally, we only want to look at masks that are set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * If a mask is not set, then the only thing wrong is that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * did a little more work than necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * If we read a zero count but the mask is set, because of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * memory barriers, that can only happen when the highest prio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * task for a run queue has left the run queue, in which case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * it will be followed by a pull. If the task we are processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * fails to find a proper place to go, that pull request will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * pull this task if the run queue is running at a lower
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * priority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* Need to do the rmb for every iteration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (lowest_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (drop_nopreempts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) drop_nopreempt_cpus(lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * We have to ensure that we have at least one bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * still set in the array, since the map could have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * been concurrently emptied between the first and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * second reads of vec->mask. If we hit this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * condition, simply act as though we never hit this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * priority level and continue on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (cpumask_empty(lowest_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) int cpupri_find(struct cpupri *cp, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct cpumask *lowest_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return cpupri_find_fitness(cp, p, lowest_mask, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * cpupri_find_fitness - find the best (lowest-pri) CPU in the system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * @cp: The cpupri context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * @p: The task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * @fitness_fn: A pointer to a function to do custom checks whether the CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * fits a specific criteria so that we only return those CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * Note: This function returns the recommended CPUs as calculated during the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * current invocation. By the time the call returns, the CPUs may have in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * fact changed priorities any number of times. While not ideal, it is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * an issue of correctness since the normal rebalancer logic will correct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * any discrepancies created by racing against the uncertainty of the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * priority configuration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Return: (int)bool - CPUs were found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct cpumask *lowest_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) bool (*fitness_fn)(struct task_struct *p, int cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) int task_pri = convert_prio(p->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) int idx, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) bool drop_nopreempts = task_pri <= MAX_RT_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) #ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) for (idx = 0; idx < task_pri; idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if (!__cpupri_find(cp, p, lowest_mask, idx, drop_nopreempts))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (!lowest_mask || !fitness_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* Ensure the capacity of the CPUs fit the task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) for_each_cpu(cpu, lowest_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (!fitness_fn(p, cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) cpumask_clear_cpu(cpu, lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * If no CPU at the current priority can fit the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * continue looking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (cpumask_empty(lowest_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * If we can't find any non-preemptible cpu's, retry so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * find the lowest priority target and avoid priority inversion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) #ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (drop_nopreempts) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) drop_nopreempts = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * If we failed to find a fitting lowest_mask, kick off a new search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * but without taking into account any fitness criteria this time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * This rule favours honouring priority over fitting the task in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * correct CPU (Capacity Awareness being the only user now).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * The idea is that if a higher priority task can run, then it should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * run even if this ends up being on unfitting CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * The cost of this trade-off is not entirely clear and will probably
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * be good for some workloads and bad for others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * The main idea here is that if some CPUs were overcommitted, we try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * to spread which is what the scheduler traditionally did. Sys admins
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * must do proper RT planning to avoid overloading the system if they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * really care.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) if (fitness_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) return cpupri_find(cp, p, lowest_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) EXPORT_SYMBOL_GPL(cpupri_find_fitness);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * cpupri_set - update the CPU priority setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * @cp: The cpupri context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * @cpu: The target CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * @newpri: The priority (INVALID-RT99) to assign to this CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * Note: Assumes cpu_rq(cpu)->lock is locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * Returns: (void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) void cpupri_set(struct cpupri *cp, int cpu, int newpri)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) int *currpri = &cp->cpu_to_pri[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) int oldpri = *currpri;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) int do_mb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) newpri = convert_prio(newpri);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (newpri == oldpri)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * If the CPU was currently mapped to a different value, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * need to map it to the new value then remove the old value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * Note, we must add the new value first, otherwise we risk the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * cpu being missed by the priority loop in cpupri_find.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) if (likely(newpri != CPUPRI_INVALID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) cpumask_set_cpu(cpu, vec->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * When adding a new vector, we update the mask first,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * do a write memory barrier, and then update the count, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * make sure the vector is visible when count is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) smp_mb__before_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) atomic_inc(&(vec)->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) do_mb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (likely(oldpri != CPUPRI_INVALID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * Because the order of modification of the vec->count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * is important, we must make sure that the update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * of the new prio is seen before we decrement the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * old prio. This makes sure that the loop sees
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * one or the other when we raise the priority of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * the run queue. We don't care about when we lower the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * priority, as that will trigger an rt pull anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * We only need to do a memory barrier if we updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * the new priority vec.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (do_mb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * When removing from the vector, we decrement the counter first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * do a memory barrier and then clear the mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) atomic_dec(&(vec)->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) cpumask_clear_cpu(cpu, vec->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *currpri = newpri;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * cpupri_init - initialize the cpupri structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * @cp: The cpupri context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * Return: -ENOMEM on memory allocation failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) int cpupri_init(struct cpupri *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct cpupri_vec *vec = &cp->pri_to_cpu[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) atomic_set(&vec->count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (!cp->cpu_to_pri)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) for_each_possible_cpu(i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) cp->cpu_to_pri[i] = CPUPRI_INVALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) for (i--; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) free_cpumask_var(cp->pri_to_cpu[i].mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * cpupri_cleanup - clean up the cpupri structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * @cp: The cpupri context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) void cpupri_cleanup(struct cpupri *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) kfree(cp->cpu_to_pri);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) free_cpumask_var(cp->pri_to_cpu[i].mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * cpupri_check_rt - check if CPU has a RT task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * should be called from rcu-sched read section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) bool cpupri_check_rt(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) int cpu = raw_smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) #endif